From 0661d7a60d952f2fd5b7d083fcaf271b4bd900fd Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Jul 2012 00:01:03 -0500 Subject: [PATCH] Added files from OrangeFS-2.8.6 -These files were not related to OSD -They were only present in OrangeFS-2.8.6 --- aclocal.m4 | 2303 --------- cert-utils/pvfs2-grid-proxy-init.sh | 22 + config.save | 26 +- include/orange.h | 27 + include/pvfs2-usrint.h | 329 ++ src/apps/admin/pvfs2-get-uid.c | 337 ++ src/apps/admin/pvfs2-perf-mon-snmp.c | 429 ++ src/apps/ucache/module.mk.in | 11 + src/apps/ucache/shmem_util.c | 75 + src/apps/ucache/shmem_util.h | 20 + src/apps/ucache/ucached.c | 711 +++ src/apps/ucache/ucached.h | 81 + src/apps/ucache/ucached_cmd.c | 120 + src/apps/ucache/ucached_common.c | 16 + src/apps/ucache/watch_daemons | 1 + src/apps/ucache/watch_ipcs | 1 + src/apps/ucache/watch_log | 1 + src/client/sysint/mgmt-get-uid-list.sm | 243 + src/client/usrint/mmap.c | 180 + src/client/usrint/module.mk.in | 21 + src/client/usrint/posix-ops.h | 230 + src/client/usrint/request.c | 157 + src/client/usrint/socket.c | 506 ++ src/client/usrint/stdio-ops.h | 108 + src/client/usrint/ucache.c | 2066 ++++++++ src/client/usrint/ucache.h | 251 + src/common/gen-locks/gen-win-locks.c | 748 +++ src/common/misc/pint-uid-mgmt.c | 198 + src/common/misc/pint-uid-mgmt.h | 53 + src/common/misc/pvfs2-win-util.c | 2117 +++++++++ src/common/windows/wincommon.h | 40 + src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h | 102 + src/io/bmi/bmi_wintcp/bmi-wintcp.c | 4177 +++++++++++++++++ .../bmi/bmi_wintcp/socket-collection-epoll.c | 203 + .../bmi/bmi_wintcp/socket-collection-epoll.h | 120 + src/io/bmi/bmi_wintcp/socket-collection.c | 477 ++ src/io/bmi/bmi_wintcp/socket-collection.h | 126 + src/io/bmi/bmi_wintcp/sockio.c | 415 ++ src/io/bmi/bmi_wintcp/sockio.h | 130 + src/server/mgmt-get-uid.sm | 138 + test/ci/jenkins-build.sh | 203 + test/ci/jenkins-doc.sh | 48 + test/ci/jenkins-test.sh | 398 ++ test/common/gen-locks/condvar1.c | 22 + test/common/gen-locks/condvar2_1.c | 185 + test/common/gen-locks/condvar3.c | 174 + test/common/gen-locks/condvar3_1.c | 222 + test/common/gen-locks/condvar3_2.c | 211 + test/io/job/test-job-client.c | 158 + test/io/job/test-job-server.c | 182 + 50 files changed, 16803 insertions(+), 2316 deletions(-) delete mode 100644 aclocal.m4 create mode 100755 cert-utils/pvfs2-grid-proxy-init.sh create mode 100644 include/orange.h create mode 100644 include/pvfs2-usrint.h create mode 100644 src/apps/admin/pvfs2-get-uid.c create mode 100644 src/apps/admin/pvfs2-perf-mon-snmp.c create mode 100644 src/apps/ucache/module.mk.in create mode 100644 src/apps/ucache/shmem_util.c create mode 100644 src/apps/ucache/shmem_util.h create mode 100644 src/apps/ucache/ucached.c create mode 100644 src/apps/ucache/ucached.h create mode 100644 src/apps/ucache/ucached_cmd.c create mode 100644 src/apps/ucache/ucached_common.c create mode 100755 src/apps/ucache/watch_daemons create mode 100755 src/apps/ucache/watch_ipcs create mode 100755 src/apps/ucache/watch_log create mode 100644 src/client/sysint/mgmt-get-uid-list.sm create mode 100644 src/client/usrint/mmap.c create mode 100644 src/client/usrint/module.mk.in create mode 100644 src/client/usrint/posix-ops.h create mode 100644 src/client/usrint/request.c create mode 100644 src/client/usrint/socket.c create mode 100644 src/client/usrint/stdio-ops.h create mode 100644 src/client/usrint/ucache.c create mode 100644 src/client/usrint/ucache.h create mode 100755 src/common/gen-locks/gen-win-locks.c create mode 100644 src/common/misc/pint-uid-mgmt.c create mode 100644 src/common/misc/pint-uid-mgmt.h create mode 100755 src/common/misc/pvfs2-win-util.c create mode 100755 src/common/windows/wincommon.h create mode 100755 src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h create mode 100755 src/io/bmi/bmi_wintcp/bmi-wintcp.c create mode 100755 src/io/bmi/bmi_wintcp/socket-collection-epoll.c create mode 100755 src/io/bmi/bmi_wintcp/socket-collection-epoll.h create mode 100755 src/io/bmi/bmi_wintcp/socket-collection.c create mode 100755 src/io/bmi/bmi_wintcp/socket-collection.h create mode 100755 src/io/bmi/bmi_wintcp/sockio.c create mode 100755 src/io/bmi/bmi_wintcp/sockio.h create mode 100644 src/server/mgmt-get-uid.sm create mode 100755 test/ci/jenkins-build.sh create mode 100755 test/ci/jenkins-doc.sh create mode 100644 test/ci/jenkins-test.sh create mode 100755 test/common/gen-locks/condvar1.c create mode 100755 test/common/gen-locks/condvar2_1.c create mode 100755 test/common/gen-locks/condvar3.c create mode 100755 test/common/gen-locks/condvar3_1.c create mode 100755 test/common/gen-locks/condvar3_2.c create mode 100755 test/io/job/test-job-client.c create mode 100755 test/io/job/test-job-server.c diff --git a/aclocal.m4 b/aclocal.m4 deleted file mode 100644 index 5af196f..0000000 --- a/aclocal.m4 +++ /dev/null @@ -1,2303 +0,0 @@ -# generated automatically by aclocal 1.7.9 -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 -# Free Software Foundation, Inc. -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -AC_DEFUN([AX_OPENSSL], -[ - opensslpath=ifelse([$1], ,,$1) - - if test "x$1" != "xno"; then - - AC_MSG_CHECKING([for openssl library]) - - if test "x${opensslpath}" != "x"; then - CFLAGS="${CFLAGS} -I${opensslpath}/include" - LDFLAGS="$LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" - SERVER_LDFLAGS="$SERVER_LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" - fi - LIBS="$LIBS -lcrypto -lssl" - - AC_COMPILE_IFELSE( - [#include "openssl/bio.h"], - [], - [AC_MSG_ERROR(Invalid openssl path specified. No openssl/bio.h found.)]) - - AC_TRY_LINK( - [#include "openssl/bio.h"], - [BIO * b;], - [AC_MSG_RESULT(yes)], - [AC_MSG_ERROR(could not find openssl libs)]) - - AC_DEFINE(WITH_OPENSSL, 1, [Define if openssl exists]) - - AC_CHECK_HEADERS(openssl/evp.h) - AC_CHECK_HEADERS(openssl/crypto.h) - fi -]) - -AC_DEFUN([AX_OPENSSL_OPTIONAL], -[ - AC_MSG_CHECKING([for openssl library]) - TMPLIBS=${LIBS} - LIBS="$LIBS -lcrypto -lssl" - - AC_COMPILE_IFELSE( - [#include "openssl/bio.h"], - [], - [AC_MSG_WARN(No openssl headers found.)]) - - AC_TRY_LINK( - [#include "openssl/bio.h"], - [BIO * b;], - [AC_MSG_RESULT(yes) - AC_DEFINE(WITH_OPENSSL, 1, [Define if openssl exists]) - ], - [ - AC_MSG_WARN(No openssl headers found.) - LIBS=${TMPLIBS} - ]) - - AC_CHECK_HEADERS(openssl/evp.h) - AC_CHECK_HEADERS(openssl/crypto.h) - -]) - - -AC_DEFUN([AX_KERNEL_FEATURES], -[ - dnl - dnl kernel feature tests. Set CFLAGS once here and use it for all - dnl kernel features. reset to the old value at the end. - dnl - dnl on some systems, there is a /usr/include/linux/xattr_acl.h , so the - dnl check for xattr_acl.h down below will always pass, even if it - dnl should fail. this hack (-nostdinc -isystem ...) will bring in just - dnl enough system headers dnl for kernel compilation - - dnl -Werror can be overkill, but for these kernel feature tests - dnl 'implicit function declaration' usually ends up in an undefined - dnl symbol somewhere. - - NOSTDINCFLAGS="-Werror-implicit-function-declaration -nostdinc -isystem `$CC -print-file-name=include`" - - CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src/include -I$lk_src/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty) -DKBUILD_MODNAME=KBUILD_STR(empty)" - - dnl kernels > 2.6.32 now use generated/autoconf.h - if test -f $lk_src/include/generated/autoconf.h ; then - CFLAGS="$CFLAGS -imacros $lk_src/include/generated/autoconf.h" - else - CFLAGS="$CFLAGS -imacros $lk_src/include/linux/autoconf.h" - fi - - dnl we probably need additional includes if this build is intended - dnl for a different architecture - if test -n "${ARCH}" ; then - CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include -I$lk_src/arch/${ARCH}/include/asm/mach-default" - else - SUBARCH=`uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ -e s/sa110/arm/ \ - -e s/s390x/s390/ -e s/parisc64/parisc/ \ - -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh.*/sh/` - if test "x$SUBARCH" = "xi386"; then - ARCH=x86 - elif test "x$SUBARCH" = "xx86_64"; then - ARCH=x86 - elif test "x$SUBARCH" = "xsparc64"; then - ARCH=sparc - else - ARCH=$SUBARCH - fi - - CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include -I$lk_src/arch/${ARCH}/include/asm/mach-default" - fi - - AC_MSG_CHECKING(for i_size_write in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel already defined it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - void i_size_write(struct inode *inode, - loff_t i_size) - { - return; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_I_SIZE_WRITE, 1, Define if kernel has i_size_write), - ) - - AC_MSG_CHECKING(for i_size_read in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel already defined it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - loff_t i_size_read(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_I_SIZE_READ, 1, Define if kernel has i_size_read), - ) - - AC_MSG_CHECKING(for iget_locked function in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel already defined it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - loff_t iget_locked(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IGET_LOCKED, 1, Define if kernel has iget_locked), - ) - - AC_MSG_CHECKING(for iget4_locked function in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel already defined it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - loff_t iget4_locked(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IGET4_LOCKED, 1, Define if kernel has iget4_locked), - ) - - AC_MSG_CHECKING(for iget5_locked function in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel already defined it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - loff_t iget5_locked(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IGET5_LOCKED, 1, Define if kernel has iget5_locked), - ) - - dnl Check if the kernel defines the xtvec structure. - dnl This is part of a POSIX extension. - AC_MSG_CHECKING(for struct xtvec in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct xtvec xv = { 0, 0 }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_STRUCT_XTVEC, 1, Define if struct xtvec is defined in the kernel), - AC_MSG_RESULT(no) - ) - - dnl 2.6.20 deprecated kmem_cache_t; some old ones do not have struct - dnl kmem_cache, but may have kmem_cache_s. It's a mess. Just look - dnl for this, and assume _t if not found. - dnl This test relies on gcc complaining about declaring a struct - dnl in a parameter list. Fragile, but nothing better is available - dnl to check for the existence of a struct. We cannot see the - dnl definition of the struct in the kernel, it's private to the - dnl slab implementation. And C lets you declare structs freely as - dnl long as you don't try to deal with their contents. - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for struct kmem_cache in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - - int foo(struct kmem_cache *s) - { - return (s == NULL) ? 3 : 4; - } - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_STRUCT_KMEM_CACHE, 1, Define if struct kmem_cache is defined in kernel), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - dnl 2.6.20 removed SLAB_KERNEL. Need to use GFP_KERNEL instead - AC_MSG_CHECKING(for SLAB_KERNEL flag in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static int flags = SLAB_KERNEL; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SLAB_KERNEL, 1, Define if SLAB_KERNEL is defined in kernel), - AC_MSG_RESULT(no) - ) - - dnl The name of this field changed from memory_backed to capabilities - dnl in 2.6.12. - AC_MSG_CHECKING(for memory_backed in struct backing_dev_info in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - static struct backing_dev_info bdi = { - .memory_backed = 0 - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BDI_MEMORY_BACKED, 1, Define if struct backing_dev_info in kernel has memory_backed), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a sendfile callback - if test "x$enable_kernel_sendfile" = "xyes"; then - AC_MSG_CHECKING(for sendfile callback in struct file_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct file_operations fop = { - .sendfile = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SENDFILE_VFS_SUPPORT, 1, Define if struct file_operations in kernel has sendfile callback), - AC_MSG_RESULT(no) - ) - fi - - dnl checking if we have a readv callback in super_operations - AC_MSG_CHECKING(for readv callback in struct file_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct file_operations fop = { - .readv = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_READV_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readv callback), - AC_MSG_RESULT(no) - ) - dnl checking if we have a writev callback in super_operations - AC_MSG_CHECKING(for writev callback in struct file_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct file_operations fop = { - .writev = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WRITEV_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has writev callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a find_inode_handle callback in super_operations - AC_MSG_CHECKING(for find_inode_handle callback in struct super_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct super_operations sop = { - .find_inode_handle = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has find_inode_handle callback), - AC_MSG_RESULT(no) - ) - - dnl 2.6.18.1 removed this member - AC_MSG_CHECKING(for i_blksize in struct inode) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct inode i = { - .i_blksize = 0, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_I_BLKSIZE_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_blksize member), - AC_MSG_RESULT(no) - ) - - dnl 2.6.16 removed this member - AC_MSG_CHECKING(for i_sem in struct inode) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct inode i = { - .i_sem = {0}, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_I_SEM_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_sem member), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a statfs_lite callback in super_operations - AC_MSG_CHECKING(for statfs_lite callback in struct super_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct super_operations sop = { - .statfs_lite = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_STATFS_LITE_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has statfs_lite callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a fill_handle callback in inode_operations - AC_MSG_CHECKING(for fill_handle callback in struct inode_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct inode_operations iop = { - .fill_handle = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILL_HANDLE_INODE_OPERATIONS, 1, Define if struct inode_operations in kernel has fill_handle callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a getattr_lite callback in inode_operations - AC_MSG_CHECKING(for getattr_lite callback in struct inode_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct inode_operations iop = { - .getattr_lite = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GETATTR_LITE_INODE_OPERATIONS, 1, Define if struct inode_operations in kernel has getattr_lite callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a get_fs_key callback in super_operations - AC_MSG_CHECKING(for get_fs_key callback in struct super_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct super_operations sop = { - .get_fs_key = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_FS_KEY_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has get_fs_key callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a readdirplus callback in file_operations - AC_MSG_CHECKING(for readdirplus member in file_operations structure) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct file_operations filop = { - .readdirplus = NULL - }; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_READDIRPLUS_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readdirplus callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a readdirplus_lite callback in file_operations - AC_MSG_CHECKING(for readdirplus_lite member in file_operations structure) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct file_operations filop = { - .readdirplus_lite = NULL - }; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_READDIRPLUSLITE_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readdirplus_lite callback), - AC_MSG_RESULT(no) - ) - - - dnl checking if we have a readx callback in file_operations - AC_MSG_CHECKING(for readx member in file_operations structure) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct file_operations filop = { - .readx = NULL - }; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_READX_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readx callback), - AC_MSG_RESULT(no) - ) - - dnl checking if we have a writex callback in file_operations - AC_MSG_CHECKING(for writex member in file_operations structure) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct file_operations filop = { - .writex = NULL - }; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WRITEX_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has writex callback), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for aio support in kernel) - dnl if this test passes, the kernel has it - dnl if this test fails, the kernel does not have it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - static struct kiocb iocb; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_AIO, 1, Define if kernel has aio support) - have_aio=yes, - AC_MSG_RESULT(no) - have_aio=no - ) - - if test "x$have_aio" = "xyes" -a "x$enable_kernel_aio" = "xyes"; then - AC_MSG_CHECKING(for ki_dtor in kiocb structure of kernel) - dnl if this test passes, the kernel does have it and we enable - dnl support for AIO. if this test fails, the kernel does not - dnl have this member and we disable support for AIO - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - static struct kiocb io_cb = { - .ki_dtor = NULL, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_AIO_VFS_SUPPORT, 1, Define if we are enabling VFS AIO support in kernel), - AC_MSG_RESULT(no) - ) - - tmp_cflags=$CFLAGS - dnl if this test passes, the signature of aio_read has changed to the new one - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for new prototype of aio_read callback of file_operations structure) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - extern ssize_t my_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); - static struct file_operations fop = { - .aio_read = my_aio_read, - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_AIO_NEW_AIO_SIGNATURE, 1, Define if VFS AIO support in kernel has a new prototype), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - fi - - AC_MSG_CHECKING(for dentry argument in kernel super_operations statfs) - dnl Rely on the fact that there is an external vfs_statfs that is - dnl of the same type as the .statfs in struct super_operations to - dnl verify the signature of that function pointer. There is a single - dnl commit in the git history where both changed at the same time - dnl from super_block to dentry. - dnl - dnl The alternative approach of trying to define a s_op.statfs is not - dnl as nice because that only throws a warning, requiring -Werror to - dnl catch it. This is a problem if the compiler happens to spit out - dnl other spurious warnings that have nothing to do with the test. - dnl - dnl If this test passes, the kernel uses a struct dentry argument. - dnl If this test fails, the kernel uses something else (old struct - dnl super_block perhaps). - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - int vfs_statfs(struct dentry *de, struct kstatfs *kfs) - { - return 0; - } - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DENTRY_STATFS_SOP, 1, Define if super_operations statfs has dentry argument), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for vfsmount argument in kernel file_system_type get_sb) - dnl Same trick as above. A single commit changed mayn things at once: - dnl type and signature of file_system_type.get_sb, and signature of - dnl get_sb_bdev. This test is a bit more tenuous, as get_sb_bdev - dnl isn't used directly in a file_system_type, but is a popular helper - dnl for many FSes. And it has not exactly the same signature. - dnl - dnl If this test passes, the kernel has the most modern known form, - dnl which includes a stfuct vfsmount argument. - dnl If this test fails, the kernel uses something else. - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - int get_sb_bdev(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, - int), - struct vfsmount *vfsm) - { - return 0; - } - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFSMOUNT_GETSB, 1, Define if file_system_type get_sb has vfsmount argument), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for xattr support in kernel) - dnl if this test passes, the kernel has it - dnl if this test fails, the kernel does not have it - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct inode_operations in_op = { - .getxattr = NULL - }; - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR, 1, Define if kernel has xattr support) - have_xattr=yes, - AC_MSG_RESULT(no) - have_xattr=no - ) - - if test "x$have_xattr" = "xyes"; then - dnl Test to check if setxattr function has a const void * argument - AC_MSG_CHECKING(for const argument to setxattr function) - dnl if this test passes, there is a const void* argument - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], - [ - struct inode_operations inode_ops; - int ret; - struct dentry * dent = NULL; - const char * name = NULL; - const void * val = NULL; - size_t size = 0; - int flags = 0; - - ret = inode_ops.setxattr(dent, name, val, size, flags); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SETXATTR_CONST_ARG, 1, Define if kernel setxattr has const void* argument), - AC_MSG_RESULT(no) - ) - fi - - dnl the proc handler functions have changed over the years. - dnl pre-2.6.8: proc_handler(ctl_table *ctl, - dnl int write, - dnl struct file *filp, - dnl void *buffer, - dnl size_t *lenp) - dnl - dnl 2.6.8-2.6.31: proc_handler(ctl_table *ctl, - dnl int write, - dnl struct file *filp, - dnl void *buffer, - dnl size_t *lenp, - dnl loff_t *ppos) - dnl > 2.6.31: proc_handler(ctl_table *ctl, - dnl int write, - dnl void *buffer, - dnl size_t *lenp, - dnl loff_t *ppos) - - dnl Test to see if sysctl proc handlers have a file argument - AC_MSG_CHECKING(for file argument to sysctl proc handlers) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - ], [ - struct ctl_table * ctl = NULL; - int write = 0; - struct file * filp = NULL; - void __user * buffer = NULL; - size_t * lenp = NULL; - loff_t * ppos = NULL; - - proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PROC_HANDLER_FILE_ARG, 1, Define if sysctl proc handlers have 6th argument), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for ppos argument to sysctl proc handlers) - dnl if this test passes, there is a ppos argument - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - ], [ - struct ctl_table * ctl = NULL; - int write = 0; - void __user * buffer = NULL; - size_t * lenp = NULL; - loff_t * ppos = NULL; - - proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PROC_HANDLER_PPOS_ARG, 1, Define if sysctl proc handlers have ppos argument), - AC_MSG_RESULT(no) - ) - - AC_CHECK_HEADERS([linux/posix_acl.h], [], [], - [#define __KERNEL__ - #include - #ifdef HAVE_XATTR - #include - #endif - ] ) - - AC_CHECK_HEADERS([linux/posix_acl_xattr.h], [], [], - [#define __KERNEL__ - #include - #ifdef HAVE_XATTR - #include - #endif - ] ) - - dnl linux-2.6.11 had xattr_acl.h, but 2.6.12 did not! - AC_CHECK_HEADERS([linux/xattr_acl.h], [], [], - [#define __KERNEL__ - #include - #ifdef HAVE_XATTR - #include - #endif - ] ) - - AC_CHECK_HEADERS([linux/mount.h], [], [], - [#define __KERNEL__ - #include - ] ) - AC_CHECK_HEADERS([linux/ioctl32.h], [], [], - [#define __KERNEL__ - #include - ] ) - AC_CHECK_HEADERS([linux/compat.h], [], [], - [#define __KERNEL__ - #include - ] ) - AC_CHECK_HEADERS([linux/syscalls.h], [], [], - [#define __KERNEL__ - #include - ] ) - AC_CHECK_HEADERS([asm/ioctl32.h], [], [], - [#define __KERNEL__ - #include - ] ) - AC_CHECK_HEADERS([linux/exportfs.h], [],[], - [#define __KERNEL__ - #include - ]) - - AC_MSG_CHECKING(for generic_file_readv api in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel has it defined with a different - dnl signature! deliberately, the signature for this method has been - dnl changed for it to give a compiler error. - - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - int generic_file_readv(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_FILE_READV, 1, Define if kernel has generic_file_readv), - ) - - AC_MSG_CHECKING(for generic_permission api in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel has it defined with a different - dnl signature! deliberately, the signature for this method has been - dnl changed for it to give a compiler error. - - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - int generic_permission(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_PERMISSION, 1, Define if kernel has generic_permission), - ) - - AC_MSG_CHECKING(for generic_getxattr api in kernel) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel has it defined - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - int generic_getxattr(struct inode *inode) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_GETXATTR, 1, Define if kernel has generic_getxattr), - ) - - AC_MSG_CHECKING(for arg member in read_descriptor_t in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - read_descriptor_t x; - x.arg.data = NULL; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ARG_IN_READ_DESCRIPTOR_T, 1, Define if read_descriptor_t has an arg member), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for fh_to_dentry member in export_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct export_operations x; - x.fh_to_dentry = NULL; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FHTODENTRY_EXPORT_OPERATIONS, 1, Define if export_operations has an fh_to_dentry member), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for encode_fh member in export_operations in kernel) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct export_operations x; - x.encode_fh = NULL; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ENCODEFH_EXPORT_OPERATIONS, 1, Define if export_operations has an encode_fh member), - AC_MSG_RESULT(no) - ) - - dnl Using -Werror is not an option, because some arches throw lots of - dnl warnings that would trigger false negatives. We know that the - dnl change to the releasepage() function signature was accompanied by - dnl a similar change to the exported function try_to_release_page(), - dnl and that one we can check without using -Werror. The test fails - dnl unless the previous declaration was identical to the one we suggest - dnl below. New kernels use gfp_t, not int. - AC_MSG_CHECKING(for second arg type int in address_space_operations releasepage) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - extern int try_to_release_page(struct page *page, int gfp_mask); - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INT_ARG2_ADDRESS_SPACE_OPERATIONS_RELEASEPAGE, 1, Define if sceond argument to releasepage in address_space_operations is type int), - AC_MSG_RESULT(no) - ) - - dnl Similar logic for the follow_link member in inode_operations. New - dnl kernels return a void *, not int. - AC_MSG_CHECKING(for int return in inode_operations follow_link) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - extern int page_follow_link_light(struct dentry *, - struct nameidata *); - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INT_RETURN_INODE_OPERATIONS_FOLLOW_LINK, 1, Define if return value from follow_link in inode_operations is type int), - AC_MSG_RESULT(no) - ) - - dnl kmem_cache_destroy function may return int only on pre 2.6.19 kernels - dnl else it returns a void. - AC_MSG_CHECKING(for int return in kmem_cache_destroy) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - extern int kmem_cache_destroy(kmem_cache_t *); - ], [], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INT_RETURN_KMEM_CACHE_DESTROY, 1, Define if return value from kmem_cache_destroy is type int), - AC_MSG_RESULT(no) - ) - - dnl more 2.6 api changes. return type for the invalidatepage - dnl address_space_operation is 'void' in new kernels but 'int' in old - dnl I had to turn on -Werror for this test because i'm not sure how - dnl else to make dnl "initialization from incompatible pointer type" - dnl fail. - AC_MSG_CHECKING(for older int return in invalidatepage) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], - [ - struct address_space_operations aso; - - int ret; - struct page * page = NULL; - unsigned long offset; - - ret = aso.invalidatepage(page, offset); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INT_RETURN_ADDRESS_SPACE_OPERATIONS_INVALIDATEPAGE, 1, Define if return type of invalidatepage should be int), - AC_MSG_RESULT(NO) - ) - - dnl In 2.6.18.1 and newer, including will throw off a - dnl warning - tmp_cflags=${CFLAGS} - CFLAGS="${CFLAGS} -Werror" - AC_MSG_CHECKING(for warnings when including linux/config.h) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [], - AC_MSG_RESULT(no) - AC_DEFINE(HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H, 1, Define if including linux/config.h gives no warnings), - AC_MSG_RESULT(yes) - ) - CFLAGS=$tmp_cflags - - AC_MSG_CHECKING(for compat_ioctl member in file_operations structure) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct file_operations filop = { - .compat_ioctl = NULL - }; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_COMPAT_IOCTL_HANDLER, 1, Define if there exists a compat_ioctl member in file_operations), - AC_MSG_RESULT(no) - ) - - dnl Gives wrong answer if header is missing; don't try then. - if test x$ac_cv_header_linux_ioctl32_h = xyes ; then - AC_MSG_CHECKING(for register_ioctl32_conversion kernel exports) - dnl if this test passes, the kernel does not have it - dnl if this test fails, the kernel has it defined - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - int register_ioctl32_conversion(void) - { - return 0; - } - ], [], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REGISTER_IOCTL32_CONVERSION, 1, Define if kernel has register_ioctl32_conversion), - ) - fi - - AC_MSG_CHECKING(for int return value of kmem_cache_destroy) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - int i = kmem_cache_destroy(NULL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT_RETURN, 1, Define if kmem_cache_destroy returns int), - AC_MSG_RESULT(no) - ) - - dnl As of 2.6.19, combined readv/writev into aio_read and aio_write - dnl functions. Detect this by not finding a readv member. - AC_MSG_CHECKING(for combined file_operations readv and aio_read) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct file_operations filop = { - .readv = NULL - }; - ], - AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_COMBINED_AIO_AND_VECTOR, 1, Define if struct file_operations has combined aio_read and readv functions), - ) - - dnl Check for kzalloc - AC_MSG_CHECKING(for kzalloc) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - void * a; - a = kzalloc(1024, GFP_KERNEL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KZALLOC, 1, Define if kzalloc exists), - AC_MSG_RESULT(no) - ) - - dnl Check for two arg register_sysctl_table() - AC_MSG_CHECKING(for two arguments to register_sysctl_table) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - ], [ - register_sysctl_table(NULL, 0); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TWO_ARG_REGISTER_SYSCTL_TABLE, 1, Define if register_sysctl_table takes two arguments), - AC_MSG_RESULT(no) - ) - - dnl FS_IOC_GETFLAGS and FS_IOC_SETFLAGS appeared - dnl somewhere around 2.6.20.1 as generic versions of fs-specific flags - AC_MSG_CHECKING(for generic FS_IOC ioctl flags) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - int flags = FS_IOC_GETFLAGS; - ], - AC_MSG_RESULT(yes), - AC_DEFINE(HAVE_NO_FS_IOC_FLAGS, 1, Define if FS_IOC flags missing from fs.h) - AC_MSG_RESULT(no) - ) - - dnl old linux kernels define struct page with a 'count' member, whereas - dnl other kernels (since at least 2.6.20) define struct page with a - dnl '_count' - AC_MSG_CHECKING(for obsolete struct page count without underscore) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct page *p; - int foo; - foo = atomic_read(&(p)->count); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_OBSOLETE_STRUCT_PAGE_COUNT_NO_UNDERSCORE, 1, Define if struct page defines a count member without leading underscore), - AC_MSG_RESULT(no) - ) - - dnl old linux kernels do not have class_create and related functions - dnl - dnl check for class_device_destroy() to weed out RHEL4 kernels that - dnl have some class functions but not others - AC_MSG_CHECKING(if kernel has device classes) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - class_device_destroy(NULL, "pvfs2") - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_DEVICE_CLASSES, 1, Define if kernel has device classes), - AC_MSG_RESULT(no) - ) - - dnl 2.6.23 removed the destructor parameter from kmem_cache_create - AC_MSG_CHECKING(for destructor param to kmem_cache_create) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - kmem_cache_create("config-test", 0, 0, 0, NULL, NULL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM, 1, [Define if kernel kmem_cache_create has destructor param]), - AC_MSG_RESULT(no) - ) - - dnl 2.6.27 changed the constructor parameter signature of - dnl kmem_cache_create. Check for this newer one-param style - dnl If they don't match, gcc complains about - dnl passing argument ... from incompatible pointer type, hence the - dnl need for the -Werror. Note that the next configure test will - dnl determine if we have a two param constructor or not. - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for one-param kmem_cache_create constructor) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - void ctor(void *req) - { - } - ], [ - kmem_cache_create("config-test", 0, 0, 0, ctor); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM, 1, [Define if kernel kmem_cache_create constructor has newer-style one-parameter form]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - dnl 2.6.27 changed the parameter signature of - dnl inode_operations->permission. Check for this newer two-param style - dnl If they don't match, gcc complains about - dnl passing argument ... from incompatible pointer type, hence the - dnl need for the -Werror and -Wall. - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror -Wall" - AC_MSG_CHECKING(for two param permission) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - #include - #include - int ctor(struct inode *i, int a) - { - return 0; - } - struct inode_operations iop = { - .permission = ctor, - }; - ], [ - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TWO_PARAM_PERMISSION, 1, [Define if kernel's inode_operations has two parameters permission function]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - - dnl 2.6.24 changed the constructor parameter signature of - dnl kmem_cache_create. Check for this newer two-param style and - dnl if not, assume it is old. Note we can get away with just - dnl struct kmem_cache (and not kmem_cache_t) as that change happened - dnl in older kernels. If they don't match, gcc complains about - dnl passing argument ... from incompatible pointer type, hence the - dnl need for the -Werror. - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for two-param kmem_cache_create constructor) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - void ctor(struct kmem_cache *cachep, void *req) - { - } - ], [ - kmem_cache_create("config-test", 0, 0, 0, ctor); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM, 1, [Define if kernel kmem_cache_create constructor has new-style two-parameter form]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - AC_MSG_CHECKING(if kernel address_space struct has a spin_lock field named page_lock) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct address_space as; - spin_lock(&as.page_lock); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock member named page_lock instead of rw_lock]), - AC_MSG_RESULT(no) - ) - - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(if kernel address_space struct has a rwlock_t field named tree_lock) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct address_space as; - read_lock(&as.tree_lock); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a rw_lock_t member named tree_lock]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(if kernel address_space struct has a spinlock_t field named tree_lock) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct address_space as; - spin_lock(&as.tree_lock); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock_t member named tree_lock]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - AC_MSG_CHECKING(if kernel address_space struct has a priv_lock field - from RT linux) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct address_space as; - spin_lock(&as.priv_lock); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock for private data instead of rw_lock -- used by RT linux]), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(if kernel defines mapping_nrpages macro - from RT linux) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct address_space idata; - int i = mapping_nrpages(&idata); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MAPPING_NRPAGES_MACRO, 1, [Define if kernel defines mapping_nrpages macro -- defined by RT linux]), - AC_MSG_RESULT(no) - ) - - dnl Starting with 2.6.25-rc1, .read_inode goes away. - AC_MSG_CHECKING(if kernel super_operations contains read_inode field) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct super_operations sops; - sops.read_inode(NULL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_READ_INODE, 1, [Define if kernel super_operations contains read_inode field]), - AC_MSG_RESULT(no) - ) - - dnl Starting with 2.6.26, drop_inode and put_inode go away - AC_MSG_CHECKING(if kernel super_operations contains drop_inode field) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct super_operations sops; - sops.drop_inode(NULL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DROP_INODE, 1, [Define if kernel super_operations contains drop_inode field]), - AC_MSG_RESULT(no) - ) - - dnl Starting with 2.6.26, drop_inode and put_inode go away - AC_MSG_CHECKING(if kernel super_operations contains put_inode field) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct super_operations sops; - sops.put_inode(NULL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PUT_INODE, 1, [Define if kernel super_operations contains put_inode field]), - AC_MSG_RESULT(no) - ) - - dnl older 2.6 kernels don't have MNT_NOATIME - AC_MSG_CHECKING(if mount.h defines MNT_NOATIME) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - int flag = MNT_NOATIME; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MNT_NOATIME, 1, [Define if mount.h contains - MNT_NOATIME flags]), - AC_MSG_RESULT(no) - ) - - dnl older 2.6 kernels don't have MNT_NODIRATIME - AC_MSG_CHECKING(if mount.h defines MNT_NODIRATIME) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - int flag = MNT_NODIRATIME; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MNT_NODIRATIME, 1, [Define if mount.h contains - MNT_NODIRATIME flags]), - AC_MSG_RESULT(no) - ) - - dnl newer 2.6 kernels (2.6.28) use d_obtain_alias instead of d_alloc_anon - AC_MSG_CHECKING(for d_alloc_anon) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct inode *i; - d_alloc_anon(i); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_ALLOC_ANON, 1, [Define if dcache.h contains - d_alloc_annon]), - AC_MSG_RESULT(no) - ) - - AC_MSG_CHECKING(for s_dirty in struct super_block) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - ], [ - struct super_block *s; - list_empty(&s->s_dirty); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SB_DIRTY_LIST, 1, [Define if struct super_block has s_dirty list]), - AC_MSG_RESULT(no) - ) - - dnl newer 2.6 kernels (2.6.29-ish) use current_fsuid() macro instead - dnl of accessing task struct fields directly - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for current_fsuid) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - ], [ - int uid = current_fsuid(); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CURRENT_FSUID, 1, [Define if cred.h contains current_fsuid]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - dnl 2.6.32 added a mandatory name field to the bdi structure - AC_MSG_CHECKING(if kernel backing_dev_info struct has a name field) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - ], [ - struct backing_dev_info foo = - { - .name = "foo" - }; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BACKING_DEV_INFO_NAME, 1, [Define if kernel backing_dev_info struct has a name field]), - AC_MSG_RESULT(no) - ) - - dnl some 2.6 kernels have functions to explicitly initialize bdi structs - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for bdi_init) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - ], [ - int ret = bdi_init(NULL); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BDI_INIT, 1, [Define if bdi_init function is present]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - - dnl 2.6.33 API change, - dnl Removed .ctl_name from struct ctl_table. - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct ctl_table c = { .ctl_name = 0, }; - ],[ ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CTL_NAME, 1, Define if struct ctl_table has ctl_name member), - AC_MSG_RESULT(no) - ) - - dnl Removed .strategy from struct ctl_table. - AC_MSG_CHECKING([whether struct ctl_table has strategy]) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct ctl_table c = { .strategy = 0, }; - ], [ ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_STRATEGY_NAME, 1, Define if struct ctl_table has strategy member), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - dnl 2.6.33 changed the parameter signature of xattr_handler get - dnl member functions to have a fifth argument and changed the first - dnl parameter from struct inode to struct dentry. if the test fails - dnl assume the old 4 param with struct inode - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for five-param xattr_handler.get) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - static struct xattr_handler x; - static int get_xattr_h( struct dentry *d, const char *n, - void *b, size_t s, int h) - { return 0; } - ], - [ - x.get = get_xattr_h; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_HANDLER_GET_FIVE_PARAM, 1, [Define if kernel xattr_handle get function has dentry as first parameter and a fifth parameter]), - AC_MSG_RESULT(no) - ) - - dnl 2.6.33 changed the parameter signature of xattr_handler set - dnl member functions to have a sixth argument and changed the first - dnl parameter from struct inode to struct dentry. if the test fails - dnl assume the old 5 param with struct inode - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for six-param xattr_handler.set) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - static struct xattr_handler x; - static int set_xattr_h( struct dentry *d, const char *n, - const void *b, size_t s, int f, int h) - { return 0; } - ], - [ - x.set = set_xattr_h; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_HANDLER_SET_SIX_PARAM, 1, [Define if kernel xattr_handle set function has dentry as first parameter and a sixth parameter]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - dnl xattr_handler is also a const - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for const s_xattr member in super_block struct) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - #include - struct super_block sb; - const struct xattr_handler *x[] = { NULL }; - ], - [ - sb.s_xattr = x; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CONST_S_XATTR_IN_SUPERBLOCK, 1, [Define if s_xattr member of super_block struct is const]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - dnl early 2.6 kernels do not contain true/false enum in stddef.h - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(stddef.h true/false enum) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - int f = true; - ], - [ ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TRUE_FALSE_ENUM, 1, [Define if kernel stddef has true/false enum]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - - dnl fsync no longer has a dentry second parameter - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - AC_MSG_CHECKING(for dentry argument in fsync) - AC_TRY_COMPILE([ - #define __KERNEL__ - #include - static struct file_operations f; - static int local_fsync(struct file *f, struct dentry *d, int i) - { return 0; } - ], - [ - f.fsync = local_fsync; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FSYNC_DENTRY_PARAM, 1, [Define if fsync function in file_operations struct wants a dentry pointer as the second parameter]), - AC_MSG_RESULT(no) - ) - CFLAGS=$tmp_cflags - - CFLAGS=$oldcflags - -]) - - -AC_DEFUN([AX_BERKELEY_DB], -[ - dbpath=ifelse([$1], ,,$1) - - DB_LDFLAGS= - dnl - dnl if the db is specified, try to link with -ldb - dnl otherwise try -ldb4, then -ldb3, then -ldb - dnl $lib set to notfound on link failure - dnl - AC_MSG_CHECKING([for db library]) - oldlibs=$LIBS - lib=notfound - - if test "x$dbpath" != "x" ; then - oldcflags=$CFLAGS - for dbheader in db4 db3 notfound; do - AC_COMPILE_IFELSE( - [#include "$dbpath/include/$dbheader/db.h"], - [DB_CFLAGS="-I$dbpath/include/$dbheader/" - break]) - done - - if test "x$dbheader" = "xnotfound"; then - AC_COMPILE_IFELSE( - [#include "$dbpath/include/db.h"], - [DB_CFLAGS="-I$dbpath/include/"], - [AC_MSG_FAILURE( - Invalid libdb path specified. No db.h found.)]) - fi - - DB_LDFLAGS="-L${dbpath}/lib" - LDFLAGS="$DB_LDFLAGS ${LDFLAGS}" - - LIBS="${oldlibs} -ldb -lpthread" - DB_LIB="-ldb" - CFLAGS="$DB_CFLAGS $oldcflags" - AC_TRY_LINK( - [#include ], - [DB *dbp; db_create(&dbp, NULL, 0);], - lib=db) - CFLAGS=$oldcflags - - else - for lib in db4 db3 db notfound; do - LIBS="${oldlibs} -l$lib -lpthread" - DB_LIB="-l$lib" - AC_TRY_LINK( - [#include ], - [DB *dbp; db_create(&dbp, NULL, 0);], - [break]) - done - fi - - dnl reset LIBS value and just report through DB_LIB - LIBS=$oldlibs - if test "x$lib" = "xnotfound" ; then - AC_MSG_ERROR(could not find DB libraries) - else - AC_MSG_RESULT($lib) - fi - AC_SUBST(DB_CFLAGS) - AC_SUBST(DB_LIB) - - dnl See if we have a new enough version of Berkeley DB; needed for - dnl compilation of trove-dbpf component - dnl AC_MSG_CHECKING(whether version of Berkeley DB is new enough) - dnl AC_TRY_COMPILE([ - dnl #include - dnl ], [ - dnl #if DB_VERSION_MAJOR < 4 - dnl #error "DB_VERSION_MAJOR < 4; need newer Berkeley DB implementation" - dnl #endif - dnl ], AC_MSG_RESULT(yes), - dnl AC_MSG_RESULT(no) - dnl AC_MSG_ERROR(Need newer (4.x.x or later) version of Berkeley DB. - dnl try: http://www.sleepycat.com/download/index.shtml - dnl or: /parl/pcarns/rpms/db4-4.0.14-1mdk.src.rpm (to build rpm)) - dnl ) - - dnl Test to check for DB_ENV variable to error callback fn. Then - dnl test to see if third parameter must be const (related but not - dnl exactly the same). - AC_MSG_CHECKING(for dbenv parameter to DB error callback function) - oldcflags=$CFLAGS - CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" - AC_TRY_COMPILE([ - #include - - void error_callback_fn(const DB_ENV *dbenv, - const char *prefix, - const char *message) - { - return; - } - ], [ - DB *db; - - db->set_errcall(db, error_callback_fn); - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DBENV_PARAMETER_TO_DB_ERROR_CALLBACK, 1, - Define if DB error callback function takes dbenv parameter) - have_dbenv_parameter_to_db_error_callback=yes, - AC_MSG_RESULT(no) - have_dbenv_parameter_to_db_error_callback=no) - - if test "x$have_dbenv_parameter_to_db_error_callback" = "xyes" ; then - dnl Test if compilation succeeds without const; we expect that it will - dnl not. - dnl NOTE: still using -Werror! - AC_MSG_CHECKING(if third parameter to error callback function is const) - AC_TRY_COMPILE([ - #include - - void error_callback_fn(const DB_ENV *dbenv, - const char *prefix, - char *message) - { - return; - } - ], [ - DB *db; - - db->set_errcall(db, error_callback_fn); - ], AC_MSG_RESULT(no), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK, 1, - Define if third param (message) to DB error callback function is const)) - fi - - CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" - dnl Test to check for unknown third param to DB stat (four params - dnl total). The unknown parameter is a function ptr so that the - dnl the user can pass in a replcaement for malloc. - dnl Note: this is a holdover from relatively old DB implementations, - dnl while the txnid parameter is new. So we don't test for the old - dnl unknown parameter if we found the new one. - AC_MSG_CHECKING(for DB stat with malloc function ptr) - AC_TRY_COMPILE([ - #include - #include - ], [ - int ret = 0; - DB *db = db; - int dummy = 0; - u_int32_t flags = 0; - - ret = db->stat(db, &dummy, malloc, flags); - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_UNKNOWN_PARAMETER_TO_DB_STAT, 1, - Define if DB stat function takes malloc function ptr) - have_db_stat_malloc=yes, - AC_MSG_RESULT(no) - have_db_stat_malloc=no) - - dnl Test to check for txnid parameter to DB stat (DB 4.3.xx+) - if test "x$have_db_stat_malloc" = "xno" ; then - - AC_MSG_CHECKING(for txnid parameter to DB stat function) - AC_TRY_COMPILE([ - #include - ], [ - int ret = 0; - DB *db = db; - DB_TXN *txnid = txnid; - u_int32_t flags = 0; - - ret = db->stat(db, txnid, NULL, flags); - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TXNID_PARAMETER_TO_DB_STAT, 1, - Define if DB stat function takes txnid parameter) - have_txnid_param_to_stat=yes, - AC_MSG_RESULT(no) - have_txnid_param_to_stat=no) - - fi - - dnl Test to check for txnid parameter to DB open (DB4.1+) - AC_MSG_CHECKING(for txnid parameter to DB open function) - AC_TRY_COMPILE([ - #include - ], [ - int ret = 0; - DB *db = NULL; - DB_TXN *txnid = NULL; - char *file = NULL; - char *database = NULL; - DBTYPE type = 0; - u_int32_t flags = 0; - int mode = 0; - - ret = db->open(db, txnid, file, database, type, flags, mode); - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TXNID_PARAMETER_TO_DB_OPEN, 1, - Define if DB open function takes a txnid parameter), - AC_MSG_RESULT(no)) - - dnl check for DB_DIRTY_READ (it is not in db-3.2.9, for example) - AC_MSG_CHECKING(for DB_DIRTY_READ flag) - AC_TRY_COMPILE([ - #include - ], [ - u_int32_t flags = DB_DIRTY_READ; - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DB_DIRTY_READ, 1, [Define if db library has DB_DIRTY_READ flag]), - AC_MSG_RESULT(no)) - - dnl check for DB_BUFFER_SMALL (it is returned by dbp->get in db-4.4 and up) - AC_MSG_CHECKING(for DB_BUFFER_SMALL error) - AC_TRY_COMPILE([ - #include - ], [ - int res = DB_BUFFER_SMALL; - res++; - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DB_BUFFER_SMALL, 1, [Define if db library has DB_BUFFER_SMALL error]), - AC_MSG_RESULT(no)) - - dnl Test to check for db->get_pagesize - AC_MSG_CHECKING(for berkeley db get_pagesize function) - AC_TRY_COMPILE([ - #include - ], [ - int ret = 0; - DB *db = NULL; - int pagesize; - - ret = db->get_pagesize(db, &pagesize); - ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DB_GET_PAGESIZE, 1, [Define if DB has get_pagesize function]), - AC_MSG_RESULT(no)) - - CFLAGS="$oldcflags" -]) - - -AC_DEFUN([AX_CHECK_NEEDS_LIBRT], -[ - -AC_MSG_CHECKING([if server lib needs -lrt]) -AC_TRY_LINK( - [#include - #include - #include ], - [lio_listio(LIO_NOWAIT, NULL, 0, NULL);], - [AC_MSG_RESULT(no)], - [ - oldlibs=$LIBS - LIBS="$LIBS -lrt" - AC_TRY_LINK( - [#include - #include - #include ], - [lio_listio(LIO_NOWAIT, NULL, 0, NULL);], - [NEEDS_LIBRT=1 - AC_SUBST(NEEDS_LIBRT) - AC_MSG_RESULT(yes)], - [AC_MSG_ERROR(failed attempting to link lio_listio)]) - LIBS=$oldlibs - ]) -]) - -# -# Configure rules for GM -# -# Copyright (C) 2008 Pete Wyckoff -# -# See COPYING in top-level directory. -# -AC_DEFUN([AX_GM], -[ - dnl Configure options for GM install path. - dnl --with-gm= is shorthand for - dnl --with-gm-includes=/include - dnl --with-gm-libs=/lib (or lib64 if that exists) - gm_home= - AC_ARG_WITH(gm, -[ --with-gm= Location of the GM install (default no GM)], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-gm requires the path to your GM tree.]) - elif test "$withval" != no ; then - gm_home="$withval" - fi - ) - AC_ARG_WITH(gm-includes, -[ --with-gm-includes= - Location of the GM includes], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-gm-includes requires path to GM headers.]) - elif test "$withval" != no ; then - GM_INCDIR="$withval" - fi - ) - AC_ARG_WITH(gm-libs, -[ --with-gm-libs= Location of the GM libraries], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-gm-libs requires path to GM libraries.]) - elif test "$withval" != no ; then - GM_LIBDIR="$withval" - fi - ) - dnl If supplied the incls and libs explicitly, use them, else populate them - dnl using guesses from the --with-gm dir. - if test -n "$gm_home" ; then - if test -z "$GM_INCDIR"; then - GM_INCDIR=$gm_home/include - fi - if test -z "$GM_LIBDIR"; then - GM_LIBDIR=$gm_home/lib64 - if test ! -d "$GM_LIBDIR" ; then - GM_LIBDIR=$gm_home/lib - fi - fi - fi - dnl If anything GM-ish was set, go look for header. - if test -n "$GM_INCDIR$GM_LIBDIR" ; then - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$GM_INCDIR -I$GM_INCDIR/gm" - AC_CHECK_HEADER(gm.h,, AC_MSG_ERROR([Header gm.h not found.])) - dnl Run test is not possible on a machine that does not have a GM NIC. - dnl Link test would work, but just check for existence. - if test ! -f $GM_LIBDIR/libgm.so ; then - if test ! -f $GM_LIBDIR/libgm.a ; then - AC_MSG_ERROR([Neither GM library libgm.so or libgm.a found.]) - fi - fi - BUILD_GM=1 - CPPFLAGS="$save_cppflags" - fi - AC_SUBST(BUILD_GM) - AC_SUBST(GM_INCDIR) - AC_SUBST(GM_LIBDIR) -]) - -dnl vim: set ft=config : - -# -# Configure rules for MX -# -# Copyright (C) 2008 Pete Wyckoff -# -# See COPYING in top-level directory. -# -AC_DEFUN([AX_MX], -[ - dnl Configure options for MX install path. - dnl --with-mx= is shorthand for - dnl --with-mx-includes=/include - dnl --with-mx-libs=/lib (or lib64 if that exists) - mx_home= - AC_ARG_WITH(mx, -[ --with-mx= Location of the MX install (default no MX)], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-mx requires the path to your MX tree.]) - elif test "$withval" != no ; then - mx_home="$withval" - fi - ) - AC_ARG_WITH(mx-includes, -[ --with-mx-includes= - Location of the MX includes], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-mx-includes requires path to MX headers.]) - elif test "$withval" != no ; then - MX_INCDIR="$withval" - fi - ) - AC_ARG_WITH(mx-libs, -[ --with-mx-libs= Location of the MX libraries], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-mx-libs requires path to MX libraries.]) - elif test "$withval" != no ; then - MX_LIBDIR="$withval" - fi - ) - dnl If supplied the incls and libs explicitly, use them, else populate them - dnl using guesses from the --with-mx dir. - if test -n "$mx_home" ; then - if test -z "$MX_INCDIR"; then - MX_INCDIR=$mx_home/include - fi - if test -z "$MX_LIBDIR"; then - MX_LIBDIR=$mx_home/lib64 - if test ! -d "$MX_LIBDIR" ; then - MX_LIBDIR=$mx_home/lib - fi - fi - fi - dnl If anything MX-ish was set, go look for header. - if test -n "$MX_INCDIR$MX_LIBDIR" ; then - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$MX_INCDIR -I$MX_INCDIR/mx" - AC_CHECK_HEADER(myriexpress.h,, - AC_MSG_ERROR([Header myriexpress.h not found.])) - dnl Run test is not possible on a machine that does not have a MX NIC. - dnl Link test would work, but just check for existence. - if test ! -f $MX_LIBDIR/libmyriexpress.so ; then - if test ! -f $MX_LIBDIR/libmyriexpress.a ; then - AC_MSG_ERROR([Neither MX library libmyriexpress.so or libmyriexpress.a found.]) - fi - fi - BUILD_MX=1 - CPPFLAGS="$save_cppflags" - fi - AC_SUBST(BUILD_MX) - AC_SUBST(MX_INCDIR) - AC_SUBST(MX_LIBDIR) - - if test -n "$BUILD_MX" ; then - dnl Check for existence of mx_decompose_endpoint_addr2 - save_ldflags="$LDFLAGS" - LDFLAGS="-L$MX_LIBDIR $LDFLAGS" - save_libs="$LIBS" - LIBS="-lmyriexpress -lpthread $LIBS" - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$MX_INCDIR" - - AC_MSG_CHECKING(for mx_decompose_endpoint_addr2) - AC_TRY_LINK([ - #include "mx_extensions.h" - #include - ], [ - mx_endpoint_addr_t epa; - mx_decompose_endpoint_addr2(epa, NULL, NULL, NULL); - ], - AC_MSG_RESULT(yes), - AC_MSG_RESULT(no) - AC_MSG_ERROR([Function mx_decompose_endpoint_addr2() not found.]) - ) - - LDFLAGS="$save_ldflags" - CPPFLAGS="$save_cppflags" - LIBS="$save_libs" - fi -]) - -dnl vim: set ft=config : - -AC_DEFUN([AX_IB], -[ - dnl Configure options for IB install path. - dnl --with-ib= is shorthand for - dnl --with-ib-includes=/include - dnl --with-ib-libs=/lib (or lib64 if that exists) - ib_home= - AC_ARG_WITH(ib, - [ --with-ib= Location of the IB installation (default no IB)], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-ib requires the path to your IB tree.]) - elif test "$withval" != no ; then - ib_home="$withval" - fi - ) - AC_ARG_WITH(ib-includes, -[ --with-ib-includes= - Location of the IB includes], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-ib-includes requires path to IB headers.]) - elif test "$withval" != no ; then - IB_INCDIR="$withval" - fi - ) - AC_ARG_WITH(ib-libs, -[ --with-ib-libs= Location of the IB libraries], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-ib-libs requires path to IB libraries.]) - elif test "$withval" != no ; then - IB_LIBDIR="$withval" - fi - ) - dnl If supplied the incls and libs explicitly, use them, else populate them - dnl using guesses from the --with-ib dir. - if test -n "$ib_home" ; then - if test -z "$IB_INCDIR"; then - IB_INCDIR=$ib_home/include - fi - if test -z "$IB_LIBDIR"; then - IB_LIBDIR=$ib_home/lib64 - if test ! -d "$IB_LIBDIR" ; then - IB_LIBDIR=$ib_home/lib - fi - fi - fi - dnl If anything IB-ish was set, go look for header. - if test -n "$IB_INCDIR$IB_LIBDIR" ; then - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$IB_INCDIR" - AC_CHECK_HEADER(vapi.h,, AC_MSG_ERROR([Header vapi.h not found.])) - dnl Run test is not possible on a machine that does not have an IB NIC, - dnl and link test is hard because we need so many little libraries. Bail - dnl and just check for existence; full library list is in Makefile.in. - if test ! -f $IB_LIBDIR/libvapi.so ; then - if test ! -f $IB_LIBDIR/libvapi.a ; then - AC_MSG_ERROR([Infiniband library libvapi.so not found.]) - fi - fi - BUILD_IB=1 - AC_CHECK_HEADER(wrap_common.h, - AC_DEFINE(HAVE_IB_WRAP_COMMON_H, 1, Define if IB wrap_common.h exists.), - , - [#include ]) - CPPFLAGS="$save_cppflags" - fi - AC_SUBST(BUILD_IB) - AC_SUBST(IB_INCDIR) - AC_SUBST(IB_LIBDIR) - - dnl Configure options for OpenIB install path. - dnl --with-openib= is shorthand for - dnl --with-openib-includes=/include - dnl --with-openib-libs=/lib (or lib64 if that exists) - openib_home= - AC_ARG_WITH(openib, - [ --with-openib= Location of the OpenIB install (default no OpenIB)], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-openib requires the path to your OpenIB tree.]) - elif test "$withval" != no ; then - openib_home="$withval" - fi - ) - AC_ARG_WITH(openib-includes, -[ --with-openib-includes= - Location of the OpenIB includes], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-openib-includes requires path to OpenIB headers.]) - elif test "$withval" != no ; then - OPENIB_INCDIR="$withval" - fi - ) - AC_ARG_WITH(openib-libs, -[ --with-openib-libs= - Location of the OpenIB libraries], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-openib-libs requires path to OpenIB libraries.]) - elif test "$withval" != no ; then - OPENIB_LIBDIR="$withval" - fi - ) - dnl If supplied the incls and libs explicitly, use them, else populate them - dnl using guesses from the --with-openib dir. - if test -n "$openib_home" ; then - if test -z "$OPENIB_INCDIR"; then - OPENIB_INCDIR=$openib_home/include - fi - if test -z "$OPENIB_LIBDIR"; then - OPENIB_LIBDIR=$openib_home/lib64 - if test ! -d "$OPENIB_LIBDIR" ; then - OPENIB_LIBDIR=$openib_home/lib - fi - fi - fi - dnl If anything OpenIB-ish was set, go look for header. - if test -n "$OPENIB_INCDIR$OPENIB_LIBDIR" ; then - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" - AC_CHECK_HEADER(infiniband/verbs.h,, - AC_MSG_ERROR([Header infiniband/verbs.h not found.])) - dnl Run test is not possible on a machine that does not have an IB NIC. - dnl Link test would work, but just check for existence. - if test ! -f $OPENIB_LIBDIR/libibverbs.so ; then - if test ! -f $OPENIB_LIBDIR/libibverbs.a ; then - AC_MSG_ERROR([OpenIB library libibverbs.so not found.]) - fi - fi - BUILD_OPENIB=1 - CPPFLAGS="$save_cppflags" - fi - AC_SUBST(BUILD_OPENIB) - AC_SUBST(OPENIB_INCDIR) - AC_SUBST(OPENIB_LIBDIR) - - if test -n "$BUILD_OPENIB" ; then - dnl Check for which version of the ibverbs library; device opening is - dnl different. This format is the older one, newer is - dnl ibv_get_device_list. - save_ldflags="$LDFLAGS" - LDFLAGS="-L$OPENIB_LIBDIR -libverbs" - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" - - AC_MSG_CHECKING(for ibv_get_devices) - AC_TRY_LINK([], [ - ibv_get_devices(); - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IBV_GET_DEVICES, 1, - Define if libibverbs has ibv_get_devices), - AC_MSG_RESULT(no) - ) - - dnl Check for existence of reregister event; it's somewhat new. - AC_MSG_CHECKING(for IBV_EVENT_CLIENT_REREGISTER) - AC_TRY_COMPILE([ - #include "infiniband/verbs.h" - ], [ - enum ibv_event_type x = IBV_EVENT_CLIENT_REREGISTER; - ], - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IBV_EVENT_CLIENT_REREGISTER, 1, - Define if libibverbs has reregister event), - AC_MSG_RESULT(no) - ) - - LDFLAGS="$save_ldflags" - CPPFLAGS="$save_cppflags" - fi -]) - -dnl vim: set ft=config : - -AC_DEFUN([AX_PORTALS], -[ - dnl - dnl Configure to build Portals BMI method, if requested and available. - dnl Use - dnl --with-portals To find include files and libraries in standard - dnl system paths. - dnl --with-portals= To specify a location that has include and lib - dnl (or lib64) subdirectories with the goods. - dnl - dnl Or specify the -I an -L and -l flags exactly using, e.g.: - dnl - dnl --with-portals-includes="-I" - dnl --with-portals-libs="-L -l" - dnl - dnl The C file uses #include , so choose your include - dnl path accordingly. If it did not do this, portals/errno.h would sit in - dnl front of the system version. - dnl - use_portals= - home= - incs= - libs= - AC_ARG_WITH(portals, - [ --with-portals[=] Location of the Portals install (default no Portals)], - if test -z "$withval" -o "$withval" = yes ; then - use_portals=yes - elif test "$withval" != no ; then - home="$withval" - fi - ) - AC_ARG_WITH(portals-includes, -[ --with-portals-includes= - Extra CFLAGS to specify Portals includes], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-portals-includes requires an argument.]) - elif test "$withval" != no ; then - incs="$withval" - fi - ) - AC_ARG_WITH(portals-libs, -[ --with-portals-libs= - Extra LIBS to link Portals libraries], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-portals-libs requires an argument.]) - elif test "$withval" != no ; then - libs="$withval" - fi - ) - dnl If supplied the incls and libs explicitly, use them, else populate them - dnl using guesses from the --with-portals dir. - if test -n "$home" ; then - if test -z "$incs"; then - incs=-I$home/include - fi - if test -z "$libs"; then - libs=-L$home/lib64 - if test ! -d "$home/lib64" ; then - libs=-L$home/lib - fi - fi - fi - - dnl - dnl Look for headers and libs. - dnl - BUILD_PORTALS= - PORTALS_INCS= - PORTALS_LIBS= - if test "X$use_portals$home$incs$libs" != X ; then - # Save stuff - save_cppflags="$CPPFLAGS" - save_libs="$LIBS" - - PORTALS_INCS="$incs" - CPPFLAGS="$CPPFLAGS $PORTALS_INCS" - - PORTALS_LIBS="$libs" - LIBS="$save_libs $PORTALS_LIBS" - - AC_MSG_CHECKING([for portals3.h header]) - ok=no - AC_TRY_COMPILE( - [#include ], - [int m, n; m = PtlInit(&n);], - [ok=yes]) - - if test "$ok" = yes ; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Header portals/portals3.h not found.]) - fi - - dnl try without first, for Cray, then try TCP version - dnl Run test is not always possible, esp when cross-compiling or on - dnl a box that does not have the hardware. - AC_MSG_CHECKING([for portals libraries]) - ok=no - AC_TRY_LINK( - [#include ], - [int m, n; m = PtlInit(&n);], - [ok=yes]) - - if test "$ok" = no ; then - PORTALS_LIBS="$libs -lportals" - LIBS="$save_libs $PORTALS_LIBS" - AC_TRY_LINK( - [#include ], - [int m, n; m = PtlInit(&n);], - [ok=yes]) - fi - - if test "$ok" = no ; then - PORTALS_LIBS="$libs -lp3api -lp3lib -lp3utcp -lp3rt -lpthread" - LIBS="$save_libs $PORTALS_LIBS" - AC_TRY_LINK( - [#include ], - [int m, n; m = PtlInit(&n);], - [ok=yes]) - fi - - if test "$ok" = yes ; then - AC_MSG_RESULT([yes]) - BUILD_PORTALS=1 - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Could not link Portals library.]) - fi - - # - # Check for API variations. - # - AC_CHECK_FUNCS(PtlErrorStr) - AC_CHECK_FUNCS(PtlEventKindStr) - - AC_TRY_COMPILE( - [#include ], - [int m; ptl_process_id_t any_pid; - m = PtlACEntry(0, 0, any_pid, (ptl_uid_t) -1, (ptl_jid_t) -1, 0);], - AC_DEFINE(HAVE_PTLACENTRY_JID, 1, - [Define if have PtlACEntry with jid argument.])) - - # Reset - CPPFLAGS="$save_cppflags" - LIBS="$save_libs" - fi - AC_SUBST(BUILD_PORTALS) - AC_SUBST(PORTALS_INCS) - AC_SUBST(PORTALS_LIBS) -]) - -dnl vim: set ft=config : - -# -# Configure rules for ZOID -# -# See COPYING in top-level directory. -# -AC_DEFUN([AX_ZOID], -[ - dnl Configure options for ZOID install path. - dnl --with-zoid= - AC_ARG_WITH(zoid, -[ --with-zoid= Location of the ZOID tree (default no ZOID)], - if test -z "$withval" -o "$withval" = yes ; then - AC_MSG_ERROR([Option --with-zoid requires the path to your ZOID source tree.]) - elif test "$withval" != no ; then - ZOID_SRCDIR="$withval" - fi - ) - if test -n "$ZOID_SRCDIR" ; then - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -Isrc/io/bmi -I$ZOID_SRCDIR/include -I$ZOID_SRCDIR/zbmi -I$ZOID_SRCDIR/zbmi/implementation" - AC_CHECK_HEADER(zbmi.h,, AC_MSG_ERROR([Header zbmi.h not found.])) - AC_CHECK_HEADER(zoid_api.h,, AC_MSG_ERROR([Header zoid_api.h not found.])) - AC_CHECK_HEADER(zbmi_protocol.h,, AC_MSG_ERROR([Header zbmi_protocol.h not found.])) - CPPFLAGS="$save_cppflags" - BUILD_ZOID=1 - fi - AC_SUBST(BUILD_ZOID) - AC_SUBST(ZOID_SRCDIR) -]) - -dnl vim: set ft=config : - diff --git a/cert-utils/pvfs2-grid-proxy-init.sh b/cert-utils/pvfs2-grid-proxy-init.sh new file mode 100755 index 0000000..3080b53 --- /dev/null +++ b/cert-utils/pvfs2-grid-proxy-init.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# This script generates a proxy certificate with a policy in the format +# of {UID}/{GID}, e.g. 1000/100. The certificate is stored in /tmp/x509up_u{UID}, +# e.g. /tmp/x509up_u1000. This certificate is for use with the OrangeFS +# Windows Client. +# +# $GLOBUS_LOCATION must be set, or grid-proxy-init must be on the path. +# +# Arguments to this script will be passed to grid-proxy-init. + +echo `id -u`/`id -g` > cert-policy +if [ $? -ne 0 ]; then + echo Could not create cert-policy, exiting + exit 1 +fi + +if [ "$GLOBUS_LOCATION" != "" ]; then + $GLOBUS_LOCATION/bin/grid-proxy-init -policy cert-policy -pl id-ppl-anyLanguage $@ +else + grid-proxy-init -policy cert-policy -pl id-ppl-anyLanguage $@ +fi + diff --git a/config.save b/config.save index be895d8..c8eb321 100755 --- a/config.save +++ b/config.save @@ -525,17 +525,17 @@ s,@ECHO_C@,,;t t s,@ECHO_N@,-n,;t t s,@ECHO_T@,,;t t s,@LIBS@, -lcrypto -lssl -ldl,;t t -s,@PVFS2_VERSION@,2.8.6-orangefs-2012-07-03-162939,;t t +s,@PVFS2_VERSION@,2.8.6-orangefs-2012-07-13-043849,;t t s,@PVFS2_VERSION_MAJOR@,2,;t t s,@PVFS2_VERSION_MINOR@,8,;t t s,@PVFS2_VERSION_SUB@,6,;t t -s,@build@,x86_64-unknown-linux-gnu,;t t -s,@build_cpu@,x86_64,;t t -s,@build_vendor@,unknown,;t t +s,@build@,i686-pc-linux-gnu,;t t +s,@build_cpu@,i686,;t t +s,@build_vendor@,pc,;t t s,@build_os@,linux-gnu,;t t -s,@host@,x86_64-unknown-linux-gnu,;t t -s,@host_cpu@,x86_64,;t t -s,@host_vendor@,unknown,;t t +s,@host@,i686-pc-linux-gnu,;t t +s,@host_cpu@,i686,;t t +s,@host_vendor@,pc,;t t s,@host_os@,linux-gnu,;t t s,@CC@,gcc,;t t s,@CFLAGS@, -g -O2,;t t @@ -579,9 +579,9 @@ s,@THREADED_KMOD_HELPER@,,;t t s,@LINUX_KERNEL_SRC@,,;t t s,@LINUX24_KERNEL_SRC@,,;t t s,@LINUX24_KERNEL_MINOR_VER@,,;t t -s,@BUILD_ABSOLUTE_TOP@,/usr/src/orangefsosd-2.8.6,;t t +s,@BUILD_ABSOLUTE_TOP@,/usr/src/pvfs2-osd,;t t s,@SRC_RELATIVE_TOP@,./,;t t -s,@SRC_ABSOLUTE_TOP@,/usr/src/orangefsosd-2.8.6,;t t +s,@SRC_ABSOLUTE_TOP@,/usr/src/pvfs2-osd,;t t s,@FUSE_LDFLAGS@,,;t t s,@FUSE_CFLAGS@,,;t t s,@BUILD_FUSE@,,;t t @@ -983,14 +983,14 @@ CEOF t clr : clr ${ac_dA}HAVE_ARPA_INET_H${ac_dB}HAVE_ARPA_INET_H${ac_dC}1${ac_dD} -${ac_dA}SIZEOF_LONG_INT${ac_dB}SIZEOF_LONG_INT${ac_dC}8${ac_dD} +${ac_dA}SIZEOF_LONG_INT${ac_dB}SIZEOF_LONG_INT${ac_dC}4${ac_dD} ${ac_dA}WITH_OPENSSL${ac_dB}WITH_OPENSSL${ac_dC}1${ac_dD} ${ac_dA}HAVE_OPENSSL_EVP_H${ac_dB}HAVE_OPENSSL_EVP_H${ac_dC}1${ac_dD} ${ac_dA}HAVE_OPENSSL_CRYPTO_H${ac_dB}HAVE_OPENSSL_CRYPTO_H${ac_dC}1${ac_dD} ${ac_dA}HAVE_OPENSSL_SHA_H${ac_dB}HAVE_OPENSSL_SHA_H${ac_dC}1${ac_dD} ${ac_dA}HAVE_AIOCB_ERROR_CODE${ac_dB}HAVE_AIOCB_ERROR_CODE${ac_dC}1${ac_dD} ${ac_dA}HAVE_AIOCB_RETURN_VALUE${ac_dB}HAVE_AIOCB_RETURN_VALUE${ac_dC}1${ac_dD} -${ac_dA}SIZEOF_VOID_P${ac_dB}SIZEOF_VOID_P${ac_dC}8${ac_dD} +${ac_dA}SIZEOF_VOID_P${ac_dB}SIZEOF_VOID_P${ac_dC}4${ac_dD} ${ac_dA}HAVE_SYS_EPOLL_H${ac_dB}HAVE_SYS_EPOLL_H${ac_dC}1${ac_dD} ${ac_dA}PVFS_USRINT_BUILD${ac_dB}PVFS_USRINT_BUILD${ac_dC}1${ac_dD} ${ac_dA}PVFS_USRINT_KMOUNT${ac_dB}PVFS_USRINT_KMOUNT${ac_dC}0${ac_dD} @@ -1075,14 +1075,14 @@ CEOF t clr : clr ${ac_uA}HAVE_ARPA_INET_H${ac_uB}HAVE_ARPA_INET_H${ac_uC}1${ac_uD} -${ac_uA}SIZEOF_LONG_INT${ac_uB}SIZEOF_LONG_INT${ac_uC}8${ac_uD} +${ac_uA}SIZEOF_LONG_INT${ac_uB}SIZEOF_LONG_INT${ac_uC}4${ac_uD} ${ac_uA}WITH_OPENSSL${ac_uB}WITH_OPENSSL${ac_uC}1${ac_uD} ${ac_uA}HAVE_OPENSSL_EVP_H${ac_uB}HAVE_OPENSSL_EVP_H${ac_uC}1${ac_uD} ${ac_uA}HAVE_OPENSSL_CRYPTO_H${ac_uB}HAVE_OPENSSL_CRYPTO_H${ac_uC}1${ac_uD} ${ac_uA}HAVE_OPENSSL_SHA_H${ac_uB}HAVE_OPENSSL_SHA_H${ac_uC}1${ac_uD} ${ac_uA}HAVE_AIOCB_ERROR_CODE${ac_uB}HAVE_AIOCB_ERROR_CODE${ac_uC}1${ac_uD} ${ac_uA}HAVE_AIOCB_RETURN_VALUE${ac_uB}HAVE_AIOCB_RETURN_VALUE${ac_uC}1${ac_uD} -${ac_uA}SIZEOF_VOID_P${ac_uB}SIZEOF_VOID_P${ac_uC}8${ac_uD} +${ac_uA}SIZEOF_VOID_P${ac_uB}SIZEOF_VOID_P${ac_uC}4${ac_uD} ${ac_uA}HAVE_SYS_EPOLL_H${ac_uB}HAVE_SYS_EPOLL_H${ac_uC}1${ac_uD} ${ac_uA}PVFS_USRINT_BUILD${ac_uB}PVFS_USRINT_BUILD${ac_uC}1${ac_uD} ${ac_uA}PVFS_USRINT_KMOUNT${ac_uB}PVFS_USRINT_KMOUNT${ac_uC}0${ac_uD} diff --git a/include/orange.h b/include/orange.h new file mode 100644 index 0000000..84138c7 --- /dev/null +++ b/include/orange.h @@ -0,0 +1,27 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* This is the master header file for OrangeFS. It pulls in all header + * files needed by client side for software that operates at or above + * the system interface level. + */ + +#ifndef __ORANGE_H +#define __ORANGE_H + +#include "pvfs2-usrint.h" +#include "pvfs2.h" + +#endif /* __ORANGE_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/include/pvfs2-usrint.h b/include/pvfs2-usrint.h new file mode 100644 index 0000000..08ae3dc --- /dev/null +++ b/include/pvfs2-usrint.h @@ -0,0 +1,329 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines + */ + +#ifndef PVFS_USRINT_H +#define PVFS_USRINT_H 1 + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#ifndef _ATFILE_SOURCE +#define _ATFILE_SOURCE 1 +#endif +#ifndef _LARGEFILE_SOURCE +#define _LARGEFILE_SOURCE 1 +#endif +#ifndef _LARGEFILE64_SOURCE +#define _LARGEFILE64_SOURCE 1 +#endif +#ifndef _USE_FILE_OFFSET64 +#define _USE_FILE_OFFSET64 1 +#endif +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* define open flags unique to PVFS here */ +#define O_HINTS 02000000 /* PVFS hints are present */ +#define O_NOTPVFS 04000000 /* Open non-PVFS files if possible */ + +/* define FD flags unique to PVFS here */ +#define PVFS_FD_NOCACHE 0x10000 + +/* Define AT_FDCWD and related flags on older systems */ +#ifndef AT_FDCWD +# define AT_FDCWD -100 /* Special value used to indicate + the *at functions should use the + current working directory. */ +#endif +#ifndef AT_SYMLINK_NOFOLLOW +# define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#endif +#ifndef AT_REMOVDIR +# define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ +#endif +#ifndef AT_SYMLINK_FOLLOW +# define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#endif +#ifndef AT_EACCESS +# define AT_EACCESS 0x200 /* Test access permitted for + effective IDs, not real IDs. */ +#endif + +/* pvfs_open */ +extern int pvfs_open(const char *path, int flags, ...); + +/* pvfs_open64 */ +extern int pvfs_open64(const char *path, int flags, ...); + +/* pvfs_openat */ +extern int pvfs_openat(int dirfd, const char *path, int flags, ...); + +/* pvfs_openat64 */ +extern int pvfs_openat64(int dirfd, const char *path, int flags, ...); + +extern int pvfs_creat(const char *path, mode_t mode, ...); + +extern int pvfs_creat64(const char *path, mode_t mode, ...); + +/* pvfs_unlink */ +extern int pvfs_unlink (const char *path); + +extern int pvfs_unlinkat (int dirfd, const char *path, int flags); + +extern int pvfs_rename(const char *oldpath, const char *newpath); + +extern int pvfs_renameat(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath); + +/* pvfs_read */ +extern ssize_t pvfs_read( int fd, void *buf, size_t count ); + +/* pvfs_pread */ +extern ssize_t pvfs_pread( int fd, void *buf, size_t count, off_t offset ); + +extern ssize_t pvfs_readv(int fd, const struct iovec *vector, int count); + +/* pvfs_pread64 */ +extern ssize_t pvfs_pread64( int fd, void *buf, size_t count, off64_t offset ); + +/* pvfs_write */ +extern ssize_t pvfs_write( int fd, const void *buf, size_t count ); + +/* pvfs_pwrite */ +extern ssize_t pvfs_pwrite( int fd, const void *buf, size_t count, off_t offset ); + +extern ssize_t pvfs_writev( int fd, const struct iovec *vector, int count ); + +/* pvfs_pwrite64 */ +extern ssize_t pvfs_pwrite64( int fd, const void *buf, size_t count, off64_t offset ); + +/* pvfs_lseek */ +extern off_t pvfs_lseek(int fd, off_t offset, int whence); + +/* pvfs_lseek64 */ +extern off64_t pvfs_lseek64(int fd, off64_t offset, int whence); + +extern int pvfs_truncate(const char *path, off_t length); + +extern int pvfs_truncate64 (const char *path, off64_t length); + +extern int pvfs_fallocate(int fd, off_t offset, off_t length); + +extern int pvfs_ftruncate (int fd, off_t length); + +extern int pvfs_ftruncate64 (int fd, off64_t length); + +/* pvfs_close */ +extern int pvfs_close( int fd ); + +extern int pvfs_flush(int fd); + +/* various flavors of stat */ +extern int pvfs_stat(const char *path, struct stat *buf); + +extern int pvfs_stat64(const char *path, struct stat64 *buf); + +extern int pvfs_stat_mask(const char *path, struct stat *buf, uint32_t mask); + +extern int pvfs_fstat(int fd, struct stat *buf); + +extern int pvfs_fstat64(int fd, struct stat64 *buf); + +extern int pvfs_fstatat(int fd, const char *path, struct stat *buf, int flag); + +extern int pvfs_fstatat64(int fd, const char *path, struct stat64 *buf, int flag); + +extern int pvfs_fstat_mask(int fd, struct stat *buf, uint32_t mask); + +extern int pvfs_lstat(const char *path, struct stat *buf); + +extern int pvfs_lstat64(const char *path, struct stat64 *buf); + +extern int pvfs_lstat_mask(const char *path, struct stat *buf, uint32_t mask); + +extern int pvfs_futimesat(int dirfd, const char *path, const struct timeval times[2]); + +extern int pvfs_utimes(const char *path, const struct timeval times[2]); + +extern int pvfs_utime(const char *path, const struct utimbuf *buf); + +extern int pvfs_futimes(int fd, const struct timeval times[2]); + +extern int pvfs_dup(int oldfd); + +extern int pvfs_dup2(int oldfd, int newfd); + +extern int pvfs_chown (const char *path, uid_t owner, gid_t group); + +extern int pvfs_fchown (int fd, uid_t owner, gid_t group); + +extern int pvfs_fchownat(int fd, const char *path, uid_t owner, gid_t group, int flag); + +extern int pvfs_lchown (const char *path, uid_t owner, gid_t group); + +extern int pvfs_chmod (const char *path, mode_t mode); + +extern int pvfs_fchmod (int fd, mode_t mode); + +extern int pvfs_fchmodat(int fd, const char *path, mode_t mode, int flag); + +extern int pvfs_mkdir (const char *path, mode_t mode); + +extern int pvfs_mkdirat (int dirfd, const char *path, mode_t mode); + +extern int pvfs_rmdir (const char *path); + +extern ssize_t pvfs_readlink (const char *path, char *buf, size_t bufsiz); + +extern ssize_t pvfs_readlinkat (int dirfd, const char *path, char *buf, size_t bufsiz); + +extern int pvfs_symlink (const char *oldpath, const char *newpath); + +extern int pvfs_symlinkat (const char *oldpath, int newdirfd, const char *newpath); + +/* PVFS does not have hard links */ +extern int pvfs_link (const char *oldpath, const char *newpath); + +/* PVFS does not have hard links */ +extern int pvfs_linkat (int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, int flags); + +/* this reads exactly one dirent, count is ignored */ +extern int pvfs_readdir(unsigned int fd, struct dirent *dirp, unsigned int count); + +/* this reads multiple dirents, up to count */ +extern int pvfs_getdents(unsigned int fd, struct dirent *dirp, unsigned int count); + +extern int pvfs_getdents64(unsigned int fd, struct dirent64 *dirp, unsigned int count); + +extern int pvfs_access (const char * path, int mode); + +extern int pvfs_faccessat (int dirfd, const char * path, int mode, int flags); + +extern int pvfs_flock(int fd, int op); + +extern int pvfs_fcntl(int fd, int cmd, ...); + +/* sync all disk data */ +extern void pvfs_sync(void ); + +/* sync file, but not dir it is in */ +extern int pvfs_fsync(int fd); + +/* does not sync file metadata */ +extern int pvfs_fdatasync(int fd); + +extern int pvfs_fadvise(int fd, off_t offset, off_t len, int advice); + +extern int pvfs_fadvise64(int fd, off64_t offset, off64_t len, int advice); + +extern int pvfs_statfs(const char *path, struct statfs *buf); + +extern int pvfs_statfs64(const char *path, struct statfs64 *buf); + +extern int pvfs_fstatfs(int fd, struct statfs *buf); + +extern int pvfs_fstatfs64(int fd, struct statfs64 *buf); + +extern int pvfs_statvfs(const char *path, struct statvfs *buf); + +extern int pvfs_fstatvfs(int fd, struct statvfs *buf); + +extern int pvfs_mknod(const char *path, mode_t mode, dev_t dev); + +extern int pvfs_mknodat(int dirfd, const char *path, mode_t mode, dev_t dev); + +extern ssize_t pvfs_sendfile(int outfd, int infd, off_t *offset, size_t count); + +extern ssize_t pvfs_sendfile64(int outfd, int infd, off64_t *offset, size_t count); + +extern int pvfs_setxattr(const char *path, const char *name, + const void *value, size_t size, int flags); + +extern int pvfs_lsetxattr(const char *path, const char *name, + const void *value, size_t size, int flags); + +extern int pvfs_fsetxattr(int fd, const char *name, + const void *value, size_t size, int flags); + +extern ssize_t pvfs_getxattr(const char *path, const char *name, + void *value, size_t size); + +extern ssize_t pvfs_lgetxattr(const char *path, const char *name, + void *value, size_t size); + +extern ssize_t pvfs_fgetxattr(int fd, const char *name, + void *value, size_t size); + +extern ssize_t pvfs_listxattr(const char *path, char *list, size_t size); + +extern ssize_t pvfs_llistxattr(const char *path, char *list, size_t size); + +extern ssize_t pvfs_flistxattr(int fd, char *list, size_t size); + +extern int pvfs_removexattr(const char *path, const char *name); + +extern int pvfs_lremovexattr(const char *path, const char *name); + +extern int pvfs_fremovexattr(int fd, const char *name); + +extern int pvfs_chdir(const char *path); + +extern int pvfs_fchdir(int fd); + +extern int pvfs_cwd_init(const char *buf, size_t size); + +extern char *pvfs_getcwd(char *buf, size_t size); + +extern char *pvfs_get_current_dir_name(void); + +extern char *pvfs_getwd(char *buf); + +extern mode_t pvfs_umask(mode_t mask); + +extern mode_t pvfs_getumask(void); + +extern int pvfs_getdtablesize(void); + +extern void *pvfs_mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset); + +extern int pvfs_munmap(void *start, size_t length); + +extern int pvfs_msync(void *start, size_t length, int flags); + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/apps/admin/pvfs2-get-uid.c b/src/apps/admin/pvfs2-get-uid.c new file mode 100644 index 0000000..2e8ba7b --- /dev/null +++ b/src/apps/admin/pvfs2-get-uid.c @@ -0,0 +1,337 @@ +#include +#include +#include + +#include "pvfs2.h" +#include "pvfs2-mgmt.h" +#include "bmi.h" +#include "pint-uid-mgmt.h" +#include "pint-util.h" +#include "pint-cached-config.h" + +#define UID_HISTORY_MAX_SECS 4294967295 /* max uint32_t val */ +#define UID_SERV_LIST_SIZE 25 /* maximum servers to get stats from */ + +struct options +{ + uint32_t history; + char **server_list; + int server_count; + PVFS_fs_id fs_id; +}; + +static struct options *parse_args(int argc, char *argv[]); +static void usage(int argc, char *argv[]); +static void cleanup(struct options *ptr, PVFS_BMI_addr_t *addr_array, + PVFS_uid_info_s **uid_stats); + +int main(int argc, char *argv[]) +{ + PVFS_credentials creds; + PVFS_fs_id cur_fs; + PVFS_BMI_addr_t *addr_array, server_addr; + PVFS_uid_info_s **uid_info_array; + uint32_t *uid_info_count; + char uid_timestamp[64], curTime[64]; + struct options *prog_opts = NULL; + int ret = 0; + int i, j; + struct timeval currentTime; + + /* parse command line arguments */ + prog_opts = parse_args(argc, argv); + if (!prog_opts) + { + fprintf(stderr, "Unable to allocate memory for command line args\n"); + exit(EXIT_FAILURE); + } + + if (!(prog_opts->history)) + { + prog_opts->history = UID_HISTORY_MAX_SECS; + } + + ret = PVFS_util_init_defaults(); + if (ret < 0) + { + PVFS_perror("PVFS_util_init_defaults", ret); + return (-1); + } + + PVFS_util_gen_credentials(&creds); + + /* get a default fsid or use the one given by the user */ + if (prog_opts->fs_id == -1) + { + ret = PVFS_util_get_default_fsid(&cur_fs); + if (ret < 0) + { + PVFS_perror("PVFS_util_get_default_fsid", ret); + return (-1); + } + } + else + { + cur_fs = prog_opts->fs_id; + } + + /* if user specifies servers, allocate memory for the BMI addrs and + * then translate the server strings to BMI addrs + */ + if (prog_opts->server_count) + { + /* allocate memory for our BMI addresses and fill them in */ + addr_array = (PVFS_BMI_addr_t *)malloc(prog_opts->server_count * + sizeof(PVFS_BMI_addr_t)); + if (!addr_array) + { + fprintf(stderr, "Unable to allocate memory for BMI addrs\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < prog_opts->server_count; i ++) + { + ret = BMI_addr_lookup(&server_addr, prog_opts->server_list[i]); + if (ret < 0) + { + PVFS_perror("BMI_addr_lookup", ret); + return (-1); + } + addr_array[i] = server_addr; + } + } + else + { + /* else, user specified no servers, so a list will be built */ + ret = PVFS_mgmt_count_servers(cur_fs, &creds, PINT_SERVER_TYPE_ALL, + &(prog_opts->server_count)); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_count_servers", ret); + return (-1); + } + + /* allocate memory for the number of BMI addrs found */ + addr_array = (PVFS_BMI_addr_t *)malloc(prog_opts->server_count * + sizeof(PVFS_BMI_addr_t)); + if (!addr_array) + { + fprintf(stderr, "Unable to allocate memory for BMI addrs\n"); + exit(EXIT_FAILURE); + } + + /* retrieve the list of BMI addrs for the list of servers */ + ret = PVFS_mgmt_get_server_array(cur_fs, &creds, PINT_SERVER_TYPE_ALL, + addr_array, + &(prog_opts->server_count)); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_get_server_array", ret); + return (-1); + } + + /* use reverse lookups so the server URI's can be displayed to the user */ + for (i = 0; i < prog_opts->server_count; i++) + { + prog_opts->server_list[i] = strdup(BMI_addr_rev_lookup(addr_array[i])); + } + } + + /* allocate memory to store the uid statistics from the given servers */ + uid_info_array = (PVFS_uid_info_s **)malloc(prog_opts->server_count * + sizeof(PVFS_uid_info_s *)); + if (!uid_info_array) + { + fprintf(stderr, "Unable to allocate memory for uid stats array\n"); + exit(EXIT_FAILURE); + } + for (i = 0; i < prog_opts->server_count; i++) + { + uid_info_array[i] = (PVFS_uid_info_s *)malloc(UID_MGMT_MAX_HISTORY * + sizeof(PVFS_uid_info_s)); + if(!uid_info_array[i]) + { + fprintf(stderr, "Unable to allocate memory for uid stats array\n"); + exit(EXIT_FAILURE); + } + } + + uid_info_count = (uint32_t *)malloc(prog_opts->server_count * sizeof(uint32_t)); + if (!uid_info_count) + { + fprintf(stderr, "Memory allocation error, out of memory\n"); + } + + /* retrieve the statistics from the servers, checking for any errors */ + ret = PVFS_mgmt_get_uid_list(cur_fs, &creds, prog_opts->server_count, + addr_array, prog_opts->history, uid_info_array, + uid_info_count, NULL, NULL); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_get_uid_list", ret); + return (-1); + } + + printf("\nFSID: %d\n", cur_fs); + + /* get a current timestamp for users to compare against */ + PINT_util_get_current_timeval(¤tTime); + PINT_util_parse_timeval(currentTime, curTime); + printf("Current Time: %s\n\n", curTime); + + /* display the uid statistics for each server to the user */ + for (i = 0; i < prog_opts->server_count; i++) + { + printf("Server: %s\n", prog_opts->server_list[i]); + for (j = 0; j < uid_info_count[i]; j++) + { + + PINT_util_parse_timeval(uid_info_array[i][j].tv, uid_timestamp); + printf("\tUID: %-10u\tcount: %-10llu\t%s\n", uid_info_array[i][j].uid, + (long long unsigned int)uid_info_array[i][j].count, + uid_timestamp); + } + printf("\n"); + } + + /* memory cleanup */ + cleanup(prog_opts, addr_array, uid_info_array); + + return 0; +} + +/* parse_args() + * + * parses command line arguments and returns pointer to program options + */ +static struct options *parse_args(int argc, char *argv[]) +{ + char flags[] = "s:t:f:h"; + int one_opt = 0; + struct options *tmp_opts = NULL; + int server_cnt = 0; + int i; + + /* allocate memory for the program options */ + tmp_opts = (struct options *)malloc(sizeof(struct options)); + if (!tmp_opts) + { + return NULL; + } + memset(tmp_opts, 0, sizeof(struct options)); + + /* allocate memory for storing pointers to server addrs */ + tmp_opts->server_list = (char **)malloc(UID_SERV_LIST_SIZE * sizeof(char *)); + for (i = 0; i < UID_SERV_LIST_SIZE; i++) + { + tmp_opts->server_list[i] = NULL; + } + + tmp_opts->fs_id = -1; + + /* parse args using getopt() */ + while((one_opt = getopt(argc, argv, flags)) != EOF) + { + switch(one_opt) + { + case('s'): + if (server_cnt == UID_SERV_LIST_SIZE) + { + fprintf(stderr, "Server limit exceded, using first %d servers\n", UID_SERV_LIST_SIZE); + break; + } + if (server_cnt > UID_SERV_LIST_SIZE) + { + break; + } + tmp_opts->server_list[server_cnt] = strdup(optarg); + server_cnt++; + break; + case('t'): + tmp_opts->history = atoi(optarg); + if (tmp_opts->history < 1) + { + usage(argc, argv); + exit(EXIT_FAILURE); + } + break; + case('f'): + tmp_opts->fs_id = atoi(optarg); + if (tmp_opts->fs_id < 0) + { + usage(argc, argv); + exit(EXIT_FAILURE); + } + break; + case('h'): + usage(argc, argv); + exit(EXIT_SUCCESS); + case('?'): + usage(argc, argv); + exit(EXIT_FAILURE); + } + } + + tmp_opts->server_count = server_cnt; + + return tmp_opts; +} + +/* usage() + * + * displays proper program usage to the user + */ +static void usage(int argc, char *argv[]) +{ + fprintf(stderr, "\n"); + fprintf(stderr, "Usage : %s [-s server] ... [-t history] [-f fs_id]\n", argv[0]); + fprintf(stderr, "Example: %s -s tcp://127.0.0.1:3334 -t 60 -f 135161\n", argv[0]); + fprintf(stderr, "\nOPTIONS:\n"); + fprintf(stderr, "\n-s\t specify a server address, e.g. tcp://127.0.0.1:3334\n"); + fprintf(stderr, "\t multiple servers can be specified by repeating -s option\n"); + fprintf(stderr, "\t if no servers are specified, a list will be generated\n"); + fprintf(stderr, "\n-t\t history measured in seconds (must be > 0)\n"); + fprintf(stderr, "\t if no history is specified, all uid history is returned\n"); + fprintf(stderr, "\n-f\t specify a PVFS_fs_id\n"); + fprintf(stderr, "\t if not specified, a default fs_id is found\n"); + fprintf(stderr, "\n-h\t display program usage\n\n"); + return; +} + +/* cleanup() + * + * This function frees all memory used by this application + */ +static void cleanup(struct options *opts, PVFS_BMI_addr_t *addr_array, + PVFS_uid_info_s **uid_stats) +{ + int i; + + for (i = 0; i < UID_SERV_LIST_SIZE; i++) + { + if (opts->server_list[i] == NULL) + { + break; + } + free(opts->server_list[i]); + } + for (i = 0; i < opts->server_count; i++) + { + free(uid_stats[i]); + } + free(opts->server_list); + free(opts); + free(addr_array); + free(uid_stats); + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=4 sts=4 sw=4 expandtab + */ + diff --git a/src/apps/admin/pvfs2-perf-mon-snmp.c b/src/apps/admin/pvfs2-perf-mon-snmp.c new file mode 100644 index 0000000..226ab67 --- /dev/null +++ b/src/apps/admin/pvfs2-perf-mon-snmp.c @@ -0,0 +1,429 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bmi.h" +#include "pvfs2.h" +#include "pvfs2-mgmt.h" +#include "pvfs2-internal.h" + +#define HISTORY 1 +#define CMD_BUF_SIZE 256 + +/* these defines should match the defines in include/pvfs2-mgmt.h */ +#define OID_READ ".1.3.6.1.4.1.7778.0" +#define OID_WRITE ".1.3.6.1.4.1.7778.1" +#define OID_MREAD ".1.3.6.1.4.1.7778.2" +#define OID_MWRITE ".1.3.6.1.4.1.7778.3" +#define OID_DSPACE ".1.3.6.1.4.1.7778.4" +#define OID_KEYVAL ".1.3.6.1.4.1.7778.5" +#define OID_REQSCHED ".1.3.6.1.4.1.7778.6" +#define OID_REQUESTS ".1.3.6.1.4.1.7778.7" +#define OID_SMALL_READ ".1.3.6.1.4.1.7778.8" +#define OID_SMALL_WRITE ".1.3.6.1.4.1.7778.9" +#define OID_FLOW_READ ".1.3.6.1.4.1.7778.10" +#define OID_FLOW_WRITE ".1.3.6.1.4.1.7778.11" + +#define INT_TYPE "INTEGER" +#define CNT_TYPE "COUNTER" + +#ifndef PVFS2_VERSION +#define PVFS2_VERSION "Unknown" +#endif + +struct MGMT_perf_iod +{ + const char *key_oid; + const char *key_type; + int key_number; + const char *key_name; +}; + +/* this table needs to match the list of keys in pvfs2-mgmt.h */ +static struct MGMT_perf_iod key_table[] = +{ + {OID_READ, CNT_TYPE, PINT_PERF_READ, "Bytes Read"}, + {OID_WRITE, CNT_TYPE, PINT_PERF_WRITE, "Bytes Written"}, + {OID_MREAD, CNT_TYPE, PINT_PERF_METADATA_READ, "Metadata Read Ops"}, + {OID_MWRITE, CNT_TYPE, PINT_PERF_METADATA_WRITE, "Metadata Write Ops"}, + {OID_DSPACE, CNT_TYPE, PINT_PERF_METADATA_DSPACE_OPS, "Metadata DSPACE Ops"}, + {OID_KEYVAL, CNT_TYPE, PINT_PERF_METADATA_KEYVAL_OPS, "Metadata KEYVAL Ops"}, + {OID_REQSCHED, INT_TYPE, PINT_PERF_REQSCHED, "Requests Active"}, + {OID_REQUESTS, CNT_TYPE, PINT_PERF_REQUESTS, "Requests Received"}, + {OID_SMALL_READ, CNT_TYPE, PINT_PERF_SMALL_READ, "Bytes Read by Small_IO"}, + {OID_SMALL_WRITE, CNT_TYPE, PINT_PERF_SMALL_WRITE, "Bytes Written by Small_IO"}, + {OID_FLOW_READ, CNT_TYPE, PINT_PERF_FLOW_READ, "Bytes Read by Flow"}, + {OID_FLOW_WRITE, CNT_TYPE, PINT_PERF_FLOW_WRITE, "Bytes Written by Flow"}, + {NULL, NULL, -1, NULL} /* this halts the key count */ +}; + +struct options +{ + char* mnt_point; + int mnt_point_set; + char* server_addr; + int server_addr_set; +}; + +static struct options* parse_args(int argc, char* argv[]); +static void usage(int argc, char** argv); + +int main(int argc, char **argv) +{ + int ret = -1; + char *retc = NULL; + PVFS_fs_id cur_fs; + struct options* user_opts = NULL; + char pvfs_path[PVFS_NAME_MAX] = {0}; + int i; + PVFS_credentials creds; + int io_server_count; + int64_t **perf_matrix; + uint64_t* end_time_ms_array; + uint32_t* next_id_array; + PVFS_BMI_addr_t *addr_array, server_addr; + char *cmd_buffer = (char *)malloc(CMD_BUF_SIZE); + int max_keys, key_count; + + /* look at command line arguments */ + user_opts = parse_args(argc, argv); + if (!user_opts) + { + fprintf(stderr, "Error: failed to parse command line arguments.\n"); + usage(argc, argv); + return(-1); + } + + ret = PVFS_util_init_defaults(); + if (ret < 0) + { + PVFS_perror("PVFS_util_init_defaults", ret); + return(-1); + } + + PVFS_util_gen_credentials(&creds); + if (user_opts->server_addr_set) + { + if (PVFS_util_get_default_fsid(&cur_fs) < 0) + { + /* Can't find a file system */ + fprintf(stderr, "Error: failed to find a file system.\n"); + usage(argc, argv); + return(-1); + } + if (user_opts->server_addr && + (BMI_addr_lookup (&server_addr, user_opts->server_addr) == 0)) + { + /* set up single server */ + addr_array = (PVFS_BMI_addr_t *)malloc(sizeof(PVFS_BMI_addr_t)); + addr_array[0] = server_addr; + io_server_count = 1; + } + else + { + /* bad argument - address not found */ + fprintf(stderr, "Error: failed to parse server address.\n"); + usage(argc, argv); + return(-1); + } + } + else + { + /* will sample all servers */ + /* translate local path into pvfs2 relative path */ + ret = PVFS_util_resolve(user_opts->mnt_point, + &cur_fs, pvfs_path, PVFS_NAME_MAX); + if (ret < 0) + { + PVFS_perror("PVFS_util_resolve", ret); + return(-1); + } + + /* count how many I/O servers we have */ + ret = PVFS_mgmt_count_servers(cur_fs, &creds, PVFS_MGMT_IO_SERVER, + &io_server_count); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_count_servers", ret); + return(-1); + } + + /* build a list of servers to talk to */ + addr_array = (PVFS_BMI_addr_t *) + malloc(io_server_count * sizeof(PVFS_BMI_addr_t)); + if (addr_array == NULL) + { + perror("malloc"); + return -1; + } + ret = PVFS_mgmt_get_server_array(cur_fs, + &creds, + PVFS_MGMT_IO_SERVER, + addr_array, + &io_server_count); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_get_server_array", ret); + return -1; + } + } + + /* count keys */ + for (max_keys = 0; key_table[max_keys].key_number >= 0; max_keys++); + + /* allocate a 2 dimensional array for statistics */ + perf_matrix = (int64_t **)malloc(io_server_count * sizeof(int64_t *)); + if (!perf_matrix) + { + perror("malloc"); + return(-1); + } + for(i=0; i 60) + { + snaptime = time(NULL); + key_count = max_keys; + ret = PVFS_mgmt_perf_mon_list(cur_fs, + &creds, + perf_matrix, + end_time_ms_array, + addr_array, + next_id_array, + io_server_count, + &key_count, + HISTORY, + NULL, NULL); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_perf_mon_list", ret); + return -1; + } + } + + /* format requested OID */ + if (perf_matrix[srv][key_count] != 0) + { + int k; + /* this is a valid measurement */ + for(k = 0; k < max_keys && + strcmp(cmd_buffer, key_table[k].key_oid); k++); + /* out of for loop k equals selected key */ + if (k < max_keys) + { + returnType = key_table[k].key_type; + returnValue = perf_matrix[srv][key_table[k].key_number]; + } + else + { + /* invalid command */ + fprintf(stdout,"NONE\n"); + fflush(stdout); + continue; + } + } + else + { + /* invalid measurement */ + fprintf(stdout,"NONE\n"); + fflush(stdout); + continue; + } + fprintf(stdout, "%s\n%llu\n", returnType, llu(returnValue)); + fflush(stdout); + /* return to top for next command */ + } + + PVFS_sys_finalize(); + + return(ret); +} + +/* parse_args() + * + * parses command line arguments + * + * returns pointer to options structure on success, NULL on failure + */ +static struct options* parse_args(int argc, char* argv[]) +{ + char flags[] = "vm:s:"; + int one_opt = 0; + int len = 0; + + struct options *tmp_opts = NULL; + int ret = -1; + + /* create storage for the command line options */ + tmp_opts = (struct options *) malloc(sizeof(struct options)); + if(tmp_opts == NULL) + { + return(NULL); + } + memset(tmp_opts, 0, sizeof(struct options)); + + /* look at command line arguments */ + while((one_opt = getopt(argc, argv, flags)) != EOF) + { + switch(one_opt) + { + case('v'): + printf("%s\n", PVFS2_VERSION); + exit(0); + case('m'): + /* we need to add a '/' to the end so cannot strdup */ + len = strlen(optarg)+1; + tmp_opts->mnt_point = (char*)malloc(len+1); + if(!tmp_opts->mnt_point) + { + free(tmp_opts); + return(NULL); + } + memset(tmp_opts->mnt_point, 0, len+1); + ret = sscanf(optarg, "%s", tmp_opts->mnt_point); + if(ret < 1) + { + free(tmp_opts); + return(NULL); + } + /* TODO: dirty hack... fix later. The remove_dir_prefix() + * function expects some trailing segments or at least + * a slash off of the mount point + */ + strcat(tmp_opts->mnt_point, "/"); + tmp_opts->mnt_point_set = 1; + break; + case('s'): + tmp_opts->server_addr = strdup(optarg); + if (!tmp_opts->server_addr) + { + free(tmp_opts); + return NULL; + } + tmp_opts->server_addr_set = 1; + break; + case('?'): + usage(argc, argv); + exit(EXIT_FAILURE); + } + } + + if (!(tmp_opts->mnt_point_set || tmp_opts->server_addr_set)) + { + free(tmp_opts); + return(NULL); + } + + return(tmp_opts); +} + + +static void usage(int argc, char **argv) +{ + fprintf(stderr, "\n"); + fprintf(stderr, "Usage : %s [-m fs_mount_point]\n", argv[0]); + fprintf(stderr, "Example: %s -m /mnt/pvfs2\n", argv[0]); + fprintf(stderr, "Usage : %s [-s bmi_address_string]\n", argv[0]); + fprintf(stderr, "Example: %s -s tcp://localhost:3334\n", argv[0]); + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=4 sts=4 sw=4 expandtab + */ diff --git a/src/apps/ucache/module.mk.in b/src/apps/ucache/module.mk.in new file mode 100644 index 0000000..fc5cb0c --- /dev/null +++ b/src/apps/ucache/module.mk.in @@ -0,0 +1,11 @@ + +ifdef BUILD_UCACHE + +DIR := src/apps/ucache + +UCACHEDSRC := \ + $(DIR)/ucached.c \ + $(DIR)/ucached_cmd.c + +endif # BUILD_UCACHE + diff --git a/src/apps/ucache/shmem_util.c b/src/apps/ucache/shmem_util.c new file mode 100644 index 0000000..a6792c9 --- /dev/null +++ b/src/apps/ucache/shmem_util.c @@ -0,0 +1,75 @@ +#include "shmem_util.h" + +/** Aquire a SysV shared memory segment. + * key_file and proj_id are identifiers used by ftok to uniquly identify the + * segment. size is the desired size in bytes. memory is an optional parameter + * that refers to a void pointer which can be set to the address of the segment. + */ +int shmem_init(char *key_file, int proj_id, size_t size, void **memory) +{ + int key = 0; + int id = 0; + + /* Generate key based on key_file and proj_id */ + key = ftok(key_file, proj_id); + if(key < 0) + { + return -1; + } + + /* Allocate Shared Memory Segment */ + id = shmget(key, size, FLAGS | IPC_CREAT | IPC_EXCL); + if(id < 0) + { + return -1; + } + + /* Reference to pointer not required. */ + if(memory) + { + *memory = shmat(id, NULL, AT_FLAGS); + if(*memory == (void *) -1) + { + return -1; + } + } + else + { + if(shmat(id, NULL, AT_FLAGS) == (void *) 0) + { + return -1; + } + } + + return 0; +} + +/** Destroy SysV shared memory segment */ +int shmem_destroy(char *key_file, int proj_id) +{ + int key = 0; + int id = 0; + int rc = 0; + + /* Generate key based on key_file and proj_id */ + key = ftok(key_file, proj_id); + if(key < 0) + { + return -1; + } + + /* Allocate Shared Memory Segment */ + id = shmget(key, 0, FLAGS); + if(id < 0) + { + return -1; + } + + rc = shmctl(id, IPC_RMID, NULL); + if(rc < 0) + { + return -1; + } + return 0; +} + diff --git a/src/apps/ucache/shmem_util.h b/src/apps/ucache/shmem_util.h new file mode 100644 index 0000000..a096871 --- /dev/null +++ b/src/apps/ucache/shmem_util.h @@ -0,0 +1,20 @@ +#ifndef SHMEM_UTIL_H +#define SHMEM_UTIL_H + +#define _XOPEN_SOURCE 500 + +#include +#include +#include +#include +#include +#include + +#define SVSHM_MODE (SHM_R | SHM_W | SHM_R>>3 | SHM_R>>6) +#define FLAGS (SVSHM_MODE) +#define AT_FLAGS 0 + +int shmem_init(char *key_file, int proj_id, size_t size, void **memory); +int shmem_destroy(char *key_file, int proj_id); + +#endif diff --git a/src/apps/ucache/ucached.c b/src/apps/ucache/ucached.c new file mode 100644 index 0000000..4dd8562 --- /dev/null +++ b/src/apps/ucache/ucached.c @@ -0,0 +1,711 @@ +#include +#include +#include "ucached.h" + +/* FIFO */ +static int readfd = 0; /* Command File Descriptor */ +static int writefd = 0; /* Response File Descriptor */ +static char buffer[BUFF_SIZE]; /* For FIFO reads and writes */ +char buff[LOG_LEN]; + +/* Time Structures For Log +static time_t rawtime; +static struct tm * timeinfo; +*/ + +/* Booleans */ +/* 1 if ucache is available for use */ +static unsigned char ucache_avail = 0; +/* Set this to one if the ucache doesn't get created, and the + * create_ucache_shmem function should be run again. + */ +//static unsigned char tryAgain = 0; + +/* Use this global to determine if the atexit registered function (clean_up) + * needs to run. A child process is created to create shmem. This facilitates + * destruction later on, since segments hang around until their creator exits. + */ +pid_t pid = -1; + +/* Hung Lock Detection */ +time_t locked_time[BLOCKS_IN_CACHE+1]; + +/* Forward Function Declarations */ +static int run_as_child(char c); /* Run as child of ucached */ +static int execute_cmd(char command); +static int create_ucache_shmem(void); +static int destroy_ucache_shmem(char dest_locks, char dest_ucache); +static void clean_up(void); +static int ucached_lockchk(void); + +void check_rc(int rc) +{ + memset(buffer, 0, BUFF_SIZE); + if(rc >= 0) + { + strcpy(buffer, "SUCCESS"); + } + else + { + strcpy(buffer, "FAILURE: check log: " UCACHED_LOG_FILE); + } +} + +/** Function to be run upon successful termination from an exit call */ +static void clean_up(void) +{ + int rc = 0; + /* Only the parent process should execute these lines. + * Must check the pid since the atexit function registered + * clean_up. This registration is passed on to any child + * processes forked off of the parent. We don't want to execute + * these lines when any of the children exit. Run only when parent. + */ + if(pid !=0) + { + if(DEST_AT_EXIT) + { + rc = destroy_ucache_shmem(1, 1); + } + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: ucached exiting...PID=%d\n", pid); + rc = unlink(FIFO1); + rc = unlink(FIFO2); + } +} + +/** Checks ucache lock shmem region for hung locks. + * Returns 0 when no hung locks are detected. + * Returns 1 when 1 or more hung locks are detected and all are gracefully + * handled. + * Returns -1 when 1 or more hung locks are detected and couldn't +* be handled properly. (error) + */ +static int ucached_lockchk(void) +{ + int rc = 0; + int i; + for(i = 0; i < (BLOCKS_IN_CACHE + 1); i++) + { + ucache_lock_t * currlock = get_lock((uint16_t)i); + if(lock_trylock(currlock) == 0) + { + /* Lock wasn't held, so set the timer to zero for this lock */ + locked_time[i] = 0; + } + else + { + /* Lock was held, so calculate if lock timeout has occured */ + /* First check to see if this lock's timer has been set at all */ + if(!locked_time[i]) + { + /* Timer for this lock isn't currently set */ + time(&locked_time[i]); + continue; + } + else + { + /* Timer was previously set meaning the block had been locked*/ + double time_diff = difftime(time((time_t *)NULL), locked_time[i]); + if((int)time_diff >= BLOCK_LOCK_TIMEOUT) + { + /* + gossip_debug(GOSSIP_UCACHED_DEBUG, + "WARNING: HUNG LOCK DETECTED @ block index = %d\n", i); + TODO: what to do with hung locks? + rc = pick_lock(ucache_lock_t * currlock); + if(rc == 1) + { + locked_time[i] = (time_t)0; + } + */ + } + } + } + } + + return rc; +} + + +/** Runs the command in a child process */ +static int run_as_child(char c) +{ + pid = fork(); + int rc = 0; + /* Fork Error? */ + if(pid < 0) + { + exit(EXIT_FAILURE); + } + /* Child Process */ + else if(pid == 0) + { + rc = execute_cmd(c); + if(rc < 0) + { + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + } + /* Parent Process */ + else + { + wait(&rc); + if(WIFEXITED(rc)) + { + if(WEXITSTATUS(rc) != 0) + { + return -1; + } + } + } + return rc; +} + + +static int execute_cmd(char cmd) +{ + int rc = 0; + switch(cmd) + { + /* Create the shared memory required by the ucache */ + case 'c': + rc = create_ucache_shmem(); + break; + /* Destroy the shared memory required by the ucache */ + case 'd': + rc = destroy_ucache_shmem(1, 1); + break; + case 'i': + { + char info_options[6]; + memset(info_options, 0, 6); + + /* Open FILE * to output ucache_info */ + FILE * info_out = fopen(UCACHED_INFO_FILE, "w"); + //FILE * info_out = fdopen(writefd, "w"); + + int howmany = sscanf(&buffer[1], "%s", info_options); + if(howmany > 0) + { + rc = ucache_info(info_out, info_options); + } + else + { + fprintf(info_out, "No display options specified. " + "Showing ucache contents.\n"); + rc = ucache_info(info_out, "c"); + } + rc = 1; + fclose(info_out); + //shmdt(ucache); + //shmdt(ucache_aux); + break; + } + /* Close Daemon */ + case 'x': + writefd = open(FIFO2, O_WRONLY); + rc = write(writefd, "SUCCESS\tExiting ucached", BUFF_SIZE); + while(rc <= 0) + { + rc = write(writefd, "SUCCESS\tExiting ucached", BUFF_SIZE); + } + remove(UCACHED_STARTED); + close(writefd); + close(readfd); + exit(EXIT_SUCCESS); + break; + default: + strcpy(buffer, "FAILURE\tInvalid command character"); + return -1; + } + return rc; +} + +/* Returns -1 on failure, 1 on success */ +static int create_ucache_shmem(void) +{ + int rc = 0; + + int old_aux_present = 0; + + /* attempt setup of shmem region for locks (inlcude SYSV later? */ + int id = SHM_ID1; + key_t key = ftok(KEY_FILE, id); + size_t size = UCACHE_AUX_SIZE; + int shmflg = SVSHM_MODE; + int aux_shmid = shmget(key, size, shmflg); + + if(aux_shmid == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: shmget on aux_shmid returned -1 on first try\n"); + + /* Shared memory segment used for aux data was not previosly created, + * so create it. + */ + shmflg = shmflg | IPC_CREAT | IPC_EXCL; + aux_shmid = shmget(key, size, shmflg); + if(aux_shmid == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmget (IPC_CREATE, IPC_EXCL)" + " on aux_shmid returned -1\n"); + /* Couldn't create the required segment */ + return -1; + } + else + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: shmget (using IPC_CREATE, IPC_EXCL)" + " on aux_shmid returned shmid = %d\n", aux_shmid); + + /* Attach to shmem and initialize all the aux struct */ + shmflg = 0; + /* ucache_aux is defined in src/client/usrint/ucache.h */ + ucache_aux = shmat(aux_shmid, NULL, shmflg); + if (!ucache_aux) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmat on aux_shmid returned NULL"); + return -1; + } + + ucache_locks = ucache_aux->ucache_locks; + + int i; + /* Initialize Shared Block Level Locks */ + for(i = 0; i < (BLOCKS_IN_CACHE + 1); i++) + { + rc = lock_init(get_lock(i)); + if (rc == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: lock_init returned -1 @ lock index = %d\n", i); + rc = -1; + } + } + } + } + else + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: first shmget on aux_shmid found segment" + ": shmid = %d\n", aux_shmid); + old_aux_present = 1; + /* Shmem for ucache_aux was already created, so just attach to it */ + shmflg = 0; + ucache_aux = shmat(aux_shmid, NULL, shmflg); + if (!ucache_aux) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmat on aux_shmid returned NULL\n"); + return -1; + } + } + + /* At this point all the locks should be aquired and initialized. + * They could also be locked or unlocked */ + + /* Set the global lock point to the address of the last lock in the locks + * shmem segment. Then lock it. + */ + ucache_lock = get_lock(BLOCKS_IN_CACHE); + lock_lock(ucache_lock); + + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: lock segment successfully retrieved and global lock locked.\n"); + + /* Set and zero out global ucache stats struct */ + ucache_stats = &(ucache_aux->ucache_stats); + *ucache_stats = (struct ucache_stats_s){ 0, 0, 0, 0, 0 }; + + /* Try to get/create the shmem required for the ucache */ + id = SHM_ID2; + key = ftok(KEY_FILE, id); + size = CACHE_SIZE; + shmflg = SVSHM_MODE; + int ucache_shmid = shmget(key, size, shmflg); + + if(ucache_shmid == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: shmget on ucache_shmid returned -1 first try\n"); + + /* Remember if there was an old lock region detected */ + if(old_aux_present) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: old ucache_aux found, attempting destruction of old" + " locks and starting\n"); + + /* Destroy old aux region and start function over */ + rc = shmctl(aux_shmid, IPC_RMID, (struct shmid_ds *) NULL); + + /* Let this child process exit, since exiting is required to get + * the shmem segment to be completely removed. Try to create the + * shmem again later in another child process. + */ + return -1; + } + + /* Shared memory segmet used for ucache was not previosly created, + * so create it. + */ + shmflg = shmflg | IPC_CREAT | IPC_EXCL; + ucache_shmid = shmget(key, size, shmflg); + if(ucache_shmid == -1) + { + /* Couldn't create the required segment */ + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmget (using IPC_CREATE, IPC_EXCL)" + " on ucache_shmid returned -1\n"); + + rc = -1; + goto errout; + } + else + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: shmget (using IPC_CREATE, IPC_EXCL)" + " on ucache_shmid returned shmid = %d\n", ucache_shmid); + + /* Attach to the ucache shmem region */ + shmflg = 0; + /* ucache is defined in src/client/usrint/ucache.h */ + ucache = shmat(ucache_shmid, NULL, shmflg); + if (!ucache) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmat on ucache_shmid returned NULL\n"); + rc = -1; + goto errout; + } + + /* Initialize the file table */ + rc = ucache_init_file_table(0); + if(rc != 0) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: file table initialization failed\n"); + /* Couldn't Initialize File Table */ + rc = -1; + goto errout; + } + } + } + else + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: first shmget on ucache_shmid found segment" + ": shmid = %d\n", ucache_shmid); + + /* Previously created ucache segment present. Need more info. */ + /* See if marked for deletion, but has users attached still */ + struct shmid_ds buf; + int cmd = IPC_STAT; + rc = shmctl(ucache_shmid, cmd, &buf); + if(rc == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmctl failed to IPC_STAT ucache_shmid\n"); + goto errout; + } + + /* Determine the count of processes attached to this shm segment */ + char hasAttached = (buf.shm_nattch > 0); + + /* Determine if the ucache shmem segment is marked for destruction*/ + uint16_t currentMode = buf.shm_perm.mode; + char markedForDest = ((currentMode & SHM_DEST) == SHM_DEST); + + if(markedForDest && hasAttached) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: detected previous ucache shmem segment" + " marked for destruction that still has" + " one or more processes attached to it.\n"); + + shmflg = shmflg | IPC_CREAT; /* Note: CREAT w/o EXCL */ + ucache_shmid = shmget(key, size, shmflg); + if(ucache_shmid == -1) + { + /* Couldn't create the required segment */ + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmget (using IPC_CREAT && !EXCL)" + " on ucache_shmid returned -1\n"); + rc = -1; + goto errout; + } + /* Attach to the ucache shmem region */ + shmflg = 0; + /* ucache is defined in src/client/usrint/ucache.h */ + ucache = shmat(ucache_shmid, NULL, shmflg); + if (!ucache) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmat on ucache_shmid returned NULL\n"); + rc = -1; + goto errout; + } + + /* Initialize the ftbl, and force the creation of it + * since the init boolean is set to 1. + */ + rc = ucache_init_file_table(1); + if(rc != 0) + { + /* Couldn't Initialize File Table */ + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: file table initialization failed\n"); + rc = -1; + goto errout; + } + } + else + { + /* Asume we will keep using the previously allocated segment */ + /* Attach to the ucache shmem region */ + shmflg = 0; + /* ucache is defined in src/client/usrint/ucache.h */ + ucache = shmat(ucache_shmid, NULL, shmflg); + if (!ucache) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmat on ucache_shmid returned NULL\n"); + rc = -1; + goto errout; + } + } + } + + lock_unlock(ucache_lock); + return 1; + +errout: + lock_unlock(ucache_lock); + return rc; +} + +static int destroy_ucache_shmem(char dest_locks, char dest_ucache) +{ + int rc = 0; + /* Aquire the main lock then attempt to destroy the ucache shmem segment */ + if(ucache_lock) + { + lock_lock(ucache_lock); + } + + if(dest_ucache) + { +// printf("dest_ucache\n"); + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: destroying ucache shmem\n"); + + /* Destroy shmem segment containing ucache */ + int id = SHM_ID2; + key_t key = ftok(KEY_FILE, id); + int shmflg = SVSHM_MODE; + int ucache_shmid = shmget(key, 0, shmflg); + if(ucache_shmid == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmget on ucache_shmid returned -1\n"); + return -1; + } + rc = shmctl(ucache_shmid, IPC_RMID, (struct shmid_ds *) NULL); + if(rc == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "WARNING: ucache shmem_destroy: errno == %d\n", errno); + } + } + + if(dest_locks) + { +// printf("dest_locks\n"); + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: destroying locks' shmem\n"); + + /* Destroy shmem segment containing locks */ + int id = SHM_ID1; + key_t key = ftok(KEY_FILE, id); + int shmflg = SVSHM_MODE; + int lock_shmid = shmget(key, 0, shmflg); + if(lock_shmid == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: shmget on lock_shmid returned -1\n"); + return -1; + } + rc = shmctl(lock_shmid, IPC_RMID, (struct shmid_ds *) NULL); + if(rc == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "WARNING: ucache_locks shmem_destroy: errno == %d\n", errno); + } + } + + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: both shmem segments marked for destruction.\n"); + return rc; +} + +/** This program should be run as root on startup to initialize the shared + * memory segments required by the user cache in PVFS. + */ +int main(int argc, char **argv) +{ + int rc = 0; + + gossip_enable_file(UCACHED_LOG_FILE, "a"); + uint64_t curr_mask; + int debug_on; + gossip_get_debug_mask(&debug_on, &curr_mask); + /* Enable the writing of the error message and write the message to file. */ + gossip_set_debug_mask(1, GOSSIP_UCACHED_DEBUG); + //printf("now gossip_debug_mask = 0x%016lx\n", gossip_debug_mask); + /* restore previous gossip_debug_mask */ + //gossip_set_debug_mask(debug_on, curr_mask); + + memset(locked_time, 0, (sizeof(time_t) * (BLOCKS_IN_CACHE + 1))); + + /* Direct output of ucache library, TODO: change this later */ + if (!out) + { + out = stdout; + } + + /* Daemonize! */ + //rc = daemon( 0, 0); + rc = daemon( 1, 1); + + if(rc != 0) + { + + perror("daemon-izing failed"); + exit(EXIT_FAILURE); + } + + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: ucached started\n"); + + /* Start up with shared memory initialized */ + if(CREATE_AT_START) + { + run_as_child('c'); + atexit(clean_up); + } + + /* Create 2 fifos */ + rc = mkfifo(FIFO1, FILE_MODE); + if(rc != 0) + { + /* Couldn't create FIFO */ + return -1; + } + rc = mkfifo(FIFO2, FILE_MODE); + if(rc != 0) + { + /* Couldn't create FIFO */ + return -1; + } + + while(1) + { + readfd = open(FIFO1, O_RDONLY | O_NONBLOCK); + struct pollfd fds[1]; + fds[0].fd = readfd; + fds[0].events = POLLIN; + + rc = poll(fds, 1, FIFO_TIMEOUT * 1000); + + if(rc == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: poll: errno = %d\n", errno); + } + + if(fds[0].revents & POLLIN) + { + /* Data to be read */ + memset(buffer, 0, BUFF_SIZE); + int count = read(readfd, buffer, BUFF_SIZE); + while(count <= 0) + { + if(count == -1) + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: caught error while trying to read cmd: errno = %d\n", + errno); + } + /* Try to read again */ + count = read(readfd, buffer, BUFF_SIZE); + } + if(count > 0) + { + /* Data read into buffer*/ + char c = buffer[0]; + /* Valid Command? */ + if(c == 'c' || c == 'd' || c == 'x' || c == 'i') + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "INFO: Command Received: %c\n", c); + if(c == 'c' || c == 'i') + { + /* Run creation in child process */ + run_as_child(c); + } + else + { + execute_cmd(c); + } + check_rc(rc); + } + /* Invalid Command */ + else + { + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: Invalid Command Received: %c\n", c); + rc = -1; + check_rc(rc); + } + + /* Data can be written, not guaranteed anything to write */ + int responseLength = strlen(buffer); + if(responseLength != 0) + { + writefd = open(FIFO2, O_WRONLY); + if(writefd == -1) + { perror("Error Opening File"); + gossip_debug(GOSSIP_UCACHED_DEBUG, + "ERROR: opening write FIFO: errno = %d\n", errno); + } + rc = write(writefd, buffer, BUFF_SIZE); + while(rc <= 0) + { printf("rc = %d\n", rc); + rc = write(writefd, buffer, BUFF_SIZE); + } + memset(buffer, 0, BUFF_SIZE); + } + else + { + printf("no response\n"); + } + } + } + close(readfd); + + if(ucache_avail) + { + /* TODO: write some stats to file periodically */ + + /* Write some dirty blocks out */ + /* TODO: create function to do this. */ + + /* Check for hung locks */ + rc = ucached_lockchk(); + } + } +} diff --git a/src/apps/ucache/ucached.h b/src/apps/ucache/ucached.h new file mode 100644 index 0000000..e876abe --- /dev/null +++ b/src/apps/ucache/ucached.h @@ -0,0 +1,81 @@ +#ifndef UCACHED_H +#define UCACHED_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Daemon Logging */ +#ifndef UCACHED_LOG_FILE +#define UCACHED_LOG_FILE "/tmp/ucached.log" +#endif + +#ifndef UCACHED_INFO_FILE +#define UCACHED_INFO_FILE "/tmp/ucached.info" +#endif + +#ifndef UCACHED_STARTED +#define UCACHED_STARTED "/tmp/ucached.started" +#endif + +#define GOSSIP_UCACHED_DEBUG 0x0001000000000000 +#define GOSSIP_UCACHED_CMD_DEBUG 0x0000100000000000 + + +/* FIFO Defines */ +#define FILE_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) +#define FIFO1 "/tmp/ucached.fifo.1" +#define FIFO2 "/tmp/ucached.fifo.2" +#define BUFF_SIZE 4096 +#define LOG_LEN 256 + +#ifndef LOG_TIMESTAMP +#define LOG_TIMESTAMP 0 +#endif + +#ifndef CREATE_AT_START +#define CREATE_AT_START 1 +#endif + +#ifndef DEST_AT_EXIT +#define DEST_AT_EXIT 1 +#endif + +#ifndef FIFO_TIMEOUT +#define FIFO_TIMEOUT 10 /* Second */ +#endif + +/* For shared memory for ucache and ucache locks */ +#define KEY_FILE "/etc/fstab" +#define SHM_ID1 'l' /* for ucache locks */ +#define SHM_ID2 'm' /* for ucache memory */ + +#ifndef SHM_R +#define SHM_R 0400 +#endif + +#ifndef SHM_W +#define SHM_W 0200 +#endif + +/* SVSHM Permissions */ +#ifndef SVSHM_MODE +#define SVSHM_MODE (SHM_R | SHM_W | SHM_R >> 3 | SHM_W >> 3 | SHM_R >> 6 | SHM_W >> 6) +#endif + +#ifndef BLOCK_LOCK_TIMEOUT +#define BLOCK_LOCK_TIMEOUT 100 +#endif + +#endif diff --git a/src/apps/ucache/ucached_cmd.c b/src/apps/ucache/ucached_cmd.c new file mode 100644 index 0000000..7197d9b --- /dev/null +++ b/src/apps/ucache/ucached_cmd.c @@ -0,0 +1,120 @@ + +#include +#include "ucached.h" + +/* + * s = start ucached + * c = create shared memory for ucache + * d = destroy shared memory for ucache + * x = exit ucached + */ +int main(int argc, char **argv) +{ + if(argc < 2 || argc > 3) + { + printf("usage: ucache_cmd \n"); + return 0; + } + + int rc = 0; + void *rp; + + char this_cmd = argv[1][0]; + if(this_cmd == 's') + { + char ps_buff[256]; + FILE *pipe = popen("ps -e | grep -w ucached", "r"); + rp = fgets(ps_buff, 256, pipe); + if(rp == NULL) + { + rc = remove(FIFO1); + rc = remove(FIFO2); + /* Crank up the daemon since it's not running */ + rc = system("ucached"); + puts("SUCCESS: Daemon started"); + } + else + { + puts("FAILURE: Daemon already started"); + puts(ps_buff); + } + return 1; + } + + char buffer[BUFF_SIZE]; + memset(buffer, 0, BUFF_SIZE); + + /* Read and Write File Descriptors */ + int readfd; + int writefd; + + /* Open FIFOs for use */ + writefd = open(FIFO1, O_WRONLY); + + if(writefd == -1) + { + perror("ucached_cmd couldn't open writefd"); + return -1; + } + + /* Send Command to Daemon */ + buffer[0] = this_cmd; + if(argc == 3) + { + strcat(buffer, " "); + strcat(buffer, argv[2]); + } + rc = write(writefd, buffer, BUFF_SIZE); + if(rc == -1) + { + perror("Error occured during write to ucached"); + } + + memset(buffer, 0, BUFF_SIZE); + readfd = open(FIFO2, O_RDONLY); + + /* Collect Response */ + int count = read(readfd, buffer, BUFF_SIZE); + while(count > 0 || ((count == -1) && (errno == EINTR))) + { + //if(count) + // printf("read: %d\n", count); + //buffer[count] = 0; + fputs(buffer, stdout); + if(strlen(buffer) < BUFF_SIZE) + { + //printf("strlen = %d\n", strlen(buffer)); + break; + } + memset(buffer, 0, BUFF_SIZE); + count = read(readfd, buffer, BUFF_SIZE); + } + printf("\n"); + /* Close FIFO when done */ + close(readfd); + close(writefd); + + if(this_cmd == 'i') + { + memset(buffer, 0, BUFF_SIZE); + FILE *info = fopen(UCACHED_INFO_FILE, "r"); + /* + while(!info) + { + info = fopen(UCACHED_INFO_FILE, "r"); + }*/ + if(!info) + { + perror("UCACHED_INFO_FILE"); + } + while(fread(buffer, sizeof(char), BUFF_SIZE - 1, info) > 0) + { + buffer[strlen(buffer)] = 0; + printf("%s", buffer); + memset(buffer, 0, BUFF_SIZE); + } + fclose(info); + } + return 1; +} + diff --git a/src/apps/ucache/ucached_common.c b/src/apps/ucache/ucached_common.c new file mode 100644 index 0000000..03ccde7 --- /dev/null +++ b/src/apps/ucache/ucached_common.c @@ -0,0 +1,16 @@ +/* ucached_common.c */ + +#include "ucached.h" + +int myread(int readfd, char *buffer) +{ + int count = read(readfd, buffer, BUFF_SIZE); + return count; +} + +void mywrite(int writefd, const char *src, char *buffer) +{ + strcpy(buffer, src); + write(writefd, buffer, BUFF_SIZE); + memset(buffer, 0, BUFF_SIZE); +} diff --git a/src/apps/ucache/watch_daemons b/src/apps/ucache/watch_daemons new file mode 100755 index 0000000..749f710 --- /dev/null +++ b/src/apps/ucache/watch_daemons @@ -0,0 +1 @@ +watch --interval=1 pgrep -l ucached diff --git a/src/apps/ucache/watch_ipcs b/src/apps/ucache/watch_ipcs new file mode 100755 index 0000000..7714e86 --- /dev/null +++ b/src/apps/ucache/watch_ipcs @@ -0,0 +1 @@ +watch --interval=1 ipcs -m diff --git a/src/apps/ucache/watch_log b/src/apps/ucache/watch_log new file mode 100755 index 0000000..1911bdc --- /dev/null +++ b/src/apps/ucache/watch_log @@ -0,0 +1 @@ +watch --interval=1 'cat /tmp/ucached.log | cat -n | sort -r -g' diff --git a/src/client/sysint/mgmt-get-uid-list.sm b/src/client/sysint/mgmt-get-uid-list.sm new file mode 100644 index 0000000..c1db40e --- /dev/null +++ b/src/client/sysint/mgmt-get-uid-list.sm @@ -0,0 +1,243 @@ +/* + * (C) 2003 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup mgmtint + * + * PVFS management interface routines for obtaining server UID (user ID) + * information. This is used to determine which user's are sending + * requests to a given server. + */ +#include "client-state-machine.h" +#include "pvfs2-debug.h" +#include "job.h" +#include "gossip.h" +#include "pvfs2-mgmt.h" + +extern job_context_id pint_client_sm_context; + +static int get_uid_list_comp_fn( + void* v_p, struct PVFS_server_resp *resp_p, int i); + +%% + +machine pvfs2_client_mgmt_get_uid_list_sm +{ + state setup_msgpair + { + run mgmt_get_uid_list_setup_msgpair; + success => xfer_msgpair; + default => cleanup; + } + + state xfer_msgpair + { + jump pvfs2_msgpairarray_sm; + default => cleanup; + } + + state cleanup + { + run mgmt_get_uid_list_cleanup; + default => terminate; + } +} + +%% + +PVFS_error PVFS_imgmt_get_uid_list( + PVFS_fs_id fs_id, + PVFS_credentials *credentials, + int server_count, + PVFS_BMI_addr_t *addr_array, + uint32_t history, + PVFS_uid_info_s **uid_info_array, + uint32_t *uid_count, + PVFS_mgmt_op_id *op_id, + PVFS_hint hints, + void *user_ptr) +{ + PINT_smcb *smcb = NULL; + PINT_client_sm *sm_p = NULL; + int ret = 0; + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "PVFS_imgmt_get_uid_list entered\n"); + + if ((server_count < 1) || (!addr_array) || (history < 1) || + (!uid_info_array) || (!uid_count)) + { + return -PVFS_EINVAL; + } + + PINT_smcb_alloc(&smcb, PVFS_MGMT_GET_UID_LIST, + sizeof(struct PINT_client_sm), + client_op_state_get_machine, + client_state_machine_terminate, + pint_client_sm_context); + + if (!smcb) + { + return -PVFS_ENOMEM; + } + + sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + PINT_init_msgarray_params(sm_p, fs_id); + PINT_init_sysint_credentials(sm_p->cred_p, credentials); + sm_p->u.get_uid_list.uid_statistics = uid_info_array; + sm_p->u.get_uid_list.history = history; + sm_p->u.get_uid_list.fs_id = fs_id; + sm_p->u.get_uid_list.server_count = server_count; + sm_p->u.get_uid_list.addr_array = addr_array; + sm_p->u.get_uid_list.uid_count = uid_count; + PVFS_hint_copy(hints, &sm_p->hints); + + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, server_count); + if (ret != 0) + { + PINT_smcb_free(smcb); + return ret; + } + + return PINT_client_state_machine_post( + smcb, op_id, user_ptr); +} + +PVFS_error PVFS_mgmt_get_uid_list( + PVFS_fs_id fs_id, + PVFS_credentials *credentials, + int server_count, + PVFS_BMI_addr_t *addr_array, + uint32_t history, + PVFS_uid_info_s **uid_info_array, + uint32_t *uid_count, + PVFS_hint hints, + void *user_ptr) +{ + PVFS_error ret = -PVFS_EINVAL, error = 0; + PVFS_mgmt_op_id op_id; + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "PVFS_mgmt_get_uid_list entered\n"); + + ret = PVFS_imgmt_get_uid_list(fs_id, credentials, server_count, addr_array, + history, uid_info_array, uid_count, &op_id, hints, NULL); + if (ret) + { + PVFS_perror_gossip("PVFS_imgmt_get_uid_list call", ret); + error = ret; + } + else + { + ret = PVFS_mgmt_wait(op_id, "get_uid_list", &error); + if (ret) + { + PVFS_perror_gossip("PVFS_mgmt_wait call", ret); + error = ret; + } + } + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "PVFS_mgmt_get_uid_list completed\n"); + + PINT_mgmt_release(op_id); + return error; +} + +static PINT_sm_action mgmt_get_uid_list_setup_msgpair( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i = 0; + PINT_sm_msgpair_state *msg_p = NULL; + + gossip_debug(GOSSIP_CLIENT_DEBUG, "get_uid_list state: " + "mgmt_get_uid_list_setup_msgpair\n"); + + /* setup msgpair array */ + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) + { + PINT_SERVREQ_MGMT_GET_UID_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->u.get_uid_list.history, + sm_p->hints); + + msg_p->fs_id = sm_p->u.get_uid_list.fs_id; + msg_p->handle = PVFS_HANDLE_NULL; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = get_uid_list_comp_fn; + msg_p->svr_addr = sm_p->u.get_uid_list.addr_array[i]; + } + + /* immediate return: next state jumps to msgpairarray machine */ + js_p->error_code = 0; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action mgmt_get_uid_list_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + PINT_msgpairarray_destroy(&sm_p->msgarray_op); + + sm_p->error_code = js_p->error_code; + + PINT_SET_OP_COMPLETE; + return SM_ACTION_TERMINATE; +} + +static int get_uid_list_comp_fn(void* v_p, + struct PVFS_server_resp *resp_p, + int i) +{ + int j = 0; + PINT_smcb *smcb = v_p; + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + + /* if this particular request was successful, then store the + * performance information in an array to be returned to caller + */ + if (sm_p->msgarray_op.msgarray[i].op_status == 0) + { + (sm_p->u.get_uid_list.uid_count)[i] = + resp_p->u.mgmt_get_uid.uid_info_array_count; + memcpy(sm_p->u.get_uid_list.uid_statistics[i], + resp_p->u.mgmt_get_uid.uid_info_array, + resp_p->u.mgmt_get_uid.uid_info_array_count + * sizeof(PVFS_uid_info_s)); + } + + /* if this is the last response, check all of the status values and + * return error code if any requests failed + */ + if (i == (sm_p->msgarray_op.count -1)) + { + for (j=0; j < sm_p->msgarray_op.count; j++) + { + if (sm_p->msgarray_op.msgarray[j].op_status != 0) + { + return(sm_p->msgarray_op.msgarray[j].op_status); + } + } + } + + return 0; +} + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/mmap.c b/src/client/usrint/mmap.c new file mode 100644 index 0000000..909e8f3 --- /dev/null +++ b/src/client/usrint/mmap.c @@ -0,0 +1,180 @@ + +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ +/** \file + * \ingroup usrint + * + * mmap operations for user interface + */ + +#include "usrint.h" +#include "posix-ops.h" +#include "posix-pvfs.h" +#include "openfile-util.h" +#include + +static struct qlist_head maplist = QLIST_HEAD_INIT(maplist); + +/** PVFS mmap + * + * This is a very basic implementation that reads whole mapped + * region into memory and writes it back if shared on unmap. + * + * This may not perform well or do all of the neat things mmap + * does, but it will let basic stuff work. + */ +void *pvfs_mmap(void *start, + size_t length, + int prot, + int flags, + int fd, + off_t offset) +{ + int rc = 0; + pvfs_descriptor *pd; + struct pvfs_mmap_s *mlist; + void *maddr; + + if (flags & MAP_ANONYMOUS) + { + void *maddr; + /* this isn't a file system map - just do it */ + maddr = glibc_ops.mmap(start, length, prot, flags, fd, offset); + if (maddr == MAP_FAILED) + { + return MAP_FAILED; + } + /* and done */ + return maddr; + } + /* this is a PVFS file system map */ + /* first find the open file */ + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return MAP_FAILED; + } + /* we will map an ANON region and read the file into it */ + maddr = glibc_ops.mmap(start, length, prot, flags & MAP_ANONYMOUS, + -1, offset); + if (maddr == MAP_FAILED) + { + return MAP_FAILED; + } + rc = pvfs_pread(fd, maddr, length, offset); + if (rc < 0) + { + glibc_ops.munmap(maddr, length); + return MAP_FAILED; + } + /* record this in the open file descriptor */ + mlist = (struct pvfs_mmap_s *)malloc(sizeof(struct pvfs_mmap_s)); + mlist->mst = start; + mlist->mlen = length; + mlist->mprot = prot; + mlist->mflags = flags; + mlist->mfd = fd; + mlist->moff = offset; + qlist_add(&mlist->link, &maplist); + /* and done */ + return maddr; +} + +/** PVFS munmap + * + * for now only unmap whole regions mapped with mmap + */ +int pvfs_munmap(void *start, size_t length) +{ + int rc = 0; + struct pvfs_mmap_s *mapl, *temp; + long long pagesize = getpagesize(); + +#if __SIZEOF_POINTER__ == __SIZEOF_LONG__ + if (((long)start % pagesize) != 0 || (length % pagesize) != 0) +#else + if (((long long)start % pagesize) != 0 || (length % pagesize) != 0) +#endif + { + errno = EINVAL; + return -1; + } + qlist_for_each_entry_safe(mapl, temp, &maplist, link) + { + /* assuming we must unmap something that was mapped */ + /* and not just part of it */ + if (mapl->mst == start && mapl->mlen == length) + { + qlist_del(&mapl->link); + break; + } + } + if (!mapl) + { + errno = EINVAL; + return -1; + } + if (mapl->mflags & MAP_SHARED) + { + pvfs_pwrite(mapl->mfd, mapl->mst, mapl->mlen, mapl->moff); + } + rc = glibc_ops.munmap(start, length); + free(mapl); + return rc; +} + +/** PVFS msync + * + * We ignore flags for now - only syncronous writebacks + * can add async later - but invalidate is not likely + */ +int pvfs_msync(void *start, size_t length, int flags) +{ + int rc = 0; + struct pvfs_mmap_s *mapl, *temp; + long long pagesize = getpagesize(); + +#if __SIZEOF_POINTER__ == __SIZEOF_LONG__ + if (((long)start % pagesize) != 0 || (length % pagesize) != 0) +#else + if (((long long)start % pagesize) != 0 || (length % pagesize) != 0) +#endif + { + errno = EINVAL; + return -1; + } + qlist_for_each_entry_safe(mapl, temp, &maplist, link) + { + if ((u_char *)mapl->mst <= (u_char *)start && + (u_char *)mapl->mst + mapl->mlen >= (u_char *)start + length) + { + break; + } + } + if (!mapl) + { + errno = ENOMEM; + return -1; + } + if (mapl->mflags & MAP_SHARED) + { + /* the diff between start and mst is distance from */ + /* start of buffer, and distnace from original offset */ + rc = pvfs_pwrite(mapl->mfd, start, length, + mapl->moff + ((u_char *)start - (u_char *)mapl->mst)); + } + return rc; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/client/usrint/module.mk.in b/src/client/usrint/module.mk.in new file mode 100644 index 0000000..364aeb3 --- /dev/null +++ b/src/client/usrint/module.mk.in @@ -0,0 +1,21 @@ +ifdef BUILD_USRINT + +DIR := src/client/usrint + +SRC := \ + $(DIR)/posix-pvfs.c \ + $(DIR)/request.c \ + $(DIR)/iocommon.c \ + $(DIR)/openfile-util.c \ + $(DIR)/ucache.c \ + $(DIR)/mmap.c + +USRC := \ + $(DIR)/posix.c \ + $(DIR)/stdio.c + +# list of all .c files (generated or otherwise) that belong in library +LIBSRC += $(SRC) +ULIBSRC += $(USRC) + +endif # BUILD_USRINT diff --git a/src/client/usrint/posix-ops.h b/src/client/usrint/posix-ops.h new file mode 100644 index 0000000..bb6f8b5 --- /dev/null +++ b/src/client/usrint/posix-ops.h @@ -0,0 +1,230 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - file descriptors for pvfs + */ + +#ifndef POSIX_OPS_H +#define POSIX_OPS_H 1 + +/* POSIX functions */ + +/** struct of pointers to methods for posix system calls */ +typedef struct posix_ops_s +{ + int (*open)(const char *path, int flags, ...); + int (*open64)(const char *path, int flags, ...); + int (*openat)(int dirfd, const char *path, int flags, ...); + int (*openat64)(int dirfd, const char *path, int flags, ...); + int (*creat)(const char *path, mode_t mode, ...); + int (*creat64)(const char *path, mode_t mode, ...); + int (*unlink)(const char *path); + int (*unlinkat)(int dirfd, const char *path, int flags); + int (*rename)(const char *oldpath, const char *newpath); + int (*renameat)(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath); + ssize_t (*read)( int fd, void *buf, size_t count); + ssize_t (*pread)( int fd, void *buf, size_t count, off_t offset); + ssize_t (*readv)(int fd, const struct iovec *vector, int count); + ssize_t (*pread64)( int fd, void *buf, size_t count, off64_t offset); + ssize_t (*write)( int fd, const void *buf, size_t count); + ssize_t (*pwrite)( int fd, const void *buf, size_t count, off_t offset); + ssize_t (*writev)( int fd, const struct iovec *vector, int count); + ssize_t (*pwrite64)( int fd, const void *buf, size_t count, off64_t offset); + off_t (*lseek)(int fd, off_t offset, int whence); + off64_t (*lseek64)(int fd, off64_t offset, int whence); + int (*truncate)(const char *path, off_t length); + int (*truncate64)(const char *path, off64_t length); + int (*ftruncate)(int fd, off_t length); + int (*ftruncate64)(int fd, off64_t length); + int (*fallocate)(int fd, off_t offset, off_t length); + int (*close)( int fd); + int (*stat)(const char *path, struct stat *buf); + int (*stat64)(const char *path, struct stat64 *buf); + int (*fstat)(int fd, struct stat *buf); + int (*fstat64)(int fd, struct stat64 *buf); + int (*fstatat)(int fd, const char *path, struct stat *buf, int flag); + int (*fstatat64)(int fd, const char *path, struct stat64 *buf, int flag); + int (*lstat)(const char *path, struct stat *buf); + int (*lstat64)(const char *path, struct stat64 *buf); + int (*futimesat)(int dirfd, const char *path, const struct timeval times[2]); + int (*utimes)(const char *path, const struct timeval times[2]); + int (*utime)(const char *path, const struct utimbuf *buf); + int (*futimes)(int fd, const struct timeval times[2]); + int (*dup)(int oldfd); + int (*dup2)(int oldfd, int newfd); + int (*chown)(const char *path, uid_t owner, gid_t group); + int (*fchown)(int fd, uid_t owner, gid_t group); + int (*fchownat)(int fd, const char *path, uid_t owner, gid_t group, int flag); + int (*lchown)(const char *path, uid_t owner, gid_t group); + int (*chmod)(const char *path, mode_t mode); + int (*fchmod)(int fd, mode_t mode); + int (*fchmodat)(int fd, const char *path, mode_t mode, int flag); + int (*mkdir)(const char *path, mode_t mode); + int (*mkdirat)(int dirfd, const char *path, mode_t mode); + int (*rmdir)(const char *path); + ssize_t (*readlink)(const char *path, char *buf, size_t bufsiz); + ssize_t (*readlinkat)(int dirfd, const char *path, char *buf, size_t bufsiz); + int (*symlink)(const char *oldpath, const char *newpath); + int (*symlinkat)(const char *oldpath, int newdirfd, const char *newpath); + int (*link)(const char *oldpath, const char *newpath); + int (*linkat)(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, int flags); + int (*readdir)(u_int fd, struct dirent *dirp, u_int count); + int (*getdents)(u_int fd, struct dirent *dirp, u_int count); + int (*getdents64)(u_int fd, struct dirent64 *dirp, u_int count); + int (*access)(const char *path, int mode); + int (*faccessat)(int dirfd, const char *path, int mode, int flags); + int (*flock)(int fd, int op); + int (*fcntl)(int fd, int cmd, ...); + void (*sync)(void); + int (*fsync)(int fd); + int (*fdatasync)(int fd); + int (*fadvise)(int fd, off_t offset, off_t len, int advice); + int (*fadvise64)(int fd, off64_t offset, off64_t len, int advice); + int (*statfs)(const char *path, struct statfs *buf); + int (*statfs64)(const char *path, struct statfs64 *buf); + int (*fstatfs)(int fd, struct statfs *buf); + int (*fstatfs64)(int fd, struct statfs64 *buf); + int (*statvfs)(const char *path, struct statvfs *buf); + int (*fstatvfs)(int fd, struct statvfs *buf); + int (*mknod)(const char *path, mode_t mode, dev_t dev); + int (*mknodat)(int dirfd, const char *path, mode_t mode, dev_t dev); + ssize_t (*sendfile)(int outfd, int infd, off_t *offset, size_t count); + ssize_t (*sendfile64)(int outfd, int infd, off64_t *offset, size_t count); + int (*setxattr)(const char *path, const char *name, + const void *value, size_t size, int flags); + int (*lsetxattr)(const char *path, const char *name, + const void *value, size_t size, int flags); + int (*fsetxattr)(int fd, const char *name, + const void *value, size_t size, int flags); + ssize_t (*getxattr)(const char *path, const char *name, + void *value, size_t size); + ssize_t (*lgetxattr)(const char *path, const char *name, + void *value, size_t size); + ssize_t (*fgetxattr)(int fd, const char *name, void *value, size_t size); + ssize_t (*listxattr)(const char *path, char *list, size_t size); + ssize_t (*llistxattr)(const char *path, char *list, size_t size); + ssize_t (*flistxattr)(int fd, char *list, size_t size); + int (*removexattr)(const char *path, const char *name); + int (*lremovexattr)(const char *path, const char *name); + int (*fremovexattr)(int fd, const char *name); + mode_t (*umask)(mode_t mask); + mode_t (*getumask)(void); + int (*getdtablesize)(void); + void *(*mmap)(void *start, size_t length, int prot, + int flags, int fd, off_t offset); + int (*munmap)(void *start, size_t length); + int (*msync)(void *start, size_t length, int flags); +#if 0 + int (*acl_delete_def_file)(const char *path_p); + acl_t (*acl_get_fd)(int fd); + acl_t (*acl_get_file)(const char *path_p, acl_type_t type); + int (*acl_set_fd)(int fd, acl_t acl); + int (*acl_set_file)(const char *path_p, acl_type_t type, acl_t acl); +#endif + + /* socket operations */ + int (*socket)(int dowmain, int type, int protocol); + int (*accept)(int sockfd, struct sockaddr *addr, socklen_t *alen); + int (*bind)(int sockfd, const struct sockaddr *addr, socklen_t alen); + int (*connect)(int sockfd, const struct sockaddr *addr, socklen_t alen); + int (*getpeername)(int sockfd, struct sockaddr *addr, socklen_t *alen); + int (*getsockname)(int sockfd, struct sockaddr *addr, socklen_t *alen); + int (*getsockopt)(int sockfd, int lvl, int oname, + void *oval, socklen_t *olen); + int (*setsockopt)(int sockfd, int lvl, int oname, + const void *oval, socklen_t olen); + int (*ioctl)(int fd, int request, ...); + int (*listen)(int sockfd, int backlog); + int (*recv)(int sockfd, void *buf, size_t len, int flags); + int (*recvfrom)(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *alen); + int (*recvmsg)(int sockfd, struct msghdr *msg, int flags); + /* int (*select)(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, + struct timeval *timeout); */ + /* void (*FD_CLR)(int fd, fd_set *set); */ + /* void (*FD_ISSET)(int fd, fd_set *set); */ + /* void (*FD_SET)(int fd, fd_set *set); */ + /* void (*FD_ZERO)(fd_set *set); */ + /* int (*pselect)(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, + const struct timeval *timeout, const sigset_t *sigmask); */ + int (*send)(int sockfd, const void *buf, size_t len, int flags); + int (*sendto)(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t alen); + int (*sendmsg)(int sockfd, const struct msghdr *msg, int flags); + int (*shutdown)(int sockfd, int how); + int (*socketpair)(int d, int type, int prtocol, int sv[2]); + int (*pipe)(int filedes[2]); +} posix_ops; + +#ifdef BITDEFS +#define stat stat64 +#define fstat fstat64 +#define fstatat fstatat64 +#define lstat lstat64 +#define statfs statfs64 +#define fstatfs fstatfs64 +#define sendfile sendfile64 +#endif + +extern posix_ops glibc_ops; +extern posix_ops pvfs_ops; + +typedef struct pvfs_mmap_s +{ + void *mst; /**< start of mmap region */ + size_t mlen; /**< length of mmap region */ + int mprot; /**< protection of mmap region */ + int mflags; /**< flags of mmap region */ + int mfd; /**< file descriptor of mmap region */ + off_t moff; /**< offset of mmap region */ + struct qlist_head link; +} *pvfs_mmap_t; + +/** PVFS-POSIX Descriptor table entry */ +/* these items are shared between duped descrptors */ +typedef struct pvfs_descriptor_status_s +{ + gen_mutex_t lock; /**< protect struct from mult threads */ + int dup_cnt; /**< number of table slots with this des */ + posix_ops *fsops; /**< syscalls to use for this file */ + PVFS_object_ref pvfs_ref; /**< PVFS fs_id and handle for PVFS file */ + int flags; /**< the open flags used for this file */ + int mode; /**< stat mode of the file - may be volatile */ + off64_t file_pointer; /**< offset from the beginning of the file */ + PVFS_ds_position token; /**< used db Trove to iterate dirents */ + char *dpath; /**< path of an open directory for fchdir */ + struct file_ent_s *fent; /**< reference to cached objects */ + /**< set to NULL if not caching this file */ +} pvfs_descriptor_status; + +/* these are unique among descriptors */ +typedef struct pvfs_descriptor_s +{ + gen_mutex_t lock; /**< protect struct from mult threads */ + int is_in_use; /**< PVFS_FS if this descriptor is valid */ + int fd; /**< file number in PVFS descriptor_table */ + int true_fd; /**< the true file number depending on FS */ + int fdflags; /**< POSIX file descriptor flags */ + pvfs_descriptor_status *s; +} pvfs_descriptor; + +typedef struct pvfs_descriptor_s PFILE; /* these are for posix interface */ +typedef struct pvfs_descriptor_s PDIR; + +#endif +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/request.c b/src/client/usrint/request.c new file mode 100644 index 0000000..940d5cc --- /dev/null +++ b/src/client/usrint/request.c @@ -0,0 +1,157 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - routines to convert requests for pvfs + */ +#define USRINT_SOURCE 1 +#include "usrint.h" + +int pvfs_check_vector(const struct iovec *iov, + int count, + PVFS_Request *req, + void **buf); + +/** + * converts a posix iovec into a PVFS Request + */ +int pvfs_convert_iovec (const struct iovec *vector, + int count, + PVFS_Request *req, + void **buf) +{ + /* for now just check for vectors and covert the rest */ + /* to a basic indexed struct */ + return pvfs_check_vector(vector, count, req, buf); +} + +int pvfs_check_vector(const struct iovec *iov, + int count, + PVFS_Request *req, + void **buf) +{ + int i; + int vstart; + int vlen; + int bsz; + PVFS_size stride; + int32_t *bsz_array; + PVFS_size *disp_array; + PVFS_Request *req_array; + int rblk; + + /* set up request arrays */ + bsz_array = (int32_t *)malloc(count * sizeof(int32_t)); + if (!bsz_array) + { + return -1; + } + disp_array = (PVFS_size *)malloc(count * sizeof(PVFS_size)); + if (!disp_array) + { + free(bsz_array); + return -1; + } + req_array = (PVFS_Request *)malloc(count * sizeof(PVFS_Request)); + if (!disp_array) + { + free(disp_array); + free(bsz_array); + return -1; + } + /* for now we assume that addresses in the iovec are ascending */ + /* not that otherwise won't work, but we're not sure */ + /* the first address will be assumed to be the base address of */ + /* the whole request. the displacement of each vector is relative */ + /* to that address */ + if (count > 0) + { + *buf = iov[0].iov_base; + } + rblk = 0; + /* start at beginning of iovec */ + i = 0; + while(i < count) + { + /* starting a new vector at position i */ + vstart = i; + vlen = 1; + bsz = iov[i].iov_len; + stride = 0; + /* vector blocks must be of equal size */ + while(++i < count && iov[i].iov_len == bsz) + { + if(vlen == 1) + { + /* two blocks of equal size are a vector of two */ + stride = (u_char *)iov[i].iov_base - + (u_char *)iov[i - 1].iov_base; + if (stride < bsz) + { + /* overlapping blocks and negative strides are problems */ + break; + } + vlen++; + } + else if (((u_char *)iov[i].iov_base - + (u_char *)iov[i - 1].iov_base) == stride) + { + /* to add more blocks, stride must match */ + vlen++; + } + else + { + /* doesn't match - end of vector */ + break; + } + } + if (vlen == 1) + { + /* trivial conversion */ + bsz_array[rblk] = iov[vstart].iov_len; + disp_array[rblk] = (PVFS_size)((u_char *)iov[vstart].iov_base - + (u_char *)*buf); + req_array[rblk] = PVFS_BYTE; + rblk++; + } + else + { + /* found a vector */ + bsz_array[rblk] = 1; + disp_array[rblk] = (PVFS_size)((u_char *)iov[vstart].iov_base - + (u_char *)*buf); + PVFS_Request_vector(vlen, bsz, stride, PVFS_BYTE, &req_array[rblk]); + rblk++; + } + } + /* now build full request */ + PVFS_Request_struct(rblk, bsz_array, disp_array, req_array, req); + PVFS_Request_commit(req); + free(bsz_array); + free(disp_array); + while (rblk--) + { + if (req_array[rblk] != PVFS_BYTE) + { + PVFS_Request_free(&req_array[rblk]); + } + } + free(req_array); + /* req is not freed, the caller is expected to do that */ + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/client/usrint/socket.c b/src/client/usrint/socket.c new file mode 100644 index 0000000..0ae1802 --- /dev/null +++ b/src/client/usrint/socket.c @@ -0,0 +1,506 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - wrappers for posix socket system calls + */ +#define USRINT_SOURCE 1 +#include "usrint.h" +#include +#include "posix-ops.h" +#include "posix-pvfs.h" +#include "openfile-util.h" + +/* + * SOCKET SYSTEM CALLS + */ + +int socket (int domain, int type, int protocol) +{ + int sockfd; + pvfs_descriptor *pd; + + /* sockfd = glibc_ops.socket(domain, type, protocol); */ + sockfd = syscall(SYS_socketcall, domain, type, protocol); + if (sockfd < 0) + { + return sockfd; + } + pd = pvfs_alloc_descriptor(&glibc_ops, sockfd, NULL, 0); + pd->mode |= S_IFSOCK; + return pd->fd; +} + +int accept (int sockfd, struct sockaddr *addr, socklen_t *alen) +{ + int rc = 0, fd; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + fd = pd->fsops->accept(pd->true_fd, addr, alen); + if (fd < 0) + { + rc = -1; + goto errorout; + } + pd = pvfs_alloc_descriptor(&glibc_ops, fd , NULL, 0); + pd->mode |= S_IFSOCK; + rc = fd; +errorout: + return rc; +} + +int bind (int sockfd, const struct sockaddr *addr, socklen_t alen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->bind(pd->true_fd, addr, alen); +errorout: + return rc; +} + +int connect (int sockfd, const struct sockaddr *addr, socklen_t alen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->connect(pd->true_fd, addr, alen); +errorout: + return rc; +} + +int getpeername (int sockfd, struct sockaddr *addr, socklen_t *alen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->getpeername(pd->true_fd, addr, alen); +errorout: + return rc; +} + +int getsockname (int sockfd, struct sockaddr *addr, socklen_t *alen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->getsockname(pd->true_fd, addr, alen); +errorout: + return rc; +} + +int getsockopt (int sockfd, int lvl, int oname, + void *oval, socklen_t *olen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->getsockopt(pd->true_fd, lvl, oname, oval, olen); +errorout: + return rc; +} + +int setsockopt (int sockfd, int lvl, int oname, + const void *oval, socklen_t olen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->setsockopt(pd->true_fd, lvl, oname, oval, olen); +errorout: + return rc; +} + +int ioctl (int fd, int request, ...) +{ + int rc; + pvfs_descriptor *pd; + va_list ap; + + va_start(ap, request); + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->ioctl(pd->true_fd, request, ap); + va_end(ap); +errorout: + return rc; +} + +int listen (int sockfd, int backlog) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->listen(pd->true_fd, backlog); +errorout: + return rc; +} + +int recv (int sockfd, void *buf, size_t len, int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->recv(pd->true_fd, buf, len, flags); +errorout: + return rc; +} + +int recvfrom (int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *alen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->recvfrom(pd->true_fd, buf, len, flags, addr, alen); +errorout: + return rc; +} + +int recvmsg (int sockfd, struct msghdr *msg, int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->recvmsg(pd->true_fd, msg, flags); +errorout: + return rc; +} + +/* int select (int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, + struct timeval *timeout); */ +/* void FD_CLR (int fd, fd_set *set) */ +/* void FD_ISSET (int fd, fd_set *set) */ +/* void FD_SET (int fd, fd_set *set) */ +/* void FD_ZERO (fd_set *set); */ +/* int pselect (int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, + const struct timeval *timeout, const sigset_t *sigmask); */ + +int send (int sockfd, const void *buf, size_t len, int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->send(pd->true_fd, buf, len, flags); +errorout: + return rc; +} + +int sendto (int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t alen) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->sendto(pd->true_fd, buf, len, flags, addr, alen); +errorout: + return rc; +} + +int sendmsg (int sockfd, const struct msghdr *msg, int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->sendmsg(pd->true_fd, msg, flags); +errorout: + return rc; +} + +int shutdown (int sockfd, int how) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(sockfd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + if (!S_ISSOCK(pd->mode)) + { + errno = ENOTSOCK; + rc = -1; + goto errorout; + } + rc = pd->fsops->shutdown(pd->true_fd, how); +errorout: + return rc; +} + +int socketpair (int d, int type, int protocol, int sv[2]) +{ + int rc = 0; + pvfs_descriptor *pd0, *pd1; + rc = glibc_ops.socketpair(d, type, protocol, sv); + if (rc < 0) + { + goto errorout; + } + pd0 = pvfs_alloc_descriptor(&glibc_ops, sv[0], NULL, 0); + if (!pd0) + { + goto errorout; + } + pd1 = pvfs_alloc_descriptor(&glibc_ops, sv[1], NULL, 0); + if (!pd1) + { + pvfs_free_descriptor(pd0->fd); + errno = EMFILE; + rc = -1; + goto errorout; + } + pd0->mode |= S_IFSOCK; + pd1->mode |= S_IFSOCK; + sv[0] = pd0->true_fd; + sv[1] = pd1->true_fd; +errorout: + return rc; +} + +int pipe(int filedes[2]) +{ + int rc = 0; + pvfs_descriptor *f0, *f1; + int fa[2]; + if(!filedes) + { + errno = EFAULT; + rc = -1; + goto errorout; + } + rc = glibc_ops.pipe(fa); + if (rc < 0) + { + goto errorout; + } + f0 = pvfs_alloc_descriptor(&glibc_ops, fa[0], NULL, 0); + if (!f0) + { + goto errorout; + } + f1 = pvfs_alloc_descriptor(&glibc_ops, fa[1], NULL, 0); + if (!f1) + { + pvfs_free_descriptor(f0->fd); + errno = EMFILE; + rc = -1; + goto errorout; + } + f0->mode |= S_IFSOCK; + f1->mode |= S_IFSOCK; + filedes[0] = f0->true_fd; + filedes[1] = f1->true_fd; +errorout: + return rc; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/client/usrint/stdio-ops.h b/src/client/usrint/stdio-ops.h new file mode 100644 index 0000000..30f4407 --- /dev/null +++ b/src/client/usrint/stdio-ops.h @@ -0,0 +1,108 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - implementation of stdio for pvfs + */ + +#ifndef STDIO_OPS_H +#define STDIO_OPS_H + +struct stdio_ops_s +{ + FILE *(*fopen)(const char *path, const char *mode); + FILE *(*fdopen)(int fd, const char *mode); + FILE *(*freopen)(const char *path, const char *mode, FILE *stream); + size_t (*fwrite)(const void *ptr, size_t size, size_t nmemb, FILE *stream); + size_t (*fwrite_unlocked)(const void *ptr, size_t size, size_t nmemb, FILE *stream); + size_t (*fread)(void *ptr, size_t size, size_t nmemb, FILE *stream); + size_t (*fread_unlocked)(void *ptr, size_t size, size_t nmemb, FILE *stream); + int (*fclose)(FILE *stream); + int (*fseek)(FILE *stream, long offset, int whence); + int (*fseek64)(FILE *stream, const off64_t offset, int whence); + int (*fsetpos)(FILE *stream, const fpos_t *pos); + void (*rewind)(FILE *stream); + long int (*ftell)(FILE *stream); + off64_t (*ftell64)(FILE *stream); + int (*fgetpos)(FILE *stream, fpos_t *pos); + int (*fflush)(FILE *stream); + int (*fflush_unlocked)(FILE *stream); + int (*fputc)(int c, FILE *stream); + int (*fputc_unlocked)(int c, FILE *stream); + int (*fputs)(const char *s, FILE *stream); + int (*fputs_unlocked)(const char *s, FILE *stream); + int (*putc)(int c, FILE *stream); + int (*putc_unlocked)(int c, FILE *stream); + int (*putchar)(int c); + int (*putchar_unlocked)(int c); + int (*puts)(const char *s); + int (*putw)(int wd, FILE *stream); + char *(*fgets)(char *s, int size, FILE *stream); + char *(*fgets_unlocked)(char *s, int size, FILE *stream); + int (*fgetc)(FILE *stream); + int (*fgetc_unlocked)(FILE *stream); + int (*getc)(FILE *stream); + int (*getc_unlocked)(FILE *stream); + int (*getchar)(void); + int (*getchar_unlocked)(void); + int (*getw)(FILE *stream); + char *(*gets)(char * s); + ssize_t (*getdelim)(char **lnptr, size_t *n, int delim, FILE *stream); + int (*ungetc)(int c, FILE *stream); + int (*vfprintf)(FILE *stream, const char *format, va_list ap); + int (*vprintf)(const char *format, va_list ap); + int (*fprintf)(FILE *stream, const char *format, ...); + int (*printf)(const char *format, ...); + void (*perror)(const char *s); + int (*fscanf)(FILE *stream, const char *format, ...); + int (*scanf)(const char *format, ...); + void (*clearerr)(FILE *stream); + void (*clearerr_unlocked)(FILE *stream); + int (*feof)(FILE *stream); + int (*feof_unlocked)(FILE *stream); + int (*ferror)(FILE *stream); + int (*ferror_unlocked)(FILE *stream); + int (*fileno)(FILE *stream); + int (*fileno_unlocked)(FILE *stream); + int (*remove)(const char *path); + void (*setbuf)(FILE *stream, char *buf); + void (*setbuffer)(FILE *stream, char *buf, size_t size); + void (*setlinebuf)(FILE *stream); + int (*setvbuf)(FILE *stream, char *buf, int mode, size_t size); + char *(*mkdtemp)(char *template); + int (*mkstemp)(char *template); + FILE *(*tmpfile)(void); + DIR *(*opendir)(const char *name); + DIR *(*fdopendir)(int fd); + int (*dirfd)(DIR *dir); + struct dirent *(*readdir)(DIR *dir); + struct dirent64 *(*readdir64)(DIR *dir); + void (*rewinddir)(DIR *dir); + void (*seekdir)(DIR *dir, off_t offset); + off_t (*telldir)(DIR *dir); + int (*closedir)(DIR *dir); + int (*scandir)(const char *dir, + struct dirent ***namelist, + int(*filter)(const struct dirent *), + int(*compar)(const void *, const void *)); + int (*scandir64 )(const char *dir, + struct dirent64 ***namelist, + int(*filter)(const struct dirent64 *), + int(*compar)(const void *, const void *)); +}; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/ucache.c b/src/client/usrint/ucache.c new file mode 100644 index 0000000..ccb861b --- /dev/null +++ b/src/client/usrint/ucache.c @@ -0,0 +1,2066 @@ +/* + * (C) 2011 Clemson University + * + * See COPYING in top-level directory. + */ + +/** + * \file + * \ingroup usrint + * + * Experimental cache for user data. + * + */ +#include "usrint.h" +#include "posix-ops.h" +#include "openfile-util.h" +#include "iocommon.h" +#if PVFS_UCACHE_ENABLE +#include "ucache.h" +#include + +/* Global Variables */ +FILE *out; /* For Logging Purposes */ + +/* static uint32_t ucache_blk_cnt = 0; */ + +/* Global pointers to data in shared mem. Pointers set in ucache_initialize */ +union user_cache_u *ucache = 0; +struct ucache_aux_s *ucache_aux = 0; /* All locks and stats stored here */ + +/* ucache_aux is a pointer to the actual data summarized by the following + * pointers +*/ +ucache_lock_t *ucache_locks = 0; /* The shmem of all ucache locks */ +ucache_lock_t *ucache_lock = 0; /* Global Lock maintaining concurrency */ +struct ucache_stats_s *ucache_stats = 0; /* Pointer to stats structure*/ + +/* Per-process (thread) execution statistics */ +struct ucache_stats_s these_stats = { 0, 0, 0, 0, 0 }; + +/* Flags indicating ucache status */ +int ucache_enabled = 0; +char ftblInitialized = 0; + +/* Internal Only Function Declarations */ + +/* Initialization */ +static void add_mtbls(uint16_t blk); +static void init_memory_table(struct mem_table_s *mtbl); +static inline int init_memory_entry(struct mem_table_s *mtbl, int16_t index); + +/* Gets */ +static uint16_t get_next_free_mtbl(uint16_t *free_mtbl_blk, uint16_t *free_mtbl_ent); +static uint16_t get_free_fent(void); +static inline uint16_t get_free_ment(struct mem_table_s *mtbl); +static inline uint16_t get_free_blk(void); + +/* Puts */ +static int put_free_mtbl(struct mem_table_s *mtbl, struct file_ent_s *file); +static void put_free_fent(struct file_ent_s *fent); +static void put_free_ment(struct mem_table_s *mtbl, uint16_t ent); +static inline void put_free_blk(uint16_t blk); + +/* File Entry Chain Iterator */ +static unsigned char file_done(uint16_t index); +static uint16_t file_next(struct file_table_s *ftbl, uint16_t index); + +/* Memory Entry Chain Iterator */ +static inline unsigned char ment_done(uint16_t index); +static inline uint16_t ment_next(struct mem_table_s *mtbl, uint16_t index); + +/* Dirty List Iterator */ +static inline unsigned char dirty_done(uint16_t index); +static inline uint16_t dirty_next(struct mem_table_s *mtbl, uint16_t index); + +/* File and Memory Insertion */ +uint16_t insert_file(uint32_t fs_id, uint64_t handle); + +static inline void *insert_mem(struct file_ent_s *fent, + uint64_t offset, + uint16_t *block_ndx +); + +static inline void *set_item(struct file_ent_s *fent, + uint64_t offset, + uint16_t index +); + +/* File and Memory Lookup */ +static struct mem_table_s *lookup_file( + uint32_t fs_id, + uint64_t handle, + uint16_t *file_mtbl_blk, /* Can be NULL if not desired */ + uint16_t *file_mtbl_ent, + uint16_t *file_ent_index, + uint16_t *file_ent_prev_index +); +static inline void *lookup_mem(struct mem_table_s *mtbl, + uint64_t offset, + uint16_t *item_index, + uint16_t *mem_ent_index, + uint16_t *mem_ent_prev_index +); + +/* File and Memory Entry Removal */ +static int remove_file(struct file_ent_s *fent); +static int wipe_mtbl(struct mem_table_s *mtbl); +static int remove_mem(struct file_ent_s *fent, uint64_t offset); + +/* Eviction Utilities */ +static uint16_t locate_max_fent(struct file_ent_s **fent); +static void update_LRU(struct mem_table_s *mtbl, uint16_t index); +static int evict_LRU(struct file_ent_s *fent); + +/* Logging */ +//static void log_ucache_stats(void); + +/* List Printing Functions */ +void print_LRU(struct mem_table_s *mtbl); +void print_dirty(struct mem_table_s *mtbl); + +/* Flushing of individual files and blocks */ +int flush_file(struct file_ent_s *fent); +int flush_block(struct file_ent_s *fent, struct mem_ent_s *ment); + +/* Externally Visible API + * The following functions are thread/processor safe regarding the cache + * tables and data. + */ + +/** + * Initializes the cache. + * Mainly, it aquires a previously created shared memory segment used to + * cache data. The shared mem. creation and ftbl initialization should already + * have been done by the daemon at this point. + * + * The whole cache is protected globally by a locking mechanism. + * + * Locks (same type as global lock) can be used to protect block level data. + */ +int ucache_initialize(void) +{ + int rc = 0; + //gossip_set_debug_mask(1, GOSSIP_UCACHE_DEBUG); + + /* Aquire pointers to shmem segments (ucache_aux and ucache) */ + /* shmget segment containing ucache_aux */ + key_t key = ftok(KEY_FILE, SHM_ID1); + int shmflg = SVSHM_MODE; + int aux_shmid = shmget(key, 0, shmflg); + if(aux_shmid == -1) + { + //gossip_debug(GOSSIP_UCACHE_DEBUG, + // "ucache_initialize - ucache_aux shmget: errno = %d\n", errno); + return -1; + } + /* shmat ucache_aux */ + ucache_aux = shmat(aux_shmid, NULL, 0); + if((long int)ucache_aux == -1) + { + //gossip_debug(GOSSIP_UCACHE_DEBUG, + // "ucache_initialize - ucache_aux shmat: errno = %d\n", errno); + return -1; + } + + /* Set our global pointers to data in the ucache_aux struct */ + ucache_locks = ucache_aux->ucache_locks; + ucache_lock = get_lock(BLOCKS_IN_CACHE); + ucache_stats = &(ucache_aux->ucache_stats); + + /* ucache */ + key = ftok(KEY_FILE, SHM_ID2); + int ucache_shmid = shmget(key, 0, shmflg); + if(ucache_shmid == -1) + { + //gossip_debug(GOSSIP_UCACHE_DEBUG, + // "ucache_initialize - ucache shmget: errno = %d\n", errno); + return -1; + } + ucache = (union user_cache_u *)shmat(ucache_shmid, NULL, 0); + if((long int)ucache == -1) + { + //gossip_debug(GOSSIP_UCACHE_DEBUG, + // "ucache_initialize - ucache shmat: errno = %d\n", errno); + return -1; + } + + /* When this process ends we may want to dump ucache stats to a log file */ + //rc = atexit(log_ucache_stats); + + /* Declare the ucache enabled! */ + ucache_enabled = 1; + return rc; +} + +/** + * Returns a pointer to the mtbl corresponding to the blk & ent. + * Input must be reliable otherwise invalid mtbl could be returned. + */ +inline struct mem_table_s *get_mtbl(uint16_t mtbl_blk, uint16_t mtbl_ent) +{ + if( mtbl_blk < BLOCKS_IN_CACHE && + mtbl_ent < MEM_TABLE_ENTRY_COUNT) + { + return &(ucache->b[mtbl_blk].mtbl[mtbl_ent]); + } + else + { + return (struct mem_table_s *)NILP; + } +} + +/** + * Initializes the ucache file table if it hasn't previously been initialized. + * Although this function is visible, DO NOT CALL THIS FUNCTION. + * It is meant to be called in the ucache daemon or during testing. + * see: src/apps/ucache/ucached.c for more info. + * + * Sets the char booelan ftblInitialized when ftbl has been successfully + * initialized. + * + * Returns 0 on success, -1 on failure. + */ +int ucache_init_file_table(char forceCreation) +{ + int i; + + /* check if already initialized? */ + if(ftblInitialized == 1 && !forceCreation) + { + return -1; + } + if(ucache) + { + memset(ucache, 0, CACHE_SIZE); + } + else + { + return -1; + } + + + /* initialize mtbl free list table */ + ucache->ftbl.free_mtbl_blk = NIL16; + ucache->ftbl.free_mtbl_ent = NIL16; + add_mtbls(0); + + /* set up list of free blocks */ + ucache->ftbl.free_blk = 1; + for (i = 1; i < (BLOCKS_IN_CACHE - 1); i++) + { + ucache->b[i].mtbl[0].free_list_blk = i + 1; + } + ucache->b[BLOCKS_IN_CACHE - 1].mtbl[0].free_list_blk = NIL16; + + /* set up file hash table */ + for (i = 0; i < FILE_TABLE_HASH_MAX; i++) + { + ucache->ftbl.file[i].tag_handle = NIL64; + ucache->ftbl.file[i].tag_id = NIL32; + ucache->ftbl.file[i].mtbl_blk = NIL16; + ucache->ftbl.file[i].mtbl_ent = NIL16; + ucache->ftbl.file[i].next = NIL16; + } + + /* set up list of free hash table entries */ + ucache->ftbl.free_list = FILE_TABLE_HASH_MAX; + for (i = FILE_TABLE_HASH_MAX; i < FILE_TABLE_ENTRY_COUNT - 1; i++) + { + ucache->ftbl.file[i].mtbl_blk = NIL16; + ucache->ftbl.file[i].mtbl_ent = NIL16; + ucache->ftbl.file[i].next = i + 1; + } + ucache->ftbl.file[FILE_TABLE_ENTRY_COUNT - 1].next = NIL16; + + /* Success */ + ftblInitialized = 1; + return 0; +} + +/** + * Opens a file in ucache. + */ +int ucache_open_file(PVFS_fs_id *fs_id, + PVFS_handle *handle, + struct file_ent_s **fent) +{ + int rc = -1; + uint16_t file_mtbl_blk; + uint16_t file_mtbl_ent; + uint16_t file_ent_index; + uint16_t file_ent_prev_index; + + lock_lock(ucache_lock); + + struct mem_table_s *mtbl = lookup_file((uint32_t)(*fs_id), + (uint64_t)(*handle), + &file_mtbl_blk, + &file_mtbl_ent, + &file_ent_index, + &file_ent_prev_index); + + if(mtbl == (struct mem_table_s *)NIL) + { + uint16_t fentIndex = insert_file((uint32_t)*fs_id, (uint64_t)*handle); + if(fentIndex > FILE_TABLE_ENTRY_COUNT) + { + rc = -1; + goto done; + } + *fent = &(ucache->ftbl.file[fentIndex]); + if((*fent)->mtbl_blk == NIL16 || (*fent)->mtbl_ent == NIL16) + { + rc = -1; + goto done; + } + + mtbl = get_mtbl((*fent)->mtbl_blk, (*fent)->mtbl_ent); + if(mtbl == (struct mem_table_s *)NILP) + { + /* Error - Could not insert */ + rc = -1; + goto done; + } + else + { + /* File Inserted */ + mtbl->ref_cnt = 1; + rc = 0; + goto done; + } + } + else + { + /* File was previously Inserted */ + mtbl->ref_cnt++; + *fent = &(ucache->ftbl.file[file_ent_index]); + rc = 1; + goto done; + } +done: + lock_unlock(ucache_lock); + return rc; +} + +/** + * Returns ptr to block in ucache based on file and offset + */ +inline void *ucache_lookup(struct file_ent_s *fent, uint64_t offset, + uint16_t *block_ndx) +{ + void *retVal = (void *) NIL; + if(fent) + { + lock_lock(ucache_lock); + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + retVal = lookup_mem(mtbl, + offset, + block_ndx, + NULL, + NULL); + lock_unlock(ucache_lock); + } + return retVal; +} + +/** + * Prepares the data structures for block storage. + * On success, returns a pointer to where the block of data should be written. + * On failure, returns NIL. + */ +inline void *ucache_insert(struct file_ent_s *fent, + uint64_t offset, + uint16_t *block_ndx +) +{ + lock_lock(ucache_lock); + void * retVal = insert_mem(fent, offset, block_ndx); + lock_unlock(ucache_lock); + return (retVal); +} + +#if 0 +/** + * Removes a cached block of data from mtbl + * Returns 1 on success, 0 on failure. + */ +int ucache_remove(struct file_ent_s *fent, uint64_t offset) +{ + int rc = 0; + lock_lock(ucache_lock); + rc = remove_mem(fent , offset); + lock_unlock(ucache_lock); + return rc; +} +#endif + +/** + * Flushes the entire ucache's dirty blocks (every file's dirty blocks) + * Returns 0 on success, -1 on failure + */ +int ucache_flush_cache(void) +{ + int rc = 0; + lock_lock(ucache_lock); + struct file_table_s *ftbl = &ucache->ftbl; + int i; + for(i = 0; i < FILE_TABLE_HASH_MAX; i++) + { + if((ftbl->file[i].tag_handle != NIL64) && + (ftbl->file[i].tag_handle != 0)) + { + /* Iterate accross file table chain. */ + uint16_t j; + for(j = i; !file_done(j); j = file_next(ftbl, j)) + { + rc = flush_file(&ftbl->file[j]); + if(rc !=0) + { + rc = -1; + goto done; + } + } + } + } + +done: + lock_unlock(ucache_lock); + return rc; +} + +/** + * Externally visible wrapper of the internal flush file function. + * This is intended to allow and external flush file call which locks the + * global lock, flushes the file, then releases the global lock. + * To prevent deadlock, do not call this in any function that aquires the + * global lock. + * Returns 0 on success, -1 on failure. + */ +int ucache_flush_file(struct file_ent_s *fent) +{ + int rc = 0; + lock_lock(ucache_lock); + rc = flush_file(fent); + lock_unlock(ucache_lock); + return rc; +} + +/** + * Internal only function - Flushes dirty blocks to the I/O Nodes + * Returns 0 on success and -1 on failure. + */ +int flush_file(struct file_ent_s *fent) +{ + int rc = 0; + + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + + uint16_t i; + uint16_t temp_next = NIL16; + for(i = mtbl->dirty_list; !dirty_done(i); i = temp_next) + { + struct mem_ent_s *ment = &(mtbl->mem[i]); + if(ment->tag == NIL64 || ment->item == NIL16) + { + break; + } + + /* Aquire block lock - TODO:check if this is redundant due to global lock */ + ucache_lock_t *blk_lock = get_lock(ment->item); + lock_lock(blk_lock); + + temp_next = mtbl->mem[i].dirty_next; + mtbl->mem[i].dirty_next = NIL16; + + /*#ifdef FILE_SYSTEM_ENABLED*/ + PVFS_object_ref ref = {fent->tag_handle, fent->tag_id, 0}; + struct iovec vector = {&(ucache->b[ment->item].mblk[0]), CACHE_BLOCK_SIZE_K * 1024}; + rc = iocommon_vreadorwrite(2, &ref, ment->tag, 1, &vector); + /* + #endif + #ifndef FILE_SYSTEM_ENABLED + rc = 0; + #endif + */ + + lock_unlock(blk_lock); + if(rc == -1) + { + goto done; + } + } + + mtbl->dirty_list = NIL16; + rc = 0; + +done: + return rc; +} + +/** + * This function is meant to be called only inside remove_mem. + * Returns 0 on success, -1 on failure + */ +int flush_block(struct file_ent_s *fent, struct mem_ent_s *ment) +{ + int rc = 0; + PVFS_object_ref ref = {fent->tag_handle, fent->tag_id, 0}; + struct iovec vector = {&(ucache->b[ment->item].mblk[0]), CACHE_BLOCK_SIZE_K * 1024}; + rc = iocommon_vreadorwrite(2, &ref, ment->tag, 1, &vector); + return rc; +} + + +/** + * For testing purposes only! + */ +int wipe_ucache(void) +{ + int rc = 0; + + /* Aquire pointers to shmem segments (just ucache) */ + int shmflg = SVSHM_MODE; + + /* ucache */ + key_t key = ftok(KEY_FILE, SHM_ID2); + int ucache_shmid = shmget(key, 0, shmflg); + if(ucache_shmid == -1) + { + perror("wipe_ucache - ucache shmget"); + return -1; + } + ucache = (union user_cache_u *)shmat(ucache_shmid, NULL, 0); + if((long int)ucache == -1) + { + perror("wipe ucache - ucache shmat"); + return -1; + } + + /* wipe the cache, locks, and reinitialize */ + memset(ucache, 0, CACHE_SIZE); + + /* Force Re-creation of ftbl */ + rc = ucache_init_file_table(1); + return rc; +} + +/** + * Removes all memory entries in the mtbl corresponding to the file info + * provided as parameters. It also removes the mtbl and the file entry from + * the cache. + */ +int ucache_close_file(struct file_ent_s *fent) +{ + int rc = 0; + rc = lock_lock(ucache_lock); + rc = remove_file(fent); + lock_unlock(ucache_lock); + return rc; +} + +/** May dump stats to log file if the envar LOG_UCACHE_STATS is set to 1. + * + */ +#if 0 +void log_ucache_stats(void) +{ + /* Return if envar not set to 1 */ + char *var = getenv("LOG_UCACHE_STATS"); + if(!var) + { + return; + } + if(atoi(var) != 1) + { + return; + } + + float attempts = these_stats.hits + these_stats.misses; + float percentage = 0.0; + /* Don't Divide By Zero! */ + if(attempts) + { + percentage = ((float)these_stats.hits) / attempts; + } + /* + gossip_debug(GOSSIP_UCACHE_DEBUG, + "user cache statistics for this execution:\n" + "\thits=\t%llu\n" + "\tmisses=\t%llu\n" + "\thit percentage=\t%f\n" + "\tpseudo_misses=\t%llu\n" + "\tblock_count=\t%hu\n" + "\tfile_count=\t%hu\n", + (long long unsigned int) these_stats.hits, + (long long unsigned int) these_stats.misses, + percentage, + (long long unsigned int) these_stats.pseudo_misses, + these_stats.block_count, + these_stats.file_count + ); + */ +} +#endif + +/** + * Dumps all cache related information to the specified file pointer. + * Returns 0 on succes, -1 on failure meaning the ucache wasn't enabled + * for some reason. + */ +int ucache_info(FILE *out, char *flags) +{ + if(!ucache_enabled) + { + ucache_initialize(); + } + if(!ucache_enabled) + { + //fprintf(out, "ucache is not enabled. See ucache.log and ucached.log.\n"); + return -1; + } + + /* Decide what to show */ + unsigned char show_all = 0; + unsigned char show_summary = 0; + unsigned char show_parameters = 0; + unsigned char show_contents = 0; + unsigned char show_free = 0; + + int char_ndx; + for (char_ndx=0; char_ndxhits + ucache_stats->misses; + float percentage = 0.0; + + /* Don't Divide By Zero! */ + if(attempts) + { + percentage = ((float) ucache_stats->hits) / attempts; + } + + if(show_all || show_summary) + { + fprintf(out, + "user cache statistics:\n" + "\thits=\t%llu\n" + "\tmisses=\t%llu\n" + "\thit percentage=\t%f\n" + "\tpseudo_misses=\t%llu\n" + "\tblock_count=\t%hu\n" + "\tfile_count=\t%hu\n", + (long long unsigned int) ucache_stats->hits, + (long long unsigned int) ucache_stats->misses, + percentage * 100, + (long long unsigned int) ucache_stats->pseudo_misses, + ucache_stats->block_count, + ucache_stats->file_count + ); + } + + if(show_all || show_parameters) + { + + fprintf(out, "\n#defines:\n"); + /* First, print many of the #define values */ + fprintf(out, "MEM_TABLE_ENTRY_COUNT = %d\n", MEM_TABLE_ENTRY_COUNT); + fprintf(out, "FILE_TABLE_ENTRY_COUNT = %d\n", FILE_TABLE_ENTRY_COUNT); + fprintf(out, "CACHE_BLOCK_SIZE_K = %d\n", CACHE_BLOCK_SIZE_K); + fprintf(out, "MEM_TABLE_HASH_MAX = %d\n", MEM_TABLE_HASH_MAX); + fprintf(out, "FILE_TABLE_HASH_MAX = %d\n", FILE_TABLE_HASH_MAX); + fprintf(out, "MTBL_PER_BLOCK = %d\n", MTBL_PER_BLOCK ); + fprintf(out, "KEY_FILE = %s\n", KEY_FILE); + fprintf(out, "SHM_ID1 = %d\n", SHM_ID1); + fprintf(out, "SHM_ID2 = %d\n", SHM_ID2); + fprintf(out, "BLOCKS_IN_CACHE = %d\n", BLOCKS_IN_CACHE); + fprintf(out, "CACHE_SIZE = %d(B)\t%d(MB)\n", CACHE_SIZE, + (CACHE_SIZE/(1024*1024))); + fprintf(out, "AT_FLAGS = %d\n", AT_FLAGS); + fprintf(out, "SVSHM_MODE = %d\n", SVSHM_MODE); + fprintf(out, "CACHE_FLAGS = %d\n", CACHE_FLAGS); + fprintf(out, "NIL = 0X%X\n", NIL); + fprintf(out, "NIL8 = 0X%X\n", NIL8); + fprintf(out, "NIL16 = 0X%X\n", NIL16); + fprintf(out, "NIL32 = 0X%X\n", NIL32); + fprintf(out, "NIL64 = 0X%lX\n", NIL64); + + /* Print sizes of ucache elements */ + fprintf(out, "sizeof union cache_block_u = %lu\n", sizeof(union cache_block_u)); + fprintf(out, "sizeof struct file_table_s = %lu\n", sizeof(struct file_table_s)); + fprintf(out, "sizeof struct file_ent_s = %lu\n", sizeof(struct file_ent_s)); + fprintf(out, "sizeof struct mem_table_s = %lu\n", sizeof(struct mem_table_s)); + fprintf(out, "sizeof struct mem_ent_s = %lu\n", sizeof(struct mem_ent_s)); + } + + if(show_all || show_contents) + { + /* Auxilliary structure related to ucache */ + fprintf(out, "ucache_aux ptr:\t\t0X%lX\n", (long int)ucache_aux); + + /* ucache Shared Memory Info */ + fprintf(out, "ucache ptr:\t\t0X%lX\n", (long int)ucache); + + /* FTBL Info */ + struct file_table_s *ftbl = &(ucache->ftbl); + fprintf(out, "ftbl ptr:\t\t0X%lX\n", (long int)&(ucache->ftbl)); + fprintf(out, "free_blk = %hu\n", ftbl->free_blk); + fprintf(out, "free_mtbl_blk = %hu\n", ftbl->free_mtbl_blk); + fprintf(out, "free_mtbl_ent = %hu\n", ftbl->free_mtbl_ent); + fprintf(out, "free_list = %hu\n", ftbl->free_list); + + uint16_t i; + + if(show_all || show_free) + { + /* Other Free Blocks */ + fprintf(out, "\nIterating Over Free Blocks:\n\n"); + for(i = ftbl->free_blk; i < BLOCKS_IN_CACHE; i = ucache->b[i].mtbl[0]. + free_list_blk) + { + fprintf(out, "Free Block:\tCurrent: %hu\tNext: %hu\n", i, + ucache->b[i].mtbl[0].free_list_blk); + } + fprintf(out, "End of Free Blocks List\n"); + + + /* Iterate Over Free Mtbls */ + fprintf(out, "\nIterating Over Free Mtbls:\n"); + uint16_t current_blk = (uint16_t)ftbl->free_mtbl_blk; + uint16_t current_ent = ftbl->free_mtbl_ent; + while(current_blk != NIL16) + { + fprintf(out, "free mtbl: block = %hu\tentry = %hu\n", + current_blk, current_ent); + uint16_t temp_blk = ucache->b[current_blk].mtbl[current_ent].free_list_blk; + uint16_t temp_ent = ucache->b[current_blk].mtbl[current_ent].free_list; + current_blk = temp_blk; + current_ent = temp_ent; + } + fprintf(out, "End of Free Mtbl List\n\n"); + + /* Iterating Over Free File Entries */ + fprintf(out, "Iterating Over Free File Entries:\n"); + uint16_t current_fent; + for(current_fent = ftbl->free_list; current_fent != NIL16; + current_fent = ftbl->file[current_fent].next) + { + fprintf(out, "free file entry: index = %d\n", (int16_t)current_fent); + } + fprintf(out, "End of Free File Entry List\n\n"); + } + + fprintf(out, "Iterating Over File Entries in Hash Table:\n\n"); + /* iterate over file table entries */ + for(i = 0; i < FILE_TABLE_HASH_MAX; i++) + { + if((ftbl->file[i].tag_handle != NIL64) && + (ftbl->file[i].tag_handle != 0)) + { + /* iterate accross file table chain */ + uint16_t j; + for(j = i; !file_done(j); j = file_next(ftbl, j)) + { + fprintf(out, "FILE ENTRY INDEX %hu ********************\n", j); + struct file_ent_s * fent = &(ftbl->file[j]); + fprintf(out, "tag_handle = 0X%llX\n", + (long long int)fent->tag_handle); + fprintf(out, "tag_id = 0X%X\n", (uint32_t)fent->tag_id); + fprintf(out, "mtbl_blk = %hu\n", fent->mtbl_blk); + fprintf(out, "mtbl_ent = %hu\n", fent->mtbl_ent); + fprintf(out, "next = %hu\n", fent->next); + fprintf(out, "index = %hu\n", fent->index); + + struct mem_table_s * mtbl = get_mtbl(fent->mtbl_blk, + fent->mtbl_ent); + + fprintf(out, "\tMTBL LRU List ****************\n"); + print_LRU(mtbl); + print_dirty(mtbl); + + fprintf(out, "\tMTBL INFO ********************\n"); + fprintf(out, "\tnum_blocks = %hu\n", mtbl->num_blocks); + fprintf(out, "\tfree_list = %hu\n", mtbl->free_list); + fprintf(out, "\tfree_list_blk = %hu\n", mtbl->free_list_blk); + fprintf(out, "\tlru_first = %hu\n", mtbl->lru_first); + fprintf(out, "\tlru_last = %hu\n", mtbl->lru_last); + fprintf(out, "\tdirty_list = %hu\n", mtbl->dirty_list); + fprintf(out, "\tref_cnt = %hu\n\n", mtbl->ref_cnt); + fflush(out); + /* Iterate Over Memory Entries */ + uint16_t k; + for(k = 0; k < MEM_TABLE_HASH_MAX; k++) + { + if(mtbl->bucket[k] == NIL16) + continue; + + if(mtbl->mem[mtbl->bucket[k]].tag != NIL64) + { + uint16_t l; + for(l = mtbl->bucket[k]; !ment_done(l); l = ment_next(mtbl, l)) + { + struct mem_ent_s * ment = &(mtbl->mem[l]); + fprintf(out, "\t\tMEMORY ENTRY INDEX %hd **********" + "*********\n", l); + fprintf(out, "\t\ttag = 0X%lX\n", + (long unsigned int)ment->tag); + + fprintf(out, "\t\titem = %hu\n", + ment->item); + fprintf(out, "\t\tnext = %hu\n", + ment->next); + fprintf(out, "\t\tdirty_next = %hu\n", + ment->dirty_next); + fprintf(out, "\t\tlru_next = %hu\n", + ment->lru_next); + fprintf(out, "\t\tlru_prev = %hu\n\n", + ment->lru_prev); + } + } + else + { + if(mtbl->num_blocks != 0 + && (show_all || show_free)) + { + fprintf(out, "\tvacant memory entry @ index = %d\n", + mtbl->bucket[k]); + } + } + } + } + fprintf(out, "End of chain @ Hash Table Index %hu\n\n", i); + } + else + { + if(show_all || show_free) + { + fprintf(out, "vacant file entry @ index = %hu\n\n", i); + } + } + } + } + return 0; +} + +/** + * Returns a pointer to the lock corresponding to the block_index. + * If the index is out of range, then 0 is returned. + */ +inline ucache_lock_t *get_lock(uint16_t block_index) +{ + if(block_index >= (BLOCKS_IN_CACHE + 1)) + { + return (ucache_lock_t *)0; + } + return &ucache_locks[block_index]; +} + +/** + * Initializes the proper lock based on the LOCK_TYPE + * Returns 0 on success, -1 on error + */ +int lock_init(ucache_lock_t * lock) +{ + int rc = -1; + /* TODO: ability to disable locking */ + #if LOCK_TYPE == 0 + rc = sem_init(lock, 1, 1); + if(rc != -1) + { + rc = 0; + } + #elif LOCK_TYPE == 1 + pthread_mutexattr_t attr; + rc = pthread_mutexattr_init(&attr); + assert(rc == 0); + rc = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + assert(rc == 0); + rc = pthread_mutex_init(lock, &attr); + assert(rc == 0); + if(rc != 0) + { + return -1; + } + #elif LOCK_TYPE == 2 + rc = pthread_spin_init(lock, 1); + if(rc != 0) + { + return -1; + } + #elif LOCK_TYPE == 3 + *lock = (ucache_lock_t) GEN_SHARED_MUTEX_INITIALIZER_NP; //GEN_SHARED_MUTEX_INITIALIZER_NP; + rc = 0; + #endif + return rc; +} + +/** + * Returns 0 when lock is locked; otherwise, return -1 and sets errno. + */ +inline int lock_lock(ucache_lock_t * lock) +{ + int rc = 0; + #if LOCK_TYPE == 0 + return sem_wait(lock); + #elif LOCK_TYPE == 1 +/* + while(1) + { + rc = pthread_mutex_trylock(lock); + if(rc != 0) + { + printf("couldn't lock lock 0X%lX\n", (long unsigned int) lock); + fflush(stdout); + rc = -1; + } + else + { + break; + } + } +*/ + rc = pthread_mutex_lock(lock); + return rc; + #elif LOCK_TYPE == 2 + return pthread_spin_lock(lock); + #elif LOCK_TYPE == 3 + rc = gen_mutex_lock(lock); + return rc; + #endif +} + +/** + * If successful, return zero; otherwise, return -1 and sets errno. + */ +inline int lock_unlock(ucache_lock_t * lock) +{ + #if LOCK_TYPE == 0 + return sem_post(lock); + #elif LOCK_TYPE == 1 + return pthread_mutex_unlock(lock); + #elif LOCK_TYPE == 2 + return pthread_spin_unlock(lock); + #elif LOCK_TYPE == 3 + return gen_mutex_unlock(lock); + #endif +} + +/** + * Upon successful completion, returns zero + * Otherwise, returns -1 and sets errno. + */ +#if (LOCK_TYPE == 0) +int ucache_lock_getvalue(ucache_lock_t * lock, int *sval) +{ + return sem_getvalue(lock, sval); +} +#endif + +/** + * Tries the lock to see if it's available: + * Returns 0 if lock has not been aquired ie: success + * Otherwise, returns -1 + */ +inline int lock_trylock(ucache_lock_t * lock) +{ + int rc = -1; + #if (LOCK_TYPE == 0) + int sval = 0; + rc = sem_getvalue(lock, &sval); + if(sval <= 0 || rc == -1){ + rc = -1; + } + else + { + rc = 0; + } + #elif (LOCK_TYPE == 1) + rc = pthread_mutex_trylock(lock); + if( rc != 0) + { + rc = -1; + } + #elif (LOCK_TYPE == 2) + rc = pthread_spin_trylock(lock); + if(rc != 0) + { + rc = -1; + } + #elif LOCK_TYPE == 3 + rc = gen_mutex_trylock(lock); + if(rc != 0) + { + rc = -1; + } + #endif + if(rc == 0) + { + /* Unlock before leaving if lock wasn't already set */ + rc = lock_unlock(lock); + } + return rc; +} +/***************************************** End of Externally Visible API */ + +/* Beginning of internal only (static) functions */ + +/* Dirty List Iterator */ +/** + * Returns true if current index is NIL, otherwise, returns 0. + */ +static inline unsigned char dirty_done(uint16_t index) +{ + return (index == NIL16); +} + +/** + * Returns the next index in the dirty list for the provided mtbl and index + */ +static inline uint16_t dirty_next(struct mem_table_s *mtbl, uint16_t index) +{ + return mtbl->mem[index].dirty_next; +} + +/* Memory Entry Chain Iterator */ +/** + * Returns true if current index is NIL, otherwise, returns 0. + */ +static inline unsigned char ment_done(uint16_t index) +{ + return (index == NIL16); +} + +/** + * Returns the next index in the memory entry chain for the provided mtbl + * and index. + */ +static inline uint16_t ment_next(struct mem_table_s *mtbl, uint16_t index) +{ + return mtbl->mem[index].next; +} + +/* File Entry Chain Iterator */ +/** + * Returns true if current index is NIL, otherwise, returns 0 + */ +static unsigned char file_done(uint16_t index) +{ + return (index == NIL16); +} + +/** + * Returns the next index in the file entry chain for the provided mtbl + * and index. + */ +static uint16_t file_next(struct file_table_s *ftbl, uint16_t index) +{ + return ftbl->file[index].next; +} + +/** + * This function should only be called when the ftbl has no free mtbls. + * It initizializes MTBL_PER_BLOCK additional mtbls in the block provided, + * meaning this block will no longer be used for storing file data but + * hash table related data instead. + */ +static void add_mtbls(uint16_t blk) +{ + uint16_t i, start_mtbl; + struct file_table_s *ftbl = &(ucache->ftbl); + union cache_block_u *b = &(ucache->b[blk]); + + /* add mtbls in blk to ftbl free list */ + if (blk == 0) + { + start_mtbl = 1; /* skip blk 0 ent 0 which is ftbl */ + } + else + { + start_mtbl = 0; + } + for (i = start_mtbl; i < (MTBL_PER_BLOCK - 1); i++) + { + b->mtbl[i].free_list_blk = blk; + b->mtbl[i].free_list = i + 1; + } + b->mtbl[i].free_list_blk = NIL16; + b->mtbl[i].free_list = NIL16; + ftbl->free_mtbl_blk = blk; + ftbl->free_mtbl_ent = start_mtbl; +} +/** + * Initializes a memory entry. + */ +static inline int init_memory_entry(struct mem_table_s *mtbl, int16_t index) +{ + if(index > MEM_TABLE_ENTRY_COUNT) + { + return -1; + } + mtbl->mem[index].tag = NIL64; + mtbl->mem[index].item = NIL16; + mtbl->mem[index].next = NIL16; + mtbl->mem[index].dirty_next = NIL16; + mtbl->mem[index].lru_prev = NIL16; + mtbl->mem[index].lru_next = NIL16; + return 0; +} + +/** + * Initializes a mtbl which is a hash table of memory entries. + * The mtbl will be located at the provided entry index within + * the provided block. + */ +static void init_memory_table(struct mem_table_s *mtbl) +{ + uint16_t i; + int rc = -1; + mtbl->num_blocks = 0; + mtbl->free_list_blk = NIL16; + mtbl->lru_first = NIL16; + mtbl->lru_last = NIL16; + mtbl->dirty_list = NIL16; + mtbl->ref_cnt = 0; + + /* Initialize Buckets */ + for(i = 0; i < MEM_TABLE_HASH_MAX; i++) + { + mtbl->bucket[i] = NIL16; + } + + /* set up free ments */ + mtbl->free_list = 0; + for(i = 0; i < (MEM_TABLE_ENTRY_COUNT - 1); i++) + { + rc = init_memory_entry(mtbl, i); + mtbl->mem[i].next = i + 1; + + } + /* NIL Terminate the last entries next index */ + rc = init_memory_entry(mtbl, MEM_TABLE_ENTRY_COUNT - 1); + mtbl->mem[MEM_TABLE_ENTRY_COUNT - 1].next = NIL16; +} + +/** + * This function asks the file table if a free block is avaialable. + * If so, returns the block's index; otherwise, returns NIL. + */ +static inline uint16_t get_free_blk(void) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + uint16_t desired_blk = ftbl->free_blk; + if(desired_blk != NIL16 && desired_blk < BLOCKS_IN_CACHE) + { + /* Update the head of the free block list */ + /* Use mtbl index zero since free_blks have no ititialized mem tables */ + ftbl->free_blk = ucache->b[desired_blk].mtbl[0].free_list_blk; + return desired_blk; + } + return NIL16; +} + +/** + * Accepts an index corresponding to a block that is put back on the file + * table free list. + */ +static inline void put_free_blk(uint16_t blk) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + /* set the block's next value to the current head of the block free list */ + ucache->b[blk].mtbl[0].free_list_blk = ftbl->free_blk; + /* blk is now the head of the ftbl blk free list */ + ftbl->free_blk = blk; +} + +/** + * Consults the file table to retrieve an index corresponding to a file entry + * If available, returns the file entry index, otherwise returns NIL. + */ +static uint16_t get_free_fent(void) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + uint16_t entry = ftbl->free_list; + if(entry != NIL16) + { + ftbl->free_list = ftbl->file[entry].next; + ftbl->file[entry].next = NIL16; + return entry; + } + else + { + return NIL16; + } +} + +/** + * Places the file entry located at the provided index back on the file table's + * free file entry list. If the index is < FILE_TABLE_HASH_MAX, then set next + * to NIL since this index must remain the head of the linked list. Otherwise, + * set next to the current head of fent free list and set the free list head to + * the provided index. + */ +static void put_free_fent(struct file_ent_s *fent) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + fent->tag_handle = NIL64; + fent->tag_id = NIL32; + if(fent->index < FILE_TABLE_HASH_MAX) + { + fent->next = NIL16; + } + else + { + /* Set next index to the current head of the free list */ + fent->next = ftbl->free_list; + /* Set fent index as the head of the free_list */ + ftbl->free_list = fent->index; + } +} + +/** + * Consults the provided mtbl's memory entry free list to get the index of the + * next free memory entry. Returns the index if one is available, otherwise + * returns NIL. + */ +static inline uint16_t get_free_ment(struct mem_table_s *mtbl) +{ + uint16_t ment = mtbl->free_list; + if(ment != NIL16) + { + mtbl->free_list = mtbl->mem[ment].next; + mtbl->mem[ment].next = NIL16; + } + return ment; +} + +/** + * Puts the memory entry corresponding to the provided mtbl and entry index + * back on the mtbl's memory entry free list. + */ +static void put_free_ment(struct mem_table_s *mtbl, uint16_t ent) +{ + /* Reset ment values */ + mtbl->mem[ent].tag = NIL64; + mtbl->mem[ent].item = NIL16; + mtbl->mem[ent].dirty_next = NIL16; + mtbl->mem[ent].lru_prev = NIL16; + mtbl->mem[ent].lru_next = NIL16; + /* Set next index to the current head of the free list */ + mtbl->mem[ent].next = mtbl->free_list; + /* Update free list to include this entry */ + mtbl->free_list = ent; +} + +/** + * Perform a file lookup on the ucache using the provided fs_id and handle. + * + * Additional parameters (references) may used that will be set to values + * pertaining to mtbl and file entry location. If NULL is passed in place of + * these parameters, then they cannot be set. + * + * If the file is found, a pointer to the mtbl is returned and the parameter + * references set accordingly. Otherwise, NIL is returned. + */ +static struct mem_table_s *lookup_file( + uint32_t fs_id, + uint64_t handle, + uint16_t *file_mtbl_blk, + uint16_t *file_mtbl_ent, + uint16_t *file_ent_index, + uint16_t *file_ent_prev_index +) +{ + /* Index into file hash table */ + uint16_t index = handle % FILE_TABLE_HASH_MAX; + + struct file_table_s *ftbl = &(ucache->ftbl); + struct file_ent_s *current = &(ftbl->file[index]); + + /* previous, current, next fent index */ + uint16_t p = NIL16; + uint16_t c = index; + uint16_t n = current->next; + + while(1) + { + if((current->tag_id == fs_id) && (current->tag_handle == handle)) + { + /* If params !NULL, set their values */ + if(file_mtbl_blk!=NULL && file_mtbl_ent!=NULL && + file_ent_index!=NULL && file_ent_prev_index!=NULL) + { + *file_mtbl_blk = current->mtbl_blk; + *file_mtbl_ent = current->mtbl_ent; + *file_ent_index = c; + *file_ent_prev_index = p; + } + return (struct mem_table_s *)&(ucache->b[current->mtbl_blk].mtbl[ + current->mtbl_ent]); + } + /* No match yet */ + else + { + if(current->next == NIL16 || current->next == 0) + { + return (struct mem_table_s *)NIL; + } + else + { + current = &(ftbl->file[current->next]); + p=c; + c=n; + n=current->next; + } + + } + } +} + +/** + * Function that locates the next free mtbl. + * On success, Returns 1 and sets reference parameters to proper indexes. + * On failure, returns NIL; + */ +static uint16_t get_next_free_mtbl(uint16_t *free_mtbl_blk, uint16_t *free_mtbl_ent) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + + /* Get next free mtbl_blk and ent from ftbl */ + *free_mtbl_blk = ftbl->free_mtbl_blk; + *free_mtbl_ent = ftbl->free_mtbl_ent; + + /* Is free mtbl_blk available? */ + if((*free_mtbl_blk == NIL16) || + (*free_mtbl_ent == NIL16)) + { + return NIL16; + } + + /* Update ftbl to contain new next free mtbl */ + ftbl->free_mtbl_blk = ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent]. + free_list_blk; + ftbl->free_mtbl_ent = ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent]. + free_list; + + /* Set free info to NIL */ + ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent].free_list = NIL16; + ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent].free_list_blk = NIL16; + + return 1; +} + +/** + * Places memory entries' corresponding blocks + * back on the ftbl block free list. Reinitializes mtbl. + * Assumes mtbl->ref_cnt is 0. + */ +static int wipe_mtbl(struct mem_table_s *mtbl) +{ + uint16_t i; + for(i = 0; i < MEM_TABLE_HASH_MAX; i++) + { + uint16_t j; + for(j = mtbl->bucket[i]; !ment_done(j); j = ment_next(mtbl, j)) + { + /* Current Memory Entry */ + struct mem_ent_s *ment = &(mtbl->mem[j]); + /* Account for empty head of ment chain */ + if((ment->tag == NIL64) || (ment->item == NIL16)) + { + break; + } + put_free_blk(ment->item); + } + } + memset(&mtbl->mem[0], 0, sizeof(struct mem_ent_s) * MEM_TABLE_ENTRY_COUNT); + init_memory_table(mtbl); + return 1; +} + +/** + * Places the provided mtbl back on the ftbl's mtbl free list provided it + * isn't currently referenced. + */ +static int put_free_mtbl(struct mem_table_s *mtbl, struct file_ent_s *file) +{ + /* Remove mtbl */ + mtbl->num_blocks = 0; /* number of used blocks in this mtbl */ + mtbl->lru_first = NIL16; /* index of first block on lru list */ + mtbl->lru_last = NIL16; /* index of last block on lru list */ + mtbl->dirty_list = NIL16; /* index of first dirty block */ + mtbl->ref_cnt = 0; /* number of clients using this record */ + + /* Add mem_table back to free list */ + /* Temporarily store copy of current head (the new next) */ + uint16_t tmp_blk = ucache->ftbl.free_mtbl_blk; + uint16_t tmp_ent = ucache->ftbl.free_mtbl_ent; + /* newly free mtbl becomes new head of free mtbl list */ + ucache->ftbl.free_mtbl_blk = file->mtbl_blk; + ucache->ftbl.free_mtbl_ent = file->mtbl_ent; + /* Point to the next free mtbl (the former head) */ + mtbl->free_list_blk = tmp_blk; + mtbl->free_list = tmp_ent; + + return 1; +} + +/** + * Insert information about file into ucache (no file data inserted) + * Returns pointer to mtbl on success. + * + * Returns NIL if necessary data structures could not be aquired from the free + * lists or through an eviction policy (meaning references are held). + */ +uint16_t insert_file( + uint32_t fs_id, + uint64_t handle +) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + struct file_ent_s *current; /* Current ptr for iteration */ + uint16_t free_fent = NIL16; /* Index of next free fent */ + + /* index into file hash table */ + uint16_t index = handle % FILE_TABLE_HASH_MAX; + current = &(ftbl->file[index]); + + unsigned char indexOccupied = (current->tag_handle != NIL64 && current->tag_id != NIL32); + + /* Get free mtbl */ + uint16_t free_mtbl_blk = NIL16; + uint16_t free_mtbl_ent = NIL16; + /* Create free mtbls if none are available */ + if(get_next_free_mtbl(&free_mtbl_blk, &free_mtbl_ent) != 1) + { + if(ucache->ftbl.free_blk == NIL16) + { + /* Evict a block from mtbl with most mem entries */ + struct file_ent_s *max_fent = 0; + struct mem_table_s *max_mtbl; + locate_max_fent(&max_fent); + max_mtbl = get_mtbl(max_fent->mtbl_blk, max_fent->mtbl_ent); + evict_LRU(max_fent); + } + /* TODO: other policy? */ + if(ucache->ftbl.free_blk == NIL16) + { + + } + /* Intitialize memory tables */ + if(ucache->ftbl.free_blk != NIL16) + { + int16_t free_blk = get_free_blk(); + add_mtbls(free_blk); + get_next_free_mtbl(&free_mtbl_blk, &free_mtbl_ent); + } + else + { + /* Couldn't get free mtbl - unlikely */ + return NIL16; + } + } + + /* Now, we know which hashed chain we are trying to insert into and have a + * mtbl ready to be filled. + */ + + /* Insert at the head or just after the head, since we can't change the + * indexing (only can change "nexts"). + */ + if(indexOccupied) + { + /* Certain a file entry is required */ + /* get free file entry and update ftbl */ + free_fent = get_free_fent(); + if(free_fent != NIL16) + { + uint16_t temp_next = current->next; + current->next = free_fent; + current = &(ftbl->file[free_fent]); + current->next = temp_next; /* repair link */ + current->index = free_fent; + } + else + { + /* Return an error indicating the ucache is full and file couldn't + * be cached + */ + return NIL16; + } + } + else + { + current->index = index; + } + + /* Insert file data @ index */ + current->tag_id = fs_id; + current->tag_handle = handle; + /* Update fent with it's new mtbl: blk and ent */ + current->mtbl_blk = free_mtbl_blk; + current->mtbl_ent = free_mtbl_ent; + /* Initialize Memory Table */ + init_memory_table(get_mtbl(free_mtbl_blk, free_mtbl_ent)); + return current->index; +} + +/** + * Remove file entry and memory table of file identified by parameters + * Returns 1 following removal + * Returns -1 if file is referenced or if the file could not be located. + */ +static int remove_file(struct file_ent_s *fent) +{ + int rc = 0; + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, + fent->mtbl_ent); + + if(mtbl == (struct mem_table_s *)NILP) + { + return -1; + } + + /* Flush file blocks before file removal */ + mtbl->ref_cnt--; + + if(mtbl->ref_cnt > 0) + { + return 0; + } + + /* Flush dirty blocks before file removal from cache */ + rc = flush_file(fent); + if(rc == -1) + { + return rc; + } + + /* Instead of removing individually, since memory entries are already + * flushed, just wipe the mtbl + */ + rc = wipe_mtbl(mtbl); + if(rc == -1) + { + /* Couldn't remove entries */ + return rc; + } + + rc = put_free_mtbl(mtbl, fent); + if(rc == -1) + { + return rc; + } + + put_free_fent(fent); + if(rc == -1) + { + return rc; + } + + /* Success */ + return 0; +} + +/** + * Lookup the memory location of a block of data in cache that is identified + * by the mtbl and offset parameters. + * + * If located, returns a pointer to memory where the desired block of data is + * stored. Otherwise, NIL is returned. + * + * pertaining to the memory entry's location. If NULLs are passed in place of + * these parameters, then they will not be set. + */ +inline static void *lookup_mem(struct mem_table_s *mtbl, + uint64_t offset, + uint16_t *item_index, + uint16_t *mem_ent_index, + uint16_t *mem_ent_prev_index) +{ + /* index into mem hash table */ + uint16_t index = (uint16_t) ((offset / CACHE_BLOCK_SIZE) % MEM_TABLE_HASH_MAX); + + /* If the bucket is empty then go ahead and return */ + if(mtbl->bucket[index] == NIL16) + { + return (struct mem_table_s *)NIL; + } + + uint16_t bucket_index = mtbl->bucket[index]; + struct mem_ent_s *current = &(mtbl->mem[bucket_index]); + + /* previous, current, next memory entry index in mtbl */ + int16_t p = NIL16; + int16_t c = bucket_index; + int16_t n = current->next; + + while(1) + { + if(offset == current->tag) + { + /* If parameters !NULL, set their values */ + if(item_index != NULL) + { + *item_index = current->item; + } + if((mem_ent_index != NULL) && (mem_ent_prev_index != NULL)) + { + *mem_ent_index = c; + *mem_ent_prev_index = p; + } + return (void *)(&ucache->b[current->item].mblk); + } + else + { + if(current->next == NIL16) + { + return (struct mem_table_s *)NIL; + } + else + { + /* Iterate */ + current = &(mtbl->mem[current->next]); + p = c; + c = n; + n = current->next; + } + } + } +} + +/** + * Update the provided mtbl's LRU doubly-linked list by placing the memory + * entry, identified by the provided index, at the head of the list (lru_first). + */ +static inline void update_LRU(struct mem_table_s *mtbl, uint16_t index) +{ + /* First memory entry used becomes the head and tail of the list */ + if((mtbl->lru_first == NIL16) && + (mtbl->lru_last == NIL16)) + { + mtbl->lru_first = index; + mtbl->lru_last = index; + mtbl->mem[index].lru_prev = NIL16; + mtbl->mem[index].lru_next = NIL16; + } + /* 2nd Memory Entry */ + else if(mtbl->lru_first == mtbl->lru_last) + { + /* Do nothing if this index is already the only entry */ + if(mtbl->lru_first == index) + { + return; + } + else + { + /* Must be 2nd unique memory entry */ + /* point tail.prev to new */ + mtbl->mem[mtbl->lru_first].lru_prev = index; + /* point new.prev to NIL */ + mtbl->mem[index].lru_prev = NIL16; + /* point the new.next to the tail */ + mtbl->mem[index].lru_next = mtbl->lru_first; + /* point the head to the new */ + mtbl->lru_first = index; + } + } + /* 3rd+ Memory Entry */ + else + { + if(mtbl->mem[index].lru_prev == NIL16 && + mtbl->mem[index].lru_next == NIL16) + { + /* First time on the LRU List, Add to the front */ + mtbl->mem[index].lru_next = mtbl->lru_first; + mtbl->mem[mtbl->lru_first].lru_prev = index; + } + else if(mtbl->mem[index].lru_prev == NIL16) + { + /* Already the head of MRU */ + return; + } + else if(mtbl->mem[index].lru_next == NIL16) + { + /* Relocate the LRU to become the MRU */ + mtbl->lru_last = mtbl->mem[index].lru_prev; + mtbl->mem[mtbl->lru_last].lru_next = NIL16; + mtbl->mem[mtbl->lru_first].lru_prev = index; + mtbl->mem[index].lru_next = mtbl->lru_first; + mtbl->mem[index].lru_prev = NIL16; + } + else + { + /* Relocate interior LRU list item to head */ + uint16_t current_prev = mtbl->mem[index].lru_prev; + uint16_t current_next = mtbl->mem[index].lru_next; + + mtbl->mem[current_prev].lru_next = current_next; + mtbl->mem[current_next].lru_prev = current_prev; + + mtbl->mem[index].lru_prev = NIL16; + mtbl->mem[index].lru_next = mtbl->lru_first; + } + mtbl->lru_first = index; + } +} + +/** + * Searches the ftbl for the mtbl with the most entries. + * Returns the number of memory entries the max mtbl has. The double ptr + * parameter is used to store a reference to the mtbl pointer with the most + * memory entries. + */ +static uint16_t locate_max_fent(struct file_ent_s **fent) +{ + struct file_table_s *ftbl = &(ucache->ftbl); + uint16_t index_of_max_blk = NIL16; + uint16_t index_of_max_ent = NIL16; + uint16_t value_of_max = 0; + /* Iterate over file hash table indices */ + uint16_t i; + for(i = 0; i < FILE_TABLE_HASH_MAX; i++) + { + + if((ftbl->file[i].tag_handle == NIL64) || + (ftbl->file[i].tag_handle == 0)) + continue; + + /* Iterate over hash table chain */ + uint16_t j; + for(j = i; !file_done(j); j = file_next(ftbl, j)) + { + struct file_ent_s *current_fent = &(ftbl->file[j]); + if((current_fent->mtbl_blk == NIL16) || + (current_fent->mtbl_ent == NIL16)) + { + break; + } + /* Examine the mtbl's value of num_blocks to see if it's the + * greatest. + */ + struct mem_table_s *current_mtbl = get_mtbl(current_fent->mtbl_blk, + current_fent->mtbl_ent); + + if(current_mtbl->num_blocks >= value_of_max) + { + *fent = current_fent; /* Set the parameter to this mtbl */ + index_of_max_blk = current_fent->mtbl_blk; + index_of_max_ent = current_fent->mtbl_ent; + value_of_max = current_mtbl->num_blocks; + } + } + } + return value_of_max; +} + +/** + * Evicts the LRU memory entry from the tail (lru_last) of the provided + * mtbl's LRU list. + * + * Returns 1 on success; 0 on failure, meaning there was no LRU + * or that the block's lock couldn't be aquired. + */ +static int evict_LRU(struct file_ent_s *fent) +{ + int rc = -1; + + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + + if(mtbl->num_blocks != 0 && mtbl->lru_last != NIL16) + { + //printf("evicting: %hu\n", mtbl->lru_last); + rc = remove_mem(fent, mtbl->mem[mtbl->lru_last].tag); + if(rc != 1) + { + return 0; + } + return 1; + } + else + { + return 0; + } +} + + +/** + * Used to obtain a block for storage of data identified by the offset + * parameter and maintained in the mtbl at the memory entry identified by the + * index parameter. + * + * If a free block could be aquired, returns the memory address of the block + * just inserted. Otherwise, returns NIL. + */ +static inline void *set_item(struct file_ent_s *fent, + uint64_t offset, + uint16_t index) +{ + uint16_t free_blk = get_free_blk(); + + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + + /* No Free Blocks Available */ + if(free_blk == NIL16) + { + evict_LRU(fent); + free_blk = get_free_blk(); + } + + /* After Eviction Routine - No Free Blocks Available, Evict from mtbl + * with the most memory entries + */ + if(free_blk == NIL16) + { + struct file_ent_s *max_fent = 0; + struct mem_table_s *max_mtbl; + int ment_count = 0; + ment_count = locate_max_fent(&max_fent); + max_mtbl = get_mtbl(max_fent->mtbl_blk, max_fent->mtbl_ent); + if(ment_count == 0 || max_mtbl->lru_last == NIL16) + { + goto errout; + } + evict_LRU(max_fent); + free_blk = get_free_blk(); + } + /* TODO: other policy? */ + + + /* A Free Block is Avaiable for Use */ + if(free_blk != NIL16) + { + mtbl->num_blocks++; + update_LRU(mtbl, index); + /* set item to block number */ + mtbl->mem[index].tag = offset; + mtbl->mem[index].item = free_blk; + /* add block index to head of dirty list */ + mtbl->mem[index].dirty_next = mtbl->dirty_list; + mtbl->dirty_list = index; + /* Return the address of the block where data is stored */ + return (void *)&(ucache->b[free_blk]); + } +errout: + return (void *)(NIL); +} + +/** + * Requests a location in memory to place the data identified by the mtbl and + * offset parameters. Also inserts the necessary info into the mtbl. + * + */ +static inline void *insert_mem(struct file_ent_s *fent, uint64_t offset, + uint16_t *block_ndx) +{ + void* rc = 0; + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + + /* Lookup first */ + void *returnValue = lookup_mem(mtbl, offset, block_ndx, NULL, NULL); + if(returnValue != (void *)NIL) + { + /* Already exists in mtbl so just return a ptr to the blk */ + return returnValue; + } + + /* Index into mem hash table */ + /* Hash to a bucket */ + uint16_t index = (uint16_t) ((offset / CACHE_BLOCK_SIZE) % MEM_TABLE_HASH_MAX); + + int evict_rc = 0; + uint16_t mentIndex = get_free_ment(mtbl); + if(mentIndex == NIL16) + { /* No free ment available, so attempt eviction, and try again */ + evict_rc = evict_LRU(fent); + mentIndex = get_free_ment(mtbl); + } + + /* Eviction Failed */ + if(mentIndex == NIL16) + { + return (void *)NULL; + } + + /* Procede with memory insertion if ment aquired */ + uint16_t next_ment = NIL16; + /* Insert at head, keeping track of the previous head */ + next_ment = mtbl->bucket[index]; + mtbl->bucket[index] = mentIndex; + + rc = set_item(fent, offset, mentIndex); + if(rc != (void *)NIL) + { + mtbl->mem[mentIndex].next = next_ment; + *block_ndx = mtbl->mem[mentIndex].item; + return rc; + } + else + { + /* Restore the previous head back to head of the chain */ + mtbl->bucket[index] = next_ment; + return (void *)NIL; + } +} + +/** + * Removes all table info regarding the block identified by the mtbl and + * offset provided the block isn't locked. + * + * Flushing the block to fs now occurs here upon removal from cache. + * + * On success returns 1, on failure returns 0. + * + */ +static int remove_mem(struct file_ent_s *fent, uint64_t offset) +{ + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + + /* Some Indices */ + uint16_t item_index = NIL16; /* index of cached block */ + uint16_t mem_ent_index = NIL16; + uint16_t mem_ent_prev_index = NIL16; + + void *retValue = lookup_mem(mtbl, offset, &item_index, &mem_ent_index, + &mem_ent_prev_index); + /* Verify we've recieved the necessary info */ + if(retValue == (void *)NIL) + { + return 0; + } + + /* Verify the block isn't being used by trying the corresponding lock */ + ucache_lock_t *block_lock = get_lock(mtbl->mem[mem_ent_index].item); + int rc = lock_trylock(block_lock); + if(rc != 0) + { + return -1; + } + + /* Aquire Lock */ + lock_lock(block_lock); + + /* Optionally flush block - may need to be mandatory */ + flush_block(fent, &(mtbl->mem[mem_ent_index])); + + /* Update First and Last...First */ + if(mem_ent_index == mtbl->lru_first) + { + /* Node is the head */ + mtbl->lru_first = mtbl->mem[mem_ent_index].lru_next; + } + if(mem_ent_index == mtbl->lru_last) + { + /* Node is the tail */ + mtbl->lru_last = mtbl->mem[mem_ent_index].lru_prev; + } + + /* Remove from LRU */ + /* Update each of the adjacent nodes' link */ + uint16_t lru_prev = mtbl->mem[mem_ent_index].lru_prev; + if(lru_prev != NIL16) + { + mtbl->mem[lru_prev].lru_next = mtbl->mem[mem_ent_index].lru_next; + } + uint16_t lru_next = mtbl->mem[mem_ent_index].lru_next; + if(lru_next != NIL16) + { + mtbl->mem[lru_next].lru_prev = mtbl->mem[mem_ent_index].lru_prev; + } + + /* Add memory block back to free list */ + put_free_blk(item_index); + + /* Repair link */ + if(mem_ent_prev_index != NIL16) + { + mtbl->mem[mem_ent_prev_index].next = mtbl->mem[mem_ent_index].next; + } + + /* Newly free mem entry becomes new head of free mem entry list if index + * is less than hash table max + */ + put_free_ment(mtbl, mem_ent_index); + mtbl->num_blocks--; + + /* Release Lock */ + lock_unlock(block_lock); + return 1; +} + +/* The following two functions are provided for error checking purposes. */ +/** + * Prints the Least Recently Used (LRU) list. + */ +void print_LRU(struct mem_table_s *mtbl) +{ + fprintf(out, "\tprinting lru list:\n"); + fprintf(out, "\t\tmru: %hu\n", mtbl->lru_first); + fprintf(out, "\t\t\tmru->lru_prev = %hu\n\t\t\tmru->lru_next = %hu\n", + mtbl->mem[mtbl->lru_first].lru_prev, mtbl->mem[mtbl->lru_first].lru_next); + uint16_t current = mtbl->mem[mtbl->lru_first].lru_next; + while(current != mtbl->lru_last && current != NIL16) + { + fprintf(out, "\t\t\tcurr->lru_prev = %hu\n", + mtbl->mem[current].lru_prev); + fprintf(out, "\t\t%hu\n", current); + fprintf(out, "\t\t\tcurr->lru_next = %hu\n", + mtbl->mem[current].lru_next); + current = mtbl->mem[current].lru_next; + } + fprintf(out, "\t\tlru: %hu\n", mtbl->lru_last); + fprintf(out, "\t\t\tlru->lru_prev = %hu\n\t\t\tlru->lru_next = %hu\n", + mtbl->mem[mtbl->lru_last].lru_prev, mtbl->mem[mtbl->lru_last].lru_next); +} + +/** + * Prints the list of dirty (modified) blocks that should eventually be + * flushed to disk. + */ +void print_dirty(struct mem_table_s *mtbl) +{ + fprintf(out, "\tprinting dirty list:\n"); + int i; + for(i = 0; !dirty_done(i); i = dirty_next(mtbl, i)) + { + fprintf(out, "\t\tment index = %hu\t\t\tdirty_next = %hu\n", + i, dirty_next(mtbl, i)); + } + if(i >= MEM_TABLE_ENTRY_COUNT && i != NIL16) + { + fprintf(out, "BAD MEM_TABLE_ENTRY INDEX: %hu\n", i); + exit(0); + } + fprintf(out, "\t\tdone w/ dirty list\n"); +} + +/* End of Internal Only Functions */ +#endif /* PVFS_UCACHE_ENABLE */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/ucache.h b/src/client/usrint/ucache.h new file mode 100644 index 0000000..597b4ad --- /dev/null +++ b/src/client/usrint/ucache.h @@ -0,0 +1,251 @@ +/* + * (C) 2011 Clemson University + * + * See COPYING in top-level directory. + */ + +/** + * \file + * \ingroup usrint + * ucache routines + */ +#ifndef UCACHE_H +#define UCACHE_H 1 + +#include +#include +#include + +#define MEM_TABLE_ENTRY_COUNT 679 +#define FILE_TABLE_ENTRY_COUNT 682 +#define CACHE_BLOCK_SIZE_K 256 +#define CACHE_BLOCK_SIZE (CACHE_BLOCK_SIZE_K * 1024) +#define MEM_TABLE_HASH_MAX 31 +#define FILE_TABLE_HASH_MAX 31 +#define MTBL_PER_BLOCK 16 +#define KEY_FILE "/etc/fstab" +#define SHM_ID1 'l' +#define SHM_ID2 'm' +#ifndef BLOCKS_IN_CACHE +#define BLOCKS_IN_CACHE 1024 +#endif +#define CACHE_SIZE (CACHE_BLOCK_SIZE * BLOCKS_IN_CACHE) +#define AT_FLAGS 0 +#define SVSHM_MODE (SHM_R | SHM_W | SHM_R>>3 | SHM_R>>6) +#define CACHE_FLAGS (SVSHM_MODE) +#define NIL (-1) + +#ifndef UCACHE_MAX_REQ +#define UCACHE_MAX_REQ (CACHE_BLOCK_SIZE * MEM_TABLE_ENTRY_COUNT) +#endif + +/* Define multiple NILS to there's no need to cast for different types */ +#define NIL8 0XFF +#define NIL16 0XFFFF +#define NIL32 0XFFFFFFFF +#define NIL64 0XFFFFFFFFFFFFFFFF +#if (PVFS2_SIZEOF_VOIDP == 32) +#define NILP NIL32 +#elif (PVFS2_SIZEOF_VOIDP == 64) +#define NILP NIL64 +#endif + + +#ifndef DBG +#define DBG 0 +#endif + +#ifndef UCACHE_LOG_FILE +#define UCACHE_LOG_FILE "/tmp/ucache.log" +#endif + +/* TODO: set this to an appropriate value. */ +#define GOSSIP_UCACHE_DEBUG 0x0010000000000000 + +#ifndef LOCK_TYPE +#define LOCK_TYPE 3 /* 0 for Semaphore, 1 for Mutex, 2 for Spinlock */ +#endif + +#if (LOCK_TYPE == 0) +# include +# define ucache_lock_t sem_t +# define LOCK_SIZE sizeof(sem_t) +#elif (LOCK_TYPE == 1) +# define ucache_lock_t pthread_mutex_t /* sizeof(pthread_mutex_t)=24 */ +# define LOCK_SIZE sizeof(pthread_mutex_t) +#elif (LOCK_TYPE == 2) +# define ucache_lock_t pthread_spinlock_t +# define LOCK_SIZE sizeof(pthread_spinlock_t) +#elif (LOCK_TYPE == 3) +# define ucache_lock_t gen_mutex_t +# define LOCK_SIZE sizeof(gen_mutex_t) +#endif + +#define LOCKS_SIZE ((LOCK_SIZE) * (BLOCKS_IN_CACHE + 1)) + +#define UCACHE_STATS_64 3 +#define UCACHE_STATS_16 2 +/* This is the size of the ucache_aux auxilliary shared mem segment */ +#define UCACHE_AUX_SIZE ( LOCKS_SIZE + (UCACHE_STATS_64 * 64) + \ + (UCACHE_STATS_16 * 16)) + +/* Globals */ +extern FILE * out; +extern int ucache_enabled; +extern union user_cache_u *ucache; +extern struct ucache_aux_s *ucache_aux; +extern ucache_lock_t *ucache_locks; +extern ucache_lock_t *ucache_lock; +extern struct ucache_stats_s *ucache_stats; +extern struct ucache_stats_s these_stats; + +/** A structure containing the statistics summarizing the ucache. + * + */ +struct ucache_stats_s +{ + uint64_t hits; + uint64_t misses; + uint64_t pseudo_misses; + uint16_t block_count; + uint16_t file_count; +}; + +/** A structure containing the auxilliary data required by ucache to properly + * function. + */ +struct ucache_aux_s +{ + ucache_lock_t ucache_locks[BLOCKS_IN_CACHE + 1]; /* +1 for global lock */ + struct ucache_stats_s ucache_stats; /* Summary Statistics of ucache */ +}; + +/** A link for one block of memory in a files hash table + * + */ +/* 24 bytes */ +struct mem_ent_s +{ + uint64_t tag; /* offset of data block in file */ + uint16_t item; /* index of cache block with data */ + uint16_t next; /* use for hash table chain */ + uint16_t dirty_next; /* if dirty used in dirty list */ + uint16_t lru_prev; /* used in lru list */ + uint16_t lru_next; /* used in lru list */ + char pad[6]; +}; + +/** A cache for a specific file + * + * Keyed on the address of the block of memory + */ +struct mem_table_s +{ + uint16_t num_blocks; /* number of used blocks in this mtbl */ + uint16_t free_list; /* index of next free mem entry */ + uint16_t free_list_blk; /* used when mtbl is on mtbl free list and to track free blks */ + uint16_t lru_first; /* index of first block on lru list */ + uint16_t lru_last; /* index of last block on lru list */ + uint16_t dirty_list; /* index of first dirty block */ + uint16_t ref_cnt; /* number of clients using this record */ + uint16_t bucket[MEM_TABLE_HASH_MAX]; /* bucket may contain index of ment */ + char pad[4]; + struct mem_ent_s mem[MEM_TABLE_ENTRY_COUNT]; + char pad2[8]; +}; + +/** One allocation block in the cache + * + * Either a block of memory or a block of mtbls + */ +union cache_block_u +{ + struct mem_table_s mtbl[MTBL_PER_BLOCK]; + char mblk[CACHE_BLOCK_SIZE_K * 1024]; +}; + +/** A link for one file in the top level hash table + * + */ +/* 24 bytes */ +struct file_ent_s +{ + uint64_t tag_handle; /* PVFS_handle */ + uint32_t tag_id; /* PVFS_fs_id */ + uint16_t mtbl_blk; /* block index of this mtbl */ + uint16_t mtbl_ent; /* entry index of this mtbl */ + uint16_t next; /* next fent in chain */ + uint16_t index; /* fent index in ftbl */ + char pad[4]; +}; + +/** A hash table to find caches for specific files + * + * Keyed on fs_id/handle of the file + */ +struct file_table_s +{ + uint16_t free_blk; /* index of the next free block */ + uint16_t free_mtbl_blk; /* block index of next free mtbl */ + uint16_t free_mtbl_ent; /* entry index of next free mtbl */ + uint16_t free_list; /* index of next free file entry */ + char pad[8]; + struct file_ent_s file[FILE_TABLE_ENTRY_COUNT]; +}; + +/** The whole system wide cache + * + */ +union user_cache_u +{ + struct file_table_s ftbl; + union cache_block_u b[0]; /* actual size of this varies */ +}; + +struct ucache_ref_s +{ + union user_cache_u *ucache; /* pointer to ucache shmem */ + ucache_lock_t *ucache_locks; /* pointer to ucache locks */ +}; + +/* externally visible API */ +union user_cache_u *get_ucache(void); +int ucache_initialize(void); +int ucache_open_file(PVFS_fs_id *fs_id, + PVFS_handle *handle, + struct file_ent_s **fent); +int ucache_close_file(struct file_ent_s *fent); +inline struct mem_table_s *get_mtbl(uint16_t mtbl_blk, uint16_t mtbl_ent); +inline void *ucache_lookup(struct file_ent_s *fent, uint64_t offset, uint16_t *block_ndx); +inline void *ucache_insert(struct file_ent_s *fent, + uint64_t offset, + uint16_t *block_ndx); +int ucache_info(FILE *out, char *flags); + +int ucache_flush_cache(void); +int ucache_flush_file(struct file_ent_s *fent); + +/* Don't call this except in ucache daemon */ +int ucache_init_file_table(char forceCreation); + +/* Used only in testing */ +int wipe_ucache(void); + +/* Lock Routines */ +inline ucache_lock_t *get_lock(uint16_t block_index); +int lock_init(ucache_lock_t * lock); +inline int lock_lock(ucache_lock_t * lock); +inline int lock_unlock(ucache_lock_t * lock); +inline int lock_trylock(ucache_lock_t * lock); + +#endif /* UCACHE_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/gen-locks/gen-win-locks.c b/src/common/gen-locks/gen-win-locks.c new file mode 100755 index 0000000..1902e54 --- /dev/null +++ b/src/common/gen-locks/gen-win-locks.c @@ -0,0 +1,748 @@ +/* + * (C) 2001-2011 Clemson University, The University of Chicago and + * Omnibond LLC + * + * See COPYING in top-level directory. + */ + + +/* This code implements generic locking that can be turned on or off at + * compile time. + */ + +#ifndef _WIN64 +#define _USE_32BIT_TIME_T +#endif + +#include +#include +#include +#include + +#include "gen-locks.h" + +/*************************************************************** + * visible functions + */ + +#ifndef __GEN_NULL_LOCKING__ + +/* Global variables */ +/* TODO: may need to init and delete in DLL enter/exit functions */ +LPCRITICAL_SECTION cond_list_lock = NULL; +LPCRITICAL_SECTION cond_test_init_lock = NULL; +LPCRITICAL_SECTION mutex_test_init_lock = NULL; + +gen_cond_t cond_list_head = NULL; +gen_cond_t cond_list_tail = NULL; + +/* This macro sets the value of errno + * based on the Windows error code. + */ +#define SET_ERROR(winerr) switch(winerr) { \ + case ERROR_SUCCESS: errno = 0; \ + break; \ + case ERROR_NOT_ENOUGH_MEMORY: \ + case ERROR_OUTOFMEMORY: errno = ENOMEM; \ + break; \ + case ERROR_ACCESS_DENIED: errno = EPERM; \ + break; \ + case ERROR_INVALID_HANDLE: \ + case ERROR_INVALID_PARAMETER: errno = EINVAL; \ + break; \ + case WAIT_TIMEOUT: errno = ETIMEDOUT; \ + break; \ + default: errno = winerr; \ + } + +/* + * gen_mutex_init() + * + * initializes a previously declared mutex + * + * returns 0 on success, -1 and sets errno on failure. + */ +int gen_win_mutex_init( + HANDLE *mut) +{ + if (mut == NULL) + { + errno = EINVAL; + return -1; + } + + *mut = CreateMutex(NULL, FALSE, NULL); + if (*mut == NULL) + { + DWORD err = GetLastError(); + SET_ERROR(err) + } + + return (*mut) ? 0 : -1; +} + +/* + * gen_mutex_lock() + * + * blocks until it obtains a mutex lock on the given mutex + * + * returns 0 on success, -1 and sets errno on failure. + */ +int gen_win_mutex_lock( + HANDLE *mut) +{ + DWORD dwWaitResult; + int result = 0; + + if (*mut == GEN_MUTEX_INITIALIZER) + { + /* initialize default mutex */ + if (mutex_test_init_lock == NULL) + { + mutex_test_init_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(mutex_test_init_lock); + } + + EnterCriticalSection(mutex_test_init_lock); + + gen_mutex_init(mut); + + LeaveCriticalSection(mutex_test_init_lock); + } + + if (mut == NULL || *mut == NULL) + { + errno = EINVAL; + return -1; + } + + dwWaitResult = WaitForSingleObject(*mut, INFINITE); + + if (dwWaitResult != WAIT_OBJECT_0 && dwWaitResult != WAIT_ABANDONED) + { + DWORD err = GetLastError(); + result = -1; + SET_ERROR(err) + } + + return result; +} + + +/* + * gen_mutex_unlock() + * + * releases a lock held on a mutex + * + * returns 0 on success, -1 and sets errno on failure + */ +int gen_win_mutex_unlock( + HANDLE *mut) +{ + BOOL rc; + + if (mut == NULL || *mut == NULL) + { + errno = EINVAL; + return -1; + } + + rc = ReleaseMutex(*mut); + if (!rc) + { + DWORD err = GetLastError(); + SET_ERROR(err) + } + + return (rc) ? 0 : -1; +} + + +/* + * pthread_mutex_trylock() + * + * nonblocking attempt to acquire a lock. + * + * returns 0 on success, -1 and sets errno on failure, sets errno to EBUSY + * if it cannot obtain the lock + */ +int gen_win_mutex_trylock( + HANDLE *mut) +{ + DWORD dwWaitResult; + int rc; + + if (*mut == GEN_MUTEX_INITIALIZER) + { + /* initialize default mutex */ + if (mutex_test_init_lock == NULL) + { + mutex_test_init_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(mutex_test_init_lock); + } + + EnterCriticalSection(mutex_test_init_lock); + + gen_mutex_init(mut); + + LeaveCriticalSection(mutex_test_init_lock); + } + + if (mut == NULL || *mut == NULL) + { + errno = EINVAL; + return -1; + } + + dwWaitResult = WaitForSingleObject(*mut, 0); + if (dwWaitResult == WAIT_OBJECT_0 || dwWaitResult == WAIT_ABANDONED) + { + rc = 0; + } + else + { + rc = -1; + if (dwWaitResult == WAIT_TIMEOUT) + { + errno = EBUSY; + } + else + { + DWORD err = GetLastError(); + SET_ERROR(err); + } + } + + return rc; +} + +/* + * gen_mutex_destroy() + * + * uninitializes the mutex and frees all memory associated with it. + * + * returns 0 on success, -errno on failure. + */ +int gen_win_mutex_destroy( + HANDLE *mut) +{ + + if (mut == NULL || *mut == NULL) + { + errno = EINVAL; + return (-EINVAL); + } + + CloseHandle(*mut); + + /* set mutex back to initializer value */ + *mut = GEN_MUTEX_INITIALIZER; + + return 0; +} + +HANDLE gen_win_thread_self(void) +{ + return GetCurrentThread(); +} + +_inline int cond_check_need_init(gen_cond_t *cond) +{ + int result = 0; + + /* initialize critical section if necessary */ + if (cond_test_init_lock == NULL) + { + cond_test_init_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(cond_test_init_lock); + } + + /* initialize condition variable created with GEN_COND_INITIALIZER */ + EnterCriticalSection(cond_test_init_lock); + + if (*cond == GEN_COND_INITIALIZER) + { + result = gen_cond_init(cond); + } + else if (*cond == NULL) + { + result = EINVAL; + } + + LeaveCriticalSection(cond_test_init_lock); + + return result; +} + +int gen_win_cond_destroy(gen_cond_t *cond) +{ + gen_cond_t cv; + int result = 0, result1 = 0, result2 = 0; + + if(!cond || !(*cond)) + { + return EINVAL; + } + + if (*cond != GEN_COND_INITIALIZER) + { + EnterCriticalSection(cond_list_lock); + + cv = *cond; + + if (WaitForSingleObject(cv->semBlockLock, INFINITE) != WAIT_OBJECT_0) + { + return errno; + } + + if ((result = gen_mutex_trylock(&(cv->mtxUnblockLock))) != 0) + { + ReleaseSemaphore(cv->semBlockLock, 1, NULL); + return errno; + } + + if (cv->nWaitersBlocked > cv->nWaitersGone) + { + if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL)) + { + result = GetLastError(); + SET_ERROR(result) + } + result1 = gen_mutex_unlock(&(cv->mtxUnblockLock)); + result2 = EBUSY; + } + else + { + /* Now it is safe to destroy */ + *cond = NULL; + + if (!CloseHandle(cv->semBlockLock)) + { + DWORD err = GetLastError(); + SET_ERROR(err) + result = errno; + } + if (!CloseHandle(cv->semBlockQueue)) + { + DWORD err = GetLastError(); + SET_ERROR(err) + result1 = errno; + } + if ((result2 = gen_mutex_unlock(&(cv->mtxUnblockLock))) == 0) + { + result2 = gen_mutex_destroy(&(cv->mtxUnblockLock)); + } + + /* Unlink the CV from the list */ + if (cond_list_head == cv) + { + cond_list_head = cv->next; + } + else + { + cv->prev->next = cv->next; + } + + if (cond_list_tail == cv) + { + cond_list_tail = cv->prev; + } + else { + cv->next->prev = cv->prev; + } + + free(cv); + } + + LeaveCriticalSection(cond_list_lock); + } + else + { + EnterCriticalSection(cond_test_init_lock); + + if (*cond == GEN_COND_INITIALIZER) + { + *cond = NULL; + } + else + { + result = EBUSY; + } + + LeaveCriticalSection(cond_test_init_lock); + } + + return ((result != 0) ? result : ((result1 != 0) ? result1 : result2)); +} + +typedef struct +{ + gen_mutex_t *mutexPtr; + gen_cond_t cv; + int *resultPtr; +} cond_wait_cleanup_args_t; + +static void __cdecl cond_wait_cleanup(void *args) +{ + cond_wait_cleanup_args_t *cleanup_args = (cond_wait_cleanup_args_t *) args; + gen_cond_t cv = cleanup_args->cv; + int *resultPtr = cleanup_args->resultPtr; + int nSignalsWasLeft; + int result; + + if ((result = gen_mutex_lock(&(cv->mtxUnblockLock))) != 0) + { + *resultPtr = result; + return; + } + + if ((nSignalsWasLeft = cv->nWaitersToUnblock) != 0) + { + --(cv->nWaitersToUnblock); + } + else if (INT_MAX / 2 == ++(cv->nWaitersGone)) + { + if (WaitForSingleObject(cv->semBlockLock, INFINITE) != WAIT_OBJECT_0) + { + *resultPtr = (int) GetLastError(); + return; + } + cv->nWaitersBlocked -= cv->nWaitersGone; + if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL)) + { + *resultPtr = (int) GetLastError(); + return; + } + cv->nWaitersGone = 0; + } + + if ((result = gen_mutex_unlock(&(cv->mtxUnblockLock))) != 0) + { + *resultPtr = result; + return; + } + + if (nSignalsWasLeft == 1) + { + if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL)) + { + *resultPtr = (int) GetLastError(); + return; + } + } + + if ((result = gen_mutex_lock(cleanup_args->mutexPtr)) != 0) + { + *resultPtr = result; + } + +} + +static _inline int cond_timedwait(gen_cond_t *cond, + HANDLE *mutex, const struct timespec *abstime) +{ + int result = 0; + gen_cond_t cv; + cond_wait_cleanup_args_t cleanup_args; + struct _timeb curtime; + unsigned int nano_ms, ms_diff; + + if (cond == NULL || *cond == NULL) + { + return EINVAL; + } + + if (*cond == GEN_COND_INITIALIZER) + { + result = cond_check_need_init(cond); + } + + if (result != 0 && result != EBUSY) + { + return result; + } + + cv = *cond; + + if ((result = WaitForSingleObject(cv->semBlockLock, INFINITE)) != 0) + { + SET_ERROR(result) + return errno; + } + + ++(cv->nWaitersBlocked); + + if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL)) + { + DWORD err = GetLastError(); + SET_ERROR(err) + return errno; + } + + cleanup_args.mutexPtr = mutex; + cleanup_args.cv = cv; + cleanup_args.resultPtr = &result; + +#pragma inline_depth(0) + + /* Now we can release mutex and... */ + if ((result = gen_mutex_unlock(mutex)) == 0) + { + /* convert difference in times to milliseconds */ + DWORD ms = INFINITE; + if (abstime) + { + nano_ms = abstime->tv_nsec / 1000000L; + _ftime_s(&curtime); + ms = (abstime->tv_sec - curtime.time) > 0 ? (abstime->tv_sec - curtime.time) * 1000 : 0; + if (ms > 0) + { + if (nano_ms >= curtime.millitm) + { + ms_diff = nano_ms - curtime.millitm; + } + else + { + ms_diff = nano_ms + 1000 - curtime.millitm; + ms -= 1000; + } + } + else + { + ms_diff = (nano_ms >= curtime.millitm) ? nano_ms - curtime.millitm : 0; + } + ms += ms_diff; + } + /* always wait at least 1ms so we get WAIT_TIMEOUT result */ + if (ms == 0) ms = 1; + + result = WaitForSingleObject(cv->semBlockQueue, ms); + SET_ERROR(result) + result = errno; + } + else + { + result = errno; + } + + cond_wait_cleanup(&cleanup_args); + +#pragma inline_depth() + + return result; +} + +int gen_win_cond_wait(gen_cond_t *cond, HANDLE *mut) +{ + return cond_timedwait(cond, mut, NULL); +} + +int gen_win_cond_timedwait(gen_cond_t *cond, HANDLE *mut, + const struct timespec *abstime) +{ + return cond_timedwait(cond, mut, abstime); +} + +static _inline int cond_unblock(gen_cond_t *cond, int unblockAll) +{ + int result; + gen_cond_t cv; + int nSignalsToIssue; + + if (cond == NULL || *cond == NULL) + { + return EINVAL; + } + + errno = 0; + + cv = *cond; + + /* uninitialized static cv */ + if (cv == GEN_COND_INITIALIZER) + { + return 0; + } + + if ((result = gen_mutex_lock(&(cv->mtxUnblockLock))) != 0) + { + return errno; + } + + if (cv->nWaitersToUnblock != 0) + { + if (cv->nWaitersBlocked == 0) + { + result = gen_mutex_unlock(&(cv->mtxUnblockLock)); + return (result == 0) ? 0 : errno; + } + if (unblockAll) + { + cv->nWaitersToUnblock += (nSignalsToIssue = cv->nWaitersBlocked); + cv->nWaitersBlocked = 0; + } + else + { + nSignalsToIssue = 1; + cv->nWaitersToUnblock++; + cv->nWaitersBlocked--; + } + } + else if (cv->nWaitersBlocked > cv->nWaitersGone) + { + if (WaitForSingleObject(cv->semBlockLock, INFINITE) != WAIT_OBJECT_0) + { + result = GetLastError(); + SET_ERROR(result) + gen_mutex_unlock(&(cv->mtxUnblockLock)); + return errno; + } + if (cv->nWaitersGone != 0) + { + cv->nWaitersBlocked -= cv->nWaitersGone; + } + if (unblockAll) + { + nSignalsToIssue = cv->nWaitersToUnblock = cv->nWaitersBlocked; + cv->nWaitersBlocked = 0; + } + else + { + nSignalsToIssue = cv->nWaitersToUnblock = 1; + cv->nWaitersBlocked--; + } + } + else + { + result = gen_mutex_unlock(&(cv->mtxUnblockLock)); + return (result == 0) ? 0 : errno; + } + + if ((result = gen_mutex_unlock(&(cv->mtxUnblockLock))) == 0) + { + if (!ReleaseSemaphore(cv->semBlockQueue, nSignalsToIssue, NULL)) + { + result = GetLastError(); + SET_ERROR(result) + } + } + + + return errno; +} + +int gen_win_cond_signal(gen_cond_t *cond) +{ + return cond_unblock(cond, FALSE); +} + +int gen_win_cond_broadcast(gen_cond_t *cond) +{ + return cond_unblock(cond, TRUE); +} + +int gen_win_cond_init(gen_cond_t *cond) +{ + DWORD err; + gen_cond_t cv = NULL; + + if (!cond) + { + return EINVAL; + } + + /* Allocate condition variable */ + cv = (gen_cond_t) calloc(1, sizeof(*cv)); + if (cv == NULL) + { + err = ENOMEM; + goto DONE; + } + + /* Create locking semaphore */ + cv->semBlockLock = CreateSemaphore(NULL, 1, LONG_MAX, NULL); + if (cv->semBlockLock == NULL) + { + err = GetLastError(); + SET_ERROR(err) + goto FAIL0; + } + + /* Create queue semaphore */ + cv->semBlockQueue = CreateSemaphore(NULL, 0, LONG_MAX, NULL); + if (cv->semBlockQueue == NULL) + { + err = GetLastError(); + SET_ERROR(err) + goto FAIL1; + } + + /* Create unblock/lock mutex */ + if ((err = gen_mutex_init(&(cv->mtxUnblockLock))) != 0) + { + SET_ERROR(err) + goto FAIL2; + } + + err = 0; + + goto DONE; + + /* + * Error conditions + */ +FAIL2: + CloseHandle(cv->semBlockQueue); + +FAIL1: + CloseHandle(cv->semBlockLock); + +FAIL0: + free(cv); + cv = NULL; + +DONE: + if (err == 0) + { + if (cond_list_lock == NULL) + { + cond_list_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(cond_list_lock); + } + + EnterCriticalSection(cond_list_lock); + + cv->next = NULL; + cv->prev = cond_list_tail; + + if (cond_list_tail != NULL) + { + cond_list_tail->next = cv; + } + + cond_list_tail = cv; + + if (cond_list_head == NULL) + { + cond_list_head = cv; + } + + LeaveCriticalSection(cond_list_lock); + + } + + *cond = cv; + + return errno; +} + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/misc/pint-uid-mgmt.c b/src/common/misc/pint-uid-mgmt.c new file mode 100644 index 0000000..e1bb717 --- /dev/null +++ b/src/common/misc/pint-uid-mgmt.c @@ -0,0 +1,198 @@ +#include "pint-uid-mgmt.h" +#include "pint-util.h" +#include "gen-locks.h" + +static list_head_t *uid_lru_list = NULL; +static hash_table_t *uid_hash_table = NULL; + +static gen_mutex_t uid_mgmt_mutex = GEN_MUTEX_INITIALIZER; + +static int uid_hash_compare_keys(void* key, list_head_t *link); + +/* PINT_uid_mgmt_initialize() + * + * Allocate memory for the uid management interface. A linked list is + * used to implement lru eviction, and a hash table is used to locate + * existing uid entries quickly. + */ +int PINT_uid_mgmt_initialize() +{ + list_head_t *list; + hash_table_t *hash_tbl; + PINT_uid_mgmt_s *tmp, *rover; + int i; + int ret = 0; + + /* free any already existing hash table and linked list */ + if (uid_lru_list) + { + qlist_for_each_entry_safe(rover, tmp, uid_lru_list, lru_link) + { + free(rover); + } + free(uid_lru_list); + uid_lru_list = NULL; + } + + if (uid_hash_table) + { + qhash_finalize(uid_hash_table); + uid_hash_table = NULL; + } + + /* initialize the linked list and the hash table */ + list = (list_head_t *)malloc(sizeof(list_head_t)); + if (!list) + { + ret = -PVFS_ENOMEM; + return ret; + } + INIT_QLIST_HEAD(list); + + hash_tbl = qhash_init(uid_hash_compare_keys, quickhash_32bit_hash, UID_HISTORY_HASH_TABLE_SIZE); + if (!hash_tbl) + { + ret = -PVFS_ENOMEM; + return ret; + } + + /* zero out the fields of uid structure, so they are not "occupied" */ + for (i = 0; i < UID_MGMT_MAX_HISTORY; i++) + { + tmp = (PINT_uid_mgmt_s *)malloc(sizeof(PINT_uid_mgmt_s)); + if (!tmp) + { + ret = -PVFS_ENOMEM; + return ret; + } + tmp->info.count = 0; + tmp->info.uid = 0; + qlist_add_tail(&(tmp->lru_link), list); + } + + uid_lru_list = list; + uid_hash_table = hash_tbl; + + return 0; +} + +/* PINT_uid_mgmt_finalize() + * + * Free all memory associated with the uid managment interface. + */ +void PINT_uid_mgmt_finalize() +{ + PINT_uid_mgmt_s *rover, *tmp; + + if (uid_lru_list) + { + qlist_for_each_entry_safe(rover, tmp, uid_lru_list, lru_link) + { + free(rover); + } + free(uid_lru_list); + uid_lru_list = NULL; + } + + if (uid_hash_table) + { + qhash_finalize(uid_hash_table); + uid_hash_table = NULL; + } + + return; +} + +/* PINT_add_user_to_uid_mgmt() + * + * This function is called to add new PVFS_uid's to the uid management + * interface. LRU eviction is used to keep list "recent" + */ +int PINT_add_user_to_uid_mgmt(PVFS_uid userID) +{ + list_head_t *found = NULL; + PINT_uid_mgmt_s *tmp = NULL; + int ret = 0; + + if ((!uid_hash_table) || (!uid_lru_list)) + { + ret = -PVFS_ENODATA; + return ret; + } + + /* search the hash table for our uid */ + found = qhash_search(uid_hash_table, &userID); + if (found) + { + tmp = qlist_entry(found, PINT_uid_mgmt_s, hash_link); + tmp->info.count++; + PINT_util_get_current_timeval(&(tmp->info.tv)); + } + else + { + /* evict a node from the tail of the list and add new uid */ + tmp = qlist_entry(uid_lru_list->prev, PINT_uid_mgmt_s, lru_link); + if (tmp->info.count) + { + /* make sure to remove this entry from the hash table if + the count variable has already been defined (not 0) */ + qhash_search_and_remove(uid_hash_table, &(tmp->info.uid)); + } + tmp->info.count = 1; + tmp->info.uid = userID; + PINT_util_get_current_timeval(&(tmp->info.tv)); + qhash_add(uid_hash_table, &(tmp->info.uid), &(tmp->hash_link)); + } + + /* splice the linked list around our tmp node, then move this + tmp node to the head of the lru eviction list */ + tmp->lru_link.prev->next = tmp->lru_link.next; + tmp->lru_link.next->prev = tmp->lru_link.prev; + qlist_add(&(tmp->lru_link), uid_lru_list); + + return 0; +} + +/* uid_hash_compare_keys() + * + * Compare will return true if hash entry has same uid as a given key. + */ +static int uid_hash_compare_keys(void* key, list_head_t *link) +{ + PVFS_uid uid = *(PVFS_uid *)key; + PINT_uid_mgmt_s *tmp_entry = NULL; + + tmp_entry = qhash_entry(link, PINT_uid_mgmt_s, hash_link); + + if (uid == tmp_entry->info.uid) + { + return 1; + } + return 0; +} + +/* PINT_dump_all_uid_stats() + * + * This function gathers all uid statistics (even inactive structures) + * and stores them in the array that is passed in. + */ +void PINT_dump_all_uid_stats(PVFS_uid_info_s *uid_array) +{ + int i = 0; + list_head_t *rover = uid_lru_list->next; + PINT_uid_mgmt_s *tmp; + + gen_mutex_lock(&uid_mgmt_mutex); + + /* now that we have acquired the lock for the list, fill in our array + * with the uid statistics + */ + for (i = 0; i < UID_MGMT_MAX_HISTORY; i++, rover = rover->next) + { + tmp = qlist_entry(rover, PINT_uid_mgmt_s, lru_link); + uid_array[i] = tmp->info; + } + gen_mutex_unlock(&uid_mgmt_mutex); + + return; +} diff --git a/src/common/misc/pint-uid-mgmt.h b/src/common/misc/pint-uid-mgmt.h new file mode 100644 index 0000000..9f95aa4 --- /dev/null +++ b/src/common/misc/pint-uid-mgmt.h @@ -0,0 +1,53 @@ +#ifndef __PINT_UID_MGMT_H +#define __PINT_UID_MGMT_H + +#include "quicklist.h" +#include "quickhash.h" +#include "pvfs2-types.h" + +/* UID_MGMT_MAX_HISTORY is the number of UIDs stored in history + * UID_HISTORY_HASH_TABLE_SIZE is the size of the hash tbl used to store uids + */ +#define UID_MGMT_MAX_HISTORY 25 +#define UID_HISTORY_HASH_TABLE_SIZE 19 + +typedef struct qlist_head list_head_t; +typedef struct qhash_table hash_table_t; + +/* information stored in each uid management structure defined below */ +typedef struct + { + PVFS_uid uid; + uint64_t count; + struct timeval tv; + } PVFS_uid_info_s; +endecode_fields_2_struct( + timeval, + uint64_t, tv_sec, + uint32_t, tv_usec); +endecode_fields_3( + PVFS_uid_info_s, + PVFS_uid, uid, + uint64_t, count, + timeval, tv); + +/* our uid management structure */ +typedef struct + { + PVFS_uid_info_s info; + list_head_t lru_link; + list_head_t hash_link; + } PINT_uid_mgmt_s; + +/* macro helper to determine if a UID is within the history or not */ +#define IN_UID_HISTORY(current, oldest) \ + (((current.tv_sec * 1e6) + current.tv_usec) > \ + ((oldest.tv_sec * 1e6) + oldest.tv_usec)) + +/* FUNCTION PROTOTYPES */ +int PINT_uid_mgmt_initialize(void); +void PINT_uid_mgmt_finalize(void); +int PINT_add_user_to_uid_mgmt(PVFS_uid userID); +void PINT_dump_all_uid_stats(PVFS_uid_info_s *uid_stats); + +#endif /* __PINT_UID_MGMT_H */ diff --git a/src/common/misc/pvfs2-win-util.c b/src/common/misc/pvfs2-win-util.c new file mode 100755 index 0000000..fb293ad --- /dev/null +++ b/src/common/misc/pvfs2-win-util.c @@ -0,0 +1,2117 @@ +/* + * (C) 2001-2011 Clemson University, The University of Chicago and + * Omnibond LLC + * + * Changes by Acxiom Corporation to add relative path support to + * PVFS_util_resolve(), + * Copyright © Acxiom Corporation, 2005 + * + * See COPYING in top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pvfs2-config.h" +#include "pvfs2-sysint.h" +#include "pvfs2-util.h" +#include "pvfs2-debug.h" +#include "gossip.h" +#include "pvfs2-attr.h" +#include "pvfs2-types-debug.h" +#include "str-utils.h" +#include "gen-locks.h" +#include "realpath.h" +#include "pint-sysint-utils.h" +#include "pvfs2-internal.h" +#include "pint-util.h" + +#ifdef HAVE_MNTENT_H + +#include +#define PINT_fstab_t FILE +#define PINT_fstab_entry_t struct mntent +#define PINT_fstab_open(_fstab, _fname) (_fstab) = setmntent(_fname, "r") +#define PINT_fstab_close(_tab) endmntent(_tab) +#define PINT_fstab_next_entry(_tab) getmntent(_tab) +#define PINT_fstab_entry_destroy(_entry) _entry = NULL +#define PINT_fstab_entry_hasopt(_entry, _opt) hasmntopt(_entry, _opt) + +#define PINT_FSTAB_NAME(_entry) (_entry)->mnt_fsname +#define PINT_FSTAB_PATH(_entry) (_entry)->mnt_dir +#define PINT_FSTAB_TYPE(_entry) (_entry)->mnt_type +#define PINT_FSTAB_OPTS(_entry) (_entry)->mnt_opts + +#elif HAVE_FSTAB_H + +#include +#define PINT_fstab_t FILE +#define PINT_fstab_entry_t struct fstab +#define PINT_fstab_open(_fstab, _fname) _fstab = fopen(_fname, "r") +#define PINT_fstab_close(_tab) fclose(_tab) +#define PINT_fstab_next_entry(_tab) PINT_util_my_get_next_fsent(_tab) +#define PINT_fstab_entry_destroy(_entry) PINT_util_fsent_destroy(_entry) +#define PINT_fstab_entry_hasopt(_entry, _opt) strstr((_entry)->fs_mntops, _opt) + +#define PINT_FSTAB_NAME(_entry) (_entry)->fs_spec +#define PINT_FSTAB_PATH(_entry) (_entry)->fs_file +#define PINT_FSTAB_TYPE(_entry) (_entry)->fs_vfstype +#define PINT_FSTAB_OPTS(_entry) (_entry)->fs_mntops + +#define DEFINE_MY_GET_NEXT_FSENT +static struct fstab * PINT_util_my_get_next_fsent(PINT_fstab_t * tab); +static void PINT_util_fsent_destroy(PINT_fstab_entry_t * entry); + +#elif defined(WIN32) + +/* define our own simplified fstab */ +struct fstab { + char *fs_spec; + char *fs_file; + char *fs_vfstype; + char *fs_type; + char *fs_mntops; +}; + +#define PINT_fstab_t FILE +#define PINT_fstab_entry_t struct fstab +#define PINT_fstab_open(_fstab, _fname) _fstab = fopen(_fname, "r") +#define PINT_fstab_close(_tab) fclose(_tab) +#define PINT_fstab_next_entry(_tab) PINT_util_my_get_next_fsent(_tab) +#define PINT_fstab_entry_destroy(_entry) PINT_util_fsent_destroy(_entry) +#define PINT_fstab_entry_hasopt(_entry, _opt) strstr((_entry)->fs_mntops, _opt) + +#define PINT_FSTAB_NAME(_entry) (_entry)->fs_spec +#define PINT_FSTAB_PATH(_entry) (_entry)->fs_file +#define PINT_FSTAB_TYPE(_entry) (_entry)->fs_vfstype +#define PINT_FSTAB_OPTS(_entry) (_entry)->fs_mntops + +#define DEFINE_MY_GET_NEXT_FSENT +static struct fstab * PINT_util_my_get_next_fsent(PINT_fstab_t * tab); +static void PINT_util_fsent_destroy(PINT_fstab_entry_t * entry); + +#else + +#error OS does not have mntent.h or fstab.h. +#error Add your own fstab parser macros to fix. + +#endif + +#define PVFS2_MAX_INVALID_MNTENTS 256 +#define PVFS2_MAX_TABFILES 8 +#define PVFS2_DYNAMIC_TAB_INDEX (PVFS2_MAX_TABFILES - 1) +#define PVFS2_DYNAMIC_TAB_NAME "" + +static PVFS_util_tab s_stat_tab_array[PVFS2_MAX_TABFILES]; +static int s_stat_tab_count = 0; +static gen_mutex_t s_stat_tab_mutex = GEN_MUTEX_INITIALIZER; + +static int parse_flowproto_string( + const char *input, + enum PVFS_flowproto_type *flowproto); + +static int parse_encoding_string( + const char *cp, + enum PVFS_encoding_type *et); + +static int parse_num_dfiles_string(const char* cp, int* num_dfiles); + +static int PINT_util_resolve_absolute( + const char* local_path, + PVFS_fs_id* out_fs_id, + char* out_fs_path, + int out_fs_path_max); + +struct PVFS_sys_mntent* PVFS_util_gen_mntent( + char* config_server, + char* fs_name) +{ + struct PVFS_sys_mntent* tmp_ent = NULL; + + tmp_ent = (struct PVFS_sys_mntent*)malloc(sizeof(struct + PVFS_sys_mntent)); + if(!tmp_ent) + { + return(NULL); + } + memset(tmp_ent, 0, sizeof(struct PVFS_sys_mntent)); + + tmp_ent->num_pvfs_config_servers = 1; + tmp_ent->pvfs_config_servers = (char**)malloc(sizeof(char*)); + if(!tmp_ent->pvfs_config_servers) + { + free(tmp_ent); + return(NULL); + } + + tmp_ent->pvfs_config_servers[0] = strdup(config_server); + if(!tmp_ent->pvfs_config_servers[0]) + { + free(tmp_ent->pvfs_config_servers); + free(tmp_ent); + return(NULL); + } + + tmp_ent->pvfs_fs_name = strdup(fs_name); + if(!tmp_ent->pvfs_fs_name) + { + free(tmp_ent->pvfs_config_servers[0]); + free(tmp_ent->pvfs_config_servers); + free(tmp_ent); + return(NULL); + } + + tmp_ent->flowproto = FLOWPROTO_DEFAULT; + tmp_ent->encoding = PVFS2_ENCODING_DEFAULT; + + return(tmp_ent); +} + +void PVFS_util_gen_mntent_release(struct PVFS_sys_mntent* mntent) +{ + free(mntent->pvfs_config_servers[0]); + free(mntent->pvfs_config_servers); + free(mntent->pvfs_fs_name); + free(mntent); + return; +} + + +int PVFS_util_get_umask(void) +{ + static int mask = 0, set = 0; + + if (set == 0) + { + mask = (int)_umask(0); + _umask(mask); + set = 1; + } + return mask; +} + + +PVFS_credentials *PVFS_util_dup_credentials( + const PVFS_credentials *credentials) +{ + PVFS_credentials *ret = NULL; + + if (credentials) + { + ret = (PVFS_credentials *) malloc(sizeof(PVFS_credentials)); + if (ret) + { + memcpy(ret, credentials, sizeof(PVFS_credentials)); + } + } + return ret; +} + +void PVFS_util_release_credentials( + PVFS_credentials *credentials) +{ + if (credentials) + { + free(credentials); + } +} + +int PVFS_util_copy_sys_attr( + PVFS_sys_attr *dest_attr, PVFS_sys_attr *src_attr) +{ + int ret = -PVFS_EINVAL; + + if (src_attr && dest_attr) + { + dest_attr->owner = src_attr->owner; + dest_attr->group = src_attr->group; + dest_attr->perms = src_attr->perms; + dest_attr->atime = src_attr->atime; + dest_attr->mtime = src_attr->mtime; + dest_attr->ctime = src_attr->ctime; + dest_attr->dfile_count = src_attr->dfile_count; + dest_attr->objtype = src_attr->objtype; + dest_attr->mask = src_attr->mask; + dest_attr->flags = src_attr->flags; + + if (src_attr->mask & PVFS_ATTR_SYS_SIZE) + { + dest_attr->size = src_attr->size; + } + + if((src_attr->mask & PVFS_ATTR_SYS_LNK_TARGET) && + src_attr->link_target) + { + dest_attr->link_target = strdup(src_attr->link_target); + if (!dest_attr->link_target) + { + ret = -PVFS_ENOMEM; + return ret; + } + } + else if ((src_attr->mask & PVFS_ATTR_SYS_DIR_HINT)) + { + if (src_attr->dist_name) + { + dest_attr->dist_name = strdup(src_attr->dist_name); + if (dest_attr->dist_name == NULL) + { + ret = -PVFS_ENOMEM; + return ret; + } + } + if (src_attr->dist_params) + { + dest_attr->dist_params = strdup(src_attr->dist_params); + if (dest_attr->dist_params == NULL) + { + free(dest_attr->dist_name); + ret = -PVFS_ENOMEM; + return ret; + } + } + } + ret = 0; + } + return ret; +} + +void PVFS_util_release_sys_attr(PVFS_sys_attr *attr) +{ + if (attr) + { + if ((attr->mask & PVFS_ATTR_SYS_TYPE) && + (attr->objtype == PVFS_TYPE_SYMLINK) && attr->link_target) + { + free(attr->link_target); + attr->link_target = NULL; + } + else if ((attr->mask & PVFS_ATTR_SYS_DIR_HINT) && + (attr->objtype == PVFS_TYPE_DIRECTORY)) + { + if (attr->dist_name) + free(attr->dist_name); + if (attr->dist_params) + free(attr->dist_params); + attr->dist_name = NULL; + attr->dist_params = NULL; + } + } +} + +/* PVFS_util_parse_pvfstab() + * + * parses either the file pointed to by the PVFS2TAB_FILE env + * variable, or /etc/fstab, or /etc/pvfs2tab or ./pvfs2tab to extract + * pvfs2 mount entries. + * + * NOTE: if tabfile argument is given at runtime to specify which + * tabfile to use, then that will be the _only_ file searched for + * pvfs2 entries. + * + * example entry: + * tcp://localhost:3334/pvfs2-fs /mnt/pvfs2 pvfs2 defaults 0 0 + * + * returns const pointer to internal tab structure on success, NULL on + * failure + */ +const PVFS_util_tab *PVFS_util_parse_pvfstab( + const char *tabfile) +{ + PINT_fstab_t *mnt_fp = NULL; + int file_count = 5; + /* NOTE: mtab should be last for clean error logic below */ +/* const char *file_list[5] = + { NULL, "/etc/fstab", "/etc/pvfs2tab", "pvfs2tab", "/etc/mtab" }; */ + /* just parse a specified file (by caller or environment) */ + const char *file_list[1] = { NULL }; + const char *targetfile = NULL; + PINT_fstab_entry_t *tmp_ent; + int i, j; + int ret = -1; + int tmp_mntent_count = 0; + PVFS_util_tab *current_tab = NULL; + char *epenv, *tmp; + + if((epenv = getenv("PVFS2EP")) != NULL) + { + struct PVFS_sys_mntent *mntent; + current_tab = &s_stat_tab_array[0]; + current_tab->mntent_array = malloc(sizeof(struct PVFS_sys_mntent)); + mntent = ¤t_tab->mntent_array[0]; + strcpy(current_tab->tabfile_name, "PVFSEP"); + current_tab->mntent_count = 1; + mntent->pvfs_config_servers = malloc(sizeof(char *)); + mntent->pvfs_config_servers[0] = strdup(strchr(epenv, '=') + 1); + mntent->num_pvfs_config_servers = 1; + mntent->the_pvfs_config_server = mntent->pvfs_config_servers[0]; + mntent->pvfs_fs_name = strdup(strrchr(mntent->the_pvfs_config_server, '/')); + mntent->pvfs_fs_name++; + mntent->flowproto = FLOWPROTO_DEFAULT; + mntent->encoding = PVFS2_ENCODING_DEFAULT; + mntent->mnt_dir = strdup(epenv); + tmp = strchr(mntent->mnt_dir, '='); + *tmp = 0; + mntent->mnt_opts = strdup("rw"); + mntent->fs_id = PVFS_FS_ID_NULL; + return &s_stat_tab_array[0]; + } + + if (tabfile != NULL) + { + /* + caller wants us to look in a specific location for the + tabfile + */ + file_list[0] = tabfile; + file_count = 1; + } + else + { + /* + search the system and env vars for tab files; + first check for environment variable override + */ + file_list[0] = getenv("PVFS2TAB_FILE"); + } + + gen_mutex_lock(&s_stat_tab_mutex); + + /* start by checking list of files we have already parsed */ + /*** only check one file on Windows + for (i = 0; i < s_stat_tab_count; i++) + { + for (j = 0; j < file_count; j++) + { + if (file_list[j] && + !strcmp(file_list[j], s_stat_tab_array[i].tabfile_name)) + { + /* already done */ + /*** + gen_mutex_unlock(&s_stat_tab_mutex); + return (&s_stat_tab_array[i]); + } + } + } + ***/ + + assert(s_stat_tab_count < PVFS2_DYNAMIC_TAB_INDEX); + + /* + * Open specified file + */ + if(file_list[0]) + { + PINT_fstab_open(mnt_fp, file_list[0]); + if (mnt_fp) + { + while ((tmp_ent = PINT_fstab_next_entry(mnt_fp))) + { + if(!(PINT_FSTAB_NAME(tmp_ent)) || + !(strncmp(PINT_FSTAB_NAME(tmp_ent), "#", 1))) + { + /* this entry is a comment */ + PINT_fstab_entry_destroy(tmp_ent); + continue; + } + + if (strcmp(PINT_FSTAB_TYPE(tmp_ent), "pvfs2") == 0) + { + targetfile = file_list[0]; + tmp_mntent_count++; + } + + PINT_fstab_entry_destroy(tmp_ent); + } + PINT_fstab_close(mnt_fp); + } + } + + if (!targetfile) + { + gossip_err("Error: could not find any pvfs2 tabfile entries.\n"); + if (file_list[0]) + { + gossip_err("Error: tabfile: %s\n", file_list[0]); + } + else + { + gossip_err("Error: no tabfile specified\n"); + } + gen_mutex_unlock(&s_stat_tab_mutex); + return (NULL); + } + gossip_debug(GOSSIP_CLIENT_DEBUG, + "Using pvfs2 tab file: %s\n", targetfile); + + /* allocate array of entries */ + current_tab = &s_stat_tab_array[s_stat_tab_count]; + current_tab->mntent_array = (struct PVFS_sys_mntent *)malloc( + (tmp_mntent_count * sizeof(struct PVFS_sys_mntent))); + if (!current_tab->mntent_array) + { + gen_mutex_unlock(&s_stat_tab_mutex); + return (NULL); + } + memset(current_tab->mntent_array, 0, + (tmp_mntent_count * sizeof(struct PVFS_sys_mntent))); + for (i = 0; i < tmp_mntent_count; i++) + { + current_tab->mntent_array[i].fs_id = PVFS_FS_ID_NULL; + } + current_tab->mntent_count = tmp_mntent_count; + + /* reopen our chosen fstab file */ + PINT_fstab_open(mnt_fp, targetfile); + + /* scan through looking for every pvfs2 entry */ + i = 0; + while ((tmp_ent = PINT_fstab_next_entry(mnt_fp))) + { + if (strcmp(PINT_FSTAB_TYPE(tmp_ent), "pvfs2") == 0) + { + struct PVFS_sys_mntent *me = ¤t_tab->mntent_array[i]; + char *cp; + int cur_server; + + /* Enable integrity checks by default */ + me->integrity_check = 1; + /* comma-separated list of ways to contact a config server */ + me->num_pvfs_config_servers = 1; + for (cp=PINT_FSTAB_NAME(tmp_ent); *cp; cp++) + if (*cp == ',') + ++me->num_pvfs_config_servers; + + /* allocate room for our copies of the strings */ + me->pvfs_config_servers = malloc(me->num_pvfs_config_servers + * sizeof(*me->pvfs_config_servers)); + if (!me->pvfs_config_servers) + goto error_exit; + memset(me->pvfs_config_servers, 0, + me->num_pvfs_config_servers * sizeof(*me->pvfs_config_servers)); + me->mnt_dir = malloc(strlen(PINT_FSTAB_PATH(tmp_ent)) + 1); + me->mnt_opts = malloc(strlen(PINT_FSTAB_OPTS(tmp_ent)) + 1); + + /* bail if any mallocs failed */ + if (!me->mnt_dir || !me->mnt_opts) + { + goto error_exit; + } + + /* parse server list and make sure fsname is same */ + cp = PINT_FSTAB_NAME(tmp_ent); + cur_server = 0; + for (;;) { + char *tok, *p; + int slashcount; + char *slash; + char *last_slash; + + /* tok = strsep(&cp, ","); */ + if (cp == NULL) + { + break; + } + for (p = cp; *p && *p != ','; p++) ; + tok = cp; + if (*p) + { + *p = 0; + cp = p + 1; + } + else + { + cp = NULL; + } + + slash = tok; + slashcount = 0; + while ((slash = strchr(slash, '/'))) + { + slash++; + slashcount++; + } + if (slashcount != 3) + { + /* N/A + if(!strcmp(targetfile, "/etc/mtab")) + { + gossip_err("Error: could not find any pvfs2 tabfile entries.\n"); + gossip_err("Error: tried the following tabfiles:\n"); + for (j = 0; j < file_count; j++) + { + gossip_err(" %s\n", file_list[j]); + } + goto error_exit; + } + else + { + */ + gossip_err("Error: invalid tab file entry: %s\n", + PINT_FSTAB_NAME(tmp_ent)); + gossip_err("Error: offending tab file: %s\n", + targetfile); + goto error_exit; + /* } */ + } + + /* find a reference point in the string */ + last_slash = strrchr(tok, '/'); + *last_slash = '\0'; + + /* config server and fs name are a special case, take one + * string and split it in half on "/" delimiter + */ + me->pvfs_config_servers[cur_server] = strdup(tok); + if (!me->pvfs_config_servers[cur_server]) + goto error_exit; + + ++last_slash; + + if (cur_server == 0) { + me->pvfs_fs_name = strdup(last_slash); + if (!me->pvfs_fs_name) + goto error_exit; + } else { + if (strcmp(last_slash, me->pvfs_fs_name) != 0) { + gossip_lerr( + "Error: different fs names in server addresses: %s\n", + PINT_FSTAB_NAME(tmp_ent)); + goto error_exit; + } + } + ++cur_server; + } + + /* make our own copy of parameters of interest */ + /* mnt_dir and mnt_opts are verbatim copies */ + strcpy(current_tab->mntent_array[i].mnt_dir, + PINT_FSTAB_PATH(tmp_ent)); + strcpy(current_tab->mntent_array[i].mnt_opts, + PINT_FSTAB_OPTS(tmp_ent)); + + /* find out if a particular flow protocol was specified */ + if ((PINT_fstab_entry_hasopt(tmp_ent, "flowproto"))) + { + ret = parse_flowproto_string( + PINT_FSTAB_OPTS(tmp_ent), + &(current_tab-> + mntent_array[i].flowproto)); + if (ret < 0) + { + goto error_exit; + } + } + else + { + current_tab->mntent_array[i].flowproto = + FLOWPROTO_DEFAULT; + } + + /* pick an encoding to use with the server */ + current_tab->mntent_array[i].encoding = + PVFS2_ENCODING_DEFAULT; + cp = PINT_fstab_entry_hasopt(tmp_ent, "encoding"); + if (cp) + { + ret = parse_encoding_string( + cp, ¤t_tab->mntent_array[i].encoding); + if (ret < 0) + { + goto error_exit; + } + } + + /* find out if a particular flow protocol was specified */ + current_tab->mntent_array[i].default_num_dfiles = 0; + cp = PINT_fstab_entry_hasopt(tmp_ent, "num_dfiles"); + if (cp) + { + ret = parse_num_dfiles_string( + cp, + &(current_tab->mntent_array[i].default_num_dfiles)); + + if (ret < 0) + { + goto error_exit; + } + } + + /* Loop counter increment */ + i++; + + PINT_fstab_entry_destroy(tmp_ent); + } + } + s_stat_tab_count++; + strcpy(s_stat_tab_array[s_stat_tab_count-1].tabfile_name, targetfile); + PINT_fstab_close(mnt_fp); + gen_mutex_unlock(&s_stat_tab_mutex); + return (&s_stat_tab_array[s_stat_tab_count - 1]); + + error_exit: + for (; i > -1; i--) + { + struct PVFS_sys_mntent *me = ¤t_tab->mntent_array[i]; + + if (me->pvfs_config_servers) + { + int j; + for (j=0; jnum_pvfs_config_servers; j++) + if (me->pvfs_config_servers[j]) + free(me->pvfs_config_servers[j]); + free(me->pvfs_config_servers); + me->pvfs_config_servers = NULL; + me->num_pvfs_config_servers = 0; + } + + if (me->mnt_dir) + { + free(me->mnt_dir); + me->mnt_dir = NULL; + } + + if (me->mnt_opts) + { + free(me->mnt_opts); + me->mnt_opts = NULL; + } + + if (me->pvfs_fs_name) + { + free(me->pvfs_fs_name); + me->pvfs_fs_name = NULL; + } + } + PINT_fstab_close(mnt_fp); + gen_mutex_unlock(&s_stat_tab_mutex); + return (NULL); +} + +/* PVFS_util_get_default_fsid() + * + * fills in the fs identifier for the first active file system that + * the library knows about. Useful for test programs or admin tools + * that need default file system to access if the user has not + * specified one + * + * returns 0 on success, -PVFS_error on failure + */ +int PVFS_util_get_default_fsid(PVFS_fs_id* out_fs_id) +{ + int i = 0, j = 0; + + gen_mutex_lock(&s_stat_tab_mutex); + + for(i = 0; i < s_stat_tab_count; i++) + { + for(j = 0; j < s_stat_tab_array[i].mntent_count; j++) + { + *out_fs_id = s_stat_tab_array[i].mntent_array[j].fs_id; + if(*out_fs_id != PVFS_FS_ID_NULL) + { + gen_mutex_unlock(&s_stat_tab_mutex); + return(0); + } + } + } + + /* check the dynamic tab area if we haven't found an fs yet */ + for(j = 0; j < s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++) + { + *out_fs_id = s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_array[j].fs_id; + if(*out_fs_id != PVFS_FS_ID_NULL) + { + gen_mutex_unlock(&s_stat_tab_mutex); + return(0); + } + } + + gen_mutex_unlock(&s_stat_tab_mutex); + return(-PVFS_ENOENT); +} + +/* + * PVFS_util_add_dynamic_mntent() + * + * dynamically add mount information to our internally managed mount + * tables (used for quick fs resolution using PVFS_util_resolve). + * dynamic mnt entries can only be added to a particular dynamic + * region of our book keeping, so they're the exception, not the rule. + * + * returns 0 on success, -PVFS_error on failure, and 1 if the mount + * entry already exists as a parsed entry (not dynamic) + */ +int PVFS_util_add_dynamic_mntent(struct PVFS_sys_mntent *mntent) +{ + int i = 0, j = 0, new_index = 0; + int ret = -PVFS_EINVAL; + struct PVFS_sys_mntent *current_mnt = NULL; + struct PVFS_sys_mntent *tmp_mnt_array = NULL; + + if (mntent) + { + gen_mutex_lock(&s_stat_tab_mutex); + + /* + we exhaustively scan to be sure this mnt entry doesn't exist + anywhere in our book keeping; first scan the parsed regions + */ + for(i = 0; i < s_stat_tab_count; i++) + { + for(j = 0; j < s_stat_tab_array[i].mntent_count; j++) + { + current_mnt = &(s_stat_tab_array[i].mntent_array[j]); + + if (current_mnt->fs_id == mntent->fs_id) + { + /* + no need to add the dynamic mount information + because the file system already exists as a + parsed mount entry + */ + gen_mutex_unlock(&s_stat_tab_mutex); + return 1; + } + } + } + +#if 0 + /* check the dynamic region if we haven't found a match yet */ + for(j = 0; j < s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++) + { + current_mnt = &(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX]. + mntent_array[j]); + + if ((current_mnt->fs_id == mntent->fs_id) && + (strcmp(current_mnt->pvfs_config_servers[0], + mntent->pvfs_config_servers[0]) != 0)) + { + gossip_err("Error: FS with id %d is already mounted using" + " a different config server.\n", (int)mntent->fs_id); + gossip_err("Error: This could indicate that a duplicate fsid" + " is being used.\n"); + gossip_err("Error: Please check your server configuration.\n"); + gen_mutex_unlock(&s_stat_tab_mutex); + return -PVFS_ENXIO; + } + } +#endif + + /* copy the mntent to our table in the dynamic tab area */ + new_index = s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_count; + + if (new_index == 0) + { + /* allocate and initialize the dynamic tab object */ + s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array = + (struct PVFS_sys_mntent *)malloc( + sizeof(struct PVFS_sys_mntent)); + if (!s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array) + { + return -PVFS_ENOMEM; + } + strncpy(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].tabfile_name, + PVFS2_DYNAMIC_TAB_NAME, PVFS_NAME_MAX); + } + else + { + /* we need to re-alloc this guy to add a new array entry */ + tmp_mnt_array = (struct PVFS_sys_mntent *)malloc( + ((new_index + 1) * sizeof(struct PVFS_sys_mntent))); + if (!tmp_mnt_array) + { + return -PVFS_ENOMEM; + } + + /* + copy all mntent entries into the new array, freeing the + original entries + */ + for(i = 0; i < new_index; i++) + { + current_mnt = &s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_array[i]; + PVFS_util_copy_mntent(&tmp_mnt_array[i], current_mnt); + PVFS_util_free_mntent(current_mnt); + } + + /* finally, swap the mntent arrays */ + free(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array); + s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array = + tmp_mnt_array; + } + + gossip_debug(GOSSIP_CLIENT_DEBUG, "* Adding new dynamic mount " + "point %s [%d,%d]\n", mntent->mnt_dir, + PVFS2_DYNAMIC_TAB_INDEX, new_index); + + current_mnt = &s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_array[new_index]; + + ret = PVFS_util_copy_mntent(current_mnt, mntent); + + s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_count++; + + gen_mutex_unlock(&s_stat_tab_mutex); + } + return ret; +} + +/* + * PVFS_util_remove_internal_mntent() + * + * dynamically remove mount information from our internally managed + * mount tables. + * + * returns 0 on success, -PVFS_error on failure + */ +int PVFS_util_remove_internal_mntent( + struct PVFS_sys_mntent *mntent) +{ + int i = 0, j = 0, new_count = 0, found = 0, found_index = 0; + int ret = -PVFS_EINVAL; + struct PVFS_sys_mntent *current_mnt = NULL; + struct PVFS_sys_mntent *tmp_mnt_array = NULL; + + if (mntent) + { + gen_mutex_lock(&s_stat_tab_mutex); + + /* + we exhaustively scan to be sure this mnt entry *does* exist + somewhere in our book keeping + */ + for(i = 0; i < s_stat_tab_count; i++) + { + for(j = 0; j < s_stat_tab_array[i].mntent_count; j++) + { + current_mnt = &(s_stat_tab_array[i].mntent_array[j]); + if ((current_mnt->fs_id == mntent->fs_id) + && (strcmp(current_mnt->mnt_dir, mntent->mnt_dir) == 0)) + { + found_index = i; + found = 1; + goto mntent_found; + } + } + } + + /* check the dynamic region if we haven't found a match yet */ + for(j = 0; j < s_stat_tab_array[ + PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++) + { + current_mnt = &(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX]. + mntent_array[j]); + + if (current_mnt->fs_id == mntent->fs_id) + { + found_index = PVFS2_DYNAMIC_TAB_INDEX; + found = 1; + goto mntent_found; + } + } + + mntent_found: + if (!found) + { + return -PVFS_EINVAL; + } + + gossip_debug(GOSSIP_CLIENT_DEBUG, "* Removing mount " + "point %s [%d,%d]\n", current_mnt->mnt_dir, + found_index, j); + + /* remove the mntent from our table in the found tab area */ + if ((s_stat_tab_array[found_index].mntent_count - 1) > 0) + { + /* + this is 1 minus the old count since there will be 1 less + mnt entries after this call + */ + new_count = s_stat_tab_array[found_index].mntent_count - 1; + + /* we need to re-alloc this guy to remove the array entry */ + tmp_mnt_array = (struct PVFS_sys_mntent *)malloc( + (new_count * sizeof(struct PVFS_sys_mntent))); + if (!tmp_mnt_array) + { + return -PVFS_ENOMEM; + } + + /* + copy all mntent entries into the new array, freeing the + original entries -- and skipping the one that we're + trying to remove + */ + for(i = 0, new_count = 0; + i < s_stat_tab_array[found_index].mntent_count; i++) + { + current_mnt = &s_stat_tab_array[found_index].mntent_array[i]; + + if ((current_mnt->fs_id == mntent->fs_id) + && (strcmp(current_mnt->mnt_dir, mntent->mnt_dir) == 0)) + { + PVFS_util_free_mntent(current_mnt); + continue; + } + PVFS_util_copy_mntent( + &tmp_mnt_array[new_count++], current_mnt); + PVFS_util_free_mntent(current_mnt); + } + + /* finally, swap the mntent arrays */ + free(s_stat_tab_array[found_index].mntent_array); + s_stat_tab_array[found_index].mntent_array = tmp_mnt_array; + + s_stat_tab_array[found_index].mntent_count--; + ret = 0; + } + else + { + /* + special case: we're removing the last mnt entry in the + array here. since this is the case, we also free the + array since we know it's now empty. + */ + PVFS_util_free_mntent( + &s_stat_tab_array[found_index].mntent_array[0]); + free(s_stat_tab_array[found_index].mntent_array); + s_stat_tab_array[found_index].mntent_array = NULL; + s_stat_tab_array[found_index].mntent_count = 0; + ret = 0; + } + gen_mutex_unlock(&s_stat_tab_mutex); + } + return ret; +} + +/* + * PVFS_util_get_mntent_copy() + * + * Given a pointer to a valid mount entry, out_mntent, copy the contents of + * the mount entry for fs_id into out_mntent. + * + * returns 0 on success, -PVFS_error on failure + */ +int PVFS_util_get_mntent_copy(PVFS_fs_id fs_id, + struct PVFS_sys_mntent* out_mntent) +{ + int i = 0; + + /* Search for mntent by fsid */ + gen_mutex_lock(&s_stat_tab_mutex); + for(i = 0; i < s_stat_tab_count; i++) + { + int j; + for(j = 0; j < s_stat_tab_array[i].mntent_count; j++) + { + struct PVFS_sys_mntent* mnt_iter; + mnt_iter = &(s_stat_tab_array[i].mntent_array[j]); + + if (mnt_iter->fs_id == fs_id) + { + PVFS_util_copy_mntent(out_mntent, mnt_iter); + gen_mutex_unlock(&s_stat_tab_mutex); + return 0; + } + } + } + gen_mutex_unlock(&s_stat_tab_mutex); + return -PVFS_EINVAL; +} + +/* basename() + * + * Return the portion of a path after the last non-trailing slash + */ +char *basename(char *path) +{ + int path_len; + char *last_slash; + + if (path == NULL || path[0] == '\0') + return "."; + + if (strcmp(path, "/") == 0 || + strchr(path, '/') == NULL) + return path; + + /* remove trailing slashes */ + path_len = strlen(path); + while (path[path_len - 1] == '/') + path[--path_len] = '\0'; + + /* find last_slash */ + last_slash = strrchr(path, '/'); + + /* return base */ + if (last_slash) + return last_slash + 1; + + return path; +} + +/* dirname() + * + * Return the portion of a path before the last non-trailing slash + */ +char *dirname(char *path) +{ + int path_len; + char *last_slash; + + if (path == NULL || path[0] == '\0' || + strchr(path, '/') == NULL || + strcmp(path, "..") == 0) + return "."; + + if (strcmp(path, "/") == 0) + return path; + + /* remove trailing slashes */ + path_len = strlen(path); + while (path[path_len - 1] == '/') + path[--path_len] = '\0'; + + /* find last_slash */ + last_slash = strrchr(path, '/'); + + /* truncate string */ + if (last_slash) + { + /* last slash is first character */ + if (last_slash == path) + last_slash[1] = '\0'; + else + last_slash[0] = '\0'; + } + + return path; +} + +/* PVFS_util_resolve() + * + * given a local path of a file that resides on a pvfs2 volume, + * determine what the fsid and fs relative path is. + * + * returns 0 on succees, -PVFS_error on failure + */ +int PVFS_util_resolve( + const char* local_path, + PVFS_fs_id* out_fs_id, + char* out_fs_path, + int out_fs_path_max) +{ + int ret = -1; + char* tmp_path = NULL; + char* parent_path = NULL; + int base_len = 0; + + if(strlen(local_path) > (PVFS_NAME_MAX-1)) + { + gossip_err("Error: PVFS_util_resolve() input path too long.\n"); + return(-PVFS_ENAMETOOLONG); + } + + /* the most common case first; just try to resolve the path that we + * were given + */ + ret = PINT_util_resolve_absolute(local_path, out_fs_id, out_fs_path, + out_fs_path_max); + if(ret == 0) + { + /* done */ + return(0); + } + if(ret == -PVFS_ENOENT) + { + /* if the path wasn't found, try canonicalizing the path in case it + * refers to a relative path on a mounted volume or contains symlinks + */ + tmp_path = (char*)malloc(PVFS_NAME_MAX*sizeof(char)); + if(!tmp_path) + { + return(-PVFS_ENOMEM); + } + memset(tmp_path, 0, PVFS_NAME_MAX*sizeof(char)); + ret = PINT_realpath(local_path, tmp_path, (PVFS_NAME_MAX-1)); + if(ret == -PVFS_EINVAL) + { + /* one more try; canonicalize the parent in case this function + * is called before object creation; the basename + * doesn't yet exist but we still need to find the PVFS volume + */ + parent_path = (char*)malloc(PVFS_NAME_MAX*sizeof(char)); + if(!parent_path) + { + free(tmp_path); + return(-PVFS_ENOMEM); + } + /* find size of basename so we can reserve space for it */ + /* note: basename() and dirname() modify args, thus the strcpy */ + strcpy(parent_path, local_path); + base_len = strlen(basename(parent_path)); + strcpy(parent_path, local_path); + ret = PINT_realpath(dirname(parent_path), tmp_path, + (PVFS_NAME_MAX-base_len-2)); + if(ret < 0) + { + free(tmp_path); + free(parent_path); + /* last chance failed; this is not a valid pvfs2 path */ + return(-PVFS_ENOENT); + } + /* glue the basename back on */ + strcpy(parent_path, local_path); + strcat(tmp_path, "/"); + strcat(tmp_path, basename(parent_path)); + free(parent_path); + } + else if(ret < 0) + { + /* first canonicalize failed; this is not a valid pvfs2 path */ + free(tmp_path); + return(-PVFS_ENOENT); + } + + ret = PINT_util_resolve_absolute(tmp_path, out_fs_id, out_fs_path, + out_fs_path_max); + free(tmp_path); + + /* fall through and preserve "ret" to be returned */ + } + + return(ret); +} + + +/* PVFS_util_init_defaults() + * + * performs the standard set of initialization steps for the system + * interface, mostly just a wrapper function + * + * returns 0 on success, -PVFS_error on failure + */ +int PVFS_util_init_defaults(void) +{ + int ret = -1, i = 0, j = 0, found_one = 0; + int failed_indices[PVFS2_MAX_INVALID_MNTENTS] = {0}; + + /* use standard system tab files */ + const PVFS_util_tab* tab = PVFS_util_parse_pvfstab(NULL); + if (!tab) + { + gossip_err( + "Error: failed to find any pvfs2 file systems in the " + "standard system tab files.\n"); + return(-PVFS_ENOENT); + } + + /* initialize pvfs system interface */ + ret = PVFS_sys_initialize(GOSSIP_NO_DEBUG); + if (ret < 0) + { + return(ret); + } + + /* add in any file systems we found in the fstab */ + for(i = 0; i < tab->mntent_count; i++) + { + ret = PVFS_sys_fs_add(&tab->mntent_array[i]); + if (ret == 0) + { + found_one = 1; + } + else + { + failed_indices[j++] = i; + + if (j > (PVFS2_MAX_INVALID_MNTENTS - 1)) + { + gossip_err("*** Failed to initialize %d file systems " + "from tab file %s.\n ** If this is a valid " + "tabfile, please remove invalid entries.\n", + PVFS2_MAX_INVALID_MNTENTS, + tab->tabfile_name); + gossip_err("Continuing execution without remaining " + "mount entries\n"); + + break; + } + } + } + + /* remove any mount entries that couldn't be added here */ + for(i = 0; i < PVFS2_MAX_INVALID_MNTENTS; i++) + { + if (failed_indices[i]) + { + PVFS_util_remove_internal_mntent( + &tab->mntent_array[failed_indices[i]]); + } + else + { + break; + } + } + + if (found_one) + { + return 0; + } + + gossip_err("ERROR: could not initialize any file systems " + "in %s.\n", tab->tabfile_name); + + PVFS_sys_finalize(); + return -PVFS_ENODEV; +} + +/*********************/ +/* normal size units */ +/*********************/ +#define KILOBYTE 1024 +#define MEGABYTE (1024 * KILOBYTE) +#define GIGABYTE (1024 * MEGABYTE) +#define TERABYTE (1024llu * GIGABYTE) +#define PETABYTE (1024llu * TERABYTE) +#define EXABYTE (1024llu * PETABYTE) +#define ZETTABYTE (1024llu * EXABYTE) +#define YOTTABYTE (1024llu * ZETTABYTE) + +/*****************/ +/* si size units */ +/*****************/ +#define SI_KILOBYTE 1000 +#define SI_MEGABYTE (1000 * SI_KILOBYTE) +#define SI_GIGABYTE (1000 * SI_MEGABYTE) +#define SI_TERABYTE (1000llu * SI_GIGABYTE) +#define SI_PETABYTE (1000llu * SI_TERABYTE) +#define SI_EXABYTE (1000llu * SI_PETABYTE) +#define SI_ZETTABYTE (1000llu * SI_EXABYTE) +#define SI_YOTTABYTE (1000llu * SI_ZETTABYTE) + +#if SIZEOF_LONG_INT == 8 +#define NUM_SIZES 5 +#else +#define NUM_SIZES 4 +#endif + +static PVFS_size PINT_s_size_table[NUM_SIZES] = +{ + /*YOTTABYTE, ZETTABYTE, EXABYTE, */ +#if SIZEOF_LONG_INT == 8 + PETABYTE, + TERABYTE, +#endif + GIGABYTE, MEGABYTE, KILOBYTE +}; + +static PVFS_size PINT_s_si_size_table[NUM_SIZES] = +{ + /*SI_YOTTABYTE, SI_ZETTABYTE, SI_EXABYTE, */ +#if SIZEOF_LONG_INT == 8 + SI_PETABYTE, SI_TERABYTE, +#endif + SI_GIGABYTE, SI_MEGABYTE, SI_KILOBYTE +}; + +static const char *PINT_s_str_size_table[NUM_SIZES] = +{ + /*"Y", "Z", "E", */ +#if SIZEOF_LONG_INT == 8 + "P","T", +#endif + "G", "M", "K" +}; + +/* + * PVFS_util_make_size_human_readable + * + * converts a size value to a human readable string format + * + * size - numeric size of file + * out_str - nicely formatted string, like "3.4M" + * (caller must allocate this string) + * max_out_len - maximum lenght of out_str + * use_si_units - use units of 1000, not 1024 + */ +void PVFS_util_make_size_human_readable( + PVFS_size size, + char *out_str, + int max_out_len, + int use_si_units) +{ + int i = 0; + double tmp = 0.0f; + PVFS_size *size_table = + (use_si_units? PINT_s_si_size_table : PINT_s_size_table); + + if (out_str) + { + for (i = 0; i < NUM_SIZES; i++) + { + tmp = (double)size; + if ((PVFS_size) (tmp / size_table[i]) > 0) + { + tmp = (tmp / size_table[i]); + break; + } + } + if (i == NUM_SIZES) + { + _snprintf(out_str, 16, "%lld", lld(size)); + } + else + { + _snprintf(out_str, max_out_len, "%.1f%s", + tmp, PINT_s_str_size_table[i]); + } + } +} + +/* parse_flowproto_string() + * + * looks in the mount options string for a flowprotocol specifier and + * sets the flowproto type accordingly + * + * returns 0 on success, -PVFS_error on failure + */ +static int parse_flowproto_string( + const char *input, + enum PVFS_flowproto_type *flowproto) +{ + int ret = 0; + char *start = NULL; + char flow[256]; + char *comma = NULL; + + start = strstr(input, "flowproto"); + /* we must find a match if this function is being called... */ + assert(start); + + /* scan out the option */ + ret = sscanf(start, "flowproto = %255s ,", flow); + if (ret != 1) + { + gossip_err("Error: malformed flowproto option in tab file.\n"); + return (-PVFS_EINVAL); + } + + /* chop it off at any trailing comma */ + comma = strchr(flow, ','); + if (comma) + { + comma[0] = '\0'; + } + + if (!strcmp(flow, "dump_offsets")) + { + *flowproto = FLOWPROTO_DUMP_OFFSETS; + } + else if (!strcmp(flow, "bmi_cache")) + { + *flowproto = FLOWPROTO_BMI_CACHE; + } + else if (!strcmp(flow, "multiqueue")) + { + *flowproto = FLOWPROTO_MULTIQUEUE; + } + else + { + gossip_err("Error: unrecognized flowproto option: %s\n", flow); + return (-PVFS_EINVAL); + } + return 0; +} + +void PVFS_util_free_mntent( + struct PVFS_sys_mntent *mntent) +{ + if (mntent) + { + if (mntent->pvfs_config_servers) + { + int j; + for (j=0; jnum_pvfs_config_servers; j++) + if (mntent->pvfs_config_servers[j]) + free(mntent->pvfs_config_servers[j]); + free(mntent->pvfs_config_servers); + mntent->pvfs_config_servers = NULL; + mntent->num_pvfs_config_servers = 0; + } + if (mntent->pvfs_fs_name) + { + free(mntent->pvfs_fs_name); + mntent->pvfs_fs_name = NULL; + } + if (mntent->mnt_dir) + { + free(mntent->mnt_dir); + mntent->mnt_dir = NULL; + } + if (mntent->mnt_opts) + { + free(mntent->mnt_opts); + mntent->mnt_opts = NULL; + } + + mntent->flowproto = 0; + mntent->encoding = 0; + mntent->fs_id = PVFS_FS_ID_NULL; + } +} + +int PVFS_util_copy_mntent( + struct PVFS_sys_mntent *dest_mntent, + struct PVFS_sys_mntent *src_mntent) +{ + int ret = -PVFS_EINVAL, i = 0; + + if (dest_mntent && src_mntent) + { + memset(dest_mntent, 0, sizeof(struct PVFS_sys_mntent)); + + dest_mntent->num_pvfs_config_servers = + src_mntent->num_pvfs_config_servers; + + dest_mntent->pvfs_config_servers = + malloc(dest_mntent->num_pvfs_config_servers * + sizeof(*dest_mntent->pvfs_config_servers)); + if (!dest_mntent) + { + return -PVFS_ENOMEM; + } + + memset(dest_mntent->pvfs_config_servers, 0, + dest_mntent->num_pvfs_config_servers * + sizeof(*dest_mntent->pvfs_config_servers)); + + for(i = 0; i < dest_mntent->num_pvfs_config_servers; i++) + { + dest_mntent->pvfs_config_servers[i] = + strdup(src_mntent->pvfs_config_servers[i]); + if (!dest_mntent->pvfs_config_servers[i]) + { + ret = -PVFS_ENOMEM; + goto error_exit; + } + } + + dest_mntent->pvfs_fs_name = strdup(src_mntent->pvfs_fs_name); + if (!dest_mntent->pvfs_fs_name) + { + ret = -PVFS_ENOMEM; + goto error_exit; + } + + if (src_mntent->mnt_dir) + { + dest_mntent->mnt_dir = strdup(src_mntent->mnt_dir); + if (!dest_mntent->mnt_dir) + { + ret = -PVFS_ENOMEM; + goto error_exit; + } + } + + if (src_mntent->mnt_opts) + { + dest_mntent->mnt_opts = strdup(src_mntent->mnt_opts); + if (!dest_mntent->mnt_opts) + { + ret = -PVFS_ENOMEM; + goto error_exit; + } + } + + dest_mntent->flowproto = src_mntent->flowproto; + dest_mntent->encoding = src_mntent->encoding; + dest_mntent->fs_id = src_mntent->fs_id; + dest_mntent->default_num_dfiles = src_mntent->default_num_dfiles; + } + return 0; + + error_exit: + + for(i = 0; i < dest_mntent->num_pvfs_config_servers; i++) + { + if (dest_mntent->pvfs_config_servers[i]) + { + free(dest_mntent->pvfs_config_servers[i]); + dest_mntent->pvfs_config_servers[i] = NULL; + } + } + + if (dest_mntent->pvfs_config_servers) + { + free(dest_mntent->pvfs_config_servers); + dest_mntent->pvfs_config_servers = NULL; + } + + if (dest_mntent->pvfs_fs_name) + { + free(dest_mntent->pvfs_fs_name); + dest_mntent->pvfs_fs_name = NULL; + } + + if (dest_mntent->mnt_dir) + { + free(dest_mntent->mnt_dir); + dest_mntent->mnt_dir = NULL; + } + + if (dest_mntent->mnt_opts) + { + free(dest_mntent->mnt_opts); + dest_mntent->mnt_opts = NULL; + } + return ret; +} + +/* + * Pull out the wire encoding specified as a mount option in the tab + * file. + * + * Input string is not modified; result goes into et. + * + * Returns 0 if all okay. + */ +static int parse_encoding_string( + const char *cp, + enum PVFS_encoding_type *et) +{ + int i = 0; + const char *cq = NULL; + + struct + { + const char *name; + enum PVFS_encoding_type val; + } enc_str[] = + { { "default", PVFS2_ENCODING_DEFAULT }, + { "defaults", PVFS2_ENCODING_DEFAULT }, + { "direct", ENCODING_DIRECT }, + { "le_bfield", ENCODING_LE_BFIELD }, + { "xdr", ENCODING_XDR } }; + + gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: input is %s\n", + __func__, cp); + cp += strlen("encoding"); + for (; isspace(*cp); cp++); /* optional spaces */ + if (*cp != '=') + { + gossip_err("Error: %s: malformed encoding option in tab file.\n", + __func__); + return -PVFS_EINVAL; + } + for (++cp; isspace(*cp); cp++); /* optional spaces */ + for (cq = cp; *cq && *cq != ','; cq++);/* find option end */ + + *et = -1; + for (i = 0; i < sizeof(enc_str) / sizeof(enc_str[0]); i++) + { + int n = strlen(enc_str[i].name); + if (cq - cp > n) + n = cq - cp; + if (!strncmp(enc_str[i].name, cp, n)) + { + *et = enc_str[i].val; + break; + } + } + if (*et == -1) + { + gossip_err("Error: %s: unknown encoding type in tab file.\n", + __func__); + return -PVFS_EINVAL; + } + return 0; +} + +/* PINT_release_pvfstab() + * + * frees up any resources associated with previously parsed tabfiles + * + * no return value + */ +void PINT_release_pvfstab(void) +{ + int i, j; + + gen_mutex_lock(&s_stat_tab_mutex); + for(i=0; ifs_spec = strdup(nexttok); + + + /* get the mount point */ + + /* nexttok = strtok_r(NULL, " ", &strtok_ctx); */ + nexttok = strtok(NULL, " "); + if(!nexttok) + { + goto exit; + } + fsentry->fs_file = strdup(nexttok); + + /* get the fs type */ + nexttok = strtok(NULL, " "); + if(!nexttok) + { + goto exit; + } + fsentry->fs_vfstype = strdup(nexttok); + + /* get the mount opts */ + nexttok = strtok(NULL, " "); + if(!nexttok) + { + goto exit; + } + fsentry->fs_mntops = strdup(nexttok); + + exit: + return fsentry; +} + +static void PINT_util_fsent_destroy(PINT_fstab_entry_t * entry) +{ + if(entry) + { + if(entry->fs_spec) + { + free(entry->fs_spec); + } + + if(entry->fs_file) + { + free(entry->fs_file); + } + + if(entry->fs_vfstype) + { + free(entry->fs_vfstype); + } + + if(entry->fs_mntops) + { + free(entry->fs_mntops); + } + + if(entry->fs_type) + { + free(entry->fs_type); + } + + free(entry); + } +} +#endif /* DEFINE_MY_GET_NEXT_FSENT */ + +int32_t PVFS_util_translate_mode(int mode, int suid) +{ + int ret = 0, i = 0; +#define NUM_MODES 11 + +#define S_IXOTH 0001 +#define S_IWOTH 0002 +#define S_IROTH 0004 +#define S_IXGRP 0010 +#define S_IWGRP 0020 +#define S_IRGRP 0040 +#define S_IXUSR 0100 +#define S_IWUSR 0200 +#define S_IRUSR 0400 +#define S_ISGID 002000 +#define S_ISUID 004000 + + static int modes[NUM_MODES] = + { + S_IXOTH, S_IWOTH, S_IROTH, + S_IXGRP, S_IWGRP, S_IRGRP, + S_IXUSR, S_IWUSR, S_IRUSR, + S_ISGID, S_ISUID + }; + static int pvfs2_modes[NUM_MODES] = + { + PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ, + PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ, + PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ, + PVFS_G_SGID, PVFS_U_SUID + }; + + for(i = 0; i < NUM_MODES; i++) + { + if (mode & modes[i]) + { + ret |= pvfs2_modes[i]; + } + } + if (suid == 0 && (ret & PVFS_U_SUID)) + { + ret &= ~PVFS_U_SUID; + } + return ret; +#undef NUM_MODES +} + +void PVFS_util_gen_credentials( + PVFS_credentials *credentials) +{ + PINT_util_gen_credentials(credentials); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/windows/wincommon.h b/src/common/windows/wincommon.h new file mode 100755 index 0000000..f0c7a6d --- /dev/null +++ b/src/common/windows/wincommon.h @@ -0,0 +1,40 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * declarations for Windows + */ + +#ifndef __WINCOMMON_H +#define __WINCOMMON_H + +#include +#include + +#define __inline__ _inline +#define inline _inline +#define __func__ __FUNCTION__ + +/* + * gettimeofday + */ +static int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct _timeb timebuffer; + errno_t ret; + + memset(&timebuffer, 0, sizeof(struct _timeb)); + ret = _ftime_s(&timebuffer); + if (ret == 0) + { + tv->tv_sec = (long) timebuffer.time; + tv->tv_usec = timebuffer.millitm * 1000; + } + + return ret; +} + +#endif \ No newline at end of file diff --git a/src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h b/src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h new file mode 100755 index 0000000..7395a35 --- /dev/null +++ b/src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h @@ -0,0 +1,102 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * tcp specific host addressing information + */ + +#ifndef __BMI_TCP_ADDRESSING_H +#define __BMI_TCP_ADDRESSING_H + +#include "bmi-types.h" +/* #include */ + +/***************************************************************** + * Information specific to tcp/ip + */ + +/* + max number of sequential zero reads to allow; usually indicates a + dead connection, but it's used for checking several times to be sure +*/ +#define BMI_TCP_ZERO_READ_LIMIT 10 + +/* wait no more than 10 seconds for a partial BMI header to arrive on a + * socket once we have detected part of it. + */ +#define BMI_TCP_HEADER_WAIT_SECONDS 10 + +/* peer name types */ +#define BMI_TCP_PEER_IP 1 +#define BMI_TCP_PEER_HOSTNAME 2 + +#ifdef USE_TRUSTED + +struct tcp_allowed_connection_s { + int port_enforce; + unsigned long ports[2]; + int network_enforce; + int network_count; + struct in_addr *network; + struct in_addr *netmask; +}; + +#endif + + +/* this contains TCP/IP addressing information- it is filled in as + * connections are made */ +struct tcp_addr +{ + bmi_method_addr_p map; /* points back to generic address */ \ + BMI_addr_t bmi_addr; + /* stores error code for addresses that are broken for some reason */ + int addr_error; + char *hostname; + int port; + int socket; + /* flag that indicates this address represents a + * server port on which connections may be accepted */ + int server_port; + /* reference count of pending send operations to this address */ + int write_ref_count; + /* is the socket connected yet? */ + int not_connected; + /* socket collection link */ + struct qlist_head sc_link; + int sc_index; + /* count of the number of sequential zero read operations */ + int zero_read_limit; + /* timer for how long we wait on incomplete headers to arrive */ + int short_header_timer; + /* flag used to determine if we can reconnect this address after failure */ + int dont_reconnect; + char* peer; + int peer_type; +}; + + +/***************************************************************** + * function prototypes + */ + +#define bmi_tcp_errno_to_pvfs bmi_errno_to_pvfs + +void tcp_forget_addr(bmi_method_addr_p map, + int dealloc_flag, + int error_code); +bmi_method_addr_p alloc_tcp_method_addr(void); + +#endif /* __BMI_TCP_ADDRESSING_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/bmi-wintcp.c b/src/io/bmi/bmi_wintcp/bmi-wintcp.c new file mode 100755 index 0000000..e0e69ab --- /dev/null +++ b/src/io/bmi/bmi_wintcp/bmi-wintcp.c @@ -0,0 +1,4177 @@ +/* + * (C) 2001-2011 Clemson University, The University of Chicago and + * Omnibond LLC + * + * See COPYING in top-level directory. + */ + +/* + * Windows TCP/IP implementation of a BMI method + */ + +#include + +#include +#include +#include +//#include +#include +//#include +//#include +#include +//#include +#include +#include +//#include +//#include +//#include +//#include +#include "pint-mem.h" + +#include "pvfs2-config.h" +#ifdef HAVE_NETDB_H +#include +#endif + +#include "bmi-method-support.h" +#include "bmi-method-callback.h" +#include "bmi-tcp-addressing.h" +#ifdef __PVFS2_USE_EPOLL__ +#include "socket-collection-epoll.h" +#else +#include "socket-collection.h" +#endif +#include "op-list.h" +#include "gossip.h" +#include "sockio.h" +#include "bmi-byteswap.h" +#include "id-generator.h" +#include "pint-event.h" +#include "pvfs2-debug.h" +#ifdef USE_TRUSTED +#include "server-config.h" +#include "bmi-tcp-addressing.h" +#endif +#include "gen-locks.h" +#include "pint-hint.h" + +static gen_mutex_t interface_mutex = GEN_MUTEX_INITIALIZER; +static gen_cond_t interface_cond = GEN_COND_INITIALIZER; +static int sc_test_busy = 0; + +/*** Windows-specific additions ***/ +typedef unsigned int socklen_t; + +/* Windows Sockets doesn't have inet_aton */ +int inet_aton(const char *cp, struct in_addr *inp) +{ + unsigned long addr; + + if (cp == NULL || strlen(cp) == 0 || inp == NULL) + { + return (0); + } + + /* handle 255.255.255.255 separately */ + if (strcmp(cp, "255.255.255.255") == 0) + { + inp->S_un.S_addr = 0xFFFFFFFF; + return (1); + } + + /* use inet_addr for other addresses */ + addr = inet_addr(cp); + if (addr == INADDR_NONE) + { + return (0); + } + + inp->S_un.S_addr = addr; + + return (1); + +} +/***********************************/ + +/* function prototypes */ +int BMI_tcp_initialize(bmi_method_addr_p listen_addr, + int method_id, + int init_flags); +int BMI_tcp_finalize(void); +int BMI_tcp_set_info(int option, + void *inout_parameter); +int BMI_tcp_get_info(int option, + void *inout_parameter); +void *BMI_tcp_memalloc(bmi_size_t size, + enum bmi_op_type send_recv); +int BMI_tcp_memfree(void *buffer, + bmi_size_t size, + enum bmi_op_type send_recv); +int BMI_tcp_unexpected_free(void *buffer); +int BMI_tcp_post_send(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *buffer, + bmi_size_t size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +int BMI_tcp_post_sendunexpected(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *buffer, + bmi_size_t size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +int BMI_tcp_post_recv(bmi_op_id_t * id, + bmi_method_addr_p src, + void *buffer, + bmi_size_t expected_size, + bmi_size_t * actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +int BMI_tcp_test(bmi_op_id_t id, + int *outcount, + bmi_error_code_t * error_code, + bmi_size_t * actual_size, + void **user_ptr, + int max_idle_time_ms, + bmi_context_id context_id); +int BMI_tcp_testsome(int incount, + bmi_op_id_t * id_array, + int *outcount, + int *index_array, + bmi_error_code_t * error_code_array, + bmi_size_t * actual_size_array, + void **user_ptr_array, + int max_idle_time_ms, + bmi_context_id context_id); +int BMI_tcp_testunexpected(int incount, + int *outcount, + struct bmi_method_unexpected_info *info, + int max_idle_time_ms); +int BMI_tcp_testcontext(int incount, + bmi_op_id_t * out_id_array, + int *outcount, + bmi_error_code_t * error_code_array, + bmi_size_t * actual_size_array, + void **user_ptr_array, + int max_idle_time_ms, + bmi_context_id context_id); +bmi_method_addr_p BMI_tcp_method_addr_lookup(const char *id_string); +const char* BMI_tcp_addr_rev_lookup_unexpected(bmi_method_addr_p map); +int BMI_tcp_query_addr_range(bmi_method_addr_p, const char *, int); +int BMI_tcp_post_send_list(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t total_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +int BMI_tcp_post_recv_list(bmi_op_id_t * id, + bmi_method_addr_p src, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t total_expected_size, + bmi_size_t * total_actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t total_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +int BMI_tcp_open_context(bmi_context_id context_id); +void BMI_tcp_close_context(bmi_context_id context_id); +int BMI_tcp_cancel(bmi_op_id_t id, bmi_context_id context_id); + +char BMI_tcp_method_name[] = "bmi_tcp"; + +/* size of encoded message header */ +#define TCP_ENC_HDR_SIZE 24 + +/* structure internal to tcp for use as a message header */ +struct tcp_msg_header +{ + uint32_t magic_nr; /* magic number */ + uint32_t mode; /* eager, rendezvous, etc. */ + bmi_msg_tag_t tag; /* user specified message tag */ + bmi_size_t size; /* length of trailing message */ + char enc_hdr[TCP_ENC_HDR_SIZE]; /* encoded version of header info */ +}; + +#define BMI_TCP_ENC_HDR(hdr) \ + do { \ + *((uint32_t*)&((hdr).enc_hdr[0])) = htobmi32((hdr).magic_nr); \ + *((uint32_t*)&((hdr).enc_hdr[4])) = htobmi32((hdr).mode); \ + *((uint64_t*)&((hdr).enc_hdr[8])) = htobmi64((hdr).tag); \ + *((uint64_t*)&((hdr).enc_hdr[16])) = htobmi64((hdr).size); \ + } while(0) + +#define BMI_TCP_DEC_HDR(hdr) \ + do { \ + (hdr).magic_nr = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[0]))); \ + (hdr).mode = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[4]))); \ + (hdr).tag = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[8]))); \ + (hdr).size = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[16]))); \ + } while(0) + +/* enumerate states that we care about */ +enum bmi_tcp_state +{ + BMI_TCP_INPROGRESS, + BMI_TCP_BUFFERING, + BMI_TCP_COMPLETE +}; + +/* tcp private portion of operation structure */ +struct tcp_op +{ + struct tcp_msg_header env; /* envelope for this message */ + enum bmi_tcp_state tcp_op_state; + /* these two fields are used as place holders for the buffer + * list and size list when we really don't have lists (regular + * BMI_send or BMI_recv operations); it allows us to use + * generic code to handle both cases + */ + void *buffer_list_stub; + bmi_size_t size_list_stub; +}; + +/* static io vector for use with readv and writev; we can only use + * this because BMI serializes module calls + */ +#define BMI_TCP_IOV_COUNT 10 +static WSABUF stat_io_vector[BMI_TCP_IOV_COUNT+1]; + +/* internal utility functions */ +static int tcp_server_init(void); +static void dealloc_tcp_method_addr(bmi_method_addr_p map); +static int tcp_sock_init(bmi_method_addr_p my_method_addr); +static int enqueue_operation(op_list_p target_list, + enum bmi_op_type send_recv, + bmi_method_addr_p map, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t amt_complete, + bmi_size_t env_amt_complete, + bmi_op_id_t * id, + int tcp_op_state, + struct tcp_msg_header header, + void *user_ptr, + bmi_size_t actual_size, + bmi_size_t expected_size, + bmi_context_id context_id, + int32_t event_id); +static int tcp_cleanse_addr(bmi_method_addr_p map, int error_code); +static int tcp_shutdown_addr(bmi_method_addr_p map); +static int tcp_do_work(int max_idle_time); +static int tcp_do_work_error(bmi_method_addr_p map); +static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag); +static int tcp_do_work_send(bmi_method_addr_p map, int* stall_flag); +static int work_on_recv_op(method_op_p my_method_op, + int *stall_flag); +static int work_on_send_op(method_op_p my_method_op, + int *blocked_flag, int* stall_flag); +static int tcp_accept_init(int *socket, char** peer); +static method_op_p alloc_tcp_method_op(void); +static void dealloc_tcp_method_op(method_op_p old_op); +static int handle_new_connection(bmi_method_addr_p map); +static int tcp_post_send_generic(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + enum bmi_buffer_type buffer_type, + struct tcp_msg_header my_header, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +static int tcp_post_recv_generic(bmi_op_id_t * id, + bmi_method_addr_p src, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t expected_size, + bmi_size_t * actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); +static int payload_progress(int s, void *const *buffer_list, const bmi_size_t* + size_list, int list_count, bmi_size_t total_size, int* list_index, + bmi_size_t* current_index_complete, enum bmi_op_type send_recv, + char* enc_hdr, bmi_size_t* env_amt_complete); + +#if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__) +static int tcp_enable_trusted(struct tcp_addr *tcp_addr_data); +#endif +#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__) +static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr); +#endif + +static void bmi_set_sock_buffers(int socket); + +/* exported method interface */ +const struct bmi_method_ops bmi_tcp_ops = { + BMI_tcp_method_name, + 0, /* flags */ + BMI_tcp_initialize, + BMI_tcp_finalize, + BMI_tcp_set_info, + BMI_tcp_get_info, + BMI_tcp_memalloc, + BMI_tcp_memfree, + BMI_tcp_unexpected_free, + BMI_tcp_post_send, + BMI_tcp_post_sendunexpected, + BMI_tcp_post_recv, + BMI_tcp_test, + BMI_tcp_testsome, + BMI_tcp_testcontext, + BMI_tcp_testunexpected, + BMI_tcp_method_addr_lookup, + BMI_tcp_post_send_list, + BMI_tcp_post_recv_list, + BMI_tcp_post_sendunexpected_list, + BMI_tcp_open_context, + BMI_tcp_close_context, + BMI_tcp_cancel, + BMI_tcp_addr_rev_lookup_unexpected, + BMI_tcp_query_addr_range +}; + +/* module parameters */ +static struct +{ + int method_flags; + int method_id; + bmi_method_addr_p listen_addr; +} tcp_method_params; + +#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__) +static struct tcp_allowed_connection_s *gtcp_allowed_connection = NULL; +#endif + +static int check_unexpected = 1; + +/* op_list_array indices */ +enum +{ + NUM_INDICES = 5, + IND_SEND = 0, + IND_RECV = 1, + IND_RECV_INFLIGHT = 2, + IND_RECV_EAGER_DONE_BUFFERING = 3, + IND_COMPLETE_RECV_UNEXP = 4, /* MAKE THIS COMES LAST */ +}; + +/* internal operation lists */ +static op_list_p op_list_array[6] = { NULL, NULL, NULL, NULL, + NULL, NULL +}; + +/* internal completion queues */ +static op_list_p completion_array[BMI_MAX_CONTEXTS] = { NULL }; + +/* internal socket collection */ +static socket_collection_p tcp_socket_collection_p = NULL; + +/* tunable parameters */ +enum +{ + /* amount of pending connections we'll allow */ + TCP_BACKLOG = 256, + /* amount of work to be done during a test. This roughly + * translates into the number of sockets that we will perform + * nonblocking operations on during one function call. + */ + TCP_WORK_METRIC = 128 +}; + +/* TCP message modes */ +enum +{ + TCP_MODE_IMMED = 1, /* not used for TCP/IP */ + TCP_MODE_UNEXP = 2, + TCP_MODE_EAGER = 4, + TCP_MODE_REND = 8 +}; + +/* Allowable sizes for each mode */ +enum +{ + TCP_MODE_EAGER_LIMIT = 16384, /* 16K */ + TCP_MODE_REND_LIMIT = 16777216 /* 16M */ +}; + +/* toggles cancel mode; for bmi_tcp this will result in socket being closed + * in all cancellation cases + */ +static int forceful_cancel_mode = 0; + +/* + Socket buffer sizes, currently these default values will be used + for the clients... (TODO) + */ +static int tcp_buffer_size_receive = 0; +static int tcp_buffer_size_send = 0; + +static PINT_event_type bmi_tcp_send_event_id; +static PINT_event_type bmi_tcp_recv_event_id; + +static PINT_event_group bmi_tcp_event_group; +/* static pid_t bmi_tcp_pid */ +static HANDLE bmi_tcp_pid; + +/************************************************************************* + * Visible Interface + */ + +/* BMI_tcp_initialize() + * + * Initializes the tcp method. Must be called before any other tcp + * method functions. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_initialize(bmi_method_addr_p listen_addr, + int method_id, + int init_flags) +{ + + int ret = -1, err; + int tmp_errno = bmi_tcp_errno_to_pvfs(-ENOSYS); + struct tcp_addr *tcp_addr_data = NULL; + int i = 0; + WORD version; + WSADATA wsaData; + + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Initializing TCP/IP module.\n"); + + /* check args */ + if ((init_flags & BMI_INIT_SERVER) && !listen_addr) + { + gossip_lerr("Error: bad parameters given to TCP/IP module.\n"); + return (bmi_tcp_errno_to_pvfs(-EINVAL)); + } + + version = MAKEWORD(2, 2); + err = WSAStartup(version, &wsaData); + if (err != 0) + { + gossip_lerr("Error: could not initialize Windows Sockets: %d.\n", err); + return (bmi_tcp_errno_to_pvfs(-ENOSYS)); + } + + gen_mutex_lock(&interface_mutex); + + /* zero out our parameter structure and fill it in */ + memset(&tcp_method_params, 0, sizeof(tcp_method_params)); + tcp_method_params.method_id = method_id; + tcp_method_params.method_flags = init_flags; + + if (init_flags & BMI_INIT_SERVER) + { + /* hang on to our local listening address if needed */ + tcp_method_params.listen_addr = listen_addr; + /* and initialize server functions */ + ret = tcp_server_init(); + if (ret < 0) + { + tmp_errno = bmi_tcp_errno_to_pvfs(ret); + gossip_err("Error: tcp_server_init() failure.\n"); + goto initialize_failure; + } + } + + /* set up the operation lists */ + for (i = 0; i < NUM_INDICES; i++) + { + op_list_array[i] = op_list_new(); + if (!op_list_array[i]) + { + tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); + goto initialize_failure; + } + } + + /* set up the socket collection */ + if (tcp_method_params.method_flags & BMI_INIT_SERVER) + { + tcp_addr_data = (struct tcp_addr *) tcp_method_params.listen_addr->method_data; + tcp_socket_collection_p = BMI_socket_collection_init(tcp_addr_data->socket); + } + else + { + tcp_socket_collection_p = BMI_socket_collection_init(-1); + } + + if (!tcp_socket_collection_p) + { + tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); + goto initialize_failure; + } + + /* bmi_tcp_pid = getpid(); */ + bmi_tcp_pid = GetCurrentProcess(); + PINT_event_define_group("bmi_tcp", &bmi_tcp_event_group); + + /* Define the send event: + * START: (client_id, request_id, rank, handle, op_id, send_size) + * STOP: (size_sent) + */ + PINT_event_define_event( + &bmi_tcp_event_group, +#ifdef __PVFS2_SERVER__ + "bmi_server_send", +#else + "bmi_client_send", +#endif + "%d%d%d%llu%d%d", + "%d", &bmi_tcp_send_event_id); + + /* Define the recv event: + * START: (client_id, request_id, rank, handle, op_id, recv_size) + * STOP: (size_received) + */ + PINT_event_define_event( + &bmi_tcp_event_group, +#ifdef __PVFS2_SERVER__ + "bmi_server_recv", +#else + "bmi_client_recv", +#endif + "%d%d%d%llu%d%d", + "%d", &bmi_tcp_recv_event_id); + + gen_mutex_unlock(&interface_mutex); + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, + "TCP/IP module successfully initialized.\n"); + return (0); + + initialize_failure: + + /* cleanup data structures and bail out */ + for (i = 0; i < NUM_INDICES; i++) + { + if (op_list_array[i]) + { + op_list_cleanup(op_list_array[i]); + } + } + if (tcp_socket_collection_p) + { + BMI_socket_collection_finalize(tcp_socket_collection_p); + } + gen_mutex_unlock(&interface_mutex); + return (tmp_errno); +} + + +/* BMI_tcp_finalize() + * + * Shuts down the tcp method. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_finalize(void) +{ + int i = 0; + + gen_mutex_lock(&interface_mutex); + + /* shut down our listen addr, if we have one */ + if ((tcp_method_params.method_flags & BMI_INIT_SERVER) + && tcp_method_params.listen_addr) + { + dealloc_tcp_method_addr(tcp_method_params.listen_addr); + } + + /* note that this forcefully shuts down operations */ + for (i = 0; i < NUM_INDICES; i++) + { + if (op_list_array[i]) + { + op_list_cleanup(op_list_array[i]); + op_list_array[i] = NULL; + } + } + + /* get rid of socket collection */ + if (tcp_socket_collection_p) + { + BMI_socket_collection_finalize(tcp_socket_collection_p); + tcp_socket_collection_p = NULL; + } + + /* NOTE: we are trusting the calling BMI layer to deallocate + * all of the method addresses (this will close any open sockets) + */ + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "TCP/IP module finalized.\n"); + gen_mutex_unlock(&interface_mutex); + return (0); +} + + +/* + * BMI_tcp_method_addr_lookup() + * + * resolves the string representation of an address into a method + * address structure. + * + * returns a pointer to method_addr on success, NULL on failure + */ +bmi_method_addr_p BMI_tcp_method_addr_lookup(const char *id_string) +{ + char *tcp_string = NULL; + char *delim = NULL; + char *hostname = NULL; + bmi_method_addr_p new_addr = NULL; + struct tcp_addr *tcp_addr_data = NULL; + int ret = -1; + + tcp_string = string_key("tcp", id_string); + if (!tcp_string) + { + /* the string doesn't even have our info */ + return (NULL); + } + + /* start breaking up the method information */ + /* for normal tcp, it is simply hostname:port */ + if ((delim = strchr(tcp_string, ':')) == NULL) + { + gossip_lerr("Error: malformed tcp address.\n"); + free(tcp_string); + return (NULL); + } + + /* looks ok, so let's build the method addr structure */ + new_addr = alloc_tcp_method_addr(); + if (!new_addr) + { + free(tcp_string); + return (NULL); + } + tcp_addr_data = (struct tcp_addr *) new_addr->method_data; + + ret = sscanf((delim + 1), "%d", &(tcp_addr_data->port)); + if (ret != 1) + { + gossip_lerr("Error: malformed tcp address.\n"); + dealloc_tcp_method_addr(new_addr); + free(tcp_string); + return (NULL); + } + + hostname = (char *) malloc((delim - tcp_string + 1)); + if (!hostname) + { + dealloc_tcp_method_addr(new_addr); + free(tcp_string); + return (NULL); + } + strncpy(hostname, tcp_string, (delim - tcp_string)); + hostname[delim - tcp_string] = '\0'; + + tcp_addr_data->hostname = hostname; + + free(tcp_string); + return (new_addr); +} + + +/* BMI_tcp_memalloc() + * + * Allocates memory that can be used in native mode by tcp. + * + * returns 0 on success, -errno on failure + */ +void *BMI_tcp_memalloc(bmi_size_t size, + enum bmi_op_type send_recv) +{ + /* we really don't care what flags the caller uses, TCP/IP has no + * preferences about how the memory should be configured. + */ + +/* return (calloc(1,(size_t) size)); */ + return PINT_mem_aligned_alloc(size, 4096); +} + + +/* BMI_tcp_memfree() + * + * Frees memory that was allocated with BMI_tcp_memalloc() + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_memfree(void *buffer, + bmi_size_t size, + enum bmi_op_type send_recv) +{ + PINT_mem_aligned_free(buffer); + return (0); +} + +/* BMI_tcp_unexpected_free() + * + * Frees memory that was returned from BMI_tcp_test_unexpected() + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_unexpected_free(void *buffer) +{ + if (buffer) + { + free(buffer); + } + return (0); +} + +#ifdef USE_TRUSTED + +static struct tcp_allowed_connection_s * +alloc_trusted_connection_info(int network_count) +{ + struct tcp_allowed_connection_s *tcp_allowed_connection_info = NULL; + + tcp_allowed_connection_info = (struct tcp_allowed_connection_s *) + calloc(1, sizeof(struct tcp_allowed_connection_s)); + if (tcp_allowed_connection_info) + { + tcp_allowed_connection_info->network = + (struct in_addr *) calloc(network_count, sizeof(struct in_addr)); + if (tcp_allowed_connection_info->network == NULL) + { + free(tcp_allowed_connection_info); + tcp_allowed_connection_info = NULL; + } + else + { + tcp_allowed_connection_info->netmask = + (struct in_addr *) calloc(network_count, sizeof(struct in_addr)); + if (tcp_allowed_connection_info->netmask == NULL) + { + free(tcp_allowed_connection_info->network); + free(tcp_allowed_connection_info); + tcp_allowed_connection_info = NULL; + } + else { + tcp_allowed_connection_info->network_count = network_count; + } + } + } + return tcp_allowed_connection_info; +} + +static void +dealloc_trusted_connection_info(void* ptcp_allowed_connection_info) +{ + struct tcp_allowed_connection_s *tcp_allowed_connection_info = + (struct tcp_allowed_connection_s *) ptcp_allowed_connection_info; + if (tcp_allowed_connection_info) + { + free(tcp_allowed_connection_info->network); + tcp_allowed_connection_info->network = NULL; + free(tcp_allowed_connection_info->netmask); + tcp_allowed_connection_info->netmask = NULL; + free(tcp_allowed_connection_info); + } + return; +} + +#endif + +/* + * This function will convert a mask_bits value to an in_addr + * representation. i.e for example if + * mask_bits was 24 then it would be 255.255.255.0 + * if mask_bits was 22 then it would be 255.255.252.0 + * etc + */ +static void convert_mask(int mask_bits, struct in_addr *mask) +{ + uint32_t addr = -1; + addr = addr & ~~(-1 << (mask_bits ? (32 - mask_bits) : 32)); + mask->s_addr = htonl(addr); + return; +} + +/* BMI_tcp_set_info() + * + * Pass in optional parameters. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_set_info(int option, + void *inout_parameter) +{ + int ret = -1; + bmi_method_addr_p tmp_addr = NULL; + + gen_mutex_lock(&interface_mutex); + + switch (option) + { + case BMI_TCP_BUFFER_SEND_SIZE: + tcp_buffer_size_send = *((int *)inout_parameter); + ret = 0; +#ifdef __PVFS2_SERVER__ + /* Set the default socket buffer sizes for the server socket */ + bmi_set_sock_buffers( + ((struct tcp_addr *) + tcp_method_params.listen_addr->method_data)->socket); +#endif + break; + case BMI_TCP_BUFFER_RECEIVE_SIZE: + tcp_buffer_size_receive = *((int *)inout_parameter); + ret = 0; +#ifdef __PVFS2_SERVER__ + /* Set the default socket buffer sizes for the server socket */ + bmi_set_sock_buffers( + ((struct tcp_addr *) + tcp_method_params.listen_addr->method_data)->socket); +#endif + break; + case BMI_TCP_CLOSE_SOCKET: + /* this should no longer make it to the bmi_tcp method; see bmi.c */ + ret = 0; + break; + case BMI_FORCEFUL_CANCEL_MODE: + forceful_cancel_mode = 1; + ret = 0; + break; + case BMI_DROP_ADDR: + if (inout_parameter == NULL) + { + ret = bmi_tcp_errno_to_pvfs(-EINVAL); + } + else + { + tmp_addr = (bmi_method_addr_p) inout_parameter; + /* take it out of the socket collection */ + tcp_forget_addr(tmp_addr, 1, 0); + ret = 0; + } + break; +#ifdef USE_TRUSTED + case BMI_TRUSTED_CONNECTION: + { + struct tcp_allowed_connection_s *tcp_allowed_connection = NULL; + if (inout_parameter == NULL) + { + ret = bmi_tcp_errno_to_pvfs(-EINVAL); + break; + } + else + { + int bmi_networks_count = 0; + char **bmi_networks = NULL; + int *bmi_netmasks = NULL; + struct server_configuration_s *svc_config = NULL; + + svc_config = (struct server_configuration_s *) inout_parameter; + tcp_allowed_connection = alloc_trusted_connection_info(svc_config->allowed_networks_count); + if (tcp_allowed_connection == NULL) + { + ret = bmi_tcp_errno_to_pvfs(-ENOMEM); + break; + } +#ifdef __PVFS2_SERVER__ + gtcp_allowed_connection = tcp_allowed_connection; +#endif + /* Stash this in the server_configuration_s structure. freed later on */ + svc_config->security = tcp_allowed_connection; + svc_config->security_dtor = &dealloc_trusted_connection_info; + ret = 0; + /* Fill up the list of allowed ports */ + PINT_config_get_allowed_ports(svc_config, + &tcp_allowed_connection->port_enforce, + tcp_allowed_connection->ports); + + /* if it was enabled, make sure that we know how to deal with it */ + if (tcp_allowed_connection->port_enforce == 1) + { + /* illegal ports */ + if (tcp_allowed_connection->ports[0] > 65535 + || tcp_allowed_connection->ports[1] > 65535 + || tcp_allowed_connection->ports[1] < tcp_allowed_connection->ports[0]) + { + gossip_lerr("Error: illegal trusted port values\n"); + ret = bmi_tcp_errno_to_pvfs(-EINVAL); + /* don't enforce anything! */ + tcp_allowed_connection->port_enforce = 0; + } + } + ret = 0; + /* Retrieve the list of BMI network addresses and masks */ + PINT_config_get_allowed_networks(svc_config, + &tcp_allowed_connection->network_enforce, + &bmi_networks_count, + &bmi_networks, + &bmi_netmasks); + + /* if it was enabled, make sure that we know how to deal with it */ + if (tcp_allowed_connection->network_enforce == 1) + { + int i; + + for (i = 0; i < bmi_networks_count; i++) + { + char *tcp_string = NULL; + /* Convert the network string into an in_addr_t structure */ + tcp_string = string_key("tcp", bmi_networks[i]); + if (!tcp_string) + { + /* the string doesn't even have our info */ + gossip_lerr("Error: malformed tcp network address\n"); + ret = bmi_tcp_errno_to_pvfs(-EINVAL); + } + else { + /* convert this into an in_addr_t */ + inet_aton(tcp_string, &tcp_allowed_connection->network[i]); + free(tcp_string); + } + convert_mask(bmi_netmasks[i], &tcp_allowed_connection->netmask[i]); + } + /* don't enforce anything if there were any errors */ + if (ret != 0) + { + tcp_allowed_connection->network_enforce = 0; + } + } + } + break; + } +#endif + case BMI_TCP_CHECK_UNEXPECTED: + { + check_unexpected = *(int *)inout_parameter; + ret = 0; + break; + } + + default: + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, + "TCP hint %d not implemented.\n", option); + ret = 0; + break; + } + + gen_mutex_unlock(&interface_mutex); + return (ret); +} + +/* BMI_tcp_get_info() + * + * Query for optional parameters. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_get_info(int option, + void *inout_parameter) +{ + struct method_drop_addr_query* query; + struct tcp_addr* tcp_addr_data; + int ret = 0; + + gen_mutex_lock(&interface_mutex); + + switch (option) + { + case BMI_CHECK_MAXSIZE: + *((int *) inout_parameter) = TCP_MODE_REND_LIMIT; + ret = 0; + break; + case BMI_DROP_ADDR_QUERY: + query = (struct method_drop_addr_query*)inout_parameter; + tcp_addr_data = (struct tcp_addr *) query->addr->method_data; + /* only suggest that we discard the address if we have experienced + * an error and there is no way to reconnect + */ + if(tcp_addr_data->addr_error != 0 && + tcp_addr_data->dont_reconnect == 1) + { + query->response = 1; + } + else + { + query->response = 0; + } + ret = 0; + break; + case BMI_GET_UNEXP_SIZE: + *((int *) inout_parameter) = TCP_MODE_EAGER_LIMIT; + ret = 0; + break; + + default: + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, + "TCP hint %d not implemented.\n", option); + ret = -ENOSYS; + break; + } + + gen_mutex_unlock(&interface_mutex); + return (ret < 0) ? bmi_tcp_errno_to_pvfs(ret) : ret; +} + + +/* BMI_tcp_post_send() + * + * Submits send operations. + * + * returns 0 on success that requires later poll, returns 1 on instant + * completion, -errno on failure + */ +int BMI_tcp_post_send(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *buffer, + bmi_size_t size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + struct tcp_msg_header my_header; + int ret = -1; + + /* clear the id field for safety */ + *id = 0; + + /* fill in the TCP-specific message header */ + if (size > TCP_MODE_REND_LIMIT) + { + return (bmi_tcp_errno_to_pvfs(-EMSGSIZE)); + } + + if (size <= TCP_MODE_EAGER_LIMIT) + { + my_header.mode = TCP_MODE_EAGER; + } + else + { + my_header.mode = TCP_MODE_REND; + } + my_header.tag = tag; + my_header.size = size; + my_header.magic_nr = BMI_MAGIC_NR; + + gen_mutex_lock(&interface_mutex); + + ret = tcp_post_send_generic(id, dest, &buffer, + &size, 1, buffer_type, my_header, + user_ptr, context_id, hints); + + gen_mutex_unlock(&interface_mutex); + return(ret); +} + + +/* BMI_tcp_post_sendunexpected() + * + * Submits unexpected send operations. + * + * returns 0 on success that requires later poll, returns 1 on instant + * completion, -errno on failure + */ +int BMI_tcp_post_sendunexpected(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *buffer, + bmi_size_t size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + struct tcp_msg_header my_header; + int ret = -1; + + /* clear the id field for safety */ + *id = 0; + + if (size > TCP_MODE_EAGER_LIMIT) + { + return (bmi_tcp_errno_to_pvfs(-EMSGSIZE)); + } + + my_header.mode = TCP_MODE_UNEXP; + my_header.tag = tag; + my_header.size = size; + my_header.magic_nr = BMI_MAGIC_NR; + + gen_mutex_lock(&interface_mutex); + + ret = tcp_post_send_generic(id, dest, &buffer, + &size, 1, buffer_type, my_header, + user_ptr, context_id, hints); + gen_mutex_unlock(&interface_mutex); + return(ret); +} + + + +/* BMI_tcp_post_recv() + * + * Submits recv operations. + * + * returns 0 on success that requires later poll, returns 1 on instant + * completion, -errno on failure + */ +int BMI_tcp_post_recv(bmi_op_id_t * id, + bmi_method_addr_p src, + void *buffer, + bmi_size_t expected_size, + bmi_size_t * actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + int ret = -1; + + /* A few things could happen here: + * a) rendez. recv with sender not ready yet + * b) rendez. recv with sender waiting + * c) eager recv, data not available yet + * d) eager recv, some/all data already here + * e) rendez. recv with sender in eager mode + * + * b or d could lead to completion without polling. + * we don't look for unexpected messages here. + */ + + if (expected_size > TCP_MODE_REND_LIMIT) + { + return (bmi_tcp_errno_to_pvfs(-EINVAL)); + } + gen_mutex_lock(&interface_mutex); + + ret = tcp_post_recv_generic(id, src, &buffer, &expected_size, + 1, expected_size, actual_size, + buffer_type, tag, + user_ptr, context_id, hints); + + gen_mutex_unlock(&interface_mutex); + return (ret); +} + + +/* BMI_tcp_test() + * + * Checks to see if a particular message has completed. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_test(bmi_op_id_t id, + int *outcount, + bmi_error_code_t * error_code, + bmi_size_t * actual_size, + void **user_ptr, + int max_idle_time, + bmi_context_id context_id) +{ + int ret = -1; + method_op_p query_op = (method_op_p)id_gen_fast_lookup(id); + + assert(query_op != NULL); + + gen_mutex_lock(&interface_mutex); + + /* do some ``real work'' here */ + ret = tcp_do_work(max_idle_time); + if (ret < 0) + { + gen_mutex_unlock(&interface_mutex); + return (ret); + } + + if (((struct tcp_op*)(query_op->method_data))->tcp_op_state == + BMI_TCP_COMPLETE) + { + assert(query_op->context_id == context_id); + op_list_remove(query_op); + if (user_ptr != NULL) + { + (*user_ptr) = query_op->user_ptr; + } + (*error_code) = query_op->error_code; + (*actual_size) = query_op->actual_size; + PINT_EVENT_END( + (query_op->send_recv == BMI_SEND ? + bmi_tcp_send_event_id : bmi_tcp_recv_event_id), bmi_tcp_pid, NULL, + query_op->event_id, id, *actual_size); + + dealloc_tcp_method_op(query_op); + (*outcount)++; + } + + gen_mutex_unlock(&interface_mutex); + return (0); +} + +/* BMI_tcp_testsome() + * + * Checks to see if any messages from the specified list have completed. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_testsome(int incount, + bmi_op_id_t * id_array, + int *outcount, + int *index_array, + bmi_error_code_t * error_code_array, + bmi_size_t * actual_size_array, + void **user_ptr_array, + int max_idle_time, + bmi_context_id context_id) +{ + int ret = -1; + method_op_p query_op = NULL; + int i; + + gen_mutex_lock(&interface_mutex); + + /* do some ``real work'' here */ + ret = tcp_do_work(max_idle_time); + if (ret < 0) + { + gen_mutex_unlock(&interface_mutex); + return (ret); + } + + for(i=0; imethod_data))->tcp_op_state == + BMI_TCP_COMPLETE) + { + assert(query_op->context_id == context_id); + /* this one's done; pop it out */ + op_list_remove(query_op); + error_code_array[*outcount] = query_op->error_code; + actual_size_array[*outcount] = query_op->actual_size; + index_array[*outcount] = i; + if (user_ptr_array != NULL) + { + user_ptr_array[*outcount] = query_op->user_ptr; + } + PINT_EVENT_END( + (query_op->send_recv == BMI_SEND ? + bmi_tcp_send_event_id : bmi_tcp_recv_event_id), + bmi_tcp_pid, NULL, + query_op->event_id, actual_size_array[*outcount]); + dealloc_tcp_method_op(query_op); + (*outcount)++; + } + } + } + + gen_mutex_unlock(&interface_mutex); + return(0); +} + + +/* BMI_tcp_testunexpected() + * + * Checks to see if any unexpected messages have completed. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_testunexpected(int incount, + int *outcount, + struct bmi_method_unexpected_info *info, + int max_idle_time) +{ + int ret = -1; + method_op_p query_op = NULL; + + gen_mutex_lock(&interface_mutex); + + if(op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP])) + { + /* do some ``real work'' here */ + ret = tcp_do_work(max_idle_time); + if (ret < 0) + { + gen_mutex_unlock(&interface_mutex); + return (ret); + } + } + + *outcount = 0; + + /* go through the completed/unexpected list as long as we are finding + * stuff and we have room in the info array for it + */ + while ((*outcount < incount) && + (query_op = + op_list_shownext(op_list_array[IND_COMPLETE_RECV_UNEXP]))) + { + info[*outcount].error_code = query_op->error_code; + info[*outcount].addr = query_op->addr; + info[*outcount].buffer = query_op->buffer; + info[*outcount].size = query_op->actual_size; + info[*outcount].tag = query_op->msg_tag; + op_list_remove(query_op); + dealloc_tcp_method_op(query_op); + (*outcount)++; + } + gen_mutex_unlock(&interface_mutex); + return (0); +} + + +/* BMI_tcp_testcontext() + * + * Checks to see if any messages from the specified context have completed. + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_testcontext(int incount, + bmi_op_id_t* out_id_array, + int *outcount, + bmi_error_code_t * error_code_array, + bmi_size_t * actual_size_array, + void **user_ptr_array, + int max_idle_time, + bmi_context_id context_id) +{ + int ret = -1; + method_op_p query_op = NULL; + + *outcount = 0; + + gen_mutex_lock(&interface_mutex); + + if(op_list_empty(completion_array[context_id])) + { + /* if there are unexpected ops ready to go, then short out so + * that the next testunexpected call can pick it up without + * delay + */ + if(check_unexpected && + !op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP])) + { + gen_mutex_unlock(&interface_mutex); + return(0); + } + + /* do some ``real work'' here */ + ret = tcp_do_work(max_idle_time); + if (ret < 0) + { + gen_mutex_unlock(&interface_mutex); + return (ret); + } + } + + /* pop as many items off of the completion queue as we can */ + while((*outcount < incount) && + (query_op = + op_list_shownext(completion_array[context_id]))) + { + assert(query_op); + assert(query_op->context_id == context_id); + + /* this one's done; pop it out */ + op_list_remove(query_op); + error_code_array[*outcount] = query_op->error_code; + actual_size_array[*outcount] = query_op->actual_size; + out_id_array[*outcount] = query_op->op_id; + if (user_ptr_array != NULL) + { + user_ptr_array[*outcount] = query_op->user_ptr; + } + + PINT_EVENT_END((query_op->send_recv == BMI_SEND ? + bmi_tcp_send_event_id : bmi_tcp_recv_event_id), + bmi_tcp_pid, NULL, query_op->event_id, + query_op->actual_size); + + dealloc_tcp_method_op(query_op); + query_op = NULL; + (*outcount)++; + } + + gen_mutex_unlock(&interface_mutex); + return(0); +} + + + +/* BMI_tcp_post_send_list() + * + * same as the BMI_tcp_post_send() function, except that it sends + * from an array of possibly non contiguous buffers + * + * returns 0 on success, 1 on immediate successful completion, + * -errno on failure + */ +int BMI_tcp_post_send_list(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t total_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + struct tcp_msg_header my_header; + int ret = -1; + + /* clear the id field for safety */ + *id = 0; + + /* fill in the TCP-specific message header */ + if (total_size > TCP_MODE_REND_LIMIT) + { + gossip_lerr("Error: BMI message too large!\n"); + return (bmi_tcp_errno_to_pvfs(-EMSGSIZE)); + } + + if (total_size <= TCP_MODE_EAGER_LIMIT) + { + my_header.mode = TCP_MODE_EAGER; + } + else + { + my_header.mode = TCP_MODE_REND; + } + my_header.tag = tag; + my_header.size = total_size; + my_header.magic_nr = BMI_MAGIC_NR; + + gen_mutex_lock(&interface_mutex); + + ret = tcp_post_send_generic(id, dest, buffer_list, + size_list, list_count, buffer_type, + my_header, user_ptr, context_id, hints); + gen_mutex_unlock(&interface_mutex); + return(ret); +} + +/* BMI_tcp_post_recv_list() + * + * same as the BMI_tcp_post_recv() function, except that it recvs + * into an array of possibly non contiguous buffers + * + * returns 0 on success, 1 on immediate successful completion, + * -errno on failure + */ +int BMI_tcp_post_recv_list(bmi_op_id_t * id, + bmi_method_addr_p src, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t total_expected_size, + bmi_size_t * total_actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + int ret = -1; + + if (total_expected_size > TCP_MODE_REND_LIMIT) + { + return (bmi_tcp_errno_to_pvfs(-EINVAL)); + } + + gen_mutex_lock(&interface_mutex); + + ret = tcp_post_recv_generic(id, src, buffer_list, size_list, + list_count, total_expected_size, + total_actual_size, buffer_type, tag, user_ptr, + context_id, hints); + + gen_mutex_unlock(&interface_mutex); + return (ret); +} + + +/* BMI_tcp_post_sendunexpected_list() + * + * same as the BMI_tcp_post_sendunexpected() function, except that + * it sends from an array of possibly non contiguous buffers + * + * returns 0 on success, 1 on immediate successful completion, + * -errno on failure + */ +int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t total_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + struct tcp_msg_header my_header; + int ret = -1; + + /* clear the id field for safety */ + *id = 0; + + if (total_size > TCP_MODE_EAGER_LIMIT) + { + return (bmi_tcp_errno_to_pvfs(-EMSGSIZE)); + } + + my_header.mode = TCP_MODE_UNEXP; + my_header.tag = tag; + my_header.size = total_size; + my_header.magic_nr = BMI_MAGIC_NR; + + gen_mutex_lock(&interface_mutex); + + ret = tcp_post_send_generic(id, dest, buffer_list, + size_list, list_count, buffer_type, + my_header, user_ptr, context_id, hints); + + gen_mutex_unlock(&interface_mutex); + return(ret); +} + + +/* BMI_tcp_open_context() + * + * opens a new context with the specified context id + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_open_context(bmi_context_id context_id) +{ + + gen_mutex_lock(&interface_mutex); + + /* start a new queue for tracking completions in this context */ + completion_array[context_id] = op_list_new(); + if (!completion_array[context_id]) + { + gen_mutex_unlock(&interface_mutex); + return(bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + + gen_mutex_unlock(&interface_mutex); + return(0); +} + + +/* BMI_tcp_close_context() + * + * shuts down a context, previously opened with BMI_tcp_open_context() + * + * no return value + */ +void BMI_tcp_close_context(bmi_context_id context_id) +{ + op_list_p iterator = NULL; + op_list_p scratch = NULL; + method_op_p tmp_method_op = NULL; + + gen_mutex_lock(&interface_mutex); + + /* tear down completion queue for this context */ + op_list_cleanup(completion_array[context_id]); + + gen_mutex_unlock(&interface_mutex); + return; +} + + +/* BMI_tcp_cancel() + * + * attempt to cancel a pending bmi tcp operation + * + * returns 0 on success, -errno on failure + */ +int BMI_tcp_cancel(bmi_op_id_t id, bmi_context_id context_id) +{ + method_op_p query_op = NULL; + + gen_mutex_lock(&interface_mutex); + + query_op = (method_op_p)id_gen_fast_lookup(id); + if(!query_op) + { + /* if we can't find the operattion, then assume that it has already + * completed naturally + */ + gen_mutex_unlock(&interface_mutex); + return(0); + } + + /* easy case: is the operation already completed? */ + if(((struct tcp_op*)(query_op->method_data))->tcp_op_state == + BMI_TCP_COMPLETE) + { + /* only close socket in forceful cancel mode */ + if(forceful_cancel_mode) + tcp_forget_addr(query_op->addr, 0, -BMI_ECANCEL); + /* we are done! status will be collected during test */ + gen_mutex_unlock(&interface_mutex); + return(0); + } + + /* has the operation started moving data yet? */ + if(query_op->env_amt_complete) + { + /* be pessimistic and kill the socket, even if not in forceful + * cancel mode */ + /* NOTE: this may place other operations beside this one into + * EINTR error state + */ + tcp_forget_addr(query_op->addr, 0, -BMI_ECANCEL); + gen_mutex_unlock(&interface_mutex); + return(0); + } + + /* if we fall to this point, op has been posted, but no data has moved + * for it yet as far as we know + */ + + /* mark op as canceled, move to completion queue */ + query_op->error_code = -BMI_ECANCEL; + if(query_op->send_recv == BMI_SEND) + { + BMI_socket_collection_remove_write_bit(tcp_socket_collection_p, + query_op->addr); + } + op_list_remove(query_op); + ((struct tcp_op*)(query_op->method_data))->tcp_op_state = + BMI_TCP_COMPLETE; + /* only close socket in forceful cancel mode */ + if(forceful_cancel_mode) + tcp_forget_addr(query_op->addr, 0, -BMI_ECANCEL); + op_list_add(completion_array[query_op->context_id], query_op); + gen_mutex_unlock(&interface_mutex); + return(0); +} + +/* + * For now, we only support wildcard strings that are IP addresses + * and not *hostnames*! + */ +static int check_valid_wildcard(const char *wildcard_string, unsigned long *octets) +{ + int i, len = strlen(wildcard_string), last_dot = -1, octet_count = 0; + char str[16]; + for (i = 0; i < len; i++) + { + char c = wildcard_string[i]; + memset(str, 0, 16); + if ((c < '0' || c > '9') && c != '*' && c != '.') + return -EINVAL; + if (c == '*') { + if (octet_count >= 4) + return -EINVAL; + octets[octet_count++] = 256; + } + else if (c == '.') + { + char *endptr = NULL; + if (octet_count >= 4) + return -EINVAL; + strncpy(str, &wildcard_string[last_dot + 1], (i - last_dot - 1)); + octets[octet_count++] = strtol(str, &endptr, 10); + if (*endptr != '\0' || octets[octet_count-1] >= 256) + return -EINVAL; + last_dot = i; + } + } + for (i = octet_count; i < 4; i++) + { + octets[i] = 256; + } + return 0; +} + +/* + * return 1 if the addr specified is part of the wildcard specification of octet + * return 0 otherwise. + */ +static int check_octets(struct in_addr addr, unsigned long *octets) +{ +#define B1_MASK 0xff000000 +#define B1_SHIFT 24 +#define B2_MASK 0x00ff0000 +#define B2_SHIFT 16 +#define B3_MASK 0x0000ff00 +#define B3_SHIFT 8 +#define B4_MASK 0x000000ff + uint32_t host_addr = ntohl(addr.s_addr); + /* * stands for all clients */ + if (octets[0] == 256) + { + return 1; + } + if (((host_addr & B1_MASK) >> B1_SHIFT) != octets[0]) + { + return 0; + } + if (octets[1] == 256) + { + return 1; + } + if (((host_addr & B2_MASK) >> B2_SHIFT) != octets[1]) + { + return 0; + } + if (octets[2] == 256) + { + return 1; + } + if (((host_addr & B3_MASK) >> B3_SHIFT) != octets[2]) + { + return 0; + } + if (octets[3] == 256) + { + return 1; + } + if ((host_addr & B4_MASK) != octets[3]) + { + return 0; + } + return 1; +#undef B1_MASK +#undef B1_SHIFT +#undef B2_MASK +#undef B2_SHIFT +#undef B3_MASK +#undef B3_SHIFT +#undef B4_MASK +} +/* BMI_tcp_query_addr_range() + * Check if a given address is within the network specified by the wildcard string! + * or if it is part of the subnet mask specified + */ +int BMI_tcp_query_addr_range(bmi_method_addr_p map, const char *wildcard_string, int netmask) +{ + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data; + struct sockaddr_in map_addr; + socklen_t map_addr_len = sizeof(map_addr); + const char *tcp_wildcard = wildcard_string + 6 /* strlen("tcp://") */; + int ret = -1; + + memset(&map_addr, 0, sizeof(map_addr)); + if(getpeername(tcp_addr_data->socket, (struct sockaddr *) &map_addr, (int *) &map_addr_len) < 0) + { + ret = bmi_tcp_errno_to_pvfs(-EINVAL); + gossip_err("Error: failed to retrieve peer name for client.\n"); + return(ret); + } + /* Wildcard specification */ + if (netmask == -1) + { + unsigned long octets[4]; + if (check_valid_wildcard(tcp_wildcard, octets) < 0) + { + gossip_lerr("Invalid wildcard specification: %s\n", tcp_wildcard); + return -EINVAL; + } + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Map Address is : %s, Wildcard Octets: %lu.%lu.%lu.%lu\n", inet_ntoa(map_addr.sin_addr), + octets[0], octets[1], octets[2], octets[3]); + if (check_octets(map_addr.sin_addr, octets) == 1) + { + return 1; + } + } + /* Netmask specification */ + else { + struct sockaddr_in mask_addr, network_addr; + memset(&mask_addr, 0, sizeof(mask_addr)); + memset(&network_addr, 0, sizeof(network_addr)); + /* Convert the netmask address */ + convert_mask(netmask, &mask_addr.sin_addr); + /* Invalid network address */ + if (inet_aton(tcp_wildcard, &network_addr.sin_addr) == 0) + { + gossip_err("Invalid network specification: %s\n", tcp_wildcard); + return -EINVAL; + } + /* Matches the subnet mask! */ + if ((map_addr.sin_addr.s_addr & mask_addr.sin_addr.s_addr) + == (network_addr.sin_addr.s_addr & mask_addr.sin_addr.s_addr)) + { + return 1; + } + } + return 0; +} + +/* BMI_tcp_addr_rev_lookup_unexpected() + * + * looks up an address that was initialized unexpectedly and returns a string + * hostname + * + * returns string on success, "UNKNOWN" on failure + */ +const char* BMI_tcp_addr_rev_lookup_unexpected(bmi_method_addr_p map) +{ + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data; + int debug_on; + uint64_t mask; + socklen_t peerlen; + struct sockaddr_in peer; + int ret; + struct hostent *peerent; + char* tmp_peer; + + /* return default response if we don't have support for the right socket + * calls + */ +#if !defined(HAVE_GETHOSTBYADDR) + return(tcp_addr_data->peer); +#else + + /* Only resolve hostnames if a gossip mask is set to request it. + * Otherwise we leave it at ip address + */ + gossip_get_debug_mask(&debug_on, &mask); + + if(!debug_on || (!(mask & GOSSIP_ACCESS_HOSTNAMES))) + { + return(tcp_addr_data->peer); + } + + peerlen = sizeof(struct sockaddr_in); + + if(tcp_addr_data->peer_type == BMI_TCP_PEER_HOSTNAME) + { + /* full hostname already cached; return now */ + return(tcp_addr_data->peer); + } + + /* if we hit this point, we need to resolve hostname */ + ret = getpeername(tcp_addr_data->socket, (struct sockaddr*) &(peer), (int *) &peerlen); + if(ret < 0) + { + /* default to use IP address */ + return(tcp_addr_data->peer); + } + + peerent = gethostbyaddr((const char *) &peer.sin_addr.s_addr, + sizeof(struct in_addr), AF_INET); + if(peerent == NULL) + { + /* default to use IP address */ + return(tcp_addr_data->peer); + } + + tmp_peer = (char*)malloc(strlen(peerent->h_name) + 1); + if(!tmp_peer) + { + /* default to use IP address */ + return(tcp_addr_data->peer); + } + strcpy(tmp_peer, peerent->h_name); + if(tcp_addr_data->peer) + { + free(tcp_addr_data->peer); + } + tcp_addr_data->peer = tmp_peer; + tcp_addr_data->peer_type = BMI_TCP_PEER_HOSTNAME; + return(tcp_addr_data->peer); + +#endif + +} + +/* tcp_forget_addr() + * + * completely removes a tcp method address from use, and aborts any + * operations that use the address. If the + * dealloc_flag is set, the memory used by the address will be + * deallocated as well. + * + * no return value + */ +void tcp_forget_addr(bmi_method_addr_p map, + int dealloc_flag, + int error_code) +{ + struct tcp_addr* tcp_addr_data = (struct tcp_addr *) map->method_data; + BMI_addr_t bmi_addr = tcp_addr_data->bmi_addr; + int tmp_outcount; + bmi_method_addr_p tmp_addr; + int tmp_status; + + if (tcp_socket_collection_p) + { + BMI_socket_collection_remove(tcp_socket_collection_p, map); + /* perform a test to force the socket collection to act on the remove + * request before continuing + */ + if(!sc_test_busy) + { + BMI_socket_collection_testglobal(tcp_socket_collection_p, + 0, &tmp_outcount, &tmp_addr, &tmp_status, 0); + } + } + + tcp_shutdown_addr(map); + tcp_cleanse_addr(map, error_code); + tcp_addr_data->addr_error = error_code; + if (dealloc_flag) + { + dealloc_tcp_method_addr(map); + } + else + { + /* this will cause the bmi control layer to check to see if + * this address can be completely forgotten + */ + bmi_method_addr_forget_callback(bmi_addr); + } + return; +}; + +/****************************************************************** + * Internal support functions + */ + + +/* + * dealloc_tcp_method_addr() + * + * destroys method address structures generated by the TCP/IP module. + * + * no return value + */ +static void dealloc_tcp_method_addr(bmi_method_addr_p map) +{ + + struct tcp_addr *tcp_addr_data = NULL; + + tcp_addr_data = (struct tcp_addr *) map->method_data; + /* close the socket, as long as it is not the one we are listening on + * as a server. + */ + if (!tcp_addr_data->server_port) + { + if (tcp_addr_data->socket > -1) + { + shutdown(tcp_addr_data->socket, SD_BOTH); + closesocket(tcp_addr_data->socket); + } + } + + if (tcp_addr_data->hostname) + free(tcp_addr_data->hostname); + if (tcp_addr_data->peer) + free(tcp_addr_data->peer); + + bmi_dealloc_method_addr(map); + + return; +} + + +/* + * alloc_tcp_method_addr() + * + * creates a new method address with defaults filled in for TCP/IP. + * + * returns pointer to struct on success, NULL on failure + */ +bmi_method_addr_p alloc_tcp_method_addr(void) +{ + + struct bmi_method_addr *my_method_addr = NULL; + struct tcp_addr *tcp_addr_data = NULL; + + my_method_addr = + bmi_alloc_method_addr(tcp_method_params.method_id, sizeof(struct tcp_addr)); + if (!my_method_addr) + { + return (NULL); + } + + /* note that we trust the alloc_method_addr() function to have zeroed + * out the structures for us already + */ + + tcp_addr_data = (struct tcp_addr *) my_method_addr->method_data; + tcp_addr_data->socket = -1; + tcp_addr_data->port = -1; + tcp_addr_data->map = my_method_addr; + tcp_addr_data->sc_index = -1; + + return (my_method_addr); +} + + +/* + * tcp_server_init() + * + * this function is used to prepare a node to recieve incoming + * connections if it is initialized in a server configuration. + * + * returns 0 on succes, -errno on failure + */ +static int tcp_server_init(void) +{ + + int oldfl = 0; /* old socket flags */ + struct tcp_addr *tcp_addr_data = NULL; + int tmp_errno = bmi_tcp_errno_to_pvfs(-EINVAL); + int ret = 0; + + /* create a socket */ + tcp_addr_data = (struct tcp_addr *) tcp_method_params.listen_addr->method_data; + if ((tcp_addr_data->socket = BMI_sockio_new_sock()) < 0) + { + tmp_errno = WSAGetLastError(); + gossip_err("Error: BMI_sockio_new_sock: %d\n", tmp_errno); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + + /* set it to non-blocking operation */ + /* + oldfl = fcntl(tcp_addr_data->socket, F_GETFL, 0); + if (!(oldfl & O_NONBLOCK)) + { + fcntl(tcp_addr_data->socket, F_SETFL, oldfl | O_NONBLOCK); + } + */ + SET_NONBLOCK(tcp_addr_data->socket); + + /* setup for a fast restart to avoid bind addr in use errors */ + BMI_sockio_set_sockopt(tcp_addr_data->socket, SO_REUSEADDR, 1); + + /* bind it to the appropriate port */ + if(tcp_method_params.method_flags & BMI_TCP_BIND_SPECIFIC) + { + ret = BMI_sockio_bind_sock_specific(tcp_addr_data->socket, + tcp_addr_data->hostname, + tcp_addr_data->port); + /* NOTE: this particular function converts errno in advance */ + if(ret < 0) + { + PVFS_perror_gossip("BMI_sockio_bind_sock_specific", ret); + return(ret); + } + } + else + { + ret = BMI_sockio_bind_sock(tcp_addr_data->socket, + tcp_addr_data->port); + } + + if (ret < 0) + { + tmp_errno = WSAGetLastError(); + gossip_err("Error: BMI_sockio_bind_sock: %d\n", tmp_errno); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + + /* go ahead and listen to the socket */ + if (listen(tcp_addr_data->socket, TCP_BACKLOG) != 0) + { + tmp_errno = WSAGetLastError(); + gossip_err("Error: listen: %s\n", tmp_errno); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + + return (0); +} + + +/* find_recv_inflight() + * + * checks to see if there is a recv operation in flight (when in flight + * means that some of the data or envelope has been read) for a + * particular address. + * + * returns pointer to operation on success, NULL if nothing found. + */ +static method_op_p find_recv_inflight(bmi_method_addr_p map) +{ + struct op_list_search_key key; + method_op_p query_op = NULL; + + memset(&key, 0, sizeof(struct op_list_search_key)); + key.method_addr = map; + key.method_addr_yes = 1; + + query_op = op_list_search(op_list_array[IND_RECV_INFLIGHT], &key); + + return (query_op); +} + + +/* tcp_sock_init() + * + * this is an internal function which is used to build up a TCP/IP + * connection in the situation of a client side operation. + * addressing information to determine which fields need to be set. + * If the connection is already established then it does no work. + * + * NOTE: this is safe to call repeatedly. However, always check the + * value of the not_connected field in the tcp address before using the + * address. + * + * returns 0 on success, -errno on failure + */ +static int tcp_sock_init(bmi_method_addr_p my_method_addr) +{ + + int oldfl = 0; /* socket flags */ + int ret = -1; + struct pollfd poll_conn; + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) my_method_addr->method_data; + int tmp_errno = 0; + + /* check for obvious problems */ + assert(my_method_addr); + assert(my_method_addr->method_type == tcp_method_params.method_id); + assert(tcp_addr_data->server_port == 0); + + /* fail immediately if the address is in failure mode and we have no way + * to reconnect + */ + if(tcp_addr_data->addr_error && tcp_addr_data->dont_reconnect) + { + gossip_debug(GOSSIP_BMI_DEBUG_TCP, + "Warning: BMI communication attempted on an address in failure mode.\n"); + return(tcp_addr_data->addr_error); + } + + if(tcp_addr_data->addr_error) + { + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "%s: attempting reconnect.\n", + __func__); + tcp_addr_data->addr_error = 0; + assert(tcp_addr_data->socket < 0); + tcp_addr_data->not_connected = 1; + } + + /* is there already a socket? */ + if (tcp_addr_data->socket > -1) + { + /* check to see if we still need to work on the connect.. */ + if (tcp_addr_data->not_connected) + { + /* this is a little weird, but we complete the nonblocking + * connection by polling */ + poll_conn.fd = tcp_addr_data->socket; + poll_conn.events = POLLOUT; + ret = WSAPoll(&poll_conn, 1, 2); + if ((ret < 0) || (poll_conn.revents & POLLERR)) + { + tmp_errno = WSAGetLastError(); + gossip_lerr("Error: poll: %d\n", tmp_errno); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + if (poll_conn.revents & POLLOUT) + { + tcp_addr_data->not_connected = 0; + } + + /* use select on Windows */ + /*fd_set writefds; + struct timeval timeout; + + timeout.tv_sec = 0; + timeout.tv_usec = 2000; /* 2ms */ +/* + FD_ZERO(&writefds); + FD_SET(tcp_addr_data->socket, &writefds); + ret = select(1, NULL, &writefds, NULL, (const struct timeval *) &timeout); + if (ret == SOCKET_ERROR) + { + tmp_errno = WSAGetLastError(); + gossip_lerr("Error: select (tcp_sock_init): %d\n", tmp_errno); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + if (FD_ISSET(tcp_addr_data->socket, &writefds)) + { + tcp_addr_data->not_connected = 0; + } +*/ + } + /* return. the caller should check the "not_connected" flag to + * see if the socket is usable yet. */ + return (0); + } + + bmi_set_sock_buffers(tcp_addr_data->socket); + + /* at this point there is no socket. try to build it */ + if (tcp_addr_data->port < 1) + { + return (bmi_tcp_errno_to_pvfs(-EINVAL)); + } + + /* make a socket */ + if ((tcp_addr_data->socket = BMI_sockio_new_sock()) < 0) + { + tmp_errno = WSAGetLastError(); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + + /* set it to non-blocking operation */ + /* oldfl = fcntl(tcp_addr_data->socket, F_GETFL, 0); + if (!(oldfl & O_NONBLOCK)) + { + fcntl(tcp_addr_data->socket, F_SETFL, oldfl | O_NONBLOCK); + }*/ + SET_NONBLOCK(tcp_addr_data->socket); + +#if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__) + /* make sure if we need to bind or not to some local port ranges */ + tcp_enable_trusted(tcp_addr_data); +#endif + + /* turn off Nagle's algorithm */ + if (BMI_sockio_set_tcpopt(tcp_addr_data->socket, TCP_NODELAY, 1) < 0) + { + tmp_errno = WSAGetLastError(); + gossip_lerr("Error: failed to set TCP_NODELAY option.\n"); + closesocket(tcp_addr_data->socket); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + + bmi_set_sock_buffers(tcp_addr_data->socket); + + if (tcp_addr_data->hostname) + { + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, + "Connect: socket=%d, hostname=%s, port=%d\n", + tcp_addr_data->socket, tcp_addr_data->hostname, + tcp_addr_data->port); + ret = BMI_sockio_connect_sock(tcp_addr_data->socket, + tcp_addr_data->hostname, + tcp_addr_data->port); + } + else + { + return (bmi_tcp_errno_to_pvfs(-EINVAL)); + } + + if (ret < 0) + { + if (ret == -WSAEWOULDBLOCK) + { + tcp_addr_data->not_connected = 1; + /* this will have to be connected later with a poll */ + } + else + { + /* NOTE: BMI_sockio_connect_sock returns a PVFS error */ + char buff[300]; + + _snprintf(buff, 300, "Error: BMI_sockio_connect_sock: (%s):", + tcp_addr_data->hostname); + + PVFS_perror_gossip(buff, ret); + return (ret); + } + } + + return (0); +} + + +/* enqueue_operation() + * + * creates a new operation based on the arguments to the function. It + * then makes sure that the address is added to the socket collection, + * and the operation is added to the appropriate operation queue. + * + * Damn, what a big prototype! + * + * returns 0 on success, -errno on failure + */ +static int enqueue_operation(op_list_p target_list, + enum bmi_op_type send_recv, + bmi_method_addr_p map, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t amt_complete, + bmi_size_t env_amt_complete, + bmi_op_id_t * id, + int tcp_op_state, + struct tcp_msg_header header, + void *user_ptr, + bmi_size_t actual_size, + bmi_size_t expected_size, + bmi_context_id context_id, + int32_t eid) +{ + method_op_p new_method_op = NULL; + struct tcp_op *tcp_op_data = NULL; + struct tcp_addr* tcp_addr_data = NULL; + int i; + + /* allocate the operation structure */ + new_method_op = alloc_tcp_method_op(); + if (!new_method_op) + { + return (bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + + *id = new_method_op->op_id; + new_method_op->event_id = eid; + + /* set the fields */ + new_method_op->send_recv = send_recv; + new_method_op->addr = map; + new_method_op->user_ptr = user_ptr; + /* this is on purpose; we want to use the buffer_list all of + * the time, no special case for one contig buffer + */ + new_method_op->buffer = NULL; + new_method_op->actual_size = actual_size; + new_method_op->expected_size = expected_size; + new_method_op->send_recv = send_recv; + new_method_op->amt_complete = amt_complete; + new_method_op->env_amt_complete = env_amt_complete; + new_method_op->msg_tag = header.tag; + new_method_op->mode = header.mode; + new_method_op->list_count = list_count; + new_method_op->context_id = context_id; + + /* set our current position in list processing */ + i=0; + new_method_op->list_index = 0; + new_method_op->cur_index_complete = 0; + while(amt_complete > 0) + { + if(amt_complete >= size_list[i]) + { + amt_complete -= size_list[i]; + new_method_op->list_index++; + i++; + } + else + { + new_method_op->cur_index_complete = amt_complete; + amt_complete = 0; + } + } + + tcp_op_data = (struct tcp_op *) new_method_op->method_data; + tcp_op_data->tcp_op_state = (enum bmi_tcp_state) tcp_op_state; + tcp_op_data->env = header; + + /* if there is only one item in the list, then keep the list stored + * in the op structure. This allows us to use the same code for send + * and recv as we use for send_list and recv_list, without having to + * malloc lists for those special cases + */ + if (list_count == 1) + { + new_method_op->buffer_list = &tcp_op_data->buffer_list_stub; + new_method_op->size_list = &tcp_op_data->size_list_stub; + ((void**)new_method_op->buffer_list)[0] = buffer_list[0]; + ((bmi_size_t*)new_method_op->size_list)[0] = size_list[0]; + } + else + { + new_method_op->size_list = size_list; + new_method_op->buffer_list = buffer_list; + } + + tcp_addr_data = (struct tcp_addr *) map->method_data; + + if(tcp_addr_data->addr_error) + { + /* server should always fail here, client should let receives queue + * as if nothing were wrong + */ + if(tcp_addr_data->dont_reconnect || send_recv == BMI_SEND) + { + gossip_debug(GOSSIP_BMI_DEBUG_TCP, + "Warning: BMI communication attempted on an " + "address in failure mode.\n"); + new_method_op->error_code = tcp_addr_data->addr_error; + op_list_add(op_list_array[new_method_op->context_id], + new_method_op); + return(tcp_addr_data->addr_error); + } + } + +#if 0 + if(tcp_addr_data->addr_error) + { + /* this address is bad, don't try to do anything with it */ + gossip_err("Warning: BMI communication attempted on an " + "address in failure mode.\n"); + + new_method_op->error_code = tcp_addr_data->addr_error; + op_list_add(op_list_array[new_method_op->context_id], + new_method_op); + return(tcp_addr_data->addr_error); + } +#endif + + /* add the socket to poll on */ + BMI_socket_collection_add(tcp_socket_collection_p, map); + if(send_recv == BMI_SEND) + { + BMI_socket_collection_add_write_bit(tcp_socket_collection_p, map); + } + + /* keep up with the operation */ + op_list_add(target_list, new_method_op); + + return (0); +} + + +/* tcp_post_recv_generic() + * + * does the real work of posting an operation - works for both + * eager and rendezvous messages + * + * returns 0 on success that requires later poll, returns 1 on instant + * completion, -errno on failure + */ +static int tcp_post_recv_generic(bmi_op_id_t * id, + bmi_method_addr_p src, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t expected_size, + bmi_size_t * actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + method_op_p query_op = NULL; + int ret = -1; + struct tcp_addr *tcp_addr_data = NULL; + struct tcp_op *tcp_op_data = NULL; + struct tcp_msg_header bogus_header; + struct op_list_search_key key; + bmi_size_t copy_size = 0; + bmi_size_t total_copied = 0; + int i; + PINT_event_id eid = 0; + + PINT_EVENT_START( + bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, &eid, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + PINT_HINT_GET_OP_ID(hints), + expected_size); + + tcp_addr_data = (struct tcp_addr *) src->method_data; + + /* short out immediately if the address is bad and we have no way to + * reconnect + */ + if(tcp_addr_data->addr_error && tcp_addr_data->dont_reconnect) + { + gossip_debug( + GOSSIP_BMI_DEBUG_TCP, + "Warning: BMI communication attempted " + "on an address in failure mode.\n"); + return(tcp_addr_data->addr_error); + } + + /* lets make sure that the message hasn't already been fully + * buffered in eager mode before doing anything else + */ + memset(&key, 0, sizeof(struct op_list_search_key)); + key.method_addr = src; + key.method_addr_yes = 1; + key.msg_tag = tag; + key.msg_tag_yes = 1; + + query_op = + op_list_search(op_list_array[IND_RECV_EAGER_DONE_BUFFERING], &key); + if (query_op) + { + /* make sure it isn't too big */ + if (query_op->actual_size > expected_size) + { + gossip_err("Error: message ordering violation;\n"); + gossip_err("Error: message too large for next buffer.\n"); + return (bmi_tcp_errno_to_pvfs(-EPROTO)); + } + + /* whoohoo- it is already done! */ + /* copy buffer out to list segments; handle short case */ + for (i = 0; i < list_count; i++) + { + copy_size = size_list[i]; + if (copy_size + total_copied > query_op->actual_size) + { + copy_size = query_op->actual_size - total_copied; + } + memcpy(buffer_list[i], (void *) ((char *) query_op->buffer + + total_copied), copy_size); + total_copied += copy_size; + if (total_copied == query_op->actual_size) + { + break; + } + } + /* copy out to correct memory regions */ + (*actual_size) = query_op->actual_size; + free(query_op->buffer); + *id = 0; + op_list_remove(query_op); + dealloc_tcp_method_op(query_op); + PINT_EVENT_END(bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, eid, 0, + *actual_size); + + return (1); + } + + /* look for a message that is already being received */ + query_op = op_list_search(op_list_array[IND_RECV_INFLIGHT], &key); + if (query_op) + { + tcp_op_data = (struct tcp_op *) query_op->method_data; + } + + /* see if it is being buffered into a temporary memory region */ + if (query_op && tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING) + { + /* make sure it isn't too big */ + if (query_op->actual_size > expected_size) + { + gossip_err("Error: message ordering violation;\n"); + gossip_err("Error: message too large for next buffer.\n"); + return (bmi_tcp_errno_to_pvfs(-EPROTO)); + } + + /* copy what we have so far into the correct buffers */ + total_copied = 0; + for (i = 0; i < list_count; i++) + { + copy_size = size_list[i]; + if (copy_size + total_copied > query_op->amt_complete) + { + copy_size = query_op->amt_complete - total_copied; + } + if (copy_size > 0) + { + memcpy(buffer_list[i], (void *) ((char *) query_op->buffer + + total_copied), copy_size); + } + total_copied += copy_size; + if (total_copied == query_op->amt_complete) + { + query_op->list_index = i; + query_op->cur_index_complete = copy_size; + break; + } + } + + /* see if we ended on a buffer boundary */ + if (query_op->cur_index_complete == + query_op->size_list[query_op->list_index]) + { + query_op->list_index++; + query_op->cur_index_complete = 0; + } + + /* release the old buffer */ + if (query_op->buffer) + { + free(query_op->buffer); + } + + *id = query_op->op_id; + tcp_op_data = (struct tcp_op *) query_op->method_data; + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + + query_op->list_count = list_count; + query_op->user_ptr = user_ptr; + query_op->context_id = context_id; + /* if there is only one item in the list, then keep the list stored + * in the op structure. This allows us to use the same code for send + * and recv as we use for send_list and recv_list, without having to + * malloc lists for those special cases + */ + if (list_count == 1) + { + query_op->buffer_list = &tcp_op_data->buffer_list_stub; + query_op->size_list = &tcp_op_data->size_list_stub; + ((void **)query_op->buffer_list)[0] = buffer_list[0]; + ((bmi_size_t *)query_op->size_list)[0] = size_list[0]; + } + else + { + query_op->buffer_list = buffer_list; + query_op->size_list = size_list; + } + + if (query_op->amt_complete < query_op->actual_size) + { + /* try to recv some more data */ + tcp_addr_data = (struct tcp_addr *) query_op->addr->method_data; + ret = payload_progress(tcp_addr_data->socket, + query_op->buffer_list, + query_op->size_list, + query_op->list_count, + query_op->actual_size, + &(query_op->list_index), + &(query_op->cur_index_complete), + BMI_RECV, + NULL, + 0); + if (ret < 0) + { + PVFS_perror_gossip("Error: payload_progress", ret); + /* payload_progress() returns BMI error codes */ + tcp_forget_addr(query_op->addr, 0, ret); + return (ret); + } + + query_op->amt_complete += ret; + } + assert(query_op->amt_complete <= query_op->actual_size); + if (query_op->amt_complete == query_op->actual_size) + { + /* we are done */ + op_list_remove(query_op); + *id = 0; + (*actual_size) = query_op->actual_size; + dealloc_tcp_method_op(query_op); + PINT_EVENT_END( + bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, eid, + 0, *actual_size); + + return (1); + } + else + { + /* there is still more work to do */ + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + return (0); + } + } + + /* NOTE: if the message was in flight, but not buffering, then + * that means that it has already matched an earlier receive + * post or else is an unexpected message that doesn't require a + * matching receive post - at any rate it shouldn't be handled + * here + */ + + /* if we hit this point we must enqueue */ + if (expected_size <= TCP_MODE_EAGER_LIMIT) + { + bogus_header.mode = TCP_MODE_EAGER; + } + else + { + bogus_header.mode = TCP_MODE_REND; + } + bogus_header.tag = tag; + ret = enqueue_operation(op_list_array[IND_RECV], + BMI_RECV, src, buffer_list, size_list, + list_count, 0, 0, id, BMI_TCP_INPROGRESS, + bogus_header, user_ptr, 0, + expected_size, context_id, eid); + /* just for safety; this field isn't valid to the caller anymore */ + (*actual_size) = 0; + /* TODO: figure out why this causes deadlocks; observable in 2 + * scenarios: + * - pvfs2-client-core with threaded library and nptl + * - pvfs2-server threaded with nptl sending messages to itself + */ +#if 0 + if (ret >= 0) + { + /* go ahead and try to do some work while we are in this + * function since we appear to be backlogged. Make sure that + * we do not wait in the poll, however. + */ + ret = tcp_do_work(0); + } +#endif + return (ret); +} + + +/* tcp_cleanse_addr() + * + * finds all active operations matching the given address, places them + * in an error state, and moves them to the completed queue. + * + * NOTE: this function does not shut down the address. That should be + * handled separately + * + * returns 0 on success, -errno on failure + */ +static int tcp_cleanse_addr(bmi_method_addr_p map, int error_code) +{ + int i = 0; + struct op_list_search_key key; + method_op_p query_op = NULL; + + memset(&key, 0, sizeof(struct op_list_search_key)); + key.method_addr = map; + key.method_addr_yes = 1; + + /* NOTE: we know the unexpected completed queue is the last index! */ + for (i = 0; i < (NUM_INDICES - 1); i++) + { + if (op_list_array[i]) + { + while ((query_op = op_list_search(op_list_array[i], &key))) + { + op_list_remove(query_op); + query_op->error_code = error_code; + if (query_op->mode == TCP_MODE_UNEXP && query_op->send_recv + == BMI_RECV) + { + op_list_add(op_list_array[IND_COMPLETE_RECV_UNEXP], + query_op); + } + else + { + ((struct tcp_op*)(query_op->method_data))->tcp_op_state = + BMI_TCP_COMPLETE; + op_list_add(completion_array[query_op->context_id], query_op); + } + } + } + } + + return (0); +} + + +/* tcp_shutdown_addr() + * + * closes connections associated with a tcp method address + * + * returns 0 on success, -errno on failure + */ +static int tcp_shutdown_addr(bmi_method_addr_p map) +{ + + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data; + if (tcp_addr_data->socket > -1) + { + shutdown(tcp_addr_data->socket, SD_BOTH); + closesocket(tcp_addr_data->socket); + } + tcp_addr_data->socket = -1; + tcp_addr_data->not_connected = 1; + + return (0); +} + + +/* tcp_do_work() + * + * this is the function that actually does communication work during + * BMI_tcp_testXXX and BMI_tcp_waitXXX functions. The amount of work + * that it does is tunable. + * + * returns 0 on success, -errno on failure. + */ +static int tcp_do_work(int max_idle_time) +{ + int ret = -1; + bmi_method_addr_p addr_array[TCP_WORK_METRIC]; + int status_array[TCP_WORK_METRIC]; + int socket_count = 0; + int i = 0; + int stall_flag = 0; + int busy_flag = 1; + struct timespec req; + struct tcp_addr* tcp_addr_data = NULL; + struct timespec wait_time; + struct timeval start; + + if(sc_test_busy) + { + /* another thread is already polling or working on sockets */ + if(max_idle_time == 0) + { + /* we don't want to spend time waiting on it; return + * immediately. + */ + return(0); + } + + /* Sleep until working thread thread signals that it has finished + * its work and then return. No need for this thread to poll; + * the other thread may have already finished what we wanted. + * This condition wait is used strictly as a best effort to + * prevent busy spin. We'll sort out the results later. + */ + gettimeofday(&start, NULL); + wait_time.tv_sec = start.tv_sec + max_idle_time / 1000; + wait_time.tv_nsec = (start.tv_usec + ((max_idle_time % 1000)*1000))*1000; + if (wait_time.tv_nsec > 1000000000) + { + wait_time.tv_nsec = wait_time.tv_nsec - 1000000000; + wait_time.tv_sec++; + } + gen_cond_timedwait(&interface_cond, &interface_mutex, &wait_time); + return(0); + } + + /* this thread has gained control of the polling. */ + sc_test_busy = 1; + gen_mutex_unlock(&interface_mutex); + + /* our turn to look at the socket collection */ + ret = BMI_socket_collection_testglobal(tcp_socket_collection_p, + TCP_WORK_METRIC, &socket_count, + addr_array, status_array, + max_idle_time); + + gen_mutex_lock(&interface_mutex); + sc_test_busy = 0; + + if (ret < 0) + { + /* wake up anyone else who might have been waiting */ + gen_cond_broadcast(&interface_cond); + PVFS_perror_gossip("Error: socket collection:", ret); + /* BMI_socket_collection_testglobal() returns BMI error code */ + return (ret); + } + + if(socket_count == 0) + busy_flag = 0; + + /* do different kinds of work depending on results */ + for (i = 0; i < socket_count; i++) + { + tcp_addr_data = (struct tcp_addr *) addr_array[i]->method_data; + /* skip working on addresses in failure mode */ + if(tcp_addr_data->addr_error) + { + /* addr_error field is in BMI error code format */ + tcp_forget_addr(addr_array[i], 0, tcp_addr_data->addr_error); + continue; + } + + if (status_array[i] & SC_ERROR_BIT) + { + ret = tcp_do_work_error(addr_array[i]); + if (ret < 0) + { + PVFS_perror_gossip("Warning: BMI error handling failure, continuing", ret); + } + } + else + { + if (status_array[i] & SC_WRITE_BIT) + { + ret = tcp_do_work_send(addr_array[i], &stall_flag); + if (ret < 0) + { + PVFS_perror_gossip("Warning: BMI send error, continuing", ret); + } + if(!stall_flag) + busy_flag = 0; + } + if (status_array[i] & SC_READ_BIT) + { + ret = tcp_do_work_recv(addr_array[i], &stall_flag); + if (ret < 0) + { + PVFS_perror_gossip("Warning: BMI recv error, continuing", ret); + } + if(!stall_flag) + busy_flag = 0; + } + } + } + + /* IMPORTANT NOTE: if we have set the following flag, then it indicates that + * poll() is finding data on our sockets, yet we are not able to move + * any of it right now. This means that the sockets are backlogged, and + * BMI is in danger of busy spinning during test functions. Let's sleep + * for a millisecond here in hopes of letting the rest of the system + * catch up somehow (either by clearing a backlog in another I/O + * component, or by posting more matching BMI recieve operations) + */ + if(busy_flag) + { + /* req.tv_sec = 0; + req.tv_nsec = 1000; */ + gen_mutex_unlock(&interface_mutex); + /* nanosleep(&req, NULL); */ + Sleep(1); + gen_mutex_lock(&interface_mutex); + } + + /* wake up anyone else who might have been waiting */ + gen_cond_broadcast(&interface_cond); + return (0); +} + + +/* tcp_do_work_send() + * + * does work on a TCP address that is ready to send data. + * + * returns 0 on success, -errno on failure + */ +static int tcp_do_work_send(bmi_method_addr_p map, int* stall_flag) +{ + method_op_p active_method_op = NULL; + struct op_list_search_key key; + int blocked_flag = 0; + int ret = 0; + int tmp_stall_flag; + + *stall_flag = 1; + + while (blocked_flag == 0 && ret == 0) + { + /* what we want to do here is find the first operation in the send + * queue for this address. + */ + memset(&key, 0, sizeof(struct op_list_search_key)); + key.method_addr = map; + key.method_addr_yes = 1; + active_method_op = op_list_search(op_list_array[IND_SEND], &key); + if (!active_method_op) + { + /* ran out of queued sends to work on */ + return (0); + } + + ret = work_on_send_op(active_method_op, &blocked_flag, &tmp_stall_flag); + if(!tmp_stall_flag) + *stall_flag = 0; + } + + return (ret); +} + + +/* handle_new_connection() + * + * this function should be called only on special tcp method addresses + * that represent local server ports. It will attempt to accept a new + * connection and create a new method address for the remote host. + * + * side effect: destroys the temporary method_address that is passed in + * to it. + * + * returns 0 on success, -errno on failure + */ +static int handle_new_connection(bmi_method_addr_p map) +{ + struct tcp_addr *tcp_addr_data = NULL; + int accepted_socket = -1; + bmi_method_addr_p new_addr = NULL; + int ret = -1; + char* tmp_peer = NULL; + + ret = tcp_accept_init(&accepted_socket, &tmp_peer); + if (ret < 0) + { + return (ret); + } + if (accepted_socket < 0) + { + /* guess it wasn't ready after all */ + return (0); + } + + /* ok, we have a new socket. what now? Probably simplest + * thing to do is to create a new method_addr, add it to the + * socket collection, and return. It will get caught the next + * time around */ + new_addr = alloc_tcp_method_addr(); + if (!new_addr) + { + return (bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, + "Assigning socket %d to new method addr.\n", + accepted_socket); + tcp_addr_data = (struct tcp_addr *) new_addr->method_data; + tcp_addr_data->socket = accepted_socket; + tcp_addr_data->peer = tmp_peer; + tcp_addr_data->peer_type = BMI_TCP_PEER_IP; + + /* set a flag to make sure that we never try to reconnect this address + * in the future + */ + tcp_addr_data->dont_reconnect = 1; + /* register this address with the method control layer */ + tcp_addr_data->bmi_addr = bmi_method_addr_reg_callback(new_addr); + if (ret < 0) + { + tcp_shutdown_addr(new_addr); + dealloc_tcp_method_addr(new_addr); + dealloc_tcp_method_addr(map); + return (ret); + } + BMI_socket_collection_add(tcp_socket_collection_p, new_addr); + + dealloc_tcp_method_addr(map); + return (0); + +} + + +/* tcp_do_work_recv() + * + * does work on a TCP address that is ready to recv data. + * + * returns 0 on success, -errno on failure + */ +static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag) +{ + + method_op_p active_method_op = NULL; + int ret = -1; + void *new_buffer = NULL; + struct op_list_search_key key; + struct tcp_msg_header new_header; + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data; + struct tcp_op *tcp_op_data = NULL; + int tmp_errno; + int tmp; + bmi_size_t old_amt_complete = 0; + time_t current_time; + + *stall_flag = 1; + + /* figure out if this is a new connection */ + if (tcp_addr_data->server_port) + { + /* just try to accept connection- no work yet */ + *stall_flag = 0; + return (handle_new_connection(map)); + } + + /* look for a recv for this address that is already in flight */ + active_method_op = find_recv_inflight(map); + /* see if we found one in progress... */ + if (active_method_op) + { + tcp_op_data = (struct tcp_op *) active_method_op->method_data; + if (active_method_op->mode == TCP_MODE_REND && + tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING) + { + /* we must wait for recv post */ + return (0); + } + else + { + old_amt_complete = active_method_op->amt_complete; + ret = work_on_recv_op(active_method_op, stall_flag); + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "actual_size=%d, " + "amt_complete=%d, old_amt_complete=%d\n", + (int)active_method_op->actual_size, + (int)active_method_op->amt_complete, + (int)old_amt_complete); + + if ((ret == 0) && + (old_amt_complete == active_method_op->amt_complete) && + active_method_op->actual_size && + (active_method_op->amt_complete < + active_method_op->actual_size)) + { + gossip_debug( + GOSSIP_BMI_DEBUG_TCP, "Warning: bmi_tcp unable " + "to recv any data reported by poll(). [1]\n"); + + if (tcp_addr_data->zero_read_limit++ == + BMI_TCP_ZERO_READ_LIMIT) + { + gossip_debug(GOSSIP_BMI_DEBUG_TCP, + "...dropping connection.\n"); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE)); + } + } + else + { + tcp_addr_data->zero_read_limit = 0; + } + return(ret); + } + } + + /* let's see if a the entire header is ready to be received. If so + * we will go ahead and pull it. Otherwise, we will try again later. + * It isn't worth the complication of reading only a partial message + * header - we really want it atomically + */ + ret = BMI_sockio_nbpeek(tcp_addr_data->socket, + new_header.enc_hdr, TCP_ENC_HDR_SIZE); + if (ret < 0) + { + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-WSAGetLastError())); + return (0); + } + + if (ret == 0) + { + gossip_debug( + GOSSIP_BMI_DEBUG_TCP, "Warning: bmi_tcp unable " + "to recv any data reported by poll(). [2]\n"); + + if (tcp_addr_data->zero_read_limit++ == + BMI_TCP_ZERO_READ_LIMIT) + { + gossip_debug(GOSSIP_BMI_DEBUG_TCP, + "...dropping connection.\n"); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE)); + } + return(0); + } + else + { + tcp_addr_data->zero_read_limit = 0; + } + + if (ret < TCP_ENC_HDR_SIZE) + { + current_time = time(NULL); + if(!tcp_addr_data->short_header_timer) + { + tcp_addr_data->short_header_timer = current_time; + } + else if((current_time - tcp_addr_data->short_header_timer) > + BMI_TCP_HEADER_WAIT_SECONDS) + { + gossip_err("Error: incomplete BMI TCP header after %d seconds, closing connection.\n", + BMI_TCP_HEADER_WAIT_SECONDS); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE)); + return (0); + } + + /* header not ready yet, but we will keep hoping */ + return (0); + } + + tcp_addr_data->short_header_timer = 0; + *stall_flag = 0; + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Reading header for new op.\n"); + ret = BMI_sockio_nbrecv(tcp_addr_data->socket, + new_header.enc_hdr, TCP_ENC_HDR_SIZE); + if (ret < TCP_ENC_HDR_SIZE) + { + tmp_errno = WSAGetLastError(); + gossip_err("Error: BMI_sockio_nbrecv: %d\n", tmp_errno); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-tmp_errno)); + return (0); + } + + /* decode the header */ + BMI_TCP_DEC_HDR(new_header); + + /* so we have the header. now what? These are the possible + * scenarios: + * a) unexpected message + * b) eager message for which a recv has been posted + * c) eager message for which a recv has not been posted + * d) rendezvous messsage for which a recv has been posted + * e) rendezvous messsage for which a recv has not been posted + * f) eager message for which a rend. recv has been posted + */ + + /* check magic number of message */ + if(new_header.magic_nr != BMI_MAGIC_NR) + { + gossip_err("Error: bad magic in BMI TCP message.\n"); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EBADMSG)); + return(0); + } + + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Received new message; mode: %d.\n", + (int) new_header.mode); + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "tag: %d\n", (int) new_header.tag); + + if (new_header.mode == TCP_MODE_UNEXP) + { + /* allocate the operation structure */ + active_method_op = alloc_tcp_method_op(); + if (!active_method_op) + { + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM)); + return (bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + /* create data buffer */ + new_buffer = malloc(new_header.size); + if (!new_buffer) + { + dealloc_tcp_method_op(active_method_op); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM)); + return (bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + + /* set the fields */ + active_method_op->send_recv = BMI_RECV; + active_method_op->addr = map; + active_method_op->actual_size = new_header.size; + active_method_op->expected_size = 0; + active_method_op->amt_complete = 0; + active_method_op->env_amt_complete = TCP_ENC_HDR_SIZE; + active_method_op->msg_tag = new_header.tag; + active_method_op->buffer = new_buffer; + active_method_op->mode = TCP_MODE_UNEXP; + active_method_op->buffer_list = &(active_method_op->buffer); + active_method_op->size_list = &(active_method_op->actual_size); + active_method_op->list_count = 1; + tcp_op_data = (struct tcp_op *) active_method_op->method_data; + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + tcp_op_data->env = new_header; + + op_list_add(op_list_array[IND_RECV_INFLIGHT], active_method_op); + /* grab some data if we can */ + return (work_on_recv_op(active_method_op, &tmp)); + } + + memset(&key, 0, sizeof(struct op_list_search_key)); + key.method_addr = map; + key.method_addr_yes = 1; + key.msg_tag = new_header.tag; + key.msg_tag_yes = 1; + + /* look for a match within the posted operations */ + active_method_op = op_list_search(op_list_array[IND_RECV], &key); + + if (active_method_op) + { + /* make sure it isn't too big */ + if (new_header.size > active_method_op->expected_size) + { + gossip_err("Error: message ordering violation;\n"); + gossip_err("Error: message too large for next buffer.\n"); + gossip_err("Error: incoming size: %ld, expected size: %ld\n", + (long) new_header.size, + (long) active_method_op->expected_size); + /* TODO: return error here or do something else? */ + return (bmi_tcp_errno_to_pvfs(-EPROTO)); + } + + /* we found a match. go work on it and return */ + op_list_remove(active_method_op); + active_method_op->env_amt_complete = TCP_ENC_HDR_SIZE; + active_method_op->actual_size = new_header.size; + op_list_add(op_list_array[IND_RECV_INFLIGHT], active_method_op); + return (work_on_recv_op(active_method_op, &tmp)); + } + + /* no match anywhere. Start a new operation */ + /* allocate the operation structure */ + active_method_op = alloc_tcp_method_op(); + if (!active_method_op) + { + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM)); + return (bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + + if (new_header.mode == TCP_MODE_EAGER) + { + /* create data buffer for eager messages */ + new_buffer = malloc(new_header.size); + if (!new_buffer) + { + dealloc_tcp_method_op(active_method_op); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM)); + return (bmi_tcp_errno_to_pvfs(-ENOMEM)); + } + } + else + { + new_buffer = NULL; + } + + /* set the fields */ + active_method_op->send_recv = BMI_RECV; + active_method_op->addr = map; + active_method_op->actual_size = new_header.size; + active_method_op->expected_size = 0; + active_method_op->amt_complete = 0; + active_method_op->env_amt_complete = TCP_ENC_HDR_SIZE; + active_method_op->msg_tag = new_header.tag; + active_method_op->buffer = new_buffer; + active_method_op->mode = new_header.mode; + active_method_op->buffer_list = &(active_method_op->buffer); + active_method_op->size_list = &(active_method_op->actual_size); + active_method_op->list_count = 1; + tcp_op_data = (struct tcp_op *) active_method_op->method_data; + tcp_op_data->tcp_op_state = BMI_TCP_BUFFERING; + tcp_op_data->env = new_header; + + op_list_add(op_list_array[IND_RECV_INFLIGHT], active_method_op); + + /* grab some data if we can */ + if (new_header.mode == TCP_MODE_EAGER) + { + return (work_on_recv_op(active_method_op, &tmp)); + } + + return (0); +} + + +/* + * work_on_send_op() + * + * used to perform work on a send operation. this is called by the poll + * function. + * + * sets blocked_flag if no more work can be done on socket without + * blocking + * returns 0 on success, -errno on failure. + */ +static int work_on_send_op(method_op_p my_method_op, + int *blocked_flag, int* stall_flag) +{ + int ret = -1; + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) my_method_op->addr->method_data; + struct tcp_op *tcp_op_data = (struct tcp_op *) my_method_op->method_data; + + *blocked_flag = 1; + *stall_flag = 0; + + /* make sure that the connection is done before we continue */ + if (tcp_addr_data->not_connected) + { + ret = tcp_sock_init(my_method_op->addr); + if (ret < 0) + { + PVFS_perror_gossip("Error: socket failed to init", ret); + /* tcp_sock_init() returns BMI error code */ + tcp_forget_addr(my_method_op->addr, 0, ret); + return (0); + } + if (tcp_addr_data->not_connected) + { + /* try again later- still could not connect */ + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + return (0); + } + } + + ret = payload_progress(tcp_addr_data->socket, + my_method_op->buffer_list, + my_method_op->size_list, + my_method_op->list_count, + my_method_op->actual_size, + &(my_method_op->list_index), + &(my_method_op->cur_index_complete), + BMI_SEND, + tcp_op_data->env.enc_hdr, + &my_method_op->env_amt_complete); + if (ret < 0) + { + PVFS_perror_gossip("Error: payload_progress", ret); + /* payload_progress() returns BMI error codes */ + tcp_forget_addr(my_method_op->addr, 0, ret); + return (0); + } + + if(ret == 0) + *stall_flag = 1; + + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Sent: %d bytes of data.\n", ret); + my_method_op->amt_complete += ret; + assert(my_method_op->amt_complete <= my_method_op->actual_size); + + if (my_method_op->amt_complete == my_method_op->actual_size && my_method_op->env_amt_complete == TCP_ENC_HDR_SIZE) + { + /* we are done */ + my_method_op->error_code = 0; + BMI_socket_collection_remove_write_bit(tcp_socket_collection_p, + my_method_op->addr); + op_list_remove(my_method_op); + ((struct tcp_op*)(my_method_op->method_data))->tcp_op_state = + BMI_TCP_COMPLETE; + op_list_add(completion_array[my_method_op->context_id], my_method_op); + *blocked_flag = 0; + } + else + { + /* there is still more work to do */ + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + } + + return (0); +} + + +/* + * work_on_recv_op() + * + * used to perform work on a recv operation. this is called by the poll + * function. + * NOTE: this function assumes the method header has already been read. + * + * returns 0 on success, -errno on failure. + */ +static int work_on_recv_op(method_op_p my_method_op, int* stall_flag) +{ + + int ret = -1; + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) my_method_op->addr->method_data; + struct tcp_op *tcp_op_data = (struct tcp_op *) my_method_op->method_data; + + *stall_flag = 1; + + if (my_method_op->actual_size != 0) + { + /* now let's try to recv some actual data */ + ret = payload_progress(tcp_addr_data->socket, + my_method_op->buffer_list, + my_method_op->size_list, + my_method_op->list_count, + my_method_op->actual_size, + &(my_method_op->list_index), + &(my_method_op->cur_index_complete), + BMI_RECV, + NULL, + 0); + if (ret < 0) + { + PVFS_perror_gossip("Error: payload_progress", ret); + /* payload_progress() returns BMI error codes */ + tcp_forget_addr(my_method_op->addr, 0, ret); + return (0); + } + } + else + { + ret = 0; + } + + if(ret > 0) + *stall_flag = 0; + + my_method_op->amt_complete += ret; + assert(my_method_op->amt_complete <= my_method_op->actual_size); + + if (my_method_op->amt_complete == my_method_op->actual_size) + { + /* we are done */ + op_list_remove(my_method_op); + if (tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING) + { + /* queue up to wait on matching post recv */ + op_list_add(op_list_array[IND_RECV_EAGER_DONE_BUFFERING], + my_method_op); + } + else + { + my_method_op->error_code = 0; + if (my_method_op->mode == TCP_MODE_UNEXP) + { + op_list_add(op_list_array[IND_COMPLETE_RECV_UNEXP], + my_method_op); + } + else + { + ((struct tcp_op*)(my_method_op->method_data))->tcp_op_state = + BMI_TCP_COMPLETE; + op_list_add(completion_array[my_method_op->context_id], my_method_op); + } + } + } + + return (0); +} + + +/* tcp_do_work_error() + * + * handles a tcp address that has indicated an error during polling. + * + * returns 0 on success, -errno on failure + */ +static int tcp_do_work_error(bmi_method_addr_p map) +{ + struct tcp_addr *tcp_addr_data = NULL; + int buf; + int ret; + int tmp_errno; + + tcp_addr_data = (struct tcp_addr *) map->method_data; + + /* perform a read on the socket so that we can get a real errno */ + ret = recv(tcp_addr_data->socket, &buf, sizeof(int), 0); + if (ret == 0) + tmp_errno = EPIPE; /* report other side closed socket with this */ + else + tmp_errno = WSAGetLastError(); + + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Error: bmi_tcp: %d\n", + tmp_errno); + + if (tcp_addr_data->server_port) + { + /* Ignore this and hope it goes away... we don't want to lose + * our local socket */ + dealloc_tcp_method_addr(map); + gossip_lerr("Warning: error polling on server socket, continuing.\n"); + return (0); + } + + if(tmp_errno == 0) + tmp_errno = EPROTO; + + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-tmp_errno)); + + return (0); +} + +#if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__) +/* + * tcp_enable_trusted() + * Ideally, this function should look up the security configuration of + * the server and determines + * if it needs to bind to any specific port locally or not.. + * For now look at the FIXME below. + */ +static int tcp_enable_trusted(struct tcp_addr *tcp_addr_data) +{ + /* + * FIXME: + * For now, there is no way for us to check if a given + * server is actually using port protection or not. + * For now we unconditionally use a trusted port range + * as long as USE_TRUSTED is #defined. + * + * Although most of the time we expect users + * to be using a range of 0-1024, it is hard to keep probing + * until one gets a port in the range specified. + * Hence this is a temporary fix. we will see if this + * requirement even needs to be met at all. + */ + static unsigned short my_requested_port = 1023; + unsigned short my_local_port = 0; + struct sockaddr_in my_local_sockaddr; + socklen_t len = sizeof(struct sockaddr_in); + memset(&my_local_sockaddr, 0, sizeof(struct sockaddr_in)); + + /* setup for a fast restart to avoid bind addr in use errors */ + if (BMI_sockio_set_sockopt(tcp_addr_data->socket, SO_REUSEADDR, 1) < 0) + { + gossip_lerr("Could not set SO_REUSEADDR on local socket (port %hd)\n", my_local_port); + } + if (BMI_sockio_bind_sock(tcp_addr_data->socket, my_requested_port) < 0) + { + gossip_lerr("Could not bind to local port %hd: %s\n", + my_requested_port, strerror(errno)); + } + else { + my_requested_port--; + } + my_local_sockaddr.sin_family = AF_INET; + if (getsockname(tcp_addr_data->socket, + (struct sockaddr *)&my_local_sockaddr, &len) == 0) + { + my_local_port = ntohs(my_local_sockaddr.sin_port); + } + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Bound locally to port: %hd\n", my_local_port); + return 0; +} + +#endif + +#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__) + +static char *bad_errors[] = { + "invalid network address", + "invalid port", + "invalid network address and port" +}; + +/* + * tcp_allow_trusted() + * if trusted ports was enabled make sure + * that we can accept a particular connection from a given + * client + */ +static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr) +{ + char *peer_hostname = inet_ntoa(peer_sockaddr->sin_addr); + unsigned short peer_port = ntohs(peer_sockaddr->sin_port); + int i, what_failed = -1; + + /* Don't refuse connects if there were any + * parse errors or if it is not enabled in the config file + */ + if (gtcp_allowed_connection->port_enforce == 0 + && gtcp_allowed_connection->network_enforce == 0) + { + return 0; + } + /* make sure that the client is within the allowed network */ + if (gtcp_allowed_connection->network_enforce == 1) + { + /* Always allow localhost to connect */ + if (ntohl(peer_sockaddr->sin_addr.s_addr) == INADDR_LOOPBACK) + { + goto port_check; + } + for (i = 0; i < gtcp_allowed_connection->network_count; i++) + { + /* check with all the masks */ + if ((peer_sockaddr->sin_addr.s_addr & gtcp_allowed_connection->netmask[i].s_addr) + != (gtcp_allowed_connection->network[i].s_addr & gtcp_allowed_connection->netmask[i].s_addr )) + { + continue; + } + else { + goto port_check; + } + } + /* not from a trusted network */ + what_failed = 0; + } +port_check: + /* make sure that the client port numbers are within specified limits */ + if (gtcp_allowed_connection->port_enforce == 1) + { + if (peer_port < gtcp_allowed_connection->ports[0] + || peer_port > gtcp_allowed_connection->ports[1]) + { + what_failed = (what_failed < 0) ? 1 : 2; + } + } + /* okay, we are good to go */ + if (what_failed < 0) + { + return 0; + } + /* no good */ + gossip_err("Rejecting client %s on port %d: %s\n", + peer_hostname, peer_port, bad_errors[what_failed]); + return -1; +} + +#endif + +/* + * tcp_accept_init() + * + * used to establish a connection from the server side. Attempts an + * accept call and provides the socket if it succeeds. + * + * returns 0 on success, -errno on failure. + */ +static int tcp_accept_init(int *socket, char** peer) +{ + + int ret = -1; + int tmp_errno = 0; + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) tcp_method_params.listen_addr->method_data; + int oldfl = 0; + struct sockaddr_in peer_sockaddr; + int peer_sockaddr_size = sizeof(struct sockaddr_in); + char* tmp_peer; + + /* do we have a socket on this end yet? */ + if (tcp_addr_data->socket < 0) + { + ret = tcp_server_init(); + if (ret < 0) + { + return (ret); + } + } + + *socket = accept(tcp_addr_data->socket, (struct sockaddr*)&peer_sockaddr, + (int *)&peer_sockaddr_size); + + if (*socket < 0) + { + tmp_errno = WSAGetLastError(); + if ((tmp_errno == WSATRY_AGAIN) || + (tmp_errno == WSAEWOULDBLOCK) || + (tmp_errno == WSAENETDOWN) || + /* (tmp_errno == EPROTO) || */ + (tmp_errno == WSAENOPROTOOPT) || + /* (tmp_errno == EHOSTDOWN) || */ + /* (tmp_errno == ENONET) || */ + (tmp_errno == WSAEHOSTUNREACH) || + (tmp_errno == WSAEOPNOTSUPP) || + (tmp_errno == WSAENETUNREACH) || + /* (tmp_errno == WSAENFILE) || */ + (tmp_errno == WSAEMFILE)) + { + /* try again later */ + if (tmp_errno == EMFILE) + { + gossip_err("Error: accept: %d (continuing)\n", tmp_errno); + bmi_method_addr_drop_callback(BMI_tcp_method_name); + } + return (0); + } + else + { + gossip_err("Error: accept: %d\n", tmp_errno); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + } + +#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__) + + /* make sure that we are allowed to accept this connection */ + if (tcp_allow_trusted(&peer_sockaddr) < 0) + { + /* Force closure of the connection */ + close(*socket); + return (bmi_tcp_errno_to_pvfs(-EACCES)); + } + +#endif + + /* we accepted a new connection. turn off Nagle's algorithm. */ + if (BMI_sockio_set_tcpopt(*socket, TCP_NODELAY, 1) < 0) + { + tmp_errno = WSAGetLastError(); + gossip_lerr("Error: failed to set TCP_NODELAY option.\n"); + closesocket(*socket); + return (bmi_tcp_errno_to_pvfs(-tmp_errno)); + } + + /* set it to non-blocking operation */ + /*oldfl = fcntl(*socket, F_GETFL, 0); + if (!(oldfl & O_NONBLOCK)) + { + fcntl(*socket, F_SETFL, oldfl | O_NONBLOCK); + }*/ + SET_NONBLOCK(*socket); + + /* allocate ip address string */ + tmp_peer = inet_ntoa(peer_sockaddr.sin_addr); + *peer = (char*)malloc(strlen(tmp_peer)+1); + if(!(*peer)) + { + closesocket(*socket); + return(bmi_tcp_errno_to_pvfs(-BMI_ENOMEM)); + } + strcpy(*peer, tmp_peer); + + return (0); +} + + +/* alloc_tcp_method_op() + * + * creates a new method op with defaults filled in for tcp. + * + * returns pointer to structure on success, NULL on failure + */ +static method_op_p alloc_tcp_method_op(void) +{ + method_op_p my_method_op = NULL; + + my_method_op = bmi_alloc_method_op(sizeof(struct tcp_op)); + + /* we trust alloc_method_op to zero it out */ + + return (my_method_op); +} + + +/* dealloc_tcp_method_op() + * + * destroys an existing tcp method op, freeing segment lists if + * needed + * + * no return value + */ +static void dealloc_tcp_method_op(method_op_p old_op) +{ + bmi_dealloc_method_op(old_op); + return; +} + +/* tcp_post_send_generic() + * + * Submits send operations (low level). + * + * returns 0 on success that requires later poll, returns 1 on instant + * completion, -errno on failure + */ +static int tcp_post_send_generic(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + enum bmi_buffer_type buffer_type, + struct tcp_msg_header my_header, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) +{ + struct tcp_addr *tcp_addr_data = (struct tcp_addr *) dest->method_data; + method_op_p query_op = NULL; + int ret = -1; + bmi_size_t total_size = 0; + bmi_size_t amt_complete = 0; + bmi_size_t env_amt_complete = 0; + struct op_list_search_key key; + int list_index = 0; + bmi_size_t cur_index_complete = 0; + PINT_event_id eid = 0; + + if(PINT_EVENT_ENABLED) + { + int i = 0; + for(; i < list_count; ++i) + { + total_size += size_list[i]; + } + } + + PINT_EVENT_START( + bmi_tcp_send_event_id, bmi_tcp_pid, NULL, &eid, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + PINT_HINT_GET_OP_ID(hints), + total_size); + + /* Three things can happen here: + * a) another op is already in queue for the address, so we just + * queue up + * b) we can send the whole message and return + * c) we send part of the message and queue the rest + */ + + /* NOTE: on the post_send side of an operation, it doesn't really + * matter whether the op is going to be eager or rendezvous. It is + * handled the same way (except for how the header is filled in). + * The difference is in the recv processing for TCP. + */ + + /* NOTE: we also don't care what the buffer_type says, TCP could care + * less what buffers it is using. + */ + + /* encode the message header */ + BMI_TCP_ENC_HDR(my_header); + + /* the first thing we must do is find out if another send is queued + * up for this address so that we don't mess up our ordering. */ + memset(&key, 0, sizeof(struct op_list_search_key)); + key.method_addr = dest; + key.method_addr_yes = 1; + query_op = op_list_search(op_list_array[IND_SEND], &key); + if (query_op) + { + /* queue up operation */ + ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, + dest, (void **) buffer_list, + size_list, list_count, 0, 0, + id, BMI_TCP_INPROGRESS, my_header, user_ptr, + my_header.size, 0, + context_id, + eid); + + /* TODO: is this causing deadlocks? See similar call in recv + * path for another example. This particular one seems to be an + * issue under a heavy bonnie++ load that Neill has been + * debugging. Comment out for now to see if the problem goes + * away. + */ +#if 0 + if (ret >= 0) + { + /* go ahead and try to do some work while we are in this + * function since we appear to be backlogged. Make sure that + * we do not wait in the poll, however. + */ + ret = tcp_do_work(0); + } +#endif + if (ret < 0) + { + gossip_err("Error: enqueue_operation() or tcp_do_work() returned: %d\n", ret); + } + return (ret); + } + + /* make sure the connection is established */ + ret = tcp_sock_init(dest); + if (ret < 0) + { + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "tcp_sock_init() failure.\n"); + /* tcp_sock_init() returns BMI error code */ + tcp_forget_addr(dest, 0, ret); + PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, NULL, 0, ret); + return (ret); + } + + tcp_addr_data = (struct tcp_addr *) dest->method_data; + +#if 0 + /* TODO: this is a hack for testing! */ + /* disables immediate send completion... */ + ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, + dest, buffer_list, size_list, list_count, 0, 0, + id, BMI_TCP_INPROGRESS, my_header, user_ptr, + my_header.size, 0, + context_id); + return(ret); +#endif + + if (tcp_addr_data->not_connected) + { + /* if the connection is not completed, queue up for later work */ + ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, + dest, (void **) buffer_list, size_list, + list_count, 0, 0, + id, BMI_TCP_INPROGRESS, my_header, user_ptr, + my_header.size, 0, + context_id, + eid); + if(ret < 0) + { + gossip_err("Error: enqueue_operation() returned: %d\n", ret); + } + return (ret); + } + + /* try to send some data */ + env_amt_complete = 0; + ret = payload_progress(tcp_addr_data->socket, + (void **) buffer_list, + size_list, list_count, my_header.size, &list_index, + &cur_index_complete, BMI_SEND, my_header.enc_hdr, &env_amt_complete); + if (ret < 0) + { + PVFS_perror_gossip("Error: payload_progress", ret); + /* payload_progress() returns BMI error codes */ + tcp_forget_addr(dest, 0, ret); + PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, NULL, eid, 0, ret); + return (ret); + } + + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Sent: %d bytes of data.\n", ret); + amt_complete = ret; + assert(amt_complete <= my_header.size); + if (amt_complete == my_header.size && env_amt_complete == TCP_ENC_HDR_SIZE) + { + /* we are already done */ + PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, + NULL, eid, 0, amt_complete); + return (1); + } + + /* queue up the remainder */ + ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, + dest, (void **) buffer_list, + size_list, list_count, + amt_complete, env_amt_complete, id, + BMI_TCP_INPROGRESS, my_header, user_ptr, + my_header.size, 0, context_id, eid); + + if(ret < 0) + { + gossip_err("Error: enqueue_operation() returned: %d\n", ret); + } + return (ret); +} + + +/* payload_progress() + * + * makes progress on sending/recving data payload portion of a message + * + * returns amount completed on success, -errno on failure + */ +static int payload_progress(int s, void *const *buffer_list, const bmi_size_t* + size_list, int list_count, bmi_size_t total_size, int* list_index, + bmi_size_t* current_index_complete, enum bmi_op_type send_recv, + char* enc_hdr, bmi_size_t* env_amt_complete) +{ + int i; + int count = 0; + int ret; + int completed; + /* used for finding the stopping point on short receives */ + int final_index = list_count-1; + bmi_size_t final_size = size_list[list_count-1]; + bmi_size_t sum = 0; + int vector_index = 0; + int header_flag = 0; + int tmp_env_done = 0; + + if(send_recv == BMI_RECV) + { + /* find out if we should stop short in list processing */ + for(i=0; i= total_size) + { + final_index = i; + final_size = size_list[i] - (sum-total_size); + break; + } + } + } + + assert(list_count > *list_index); + + /* make sure we don't overrun our preallocated iovec array */ + if((list_count - (*list_index)) > BMI_TCP_IOV_COUNT) + { + list_count = (*list_index) + BMI_TCP_IOV_COUNT; + } + + /* do we need to send any of the header? */ + if(send_recv == BMI_SEND && *env_amt_complete < TCP_ENC_HDR_SIZE) + { + stat_io_vector[vector_index].buf = &enc_hdr[*env_amt_complete]; + stat_io_vector[vector_index].len = TCP_ENC_HDR_SIZE - *env_amt_complete; + count++; + vector_index++; + header_flag = 1; + } + + /* setup vector */ + stat_io_vector[vector_index].buf = + (char*)buffer_list[*list_index] + *current_index_complete; + count++; + if(final_index == 0) + { + stat_io_vector[vector_index].len = final_size - *current_index_complete; + } + else + { + stat_io_vector[vector_index].len = + size_list[*list_index] - *current_index_complete; + for(i = (*list_index + 1); i < list_count; i++) + { + vector_index++; + count++; + stat_io_vector[vector_index].buf = (CHAR *) buffer_list[i]; + if(i == final_index) + { + stat_io_vector[vector_index].len = final_size; + break; + } + else + { + stat_io_vector[vector_index].len = size_list[i]; + } + } + } + + assert(count > 0); + + if(send_recv == BMI_RECV) + { + ret = BMI_sockio_nbvector(s, stat_io_vector, count, 1); + } + else + { + ret = BMI_sockio_nbvector(s, stat_io_vector, count, 0); + } + + /* if error or nothing done, return now */ + if(ret == 0) + return(0); + if(ret <= 0) + return(bmi_tcp_errno_to_pvfs(-WSAGetLastError())); + + completed = ret; + if(header_flag && (completed >= 0)) + { + /* take care of completed header status */ + tmp_env_done = TCP_ENC_HDR_SIZE - *env_amt_complete; + if(tmp_env_done > completed) + tmp_env_done = completed; + completed -= tmp_env_done; + ret -= tmp_env_done; + (*env_amt_complete) += tmp_env_done; + } + + i=header_flag; + while(completed > 0) + { + /* take care of completed data payload */ + if(completed >= stat_io_vector[i].len) + { + completed -= stat_io_vector[i].len; + *current_index_complete = 0; + (*list_index)++; + i++; + } + else + { + *current_index_complete += completed; + completed = 0; + } + } + + return(ret); +} + +static void bmi_set_sock_buffers(int socket){ + //Set socket buffer sizes: + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Default socket buffers send:%d receive:%d\n", + GET_SENDBUFSIZE(socket), GET_RECVBUFSIZE(socket)); + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Setting socket buffer size for send:%d receive:%d \n", + tcp_buffer_size_send, tcp_buffer_size_receive); + if( tcp_buffer_size_receive != 0) + SET_RECVBUFSIZE(socket,tcp_buffer_size_receive); + if( tcp_buffer_size_send != 0) + SET_SENDBUFSIZE(socket,tcp_buffer_size_send); + gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Reread socket buffers send:%d receive:%d\n", + GET_SENDBUFSIZE(socket), GET_RECVBUFSIZE(socket)); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/socket-collection-epoll.c b/src/io/bmi/bmi_wintcp/socket-collection-epoll.c new file mode 100755 index 0000000..3683846 --- /dev/null +++ b/src/io/bmi/bmi_wintcp/socket-collection-epoll.c @@ -0,0 +1,203 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * this is an implementation of a socket collection library. It can be + * used to maintain a dynamic list of sockets and perform polling + * operations. + */ + +/* + * NOTE: I am making read bits implicit in the implementation. A poll + * will always check to see if there is data to be read on a socket. + */ + +#include +#include +#include +#include +#include + +#include "gossip.h" +#include "socket-collection-epoll.h" +#include "bmi-method-support.h" +#include "bmi-tcp-addressing.h" +#include "gen-locks.h" + +/* errors that can occur on a poll socket */ +#define ERRMASK (EPOLLERR|EPOLLHUP) + +/* hint to kernel about how many sockets we expect to poll over */ +#define EPOLL_CREATE_SIZE 128 + +/* socket_collection_init() + * + * creates a new socket collection. It also acquires the server socket + * from the caller if it is available. Passing in a negative value + * indicates that this is being used on a client node and there is no + * server socket. + * + * returns a pointer to the collection on success, NULL on failure. + */ +socket_collection_p BMI_socket_collection_init(int new_server_socket) +{ + struct epoll_event event; + socket_collection_p tmp_scp = NULL; + int ret = -1; + + tmp_scp = (struct socket_collection*) malloc(sizeof(struct + socket_collection)); + if(!tmp_scp) + { + return(NULL); + } + + memset(tmp_scp, 0, sizeof(struct socket_collection)); + + tmp_scp->epfd = epoll_create(EPOLL_CREATE_SIZE); + if(tmp_scp->epfd < 0) + { + gossip_err("Error: epoll_create() failure: %s.\n", strerror(errno)); + free(tmp_scp); + return(NULL); + } + + tmp_scp->server_socket = new_server_socket; + + if(new_server_socket > -1) + { + memset(&event, 0, sizeof(event)); + event.events = (EPOLLIN|EPOLLERR|EPOLLHUP); + event.data.ptr = NULL; + ret = epoll_ctl(tmp_scp->epfd, EPOLL_CTL_ADD, new_server_socket, + &event); + if(ret < 0 && errno != EEXIST) + { + gossip_err("Error: epoll_ctl() failure: %s.\n", strerror(errno)); + free(tmp_scp); + return(NULL); + } + } + + return (tmp_scp); +} + +/* socket_collection_finalize() + * + * destroys a socket collection. IMPORTANT: It DOES NOT destroy the + * addresses contained within the collection, nor does it terminate + * connections. This must be handled elsewhere. + * + * no return values. + */ +void BMI_socket_collection_finalize(socket_collection_p scp) +{ + free(scp); + return; +} + + +/* socket_collection_testglobal() + * + * this function is used to poll to see if any of the new sockets are + * available for work. The array of method addresses and array of + * status fields must be passed into the function by the caller. + * incount specifies the size of these arrays. outcount + * specifies the number of ready addresses. + * + * returns 0 on success, -errno on failure. + */ +int BMI_socket_collection_testglobal(socket_collection_p scp, + int incount, + int *outcount, + bmi_method_addr_p * maps, + int * status, + int poll_timeout) +{ + struct tcp_addr* tcp_addr_data = NULL; + int ret = -1; + int old_errno; + int tmp_count; + int i; + + /* init the outgoing arguments for safety */ + *outcount = 0; + memset(maps, 0, (sizeof(bmi_method_addr_p) * incount)); + memset(status, 0, (sizeof(int) * incount)); + + if(incount == 0) + { + return(0); + } + + /* actually do the epoll_wait() here */ + do + { + tmp_count = incount; + if(tmp_count > BMI_EPOLL_MAX_PER_CYCLE) + tmp_count = BMI_EPOLL_MAX_PER_CYCLE; + + ret = epoll_wait(scp->epfd, scp->event_array, tmp_count, + poll_timeout); + + } while(ret < 0 && errno == EINTR); + old_errno = errno; + + if(ret < 0) + { + return(-old_errno); + } + + /* nothing ready, just return */ + if(ret == 0) + { + return(0); + } + + tmp_count = ret; + + for(i=0; ievent_array[i].events); + + if(scp->event_array[i].events & ERRMASK) + status[*outcount] |= SC_ERROR_BIT; + if(scp->event_array[i].events & POLLIN) + status[*outcount] |= SC_READ_BIT; + if(scp->event_array[i].events & POLLOUT) + status[*outcount] |= SC_WRITE_BIT; + + if(scp->event_array[i].data.ptr == NULL) + { + /* server socket */ + maps[*outcount] = alloc_tcp_method_addr(); + /* TODO: handle this */ + assert(maps[*outcount]); + tcp_addr_data = (maps[*outcount])->method_data; + tcp_addr_data->server_port = 1; + tcp_addr_data->socket = scp->server_socket; + tcp_addr_data->port = -1; + } + else + { + /* normal case */ + maps[*outcount] = scp->event_array[i].data.ptr; + } + + *outcount = (*outcount) + 1; + } + + return (0); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/socket-collection-epoll.h b/src/io/bmi/bmi_wintcp/socket-collection-epoll.h new file mode 100755 index 0000000..a4c6ac9 --- /dev/null +++ b/src/io/bmi/bmi_wintcp/socket-collection-epoll.h @@ -0,0 +1,120 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * This file contains the visible data structures and function interface + * for a socket collection library. This library can maintain lists of + * sockets and perform polling operations on them. + */ + +/* + * NOTE: I am making read bits implicit in the implementation. A poll + * will always check to see if there is data to be read on a socket. + */ + +#ifndef __SOCKET_COLLECTION_EPOLL_H +#define __SOCKET_COLLECTION_EPOLL_H + +#include +#include + +#include "bmi-method-support.h" +#include "bmi-tcp-addressing.h" +#include "quicklist.h" +#include "gen-locks.h" + +#define BMI_EPOLL_MAX_PER_CYCLE 16 + +struct socket_collection +{ + int epfd; + + struct epoll_event event_array[BMI_EPOLL_MAX_PER_CYCLE]; + + int server_socket; +}; +typedef struct socket_collection* socket_collection_p; + +enum +{ + SC_READ_BIT = 1, + SC_WRITE_BIT = 2, + SC_ERROR_BIT = 4 +}; + +socket_collection_p BMI_socket_collection_init(int new_server_socket); + +/* the bmi_tcp code may try to add a socket to the collection before + * it is fully connected, just ignore in this case + */ +#define BMI_socket_collection_add(s, m) \ +do { \ + struct tcp_addr* tcp_data = (m)->method_data; \ + if(tcp_data->socket > -1){ \ + struct epoll_event event;\ + memset(&event, 0, sizeof(event));\ + event.events = EPOLLIN|EPOLLERR|EPOLLHUP;\ + event.data.ptr = tcp_data->map;\ + epoll_ctl(s->epfd, EPOLL_CTL_ADD, tcp_data->socket, &event);\ + } \ +} while(0) + +#define BMI_socket_collection_remove(s, m) \ +do { \ + struct epoll_event event;\ + struct tcp_addr* tcp_data = (m)->method_data; \ + tcp_data->write_ref_count = 0; \ + memset(&event, 0, sizeof(event));\ + event.events = 0;\ + event.data.ptr = tcp_data->map;\ + epoll_ctl(s->epfd, EPOLL_CTL_DEL, tcp_data->socket, &event);\ +} while(0) + +/* we _must_ have a valid socket at this point if we want to write data */ +#define BMI_socket_collection_add_write_bit(s, m) \ +do { \ + struct tcp_addr* tcp_data = (m)->method_data; \ + struct epoll_event event;\ + assert(tcp_data->socket > -1); \ + tcp_data->write_ref_count++; \ + memset(&event, 0, sizeof(event));\ + event.events = EPOLLIN|EPOLLERR|EPOLLHUP|EPOLLOUT;\ + event.data.ptr = tcp_data->map;\ + epoll_ctl(s->epfd, EPOLL_CTL_MOD, tcp_data->socket, &event);\ +} while(0) + +#define BMI_socket_collection_remove_write_bit(s, m) \ +do { \ + struct tcp_addr* tcp_data = (m)->method_data; \ + struct epoll_event event;\ + tcp_data->write_ref_count--; \ + assert(tcp_data->write_ref_count > -1); \ + if (tcp_data->write_ref_count == 0) { \ + memset(&event, 0, sizeof(event));\ + event.events = EPOLLIN|EPOLLERR|EPOLLHUP;\ + event.data.ptr = tcp_data->map;\ + epoll_ctl(s->epfd, EPOLL_CTL_MOD, tcp_data->socket, &event);\ + }\ +} while(0) + +void BMI_socket_collection_finalize(socket_collection_p scp); +int BMI_socket_collection_testglobal(socket_collection_p scp, + int incount, + int *outcount, + bmi_method_addr_p * maps, + int * status, + int poll_timeout); + +#endif /* __SOCKET_COLLECTION_EPOLL_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/socket-collection.c b/src/io/bmi/bmi_wintcp/socket-collection.c new file mode 100755 index 0000000..8a6c812 --- /dev/null +++ b/src/io/bmi/bmi_wintcp/socket-collection.c @@ -0,0 +1,477 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * this is an implementation of a socket collection library. It can be + * used to maintain a dynamic list of sockets and perform polling + * operations. + */ + +/* + * NOTE: I am making read bits implicit in the implementation. A poll + * will always check to see if there is data to be read on a socket. + */ + +#include + +#include +#include +#include +#include + +#include "gossip.h" +#include "socket-collection.h" +#include "bmi-method-support.h" +#include "bmi-tcp-addressing.h" +#include "gen-locks.h" + +#include "pvfs2-debug.h" + +/* errors that can occur on a poll socket */ +#define ERRMASK (POLLERR+POLLHUP+POLLNVAL) + +#define POLLFD_ARRAY_START 32 +#define POLLFD_ARRAY_INC 32 + +/* socket_collection_init() + * + * creates a new socket collection. It also acquires the server socket + * from the caller if it is available. Passing in a negative value + * indicates that this is being used on a client node and there is no + * server socket. + * + * returns a pointer to the collection on success, NULL on failure. + */ +socket_collection_p BMI_socket_collection_init(int new_server_socket) +{ + + socket_collection_p tmp_scp = NULL; + + tmp_scp = (struct socket_collection*) malloc(sizeof(struct + socket_collection)); + if(!tmp_scp) + { + return(NULL); + } + + memset(tmp_scp, 0, sizeof(struct socket_collection)); + + gen_mutex_init(&tmp_scp->queue_mutex); + + tmp_scp->pollfd_array = (struct + pollfd*)malloc(POLLFD_ARRAY_START*sizeof(WSAPOLLFD)); + + tmp_scp->addr_array = + (bmi_method_addr_p*)malloc(POLLFD_ARRAY_START*sizeof(bmi_method_addr_p)); + if(!tmp_scp->addr_array) + { + free(tmp_scp->pollfd_array); + free(tmp_scp); + return NULL; + } + /* not used on Windows + if (pipe(tmp_scp->pipe_fd) < 0) + if (!CreatePipe(&(tmp_scp->pipe_fd[0]), + &(tmp_scp->pipe_fd[1]), + NULL, 128*1024)) + { + perror("pipe failed:"); + BMI_socket_collection_finalize(tmp_scp); + return NULL; + } + */ + + tmp_scp->array_max = POLLFD_ARRAY_START; + tmp_scp->array_count = 0; + INIT_QLIST_HEAD(&tmp_scp->remove_queue); + INIT_QLIST_HEAD(&tmp_scp->add_queue); + tmp_scp->server_socket = new_server_socket; + + if(new_server_socket > -1) + { + tmp_scp->pollfd_array[tmp_scp->array_count].fd = new_server_socket; + tmp_scp->pollfd_array[tmp_scp->array_count].events = POLLIN; + tmp_scp->addr_array[tmp_scp->array_count] = NULL; + tmp_scp->array_count++; + } + + /* Add the pipe_fd[0] fd to the poll in set always */ + /* -- must be handled separately on Windows + tmp_scp->pollfd_array[tmp_scp->array_count].fd = tmp_scp->pipe_fd[0]; + tmp_scp->pollfd_array[tmp_scp->array_count].events = POLLIN; + tmp_scp->addr_array[tmp_scp->array_count] = NULL; + tmp_scp->array_count++; + */ + + return (tmp_scp); +} + +/* socket_collection_queue() + * + * queues a tcp method_addr for addition or removal from the collection. + * + * returns 0 on success, -errno on failure. + */ +void BMI_socket_collection_queue(socket_collection_p scp, + bmi_method_addr_p map, struct qlist_head* queue) +{ + struct qlist_head* iterator = NULL; + struct qlist_head* scratch = NULL; + struct tcp_addr* tcp_addr_data = NULL; + + /* make sure that this address isn't already slated for addition/removal */ + qlist_for_each_safe(iterator, scratch, &scp->remove_queue) + { + tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); + if(tcp_addr_data->map == map) + { + qlist_del(&tcp_addr_data->sc_link); + break; + } + } + qlist_for_each_safe(iterator, scratch, &scp->add_queue) + { + tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); + if(tcp_addr_data->map == map) + { + qlist_del(&tcp_addr_data->sc_link); + break; + } + } + + /* add it on to the appropriate queue */ + tcp_addr_data = map->method_data; + /* add to head, we are likely to access it again soon */ + qlist_add(&tcp_addr_data->sc_link, queue); + + return; +} + + +/* socket_collection_finalize() + * + * destroys a socket collection. IMPORTANT: It DOES NOT destroy the + * addresses contained within the collection, nor does it terminate + * connections. This must be handled elsewhere. + * + * no return values. + */ +void BMI_socket_collection_finalize(socket_collection_p scp) +{ + free(scp->addr_array); + free(scp->pollfd_array); + free(scp); + return; +} + +/* socket_collection_testglobal() + * + * this function is used to poll to see if any of the new sockets are + * available for work. The array of method addresses and array of + * status fields must be passed into the function by the caller. + * incount specifies the size of these arrays. outcount + * specifies the number of ready addresses. + * + * returns 0 on success, -errno on failure. + */ +int BMI_socket_collection_testglobal(socket_collection_p scp, + int incount, + int *outcount, + bmi_method_addr_p * maps, + int * status, + int poll_timeout) +{ + struct qlist_head* iterator = NULL; + struct qlist_head* scratch = NULL; + struct tcp_addr* tcp_addr_data = NULL; + struct tcp_addr* shifted_tcp_addr_data = NULL; + WSAPOLLFD* tmp_pollfd_array = NULL; + bmi_method_addr_p* tmp_addr_array = NULL; + int ret = -1; + int old_errno; + /* int tmp_count; */ + int i; + int skip_flag; + int out_flag; + /* int pipe_notify = 0; + struct timeval start, end; */ + int allowed_poll_time = poll_timeout; + /* DWORD bytes; */ + +/* + gettimeofday(&start, NULL); +do_again: +*/ + /* pipe_notify = 0; */ + /* init the outgoing arguments for safety */ + *outcount = 0; + memset(maps, 0, (sizeof(bmi_method_addr_p) * incount)); + memset(status, 0, (sizeof(int) * incount)); + + gen_mutex_lock(&scp->queue_mutex); + + /* look for addresses slated for removal */ + qlist_for_each_safe(iterator, scratch, &scp->remove_queue) + { + tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); + qlist_del(&tcp_addr_data->sc_link); + /* take out of poll array, shift last entry into its place */ + if(tcp_addr_data->sc_index > -1) + { + scp->pollfd_array[tcp_addr_data->sc_index] = + scp->pollfd_array[scp->array_count-1]; + scp->addr_array[tcp_addr_data->sc_index] = + scp->addr_array[scp->array_count-1]; + shifted_tcp_addr_data = + scp->addr_array[tcp_addr_data->sc_index]->method_data; + shifted_tcp_addr_data->sc_index = tcp_addr_data->sc_index; + scp->array_count--; + tcp_addr_data->sc_index = -1; + tcp_addr_data->write_ref_count = 0; + } + } + + /* look for addresses slated for addition */ + qlist_for_each_safe(iterator, scratch, &scp->add_queue) + { + tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); + qlist_del(&tcp_addr_data->sc_link); + if(tcp_addr_data->sc_index > -1) + { + /* update existing entry */ +#if 0 + gossip_err("HELLO: updating addr: %p, index: %d, ref: %d.\n", + scp->addr_array[tcp_addr_data->sc_index], + tcp_addr_data->sc_index, + tcp_addr_data->write_ref_count); +#endif + scp->pollfd_array[tcp_addr_data->sc_index].events = POLLIN; + if(tcp_addr_data->write_ref_count > 0) + scp->pollfd_array[tcp_addr_data->sc_index].events |= POLLOUT; + } + else + { + /* new entry */ + if(scp->array_count == scp->array_max) + { + /* we must enlarge the poll arrays */ + tmp_pollfd_array = (WSAPOLLFD*)malloc( + (scp->array_max+POLLFD_ARRAY_INC)*sizeof(WSAPOLLFD)); + /* TODO: handle this */ + assert(tmp_pollfd_array); + tmp_addr_array = (bmi_method_addr_p*)malloc( + (scp->array_max+POLLFD_ARRAY_INC)*sizeof(bmi_method_addr_p)); + /* TODO: handle this */ + assert(tmp_addr_array); + memcpy(tmp_pollfd_array, scp->pollfd_array, + scp->array_max*sizeof(WSAPOLLFD)); + free(scp->pollfd_array); + scp->pollfd_array = tmp_pollfd_array; + memcpy(tmp_addr_array, scp->addr_array, + scp->array_max*sizeof(bmi_method_addr_p)); + free(scp->addr_array); + scp->addr_array = tmp_addr_array; + scp->array_max = scp->array_max+POLLFD_ARRAY_INC; + } + /* add into pollfd array */ + tcp_addr_data->sc_index = scp->array_count; + scp->array_count++; + scp->addr_array[tcp_addr_data->sc_index] = tcp_addr_data->map; + scp->pollfd_array[tcp_addr_data->sc_index].fd = + tcp_addr_data->socket; + scp->pollfd_array[tcp_addr_data->sc_index].events = POLLIN; + if(tcp_addr_data->write_ref_count > 0) + scp->pollfd_array[tcp_addr_data->sc_index].events |= POLLOUT; + } + } + gen_mutex_unlock(&scp->queue_mutex); + + /* actually do the poll() work */ + /* + do + { + DWORD bytes; + + /* poll for 1ms */ + /* ret = WSAPoll(scp->pollfd_array, scp->array_count, 1); + old_errno = WSAGetLastError(); + allowed_poll_time--; + + } while(ret == 0 && allowed_poll_time > 0); + */ + /* ignore the request if no sockets are available */ + if (scp->array_count > 0) + { + ret = WSAPoll(scp->pollfd_array, scp->array_count, allowed_poll_time); + old_errno = WSAGetLastError(); + } + else + { + ret = old_errno = 0; + } + + if(ret < 0) + { + return(bmi_tcp_errno_to_pvfs(-old_errno)); + } + + /* check our pipe */ + /* + if (PeekNamedPipe(scp->pipe_fd[0], NULL, 0, NULL, &bytes, NULL)) + { + if (bytes) + { + char c; + DWORD count; + + pipe_notify = 1; + /* drain the pipe */ + /* ReadFile(scp->pipe_fd[0], &c, 1, &count, NULL); + + } + } + else + { + return(bmi_tcp_errno_to_pvfs(GetLastError())); + } + */ + + /* nothing ready, just return + -- there may actually be an error: see below */ + /* + if(ret == 0 && !pipe_notify) + { + return(0); + } + */ + + /* tmp_count = ret; */ + + for(i=0; iarray_count; i++) + { + /* short out if we hit count limit */ + if(*outcount == incount /* || *outcount == tmp_count */) + { + break; + } + + skip_flag = out_flag = 0; + + /* make sure that this addr hasn't been removed */ + gen_mutex_lock(&scp->queue_mutex); + qlist_for_each_safe(iterator, scratch, &scp->remove_queue) + { + tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); + if(tcp_addr_data->map == scp->addr_array[i]) + { + skip_flag = 1; + break; + } + } + gen_mutex_unlock(&scp->queue_mutex); + if(skip_flag) + continue; + + /* anything ready on this socket? */ + if (scp->pollfd_array[i].revents) + { + + if(scp->pollfd_array[i].revents & ERRMASK) + status[*outcount] |= SC_ERROR_BIT; + if(scp->pollfd_array[i].revents & POLLIN) + status[*outcount] |= SC_READ_BIT; + if(scp->pollfd_array[i].revents & POLLOUT) + status[*outcount] |= SC_WRITE_BIT; + + /* Special case--POLLHUP has been received but data + is available. A graceful close has been initiated. + Clear the error flag so data is read/sent normally. */ + if ((scp->pollfd_array[i].revents & POLLHUP) && + (!(scp->pollfd_array[i].revents & POLLERR+POLLNVAL)) && + ((scp->pollfd_array[i].revents & POLLIN) || + (scp->pollfd_array[i].revents & POLLOUT))) + { + status[*outcount] &= ~SC_ERROR_BIT; + } + + out_flag = 1; + } + else + { + /* on Windows there may be an error on the socket that WSAPoll + doesn't report--use getsockopt to find */ + int rc, optval, optlen = sizeof(int); + + rc = getsockopt(scp->pollfd_array[i].fd, SOL_SOCKET, SO_ERROR, (char *) &optval, &optlen); + + if (rc != 0) + { + return(bmi_tcp_errno_to_pvfs(-WSAGetLastError())); + } + + if (optval) + { + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, + "Socket %d error: %d\n", + scp->pollfd_array[i].fd, + optval); + status[*outcount] |= SC_ERROR_BIT; + + out_flag = 1; + } + } + + if (out_flag) + { + if(scp->addr_array[i] == NULL) + { + /* server socket */ + maps[*outcount] = alloc_tcp_method_addr(); + /* TODO: handle this */ + assert(maps[*outcount]); + tcp_addr_data = (maps[*outcount])->method_data; + tcp_addr_data->server_port = 1; + tcp_addr_data->socket = scp->server_socket; + tcp_addr_data->port = -1; + } + else + { + /* normal case */ + maps[*outcount] = scp->addr_array[i]; + } + + *outcount = (*outcount) + 1; + } + } + + /* Under the following conditions (i.e. all of them must be true) we go back to redoing poll + * a) There were no outstanding sockets/fds that had data + * b) There was a pipe notification that our socket sets have changed + * c) we havent exhausted our allotted time + */ + /* + if (*outcount == 0 && pipe_notify == 1) + { + gettimeofday(&end, NULL); + timersub(&end, &start, &end); + allowed_poll_time -= (end.tv_sec * 1000 + end.tv_usec/1000); + if (allowed_poll_time > 0) + goto do_again; + } + */ + + return (0); +} + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/socket-collection.h b/src/io/bmi/bmi_wintcp/socket-collection.h new file mode 100755 index 0000000..d0ec57e --- /dev/null +++ b/src/io/bmi/bmi_wintcp/socket-collection.h @@ -0,0 +1,126 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * This file contains the visible data structures and function interface + * for a socket collection library. This library can maintain lists of + * sockets and perform polling operations on them. + */ + +/* + * NOTE: I am making read bits implicit in the implementation. A poll + * will always check to see if there is data to be read on a socket. + */ + +#ifndef __SOCKET_COLLECTION_H +#define __SOCKET_COLLECTION_H + +#include +#include "bmi-method-support.h" +#include "bmi-tcp-addressing.h" +#include "quicklist.h" +#include "gen-locks.h" + +struct socket_collection +{ + /*struct pollfd* pollfd_array;*/ + WSAPOLLFD *pollfd_array; + bmi_method_addr_p* addr_array; + int array_max; + int array_count; + + gen_mutex_t queue_mutex; + struct qlist_head remove_queue; + struct qlist_head add_queue; + + int server_socket; + HANDLE pipe_fd[2]; +}; +typedef struct socket_collection* socket_collection_p; + +enum +{ + SC_READ_BIT = 1, + SC_WRITE_BIT = 2, + SC_ERROR_BIT = 4 +}; + +socket_collection_p BMI_socket_collection_init(int new_server_socket); +void BMI_socket_collection_queue(socket_collection_p scp, + bmi_method_addr_p map, struct qlist_head* queue); + +/* the bmi_tcp code may try to add a socket to the collection before + * it is fully connected, just ignore in this case + */ +/* write a byte on the pipe_fd[1] so that poll breaks out in case it is idling */ +#define BMI_socket_collection_add(s, m) \ +do { \ + struct tcp_addr* tcp_data = (struct tcp_addr *) (m)->method_data; \ + if(tcp_data->socket > -1){ \ + char c; \ + DWORD count; \ + gen_mutex_lock(&((s)->queue_mutex)); \ + BMI_socket_collection_queue(s, m, &((s)->add_queue)); \ + gen_mutex_unlock(&((s)->queue_mutex)); \ + /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\ + } \ +} while(0) + +#define BMI_socket_collection_remove(s, m) \ +do { \ + char c; \ + DWORD count; \ + gen_mutex_lock(&((s)->queue_mutex)); \ + BMI_socket_collection_queue(s, m, &((s)->remove_queue)); \ + gen_mutex_unlock(&((s)->queue_mutex)); \ + /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\ +} while(0) + +/* we _must_ have a valid socket at this point if we want to write data */ +#define BMI_socket_collection_add_write_bit(s, m) \ +do { \ + char c;\ + DWORD count; \ + struct tcp_addr* tcp_data = (struct tcp_addr *) (m)->method_data; \ + assert(tcp_data->socket > -1); \ + gen_mutex_lock(&((s)->queue_mutex)); \ + tcp_data->write_ref_count++; \ + BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \ + gen_mutex_unlock(&((s)->queue_mutex)); \ + /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\ +} while(0) + +#define BMI_socket_collection_remove_write_bit(s, m) \ +do { \ + char c;\ + DWORD count; \ + struct tcp_addr* tcp_data = (struct tcp_addr *) (m)->method_data; \ + gen_mutex_lock(&((s)->queue_mutex)); \ + tcp_data->write_ref_count--; \ + assert(tcp_data->write_ref_count > -1); \ + BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \ + gen_mutex_unlock(&((s)->queue_mutex)); \ + /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\ +} while(0) + +void BMI_socket_collection_finalize(socket_collection_p scp); +int BMI_socket_collection_testglobal(socket_collection_p scp, + int incount, + int *outcount, + bmi_method_addr_p * maps, + int * status, + int poll_timeout); + +#endif /* __SOCKET_COLLECTION_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/sockio.c b/src/io/bmi/bmi_wintcp/sockio.c new file mode 100755 index 0000000..a6ae5ca --- /dev/null +++ b/src/io/bmi/bmi_wintcp/sockio.c @@ -0,0 +1,415 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pvfs2-config.h" + +#include +/* #include */ +#include +#include +#include +#include +/* #include */ +/* #include */ +#include +/* #include +#include +*/ +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_ARPA_INET_H +#include +#endif +/* #include +#include */ +#include + +#include "sockio.h" +#include "gossip.h" + +typedef unsigned int socklen_t; + +/* if the platform provides a MSG_NOSIGNAL option (which disables the + * generation of signals on broken pipe), then use it + */ +#ifdef MSG_NOSIGNAL +#define DEFAULT_MSG_FLAGS MSG_NOSIGNAL +#else +#define DEFAULT_MSG_FLAGS 0 +#endif + +int BMI_sockio_new_sock() +{ + return(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); +} + +int BMI_sockio_bind_sock(int sockd, + int service) +{ + struct sockaddr_in saddr; + + memset((char *) &saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_port = htons((u_short) service); + saddr.sin_addr.s_addr = INADDR_ANY; + bind_sock_restart: + if (bind(sockd, (struct sockaddr *) &saddr, sizeof(saddr)) < 0) + { + if (WSAGetLastError() == WSAEINTR) + goto bind_sock_restart; + return (-1); + } + return (sockd); +} + +/* NOTE: this function returns BMI error codes */ +int BMI_sockio_bind_sock_specific(int sockd, + const char *name, + int service) +{ + struct sockaddr saddr; + int ret; + + if ((ret = BMI_sockio_init_sock(&saddr, name, service)) != 0) + return (ret); /* converted to PVFS error code below */ + + bind_sock_restart: + if (bind(sockd, &saddr, sizeof(saddr)) < 0) + { + if (WSAGetLastError() == WSAEINTR) + goto bind_sock_restart; + return(bmi_errno_to_pvfs(-WSAGetLastError())); + } + return (sockd); +} + + +/* NOTE: this function returns BMI error codes */ +int BMI_sockio_connect_sock(int sockd, + const char *name, + int service) +{ + struct sockaddr saddr; + int ret; + + if ((ret = BMI_sockio_init_sock(&saddr, name, service)) != 0) + return (ret); + connect_sock_restart: + if (connect(sockd, (struct sockaddr *) &saddr, sizeof(saddr)) < 0) + { + if (WSAGetLastError() == WSAEINTR) + goto connect_sock_restart; + return(bmi_errno_to_pvfs(-WSAGetLastError())); + } + return (sockd); +} + +#ifdef HAVE_GETHOSTBYNAME +static int conv_h_errno(int herr) +{ + switch (herr) + { + case WSAHOST_NOT_FOUND : + return BMI_EHOSTNTFD; + case WSANO_ADDRESS : + return BMI_EADDRNTFD; + case WSANO_RECOVERY : + return BMI_ENORECVR; + case WSATRY_AGAIN : + return BMI_ETRYAGAIN; + default : + return herr; + } +} + +/* gethostbyname version */ +int BMI_sockio_init_sock(struct sockaddr *saddrp, + const char *name, + int service) +{ + struct hostent *hep; + + memset((char *) saddrp, 0, sizeof(struct sockaddr_in)); + if (name == NULL) + { + if ((hep = gethostbyname("localhost")) == NULL) + { + return (-conv_h_errno(WSAGetLastError())); + } + } + else if ((hep = gethostbyname(name)) == NULL) + { + return (-conv_h_errno(WSAGetLastError())); + } + ((struct sockaddr_in *) saddrp)->sin_family = AF_INET; + ((struct sockaddr_in *) saddrp)->sin_port = htons((u_short) service); + memcpy((char *) &(((struct sockaddr_in *) saddrp)->sin_addr), hep->h_addr, + hep->h_length); + return (0); +} +#else +/* inet_aton version */ +int BMI_sockio_init_sock(struct sockaddr *saddrp, + const char *name, + int service) +{ + int ret; + struct in_addr addr; + + bzero((char *) saddrp, sizeof(struct sockaddr_in)); + if (name == NULL) + { + ret = inet_aton("127.0.0.1", &addr); + } + else + { + ret = inet_aton(name, &addr); + } + + if (ret == 0) return -1; + + ((struct sockaddr_in *) saddrp)->sin_family = AF_INET; + ((struct sockaddr_in *) saddrp)->sin_port = htons((u_short) service); + memcpy((char *) &(((struct sockaddr_in *) saddrp)->sin_addr), &addr, + sizeof(addr)); + + return 0; +} +#endif + + +/* nonblocking receive */ +int BMI_sockio_nbrecv(int s, + void *buf, + int len) +{ + int ret, comp = len, err; + + /* We can't read the blocking state on Windows */ + /* assert(fcntl(s, F_GETFL, 0) & O_NONBLOCK); */ + + while (comp) + { + nbrecv_restart: + ret = recv(s, (char *) buf, comp, DEFAULT_MSG_FLAGS); + err = WSAGetLastError(); + if (ret == 0) /* socket closed */ + { + errno = EPIPE; + return (-1); + } + if (ret == -1 && err == WSAEINTR) + { + goto nbrecv_restart; + } + else if (ret == -1 && err == WSAEWOULDBLOCK) + { + /* return what we got so far, this is a nonblocking call */ + return(len-comp); + } + else if (ret == -1) + { + return (-1); + } + comp -= ret; + buf = (char *)buf + ret; + } + return (len - comp); +} + +/* BMI_sockio_nbpeek() + * + * performs a nonblocking check to see if the amount of data requested + * is actually available in a socket. Does not actually read the data + * out. + * + * returns number of bytes available on succes, -1 on failure. + */ +int BMI_sockio_nbpeek(int s, void* buf, int len) +{ + int ret, err; + + /* We can't read the blocking state on Windows */ + /* assert(fcntl(s, F_GETFL, 0) & O_NONBLOCK); */ + + nbpeek_restart: + ret = recv(s, (char *) buf, len, (MSG_PEEK|DEFAULT_MSG_FLAGS)); + err = WSAGetLastError(); + if(ret == 0) + { + /* errno = EPIPE; */ + return (-1); + } + else if (ret == -1 && err == WSAEWOULDBLOCK) + { + return(0); + } + else if (ret == -1 && err == WSAEINTR) + { + goto nbpeek_restart; + } + else if (ret == -1) + { + return (-1); + } + + return(ret); +} + + +/* nonblocking send */ +/* should always return 0 when nothing gets done! */ +int BMI_sockio_nbsend(int s, + void *buf, + int len) +{ + int ret, comp = len, err; + + while (comp) + { + nbsend_restart: + ret = send(s, (char *) buf, comp, DEFAULT_MSG_FLAGS); + err = WSAGetLastError(); + if (ret == 0 || (ret == -1 && err == WSAEWOULDBLOCK)) + return (len - comp); /* return amount completed */ + if (ret == -1 && err == WSAEINTR) + { + goto nbsend_restart; + } + else if (ret == -1) + return (-1); + comp -= ret; + buf = (char *)buf + ret; + } + return (len - comp); +} + +/* nonblocking vector send */ +int BMI_sockio_nbvector(int s, + LPWSABUF vector, + int count, + int recv_flag) +{ + int ret, err; + DWORD bytes, flags; + + /* NOTE: this function is different from the others that will + * keep making the I/O system call until EWOULDBLOCK is encountered; we + * give up after one call + */ + + /* loop over if interrupted */ + do + { + if (recv_flag) + { + /* ret = readv(s, vector, count); */ + flags = MSG_PARTIAL; + ret = WSARecv(s, vector, count, &bytes, &flags, NULL, NULL); + err = WSAGetLastError(); + } + else + { + /* ret = writev(s, vector, count); */ + flags = 0; + ret = WSASend(s, vector, count, &bytes, flags, NULL, NULL); + err = WSAGetLastError(); + } + } while ((ret == 0 && flags & MSG_PARTIAL) || (ret == -1 && err == WSAEINTR)); + + /* return zero if can't do any work at all */ + if (ret == -1 && err == WSAEWOULDBLOCK) + return(0); + + /* if data transferred or an error */ + return ret == -1 ? -1 : bytes; +} + +#ifdef __USE_SENDFILE__ +/* NBSENDFILE() - nonblocking (on the socket) send from file + * + * Here we are going to take advantage of the sendfile() call provided + * in the linux 2.2 kernel to send from an open file directly (ie. w/out + * explicitly reading into user space memory or memory mapping). + * + * We are going to set the non-block flag on the socket, but leave the + * file as is. + * + * Boy, that type on the offset for sockfile() sure is lame, isn't it? + * That's going to cause us some headaches when we want to do 64-bit + * I/O... + * + * Returns -1 on error, amount of data written to socket on success. + */ +int BMI_sockio_nbsendfile(int s, + int f, + int off, + int len) +{ + int ret, comp = len, myoff; + + while (comp) + { + nbsendfile_restart: + myoff = off; + ret = sendfile(s, f, &myoff, comp); + if (ret == 0 || (ret == -1 && errno == EWOULDBLOCK)) + return (len - comp); /* return amount completed */ + if (ret == -1 && errno == EINTR) + { + goto nbsendfile_restart; + } + else if (ret == -1) + return (-1); + comp -= ret; + off += ret; + } + return (len - comp); +} +#endif + +/* routines to get and set socket options */ +int BMI_sockio_get_sockopt(int s, + int optname) +{ + int val; + socklen_t len = sizeof(val); + + if (getsockopt(s, SOL_SOCKET, optname, (char *) &val, (int *) &len) == -1) + return (-1); + else + return (val); +} + +int BMI_sockio_set_tcpopt(int s, + int optname, + int val) +{ + if (setsockopt(s, IPPROTO_TCP, optname, (char *) &val, sizeof(val)) == -1) + return (-1); + else + return (val); +} + +int BMI_sockio_set_sockopt(int s, + int optname, + int val) +{ + if (setsockopt(s, SOL_SOCKET, optname, (char *) &val, sizeof(val)) == -1) + return (-1); + else + return (val); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/bmi/bmi_wintcp/sockio.h b/src/io/bmi/bmi_wintcp/sockio.h new file mode 100755 index 0000000..41d7535 --- /dev/null +++ b/src/io/bmi/bmi_wintcp/sockio.h @@ -0,0 +1,130 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + + +/* + * These are the exported functions from the sockio library. They + * provide a simple intuitive interface to the TCP/IP sockets API. + */ + +/* + * Defines which may be set at compile time to determine functionality: + * + * __USE_SENDFILE__ turns on the use of sendfile() in the library and + * makes the BMI_sockio_nbsendfile function available to the application. + * Older glibc systems do not have this functionality so we leave it to + * be turned on manually. + */ + +#ifndef SOCKIO_H +#define SOCKIO_H + +#include +#include +/* #include */ +/* #include */ +#include + +#include "bmi-types.h" + +int BMI_sockio_new_sock(void); +int BMI_sockio_bind_sock(int, + int); +int BMI_sockio_bind_sock_specific(int sockd, + const char *name, + int service); +int BMI_sockio_connect_sock(int, + const char *, + int); +int BMI_sockio_init_sock(struct sockaddr *, + const char *, + int); +int BMI_sockio_nbrecv(int s, + void *buf, + int len); +int BMI_sockio_nbsend(int s, + void *buf, + int len); +int BMI_sockio_nbvector(int s, + LPWSABUF vector, + int count, + int recv_flag); +int BMI_sockio_get_sockopt(int s, + int optname); +int BMI_sockio_set_tcpopt(int s, + int optname, + int val); +int BMI_sockio_set_sockopt(int s, + int optname, + int size); +int BMI_sockio_nbpeek(int s, + void* buf, + int len); +#ifdef __USE_SENDFILE__ +int BMI_sockio_nbsendfile(int s, + int f, + int off, + int len); +#endif + +#define GET_RECVBUFSIZE(s) BMI_sockio_get_sockopt(s, SO_RCVBUF) +#define GET_SENDBUFSIZE(s) BMI_sockio_get_sockopt(s, SO_SNDBUF) + +/* some OS's (ie. Linux 1.3.xx) can't handle buffer sizes of certain + * sizes, and will hang up + */ +#ifdef BRAINDEADSOCKS +/* setting socket buffer sizes can do bad things */ +#define SET_RECVBUFSIZE(s, size) +#define SET_SENDBUFSIZE(s, size) +#else +#define SET_RECVBUFSIZE(s, size) BMI_sockio_set_sockopt(s, SO_RCVBUF, size) +#define SET_SENDBUFSIZE(s, size) BMI_sockio_set_sockopt(s, SO_SNDBUF, size) +#endif + +#define GET_MINSENDSIZE(s) BMI_sockio_get_sockopt(s, SO_SNDLOWAT) +#define GET_MINRECVSIZE(s) BMI_sockio_get_sockopt(s, SO_RCVLOWAT) +#define SET_MINSENDSIZE(s, size) BMI_sockio_set_sockopt(s, SO_SNDLOWAT, size) +#define SET_MINRECVSIZE(s, size) BMI_sockio_set_sockopt(s, SO_RCVLOWAT, size) + +/* BLOCKING / NONBLOCKING MACROS */ + +/* Windows uses ioctlsocket */ +/* #define SET_NONBLOCK(x_fd) fcntl((x_fd), F_SETFL, O_NONBLOCK | \ + fcntl((x_fd), F_GETFL, 0)) */ +#define SET_NONBLOCK(x_fd) \ +do { \ + u_long enable = 1; \ + ioctlsocket((x_fd), FIONBIO, &enable); \ +} while (0) + +/* There is no equivalent for FASYNC on Windows, so just set blocking mode */ +/*#define SET_NONBLOCK_AND_SIGIO(x_fd) \ +do { \ + fcntl((x_fd), F_SETOWN, getpid()); \ + fcntl((x_fd), F_SETFL, FASYNC | O_NONBLOCK | fcntl((x_fd), F_GETFL, 0)); \ +} while (0) */ +#define SET_NONBLOCK_AND_SIGIO(x_fd) SET_NONBLOCK(x_fd) + +/* #define CLR_NONBLOCK(x_fd) fcntl((x_fd), F_SETFL, fcntl((x_fd), F_GETFL, 0) & \ + (~O_NONBLOCK)) */ +#define CLR_NONBLOCK(x_fd) \ +do { \ + u_long enable = 0; \ + ioctlsocket((x_fd), FIONBIO, &enable); \ +} while (0) + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/server/mgmt-get-uid.sm b/src/server/mgmt-get-uid.sm new file mode 100644 index 0000000..5285208 --- /dev/null +++ b/src/server/mgmt-get-uid.sm @@ -0,0 +1,138 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ +#include + +#include "pvfs2-server.h" +#include "pint-uid-mgmt.h" +#include "pint-util.h" + +/* static array used to quickly pull uid stats from the server */ +static PVFS_uid_info_s *static_array = NULL; + +%% + +machine pvfs2_uid_mgmt_sm +{ + state prelude + { + jump pvfs2_prelude_sm; + default => do_work; + } + + state do_work + { + run uid_mgmt_do_work; + default => final_response; + } + + state final_response + { + jump pvfs2_final_response_sm; + default => cleanup; + } + + state cleanup + { + run uid_mgmt_cleanup; + default => terminate; + } +} + +%% + +/** uid_mgmt_cleanup() + * + * cleans up any resources consumed by this state machine and ends + * execution of the machine + */ +static PINT_sm_action uid_mgmt_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + if(s_op->resp.u.mgmt_get_uid.uid_info_array) + free(s_op->resp.u.mgmt_get_uid.uid_info_array); + + return(server_state_machine_complete(smcb)); +} + +/** uid_mgmt_do_work() + * + * gathers uid statistics from server and builds response + */ +static PINT_sm_action uid_mgmt_do_work( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i; + struct timeval oldest; + + /* allocate memory for a static array, used to quickly pull the uid + * statistics from the server without blocking access to the uid lists + */ + if (!static_array) + { + static_array = (PVFS_uid_info_s *) + malloc(UID_MGMT_MAX_HISTORY * sizeof(PVFS_uid_info_s)); + if (!static_array) + { + s_op->resp.u.mgmt_get_uid.uid_info_array = NULL; + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + } + + /* gather all uid statistics and store them in the static array */ + PINT_dump_all_uid_stats(static_array); + + /* get a timestamp for the max history we want to look through */ + PINT_util_get_current_timeval(&oldest); + oldest.tv_sec -= s_op->req->u.mgmt_get_uid.history; + + /* scan uid stats to determine how much info we need to send back */ + for (i = 0; i < UID_MGMT_MAX_HISTORY; i++) + { + if((static_array[i].count == 0) || + !(IN_UID_HISTORY(static_array[i].tv, oldest))) + { + break; + } + } + + /* allocate memory for and fill in our response back */ + s_op->resp.u.mgmt_get_uid.uid_info_array_count = i; + s_op->resp.u.mgmt_get_uid.uid_info_array = (PVFS_uid_info_s *) + malloc(i * sizeof(PVFS_uid_info_s)); + if (!(s_op->resp.u.mgmt_get_uid.uid_info_array)) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + memcpy(s_op->resp.u.mgmt_get_uid.uid_info_array, static_array, + (s_op->resp.u.mgmt_get_uid.uid_info_array_count * sizeof(PVFS_uid_info_s))); + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + +struct PINT_server_req_params pvfs2_uid_mgmt_params = +{ + .string_name = "mgmt_get_uid", + .perm = PINT_SERVER_CHECK_NONE, + .state_machine = &pvfs2_uid_mgmt_sm +}; + + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/test/ci/jenkins-build.sh b/test/ci/jenkins-build.sh new file mode 100755 index 0000000..bc760e7 --- /dev/null +++ b/test/ci/jenkins-build.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +# build parameters coming in as arguments +# for jenkins should correspond to appropriate job and matrix parameters +if [ $# -ne 3 ] +then + echo "usage: $0 " + exit +fi + +# NAME is just a friendly, no-white space name that should match the jenkins +# job so the nightly test run can get the right artifact +# OS is just the distro name +# VFS is what interface to use for VFS, valid values are: +# kernel +# kernel_helper +# fuse +NAME=$1 +OS=$2 +VFS=$3 +TEST_NAME="${1}.${2}.${3}" + +JENKINS_BUILD=1 +# use BUILD_NUMBER to guess if this is running under Jenkins or not. If not, +# just assume we should build what's in pwd +if [ -z "${BUILD_NUMBER}" ] +then + WORKSPACE=`pwd` + JENKINS_BUILD=0 +fi + +INSTALL_PATH="${WORKSPACE}/install" +BUILD_PATH="${WORKSPACE}/build" + +## jenkins gives us the CVS branch, otherwise it default to main +CVS_BRANCH=${CVS_BRANCH:="main"} + +## setup appropriate configure flags +# common flags +flags=" --prefix=${INSTALL_PATH} --enable-shared" + +# make sure we have our local db4 version available +if [ -d /opt/db4 ] +then + flags="${flags} --with-db=/opt/db4" +else + # see if standard location works (ubuntu with db4.8) + maj=`cat /usr/include/db.h | grep DB_VERSION_MAJOR | awk '{ print $3 }'` + min=`cat /usr/include/db.h | grep DB_VERSION_MINOR | awk '{ print $3 }'` + if [ "${maj}" -eq 4 -a "${min}" -ge 8 ] + then + echo "Using default system db library" + else + echo "No /opt/db4 directory" + exit 1 + fi +fi + +# find kernel sources and set appropriate flags taking 2.4/2.6 into account +# centos 3.9 has a 2.4 kernel, and has the same changes as redhat24 +if [ "${VFS:0:6}" = "kernel" ] +then + kern="" + if [ -f /etc/redhat-release ] && + [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ] + then + kern=`find /usr/src -maxdepth 1 -type d -name "*$(uname -r)*"` + flags="${flags} --enable-redhat24 --with-kernel24=${kern}" + elif [ -f /etc/SuSE-release ] + then + flavor="desktop" + kern=`find /usr/src/ -maxdepth 5 -type d -name ${flavor} | head -n 1` + flags="${flags} --with-kernel=${kern}" + elif [ -f /etc/debian_version ] + then + kconf=`find /usr/src -maxdepth 3 -type f -path "*linux-source*" -name ".config"` + kern=`dirname "${kconf}"` + flags="${flags} --with-kernel=${kern}" + else + kern=`find /usr/src/kernels/ -maxdepth 1 -type d -name "*$(uname -r)*"` + flags="${flags} --with-kernel=${kern}" + fi + + if [ -z "${kern}" ] + then + echo "No kernel source found" + exit 1 + else + echo "Using kernel source at ${kern}" + fi +elif [ "${VFS}" = "fuse" ] +then + flags="${flags} --enable-fuse" +else + echo "Unknown interface type!" + exit 1 +fi + +## just set enable kmod_helper if that's what we want +if [ "${VFS}" = "kernel_helper" ] +then + flags="${flags} --enable-threaded-kmod-helper" +fi + +## build exceptions for distros +# centos3 apparently doesn't have epoll +if [ -f /etc/redhat-release ] && + [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ] +then + flags="${flags} --disable-epoll" +fi + + + +## configure and build it! +# run configure, clean out previous builds, and build it +rm -rf ${BUILD_PATH} +rm -rf ${INSTALL_PATH} + +mkdir ${BUILD_PATH} +cd ${BUILD_PATH} + +echo "Configuring with flags \"$flags\"" +${WORKSPACE}/./configure $flags +if [ $? -ne 0 ] +then + echo "configure failed, failing" + exit 1 +fi + +make all +if [ $? -ne 0 ] +then + echo "make all failed, failing" + exit 1 +fi + +if [ "${VFS:0:6}" = "kernel" ] +then + # build kmod, handling 2.4/2.6 cases + if [ -f /etc/redhat-release ] && + [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ] + then + make kmod24 + else + make kmod + fi +fi +if [ $? -ne 0 ] +then + echo "make kmod failed, failing" + exit 1 +fi + +make install +if [ $? -ne 0 ] +then + echo "make install failed, failing" + exit 1 +fi + +if [ "${VFS:0:6}" = "kernel" ] +then + # install kmod, handling 2.4/2.6 cases + if [ -f /etc/redhat-release ] && + [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ] + then + make "DESTDIR=${INSTALL_PATH}" just_kmod24_install + make just_kmod24_apps_install + else + make kmod_prefix=${INSTALL_PATH} kmod_install + fi +fi + +# now make the tests +cd test +../../test/./configure $flags +make all +if [ $? -ne 0 ] +then + echo "configure tests failed, failing" + exit 1 +fi + +make install +if [ $? -ne 0 ] +then + echo "make install tests failed, failing" + exit 1 +fi +# back to build directory +cd ../ + +# back to original workspace +cd ../ +tar -cjf ${TEST_NAME}-bin.tar.bz2 install +if [ $? -ne 0 ] +then + echo "Failure creating tar of installed binaries" + exit 1 +fi + +exit 0 diff --git a/test/ci/jenkins-doc.sh b/test/ci/jenkins-doc.sh new file mode 100755 index 0000000..e4d38a1 --- /dev/null +++ b/test/ci/jenkins-doc.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +cd ${WORKSPACE} + +rm -rf build +mkdir build && cd build +if [ $? -ne 0 ] +then + echo "failure creating build directory" +fi + +echo -n "Configuring source ... " +.././configure >/dev/null 2>&1 +if [ $? -ne 0 ] +then + echo "failure configuring source" + exit 1 +fi +echo "okay" + +echo -n "Making docs ... " +make docs >/dev/null 2>&1 +if [ $? -ne 0 ] +then + echo "failure making docs" + exit 1 +fi +echo "okay" + +files=`find doc/ -regextype posix-egrep -regex ".+\.(pdf|html)"` +file_count=`echo ${files} | wc -l` +if [ ${file_count} -ne 60 ] +then + echo "Not enough documents, only ${file_count}" +fi + +echo -n "Creating tar of docs ... " +tar -cjvf ${WORKSPACE}/orange-branch-docs.tar.bz2 ${files} +if [ $? -ne 0 ] +then + echo "failed" +fi +echo "okay" + +cd ${WORKSPACE} +rm -rf build + +exit 0 diff --git a/test/ci/jenkins-test.sh b/test/ci/jenkins-test.sh new file mode 100644 index 0000000..222ecc3 --- /dev/null +++ b/test/ci/jenkins-test.sh @@ -0,0 +1,398 @@ +#!/bin/bash + +export TEST_NAME=${1}.${2}.${3}.${4}.${5} # unique name +export BUILD_TEST_NAME=${1}.${2}.${3} # unique name +export OS=$2 +export VFS=$3 +export IO=$4 +export FS=$5 + +# change the tests portion of the job URL to the build portion so we can find +# the artifact with the binaries from the last successful build +export BIN_NAME="${BUILD_TEST_NAME}-bin.tar.bz2" +export BIN_URL="$(echo ${JOB_URL} | sed s/-tests/-build/ | sed s/io=[^,]*,//)lastSuccessfulBuild/artifact/${BIN_NAME}" + +export PVFS2_LOCATION=${WORKSPACE}/install # install location +export PVFS2_SRC=${WORKSPACE} # source tree +export VERBOSE="yes" + +## server/client related locations ## +export BASE="/tmp/jenkins/nightly/${TEST_NAME}" # base dir. of all tests +export PVFS2_MOUNT="${BASE}/mount" # VFS mount +export PVFS2_LOG="${BASE}/logs/" # all logs go here +export PVFS2_STORAGE="${BASE}/storage/" # server backing storage + +## extra files copied in by jenkins +export PVFS2_TESTS_NAME="pvfs2-tests-nextgen.tar.bz2" +export PVFS2_EXTRA_TESTS_NAME="benchmarks-20110616.tar.bz2" +export PVFS2_TESTS="${WORKSPACE}/new_tests/" +export PVFS2_EXTRA_TESTS="${BASE}/extra/" # work dir. of extra tests + +## tests to run ## +export PVFS2_SYSINT_TESTS="${PVFS2_TESTS}/sysint-tests.d" +export PVFS2_VFS_TESTS="${PVFS2_TESTS}/vfs-tests.d/" + +export LD_LIBRARY_PATH="${PVFS2_LOCATION}/lib:/opt/db4/lib:${LD_LIBRARY_PATH}" + +log() { + if [ -n "${VERBOSE}" ] + then + echo $1 $2 | tee -a ${PVFS2_LOG}/all_tests.log + fi +} + +echo_tee() { + echo -e "$1" "$2" | tee -a ${PVFS2_LOG}/all_tests.log +} + +check_return() { + rc=$1 + msg=$2 + if [ ${rc} -ne 0 ] + then + echo_tee "aborting tests due to failed return code of ${msg}" + exit 1 + fi +} + +## create log location right off the bat +mkdir -p ${PVFS2_LOG} +echo_tee "Running tests on $(uname -n -r -m)" + +rm -rf ${BIN_URL}* +echo_tee -n "Retrieving artifact [\"$BIN_URL\"]... " +wget -q ${BIN_URL} &>/dev/null +check_return $? "wget binary tarball artifact failed" +echo_tee "ok" + +# remove previous binaries +rm -rf ${PVFS2_LOCATION} +echo_tee -n "Extracting binaries ... " +tar -xjf ${BIN_NAME} >/dev/null +check_return $? "extract binary tarball failed" +echo_tee "ok" +rm -f ${BIN_NAME} > /dev/null + +if [ ! -d ${PVFS2_LOCATION} -o ! -f ${PVFS2_LOCATION}/sbin/pvfs2-server ] +then + echo_tee "no install directory" + exit 1 +fi + +if [ ! -d ${PVFS_SRC} ] +then + echo_tee "no source directory" + exit 1 +fi + +if [ ! -f ${PVFS2_TESTS_NAME} ] +then + echo_tee "Test tarball ${PVFS2_TESTS_NAME} doesn't exist" + exit 1 +fi + +# remove previous and extract current tests +rm -rf ${PVFS2_TESTS} +mkdir -p ${PVFS2_TESTS} +echo_tee -n "Extracting tests ... " +tar -xjvf ${PVFS2_TESTS_NAME} -C ${PVFS2_TESTS} >/dev/null +check_return $? "extract tests failed" +echo_tee "ok" + +pvfs2_client_kernel_cleanup() { + # unmount anything, kill client processes, wait, then rmmod + sudo /bin/umount $PVFS2_MOUNT &>/dev/null + sleep 2 + sudo killall -9 pvfs2-client &>/dev/null + sleep 2 + sudo killall -9 pvfs2-client-core &>/dev/null + sleep 2 + sudo /sbin/rmmod pvfs2 &>/dev/null + return 0 +} + +pvfs2_client_fuse_cleanup() { + sudo /bin/umount $PVFS2_MOUNT &>/dev/null + sleep 2 + sudo /sbin/rmmod fuse &>/dev/null + return 0 +} + +pvfs2_server_cleanup() { + sudo killall -9 pvfs2-server &>/dev/null + return 0 +} + +pvfs2_client_kernel_start() { + + # add kernel module + # 2.4 is .o, 2.6 is .ko + mod_base=`find ${PVFS2_LOCATION}/lib -type d -name kernel` + mod_loc=`find ${mod_base} -type f -name "pvfs2.*o"` + sudo /sbin/insmod ${mod_loc} + check_return $? "insmod failed" + + # start client-core, some older sudo version won't let LD_* pass through + if [ -z "$(sudo sudo -V | grep "Environment variables to preserve")" ] + then + echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}; ${PVFS2_LOCATION}/sbin/pvfs2-client -p ${PVFS2_LOCATION}/sbin/pvfs2-client-core -L ${PVFS2_LOG}/pvfs2-client-core.log" > client_run.sh + chmod +x client_run.sh + sudo ./client_run.sh + rm client_run.sh + else + sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \ + ${PVFS2_LOCATION}/sbin/pvfs2-client \ + -p ${PVFS2_LOCATION}/sbin/pvfs2-client-core \ + -L ${PVFS2_LOG}/pvfs2-client-core.log + fi + check_return $? "client core start failed" + + # make sure we can read the log + sudo chmod 777 ${PVFS2_LOG}/pvfs2-client-core.log + sleep 3 + + # mount it up + if [ -f /etc/redhat-release ] && + [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ] + then + echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}; ${PVFS2_LOCATION}/sbin/mount.pvfs2 tcp://`hostname -s`:3396/pvfs2-fs ${PVFS2_MOUNT}" > client_mount.sh + chmod +x client_mount.sh + sudo ./client_mount.sh + rm client_mount.sh + else + sudo /bin/mount -t pvfs2 tcp://`hostname -s`:3396/pvfs2-fs \ + ${PVFS2_MOUNT} + fi + check_return $? "mount failed" + return 0 +} + +pvfs2_client_fuse_start() { + + mod=`/sbin/lsmod | grep fuse` + if [ -z "${mod}" ] + then + sudo /sbin/modprobe fuse + check_return $? "modprobe failed" + fi + + # mount it up + # start pvfs2fuse, some older sudo version won't let LD_* pass through + if [ -z "$(sudo sudo -V | grep "Environment variables to preserve")" ] + then + echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}; ${PVFS2_LOCATION}/bin/pvfs2fuse -o fs_spec=tcp://`hostname -s`:3396/pvfs2-fs,allow_other ${PVFS2_MOUNT}" > client_run.sh + chmod +x client_run.sh + sudo ./client_run.sh + else + sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \ + ${PVFS2_LOCATION}/bin/pvfs2fuse \ + -o fs_spec=tcp://`hostname -s`:3396/pvfs2-fs,allow_other \ + ${PVFS2_MOUNT} + fi +} + +pvfs2_server_start() { + cd $BASE + ${PVFS2_LOCATION}/bin/pvfs2-genconfig ${BASE}/fs.conf \ + --protocol tcp \ + --iospec="`hostname -s`:{3396-3399}" \ + --metaspec="`hostname -s`:{3396-3399}" \ + --storage ${PVFS2_STORAGE} \ + --trove-method=${IO:=alt-aio} \ + --logging "none" \ + --logfile=${PVFS2_LOG}/pvfs2-server-${TEST_NAME}.log --quiet + check_return $? "pvfs2-genconfig failed" + + for alias in `grep 'Alias ' fs.conf | cut -d ' ' -f 2` + do + # create the space + ${PVFS2_LOCATION}/sbin/pvfs2-server \ + -p ${BASE}/pvfs2-server-${alias}.pid \ + -f ${BASE}/fs.conf -a $alias \ + &>${PVFS2_LOG}/pvfs2-server-create-${alias}.log + check_return $? "pvfs2-server -f failed for $alias" + + # start the server + ${PVFS2_LOCATION}/sbin/pvfs2-server \ + -p ${BASE}/pvfs2-server-${alias}.pid \ + ${BASE}/fs.conf $server_conf -a $alias \ + &>${PVFS2_LOG}/pvfs2-server-start-${alias}.log + check_return $? "pvfs2-server failed for $alias" + done + + # store out mount path + echo "tcp://`hostname -s`:3396/pvfs2-fs ${PVFS2_MOUNT} pvfs2 defaults 0 0" \ + > ${BASE}/pvfs2tab + export PVFS2TAB_FILE=${BASE}/pvfs2tab + + # up the logging chatter + ${PVFS2_LOCATION}/bin/pvfs2-set-debugmask -m ${PVFS2_MOUNT} "all" \ + &>/dev/null + check_return $? "pvfs2-set-debugmask failed" + return 0 +} + +external_tests_setup() +{ + rm -rf ${PVFS2_EXTRA_TESTS}/* + + tar -xjf ${WORKSPACE}/${PVFS2_EXTRA_TESTS_NAME} -C ${PVFS2_EXTRA_TESTS} \ + >/dev/null + check_return $? "extra tests untar extra failed" + + mv ${PVFS2_EXTRA_TESTS}/benchmarks/* ${PVFS2_EXTRA_TESTS}/ + check_return $? "extra tests mv failed" +} + +run_parts() { + cd $1 + echo_tee "* TEST GROUP START *" + for f in * + do + [ -d $f ] && continue + if [ -x $f ] + then + echo_tee -n "* TEST $f: " + ./$f > ${PVFS2_LOG}/${f}-${TEST_NAME}.log + rc=$? + if [ ${rc} -eq 0 ] + then + nr_passed=$((nr_passed + 1)) + echo_tee "OK" + else + nr_failed=$((nr_failed + 1)) + echo_tee "FAILED (${rc})" + fi + fi + done + echo_tee "* TEST GROUP DONE *" +} + +echo_tee "Running test ${TEST_NAME} in ${BASE}" +# clean up if a nasty failure from the last test +if [ "${VFS:0:6}" = "kernel" ] +then + pvfs2_client_kernel_cleanup +elif [ "${VFS}" = "fuse" ] +then + pvfs2_client_fuse_cleanup +fi + +pvfs2_server_cleanup + +sudo rm -rf $BASE +mkdir -p ${PVFS2_MOUNT}; check_return $? "mkdir on ${PVFS2_MOUNT}" +chmod 777 ${PVFS2_MOUNT} +mkdir -p ${PVFS2_LOG}; check_return $? "mkdir on ${PVFS2_LOG}" +chmod 777 ${PVFS2_MOUNT} +mkdir -p ${PVFS2_STORAGE}; check_return $? "mkdir on ${PVFS2_STORAGE}" +chmod 777 ${PVFS2_MOUNT} +mkdir -p ${PVFS2_EXTRA_TESTS}; check_return $? "mkdir on ${PVFS2_EXTRA_TESTS}" +chmod 777 ${PVFS2_MOUNT} + +#exec 6<&1 +#exec 7<&2 +exec 2>&1 + +echo_tee -n "setup external tests ... " +external_tests_setup +if [ $? -eq 0 ] +then + echo_tee "okay" +else + echo_tee "failed" + exit 1 +fi + +echo_tee -n "setup servers... " +pvfs2_server_start +if [ $? -eq 0 ] +then + echo_tee "okay" +else + echo_tee "failed" + exit 1 +fi + +echo_tee -n "starting client... " +if [ "${VFS:0:6}" = "kernel" ] +then + pvfs2_client_kernel_start +elif [ "${VFS}" = "fuse" ] +then + pvfs2_client_fuse_start +else + echo_tee "Unknown VFS test type: ${VFS}, exiting" + exit 1 +fi +if [ $? -eq 0 ] +then + echo_tee "okay" +else + echo_tee "failed" + exit 1 +fi + +nr_passed=0 +nr_failed=0 + +echo_tee "running SYSINT tests" +run_parts ${PVFS2_SYSINT_TESTS} +sleep 3 +echo_tee "running VFS tests" +run_parts ${PVFS2_VFS_TESTS} + +# cleanup client and then server + +echo_tee -n "stopping client... " +if [ "${VFS:0:6}" = "kernel" ] +then + pvfs2_client_kernel_cleanup +elif [ "${VFS}" = "fuse" ] +then + pvfs2_client_fuse_cleanup +else + echo_tee "Unknown test, exiting" + exit 1 +fi + +if [ $? -eq 0 ] +then + echo_tee "okay" +fi + +echo_tee -n "stopping server... " +pvfs2_server_cleanup +if [ $? -eq 0 ] +then + echo_tee "okay" +fi + +# restore file descriptors and close temporary fds +#exec 1<&6 6<&- +#exec 2<&7 7<&- + +echo_tee "Total Failed: ${nr_failed}" + +# remove extracted binaries and artifact +rm -rf ${PVFS2_LOCATION} +rm -rf ${BIN_NAME}* + +# remove extraced tests and tarball +rm -rf ${WORKSPACE}/${PVFS2_TESTS_NAME}* +rm -rf ${PVFS2_TESTS} + +# remove benchmarks tar ball +rm -rf ${WORKSPACE}/${PVFS2_EXTRA_TESTS_NAME} + +echo_tee -n "creating log tarball... " +cd ${WORKSPACE} +tar -cjvf ${WORKSPACE}/test-logs.tar.bz2 ${PVFS2_LOG} >/dev/null 2>&1 +echo_tee "done" + +if [ ${nr_failed} -gt 0 ] +then + exit 1 +else + exit 0 +fi diff --git a/test/common/gen-locks/condvar1.c b/test/common/gen-locks/condvar1.c new file mode 100755 index 0000000..2f85cad --- /dev/null +++ b/test/common/gen-locks/condvar1.c @@ -0,0 +1,22 @@ + +#include +#include + +#include "gen-locks.h" + +static gen_cond_t cv = NULL; + +int main() +{ + assert(cv == NULL); + + assert(gen_cond_init(&cv) == 0); + + assert(cv != NULL); + + assert(gen_cond_destroy(&cv) == 0); + + assert(cv == NULL); + + return 0; +} \ No newline at end of file diff --git a/test/common/gen-locks/condvar2_1.c b/test/common/gen-locks/condvar2_1.c new file mode 100755 index 0000000..3aa74c2 --- /dev/null +++ b/test/common/gen-locks/condvar2_1.c @@ -0,0 +1,185 @@ +/* + * File: condvar2_1.c + * + * + * -------------------------------------------------------------------------- + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * Contact Email: rpj@callisto.canberra.edu.au + * + * The current list of contributors is contained + * in the file CONTRIBUTORS included with the source + * code distribution. The list can also be seen at the + * following World Wide Web location: + * http://sources.redhat.com/pthreads-win32/contributors.html + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library in the file COPYING.LIB; + * if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * -------------------------------------------------------------------------- + * + * Test Synopsis: + * - Test timeout of multiple waits on a CV with no signal/broadcast. + * + * Test Method (Validation or Falsification): + * - Validation + * + * Requirements Tested: + * - + * + * Features Tested: + * - + * + * Cases Tested: + * - + * + * Description: + * - Because the CV is never signaled, we expect the waits to time out. + * + * Environment: + * - + * + * Input: + * - None. + * + * Output: + * - File name, Line number, and failed expression on failure. + * - No output on success. + * + * Assumptions: + * - + * + * Pass Criteria: + * - pthread_cond_timedwait returns ETIMEDOUT. + * - Process returns zero exit status. + * + * Fail Criteria: + * - pthread_cond_timedwait does not return ETIMEDOUT. + * - Process returns non-zero exit status. + */ + +// #define _WIN32_WINNT 0x400 + +/* #include "test.h" */ +#define _USE_32BIT_TIME_T + +#include +#include +#include +#include +#include + +#include "gen-locks.h" + +static gen_cond_t cv; +static gen_mutex_t mutex; +static struct timespec abstime = { 0, 0 }; + +enum { + NUMTHREADS = 5 +}; + +DWORD WINAPI +mythread(void * arg) +{ + assert(gen_mutex_lock(&mutex) == 0); + fprintf(stderr, "thread %d locked mutex\n", (DWORD) arg); + + assert(gen_cond_timedwait(&cv, &mutex, &abstime) == ETIMEDOUT); + + assert(gen_mutex_unlock(&mutex) == 0); + fprintf(stderr, "thread %d unlocked mutex\n", (DWORD) arg); + + return (DWORD) arg; +} + +int thread_join(gen_thread_t thread, LPDWORD retval) +{ + BOOL rc; + DWORD iretval; + LPDWORD pretval = (retval) ? retval : &iretval; + + do + { + rc = GetExitCodeThread(thread, pretval); + if (rc && *pretval == STILL_ACTIVE) + { + Sleep(500); + } + } while (rc && *pretval == STILL_ACTIVE); + + return 0; +} + +int +main() +{ + int i; + gen_thread_t t[NUMTHREADS + 1]; + int result = 0; + struct _timeb currSysTime; + const DWORD NANOSEC_PER_MILLISEC = 1000000; + + assert(gen_cond_init(&cv) == 0); + + assert(gen_mutex_init(&mutex) == 0); + + /* get current system time */ + _ftime_s(&currSysTime); + + abstime.tv_sec = currSysTime.time; + abstime.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm; + + abstime.tv_sec += 5; + + assert(gen_mutex_lock(&mutex) == 0); + fprintf(stderr, "main thread locked mutex\n"); + + for (i = 1; i <= NUMTHREADS; i++) + { + /* assert(pthread_create(&t[i], NULL, mythread, (void *) i) == 0); */ + assert((t[i] = CreateThread(NULL, 0, mythread, (void *) i, 0, NULL)) != NULL); + } + + assert(gen_mutex_unlock(&mutex) == 0); + fprintf(stderr, "main thread unlocked mutex\n"); + + for (i = 1; i <= NUMTHREADS; i++) + { + assert(thread_join(t[i], (LPDWORD) &result) == 0); + fprintf(stderr, "i = %d result = %ld\n", i, result); + assert(result == i); + } + + { + int result = gen_cond_destroy(&cv); + if (result != 0) + { + fprintf(stderr, "Result = %d\n", result); + fprintf(stderr, "\tWaitersBlocked = %ld\n", cv->nWaitersBlocked); + fprintf(stderr, "\tWaitersGone = %ld\n", cv->nWaitersGone); + fprintf(stderr, "\tWaitersToUnblock = %ld\n", cv->nWaitersToUnblock); + fflush(stderr); + } + assert(result == 0); + } + + getchar(); + + return 0; +} diff --git a/test/common/gen-locks/condvar3.c b/test/common/gen-locks/condvar3.c new file mode 100755 index 0000000..d3012f6 --- /dev/null +++ b/test/common/gen-locks/condvar3.c @@ -0,0 +1,174 @@ +/* + * File: condvar3.c + * + * + * -------------------------------------------------------------------------- + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * Contact Email: rpj@callisto.canberra.edu.au + * + * The current list of contributors is contained + * in the file CONTRIBUTORS included with the source + * code distribution. The list can also be seen at the + * following World Wide Web location: + * http://sources.redhat.com/pthreads-win32/contributors.html + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library in the file COPYING.LIB; + * if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * -------------------------------------------------------------------------- + * + * Test Synopsis: + * - Test basic function of a CV + * + * Test Method (Validation or Falsification): + * - Validation + * + * Requirements Tested: + * - + * + * Features Tested: + * - + * + * Cases Tested: + * - + * + * Description: + * - The primary thread takes the lock before creating any threads. + * The secondary thread blocks on the lock allowing the primary + * thread to enter the cv wait state which releases the lock. + * The secondary thread then takes the lock and signals the waiting + * primary thread. + * + * Environment: + * - + * + * Input: + * - None. + * + * Output: + * - File name, Line number, and failed expression on failure. + * - No output on success. + * + * Assumptions: + * - + * + * Pass Criteria: + * - pthread_cond_timedwait returns 0. + * - Process returns zero exit status. + * + * Fail Criteria: + * - pthread_cond_timedwait returns ETIMEDOUT. + * - Process returns non-zero exit status. + */ + +#define _USE_32BIT_TIME_T +#include +#include +#include +#include +#include + +#include "gen-locks.h" + +static gen_cond_t cv; +static gen_mutex_t mutex; +static int shared = 0; + +enum { + NUMTHREADS = 2 /* Including the primary thread. */ +}; + +DWORD WINAPI +mythread(void * arg) +{ + int result = 0; + + assert(gen_mutex_lock(&mutex) == 0); + shared++; + assert(gen_mutex_unlock(&mutex) == 0); + + if ((result = gen_cond_signal(&cv)) != 0) + { + printf("Error = %d\n", result); + } + assert(result == 0); + + + return 0; +} + +int thread_join(gen_thread_t thread, LPDWORD retval) +{ + BOOL rc; + DWORD iretval; + LPDWORD pretval = (retval) ? retval : &iretval; + + do + { + rc = GetExitCodeThread(thread, pretval); + if (rc && *pretval == STILL_ACTIVE) + { + Sleep(500); + } + } while (rc && *pretval == STILL_ACTIVE); + + return 0; +} + +int +main() +{ + gen_thread_t t[NUMTHREADS]; + struct timespec abstime = { 0, 0 }; + struct _timeb currSysTime; + const DWORD NANOSEC_PER_MILLISEC = 1000000; + + /* assert((t[0] = pthread_self()).p != NULL); */ + t[0] = gen_thread_self(); + + assert(gen_cond_init(&cv) == 0); + + assert(gen_mutex_init(&mutex) == 0); + + assert(gen_mutex_lock(&mutex) == 0); + + /* get current system time */ + _ftime_s(&currSysTime); + + abstime.tv_sec = currSysTime.time; + abstime.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm; + + /* assert(pthread_create(&t[1], NULL, mythread, (void *) 1) == 0); */ + assert((t[1] = CreateThread(NULL, 0, mythread, (void *) 1, 0, NULL)) != NULL); + + abstime.tv_sec += 5; + + while (! (shared > 0)) + assert(gen_cond_timedwait(&cv, &mutex, &abstime) == 0); + + assert(shared > 0); + + assert(gen_mutex_unlock(&mutex) == 0); + + assert(thread_join(t[1], NULL) == 0); + + assert(gen_cond_destroy(&cv) == 0); + + return 0; +} diff --git a/test/common/gen-locks/condvar3_1.c b/test/common/gen-locks/condvar3_1.c new file mode 100755 index 0000000..6c19854 --- /dev/null +++ b/test/common/gen-locks/condvar3_1.c @@ -0,0 +1,222 @@ +/* + * File: condvar3_1.c + * + * + * -------------------------------------------------------------------------- + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * Contact Email: rpj@callisto.canberra.edu.au + * + * The current list of contributors is contained + * in the file CONTRIBUTORS included with the source + * code distribution. The list can also be seen at the + * following World Wide Web location: + * http://sources.redhat.com/pthreads-win32/contributors.html + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library in the file COPYING.LIB; + * if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * -------------------------------------------------------------------------- + * + * Test Synopsis: + * - Test timeout of multiple waits on a CV with some signaled. + * + * Test Method (Validation or Falsification): + * - Validation + * + * Requirements Tested: + * - + * + * Features Tested: + * - + * + * Cases Tested: + * - + * + * Description: + * - Because some CVs are never signaled, we expect their waits to time out. + * Some are signaled, the rest time out. Pthread_cond_destroy() will fail + * unless all are accounted for, either signaled or timedout. + * + * Environment: + * - + * + * Input: + * - None. + * + * Output: + * - File name, Line number, and failed expression on failure. + * - No output on success. + * + * Assumptions: + * - + * + * Pass Criteria: + * - pthread_cond_timedwait returns ETIMEDOUT. + * - Process returns zero exit status. + * + * Fail Criteria: + * - pthread_cond_timedwait does not return ETIMEDOUT. + * - Process returns non-zero exit status. + */ + +//#define _WIN32_WINNT 0x400 +#define _USE_32BIT_TIME_T + +#include +#include +#include +#include +#include + +#include "gen-locks.h" + +static gen_cond_t cv; +static gen_cond_t cv1; +static gen_mutex_t mutex; +static gen_mutex_t mutex1; +static struct timespec abstime = { 0, 0 }; +static int timedout = 0; +static int signaled = 0; +static int awoken = 0; +static int waiting = 0; + +enum { + NUMTHREADS = 30 +}; + +DWORD WINAPI +mythread(void * arg) +{ + int result; + + assert(gen_mutex_lock(&mutex1) == 0); + ++waiting; + assert(gen_mutex_unlock(&mutex1) == 0); + assert(gen_cond_signal(&cv1) == 0); + + assert(gen_mutex_lock(&mutex) == 0); + result = gen_cond_timedwait(&cv, &mutex, &abstime); + if (result == ETIMEDOUT) + { + timedout++; + } + else + { + awoken++; + } + assert(gen_mutex_unlock(&mutex) == 0); + + return (DWORD) arg; +} + +int thread_join(gen_thread_t thread, LPDWORD retval) +{ + BOOL rc; + DWORD iretval; + LPDWORD pretval = (retval) ? retval : &iretval; + + do + { + rc = GetExitCodeThread(thread, pretval); + if (rc && *pretval == STILL_ACTIVE) + { + Sleep(500); + } + } while (rc && *pretval == STILL_ACTIVE); + + return 0; +} + +int +main() +{ + int i; + gen_thread_t t[NUMTHREADS + 1]; + int result = 0; + struct _timeb currSysTime; + const DWORD NANOSEC_PER_MILLISEC = 1000000; + + assert(gen_cond_init(&cv) == 0); + assert(gen_cond_init(&cv1) == 0); + + assert(gen_mutex_init(&mutex) == 0); + assert(gen_mutex_init(&mutex1) == 0); + + /* get current system time */ + _ftime_s(&currSysTime); + + abstime.tv_sec = currSysTime.time; + abstime.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm; + + abstime.tv_sec += 5; + + assert(gen_mutex_lock(&mutex1) == 0); + + for (i = 1; i <= NUMTHREADS; i++) + { + /* assert(pthread_create(&t[i], NULL, mythread, (void *) i) == 0); */ + assert((t[i] = CreateThread(NULL, 0, mythread, (void *) i, 0, NULL)) != NULL); + } + + do { + assert(gen_cond_wait(&cv1, &mutex1) == 0); + } while ( NUMTHREADS > waiting ); + + assert(gen_mutex_unlock(&mutex1) == 0); + + for (i = NUMTHREADS/3; i <= 2*NUMTHREADS/3; i++) + { + assert(gen_cond_signal(&cv) == 0); + + signaled++; + } + + for (i = 1; i <= NUMTHREADS; i++) + { + assert(thread_join(t[i], (LPDWORD) &result) == 0); + assert(result == i); + } + + fprintf(stderr, "awk = %d\n", awoken); + fprintf(stderr, "sig = %d\n", signaled); + fprintf(stderr, "tot = %d\n", timedout); + + assert(signaled == awoken); + assert(timedout == NUMTHREADS - signaled); + + assert(gen_cond_destroy(&cv1) == 0); + + { + int result = gen_cond_destroy(&cv); + if (result != 0) + { + fprintf(stderr, "Result = %d\n", result); + fprintf(stderr, "\tWaitersBlocked = %ld\n", cv->nWaitersBlocked); + fprintf(stderr, "\tWaitersGone = %ld\n", cv->nWaitersGone); + fprintf(stderr, "\tWaitersToUnblock = %ld\n", cv->nWaitersToUnblock); + fflush(stderr); + } + assert(result == 0); + } + + assert(gen_mutex_destroy(&mutex1) == 0); + assert(gen_mutex_destroy(&mutex) == 0); + + return 0; +} diff --git a/test/common/gen-locks/condvar3_2.c b/test/common/gen-locks/condvar3_2.c new file mode 100755 index 0000000..9a673cf --- /dev/null +++ b/test/common/gen-locks/condvar3_2.c @@ -0,0 +1,211 @@ +/* + * File: condvar3_2.c + * + * + * -------------------------------------------------------------------------- + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * Contact Email: rpj@callisto.canberra.edu.au + * + * The current list of contributors is contained + * in the file CONTRIBUTORS included with the source + * code distribution. The list can also be seen at the + * following World Wide Web location: + * http://sources.redhat.com/pthreads-win32/contributors.html + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library in the file COPYING.LIB; + * if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * -------------------------------------------------------------------------- + * + * Test Synopsis: + * - Test timeout of multiple waits on a CV with remainder broadcast awoken. + * + * Test Method (Validation or Falsification): + * - Validation + * + * Requirements Tested: + * - + * + * Features Tested: + * - + * + * Cases Tested: + * - + * + * Description: + * - Because some CVs are never signaled, we expect their waits to time out. + * Some time out, the rest are broadcast signaled. Pthread_cond_destroy() will fail + * unless all are accounted for, either signaled or timedout. + * + * Environment: + * - + * + * Input: + * - None. + * + * Output: + * - File name, Line number, and failed expression on failure. + * - No output on success. + * + * Assumptions: + * - + * + * Pass Criteria: + * - pthread_cond_timedwait returns ETIMEDOUT. + * - Process returns zero exit status. + * + * Fail Criteria: + * - pthread_cond_timedwait does not return ETIMEDOUT. + * - Process returns non-zero exit status. + */ + +#define _USE_32BIT_TIME_T + +#include +#include +#include +#include +#include + +#include "gen-locks.h" + +static gen_cond_t cv; +static gen_mutex_t mutex; +static struct timespec abstime = { 0, 0 }; +static struct timespec abstime2 = { 0, 0 }; +static int timedout = 0; +static int awoken = 0; + +enum { + NUMTHREADS = 30 +}; + +DWORD WINAPI +mythread(void * arg) +{ + int result; + + assert(gen_mutex_lock(&mutex) == 0); + + abstime2.tv_sec = abstime.tv_sec; + + if ((int) arg % 3 == 0) + { + abstime2.tv_sec += 2; + } + + result = gen_cond_timedwait(&cv, &mutex, &abstime2); + assert(gen_mutex_unlock(&mutex) == 0); + if (result == ETIMEDOUT) + { + InterlockedIncrement((LPLONG)&timedout); + } + else + { + InterlockedIncrement((LPLONG)&awoken); + } + + + return (DWORD) arg; +} + +int thread_join(gen_thread_t thread, LPDWORD retval) +{ + BOOL rc; + DWORD iretval; + LPDWORD pretval = (retval) ? retval : &iretval; + + do + { + rc = GetExitCodeThread(thread, pretval); + if (rc && *pretval == STILL_ACTIVE) + { + Sleep(500); + } + } while (rc && *pretval == STILL_ACTIVE); + + return 0; +} + + +int +main() +{ + int i; + gen_thread_t t[NUMTHREADS + 1]; + int result = 0; + struct _timeb currSysTime; + const DWORD NANOSEC_PER_MILLISEC = 1000000; + + assert(gen_cond_init(&cv) == 0); + + assert(gen_mutex_init(&mutex) == 0); + + /* get current system time */ + _ftime_s(&currSysTime); + + abstime.tv_sec = abstime2.tv_sec = currSysTime.time + 5; + abstime.tv_nsec = abstime2.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm; + + assert(gen_mutex_lock(&mutex) == 0); + + for (i = 1; i <= NUMTHREADS; i++) + { + assert((t[i] = CreateThread(NULL, 0, mythread, (void *) i, 0, NULL)) != NULL); + } + + assert(gen_mutex_unlock(&mutex) == 0); + + for (i = 1; i <= NUMTHREADS; i++) + { + assert(thread_join(t[i], (LPDWORD) &result) == 0); + assert(result == i); + /* + * Approximately 2/3rds of the threads are expected to time out. + * Signal the remainder after some threads have woken up and exited + * and while some are still waking up after timeout. + * Also tests that redundant broadcasts don't return errors. + */ + + if (InterlockedExchangeAdd((LPLONG)&awoken, 0L) > NUMTHREADS/3) + { + assert(gen_cond_broadcast(&cv) == 0); + } + + } + + assert(awoken == NUMTHREADS - timedout); + + { + int result = gen_cond_destroy(&cv); + if (result != 0) + { + fprintf(stderr, "Result = %d\n", result); + fprintf(stderr, "\tWaitersBlocked = %ld\n", cv->nWaitersBlocked); + fprintf(stderr, "\tWaitersGone = %ld\n", cv->nWaitersGone); + fprintf(stderr, "\tWaitersToUnblock = %ld\n", cv->nWaitersToUnblock); + fflush(stderr); + } + assert(result == 0); + } + + assert(gen_mutex_destroy(&mutex) == 0); + + return 0; +} diff --git a/test/io/job/test-job-client.c b/test/io/job/test-job-client.c new file mode 100755 index 0000000..91e2c85 --- /dev/null +++ b/test/io/job/test-job-client.c @@ -0,0 +1,158 @@ +/* This test is mainly to check that the job library can be linked */ + +#include + +#include "job.h" +#include "gossip.h" + +/* some fake items to send around */ +struct request_foo +{ + int x; +}; +struct ack_foo +{ + int x; +}; + +int main(int argc, char **argv) +{ + + int ret = -1; + struct request_foo* req = NULL; + struct ack_foo* ack = NULL; + PVFS_BMI_addr_t server_addr; + job_status_s status1; + job_id_t tmp_id; + job_context_id context; + + /* set debugging level */ + gossip_enable_stderr(); + gossip_set_debug_mask(0, 0); + + /* start the BMI interface */ + ret = BMI_initialize(NULL, NULL, 0); + if(ret < 0) + { + fprintf(stderr, "BMI_initialize failure.\n"); + return(-1); + } + +/* + ret = trove_initialize( + TROVE_METHOD_DBPF, NULL, "/tmp/pvfs2-test-space", "/tmp/pvfs2-test-space", 0); + if(ret < 0) + { + fprintf(stderr, "trove_initialize failure.\n"); + return(-1); + } +*/ + /* start the job interface */ + ret = job_initialize(0); + if(ret < 0) + { + fprintf(stderr, "job_initialize failure.\n"); + return(-1); + } + + ret = job_open_context(&context); + if(ret < 0) + { + fprintf(stderr, "job_open_context() failure.\n"); + return(-1); + } + + /* lookup the server to get a BMI style address for it */ + ret = BMI_addr_lookup(&server_addr, "tcp://localhost:3334"); + if(ret < 0) + { + fprintf(stderr, "BMI_addr_lookup failure.\n"); + return(-1); + } + + /* allocate some buffers for the req and ack */ + req = BMI_memalloc(server_addr, sizeof(struct request_foo), + BMI_SEND); + ack = BMI_memalloc(server_addr, sizeof(struct ack_foo), + BMI_RECV); + if(!ack || ! req) + { + fprintf(stderr, "BMI_memalloc failure.\n"); + return(-1); + } + + /* send a message */ + ret = job_bmi_send(server_addr, req, sizeof(struct request_foo), + 0, BMI_PRE_ALLOC, 1, NULL, 0, &status1, &tmp_id, context, + JOB_TIMEOUT_INF, NULL); + if(ret < 0) + { + fprintf(stderr, "job_bmi_send() failure.\n"); + return(-1); + } + if(ret == 0) + { + int count = 0; + ret = job_test(tmp_id, &count, NULL, &status1, -1, context); + if(ret < 0) + { + fprintf(stderr, "job_test() failure.\n"); + return(-1); + } + } + + /* check status */ + if(status1.error_code != 0) + { + fprintf(stderr, "job failure.\n"); + return(-1); + } + + /* receive a message */ + ret = job_bmi_recv(server_addr, ack, sizeof(struct ack_foo), + 0, BMI_PRE_ALLOC, NULL, 0, &status1, &tmp_id, context, + JOB_TIMEOUT_INF, NULL); + if(ret < 0) + { + fprintf(stderr, "job_bmi_recv() failure.\n"); + return(-1); + } + if(ret == 0) + { + int count = 0; + ret = job_test(tmp_id, &count, NULL, &status1, -1, context); + if(ret < 0) + { + fprintf(stderr, "job_test() failure.\n"); + return(-1); + } + } + + /* check status */ + if(status1.error_code != 0) + { + fprintf(stderr, "job failure.\n"); + return(-1); + } + + /* check the size */ + if(status1.actual_size != sizeof(struct ack_foo)) + { + fprintf(stderr, "short recv.\n"); + return(-1); + } + + /* free memory buffers */ + BMI_memfree(server_addr, req, sizeof(struct request_foo), + BMI_SEND); + BMI_memfree(server_addr, ack, sizeof(struct ack_foo), + BMI_RECV); + + /* shut down the interfaces */ + job_close_context(context); + job_finalize(); + BMI_finalize(); +/* trove_finalize(TROVE_METHOD_DBPF); */ + + return(0); +} \ No newline at end of file diff --git a/test/io/job/test-job-server.c b/test/io/job/test-job-server.c new file mode 100755 index 0000000..9442fca --- /dev/null +++ b/test/io/job/test-job-server.c @@ -0,0 +1,182 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* this is an example server application that uses the job interface */ + +#include +#include + +#include "job.h" +#include "gossip.h" + +/* some fake items to send around */ +struct request_foo +{ + int x; +}; +struct ack_foo +{ + int x; +}; + + + +int main(int argc, char **argv) +{ + + int ret = -1; + struct ack_foo* ack = NULL; + job_status_s status1; + struct BMI_unexpected_info req_info; + job_id_t job_id; + int outcount; + job_id_t tmp_id; + job_context_id context; + + /* set debugging level */ + gossip_enable_stderr(); + gossip_set_debug_mask(0, 0); + + + /* start the BMI interface */ + ret = BMI_initialize("bmi_tcp", "tcp://localhost:3334", BMI_INIT_SERVER); + if(ret < 0) + { + fprintf(stderr, "BMI_initialize failure.\n"); + return(-1); + } + + /* + ret = trove_initialize( + TROVE_METHOD_DBPF, NULL, "/tmp/pvfs2-test-space", "/tmp/pvfs2-test-space", 0); + if(ret < 0) + { + fprintf(stderr, "trove_initialize failure.\n"); + return(-1); + } + */ + + /* start the flow interface */ + ret = PINT_flow_initialize("flowproto_multiqueue", 0); + if(ret < 0) + { + fprintf(stderr, "flow_init failure.\n"); + return(-1); + } + + /* start the job interface */ + ret = job_initialize(0); + if(ret < 0) + { + fprintf(stderr, "job_initialize failure.\n"); + return(-1); + } + + ret = job_open_context(&context); + if(ret < 0) + { + fprintf(stderr, "job_open_context() failure.\n"); + return(-1); + } + + + + /* post a job for unexpected receive */ + ret = job_bmi_unexp(&req_info, NULL, 0, &status1, &job_id, 0, context); + if(ret < 0) + { + fprintf(stderr, "job_bmi_unexp() failure.\n"); + return(-1); + } + if(ret != 1) + { +#if 0 + /* exercise testworld() interface, block indefinitely */ + outcount = 1; + ret = job_testworld(&job_id, &outcount, NULL, &status1, -1); + if(ret < 0 || outcount == 0) + { + fprintf(stderr, "job_testworld() failure.\n"); + return(-1); + } + + /* alternatively, try out the testsome interface */ + outcount = 1; + ret = job_testsome(&job_id, &outcount, &foo, NULL, &status1, -1); + if(ret < 0 || outcount == 0) + { + fprintf(stderr, "job_testsome() failure.\n"); + return(-1); + } +#else + + /* ... or maybe even give job_test() a whirl */ + ret = job_test(job_id, &outcount, NULL, &status1, -1, context); + if(ret < 0 || outcount == 0) + { + fprintf(stderr, "job_test() failure.\n"); + return(-1); + } + +#endif + } + + /* check status */ + if(status1.error_code != 0) + { + fprintf(stderr, "Bad status in unexp recv.\n"); + return(-1); + } + + /* allocate a buffer for the ack */ + ack = BMI_memalloc(req_info.addr, sizeof(struct ack_foo), + BMI_SEND); + if(!ack) + { + fprintf(stderr, "BMI_memalloc failure.\n"); + return(-1); + } + + /* send a message */ + ret = job_bmi_send(req_info.addr, ack, sizeof(struct ack_foo), + 0, BMI_PRE_ALLOC, 0, NULL, 0, &status1, &tmp_id, context, + JOB_TIMEOUT_INF, NULL); + if(ret < 0) + { + fprintf(stderr, "job_bmi_send() failure.\n"); + return(-1); + } + if(ret == 0) + { + int count = 0; + ret = job_test(tmp_id, &count, NULL, &status1, -1, context); + if(ret < 0) + { + fprintf(stderr, "job_test() failure.\n"); + return(-1); + } + } + + + /* check status */ + if(status1.error_code != 0) + { + fprintf(stderr, "job failure.\n"); + return(-1); + } + + BMI_memfree(req_info.addr, ack, sizeof(struct ack_foo), BMI_RECV); + BMI_unexpected_free(req_info.addr, req_info.buffer); + + /* shut down the interfaces */ + job_close_context(context); + job_finalize(); + PINT_flow_finalize(); + BMI_finalize(); + /* trove_finalize(TROVE_METHOD_DBPF); */ + + return(0); +}