From 0661d7a60d952f2fd5b7d083fcaf271b4bd900fd Mon Sep 17 00:00:00 2001
From: root <root@snslmini.(none)>
Date: Fri, 13 Jul 2012 00:01:03 -0500
Subject: [PATCH] Added files from OrangeFS-2.8.6

-These files were not related to OSD
-They were only present in OrangeFS-2.8.6
---
 aclocal.m4                                    | 2303 ---------
 cert-utils/pvfs2-grid-proxy-init.sh           |   22 +
 config.save                                   |   26 +-
 include/orange.h                              |   27 +
 include/pvfs2-usrint.h                        |  329 ++
 src/apps/admin/pvfs2-get-uid.c                |  337 ++
 src/apps/admin/pvfs2-perf-mon-snmp.c          |  429 ++
 src/apps/ucache/module.mk.in                  |   11 +
 src/apps/ucache/shmem_util.c                  |   75 +
 src/apps/ucache/shmem_util.h                  |   20 +
 src/apps/ucache/ucached.c                     |  711 +++
 src/apps/ucache/ucached.h                     |   81 +
 src/apps/ucache/ucached_cmd.c                 |  120 +
 src/apps/ucache/ucached_common.c              |   16 +
 src/apps/ucache/watch_daemons                 |    1 +
 src/apps/ucache/watch_ipcs                    |    1 +
 src/apps/ucache/watch_log                     |    1 +
 src/client/sysint/mgmt-get-uid-list.sm        |  243 +
 src/client/usrint/mmap.c                      |  180 +
 src/client/usrint/module.mk.in                |   21 +
 src/client/usrint/posix-ops.h                 |  230 +
 src/client/usrint/request.c                   |  157 +
 src/client/usrint/socket.c                    |  506 ++
 src/client/usrint/stdio-ops.h                 |  108 +
 src/client/usrint/ucache.c                    | 2066 ++++++++
 src/client/usrint/ucache.h                    |  251 +
 src/common/gen-locks/gen-win-locks.c          |  748 +++
 src/common/misc/pint-uid-mgmt.c               |  198 +
 src/common/misc/pint-uid-mgmt.h               |   53 +
 src/common/misc/pvfs2-win-util.c              | 2117 +++++++++
 src/common/windows/wincommon.h                |   40 +
 src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h    |  102 +
 src/io/bmi/bmi_wintcp/bmi-wintcp.c            | 4177 +++++++++++++++++
 .../bmi/bmi_wintcp/socket-collection-epoll.c  |  203 +
 .../bmi/bmi_wintcp/socket-collection-epoll.h  |  120 +
 src/io/bmi/bmi_wintcp/socket-collection.c     |  477 ++
 src/io/bmi/bmi_wintcp/socket-collection.h     |  126 +
 src/io/bmi/bmi_wintcp/sockio.c                |  415 ++
 src/io/bmi/bmi_wintcp/sockio.h                |  130 +
 src/server/mgmt-get-uid.sm                    |  138 +
 test/ci/jenkins-build.sh                      |  203 +
 test/ci/jenkins-doc.sh                        |   48 +
 test/ci/jenkins-test.sh                       |  398 ++
 test/common/gen-locks/condvar1.c              |   22 +
 test/common/gen-locks/condvar2_1.c            |  185 +
 test/common/gen-locks/condvar3.c              |  174 +
 test/common/gen-locks/condvar3_1.c            |  222 +
 test/common/gen-locks/condvar3_2.c            |  211 +
 test/io/job/test-job-client.c                 |  158 +
 test/io/job/test-job-server.c                 |  182 +
 50 files changed, 16803 insertions(+), 2316 deletions(-)
 delete mode 100644 aclocal.m4
 create mode 100755 cert-utils/pvfs2-grid-proxy-init.sh
 create mode 100644 include/orange.h
 create mode 100644 include/pvfs2-usrint.h
 create mode 100644 src/apps/admin/pvfs2-get-uid.c
 create mode 100644 src/apps/admin/pvfs2-perf-mon-snmp.c
 create mode 100644 src/apps/ucache/module.mk.in
 create mode 100644 src/apps/ucache/shmem_util.c
 create mode 100644 src/apps/ucache/shmem_util.h
 create mode 100644 src/apps/ucache/ucached.c
 create mode 100644 src/apps/ucache/ucached.h
 create mode 100644 src/apps/ucache/ucached_cmd.c
 create mode 100644 src/apps/ucache/ucached_common.c
 create mode 100755 src/apps/ucache/watch_daemons
 create mode 100755 src/apps/ucache/watch_ipcs
 create mode 100755 src/apps/ucache/watch_log
 create mode 100644 src/client/sysint/mgmt-get-uid-list.sm
 create mode 100644 src/client/usrint/mmap.c
 create mode 100644 src/client/usrint/module.mk.in
 create mode 100644 src/client/usrint/posix-ops.h
 create mode 100644 src/client/usrint/request.c
 create mode 100644 src/client/usrint/socket.c
 create mode 100644 src/client/usrint/stdio-ops.h
 create mode 100644 src/client/usrint/ucache.c
 create mode 100644 src/client/usrint/ucache.h
 create mode 100755 src/common/gen-locks/gen-win-locks.c
 create mode 100644 src/common/misc/pint-uid-mgmt.c
 create mode 100644 src/common/misc/pint-uid-mgmt.h
 create mode 100755 src/common/misc/pvfs2-win-util.c
 create mode 100755 src/common/windows/wincommon.h
 create mode 100755 src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h
 create mode 100755 src/io/bmi/bmi_wintcp/bmi-wintcp.c
 create mode 100755 src/io/bmi/bmi_wintcp/socket-collection-epoll.c
 create mode 100755 src/io/bmi/bmi_wintcp/socket-collection-epoll.h
 create mode 100755 src/io/bmi/bmi_wintcp/socket-collection.c
 create mode 100755 src/io/bmi/bmi_wintcp/socket-collection.h
 create mode 100755 src/io/bmi/bmi_wintcp/sockio.c
 create mode 100755 src/io/bmi/bmi_wintcp/sockio.h
 create mode 100644 src/server/mgmt-get-uid.sm
 create mode 100755 test/ci/jenkins-build.sh
 create mode 100755 test/ci/jenkins-doc.sh
 create mode 100644 test/ci/jenkins-test.sh
 create mode 100755 test/common/gen-locks/condvar1.c
 create mode 100755 test/common/gen-locks/condvar2_1.c
 create mode 100755 test/common/gen-locks/condvar3.c
 create mode 100755 test/common/gen-locks/condvar3_1.c
 create mode 100755 test/common/gen-locks/condvar3_2.c
 create mode 100755 test/io/job/test-job-client.c
 create mode 100755 test/io/job/test-job-server.c

diff --git a/aclocal.m4 b/aclocal.m4
deleted file mode 100644
index 5af196f..0000000
--- a/aclocal.m4
+++ /dev/null
@@ -1,2303 +0,0 @@
-# generated automatically by aclocal 1.7.9 -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002
-# Free Software Foundation, Inc.
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-
-AC_DEFUN([AX_OPENSSL],
-[
-    opensslpath=ifelse([$1], ,,$1)
-
-    if test "x$1" != "xno"; then
-
-        AC_MSG_CHECKING([for openssl library])
-    
-        if test "x${opensslpath}" != "x"; then
-            CFLAGS="${CFLAGS} -I${opensslpath}/include"
-            LDFLAGS="$LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib"
-            SERVER_LDFLAGS="$SERVER_LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib"
-        fi
-        LIBS="$LIBS -lcrypto -lssl"
-    
-        AC_COMPILE_IFELSE(
-    	    [#include "openssl/bio.h"],
-    	    [],
-    	    [AC_MSG_ERROR(Invalid openssl path specified.  No openssl/bio.h found.)])
-    
-        AC_TRY_LINK(
-    	    [#include "openssl/bio.h"],
-    	    [BIO * b;],
-    	    [AC_MSG_RESULT(yes)],
-    	    [AC_MSG_ERROR(could not find openssl libs)])
-    
-        AC_DEFINE(WITH_OPENSSL, 1, [Define if openssl exists])
-        
-	AC_CHECK_HEADERS(openssl/evp.h)
-	AC_CHECK_HEADERS(openssl/crypto.h)
-    fi
-])
-
-AC_DEFUN([AX_OPENSSL_OPTIONAL],
-[
-    AC_MSG_CHECKING([for openssl library])
-    TMPLIBS=${LIBS}
-    LIBS="$LIBS -lcrypto -lssl"
-
-    AC_COMPILE_IFELSE(
-      [#include "openssl/bio.h"],
-      [],
-      [AC_MSG_WARN(No openssl headers found.)])
-
-    AC_TRY_LINK(
-      [#include "openssl/bio.h"],
-      [BIO * b;],
-      [AC_MSG_RESULT(yes)
-       AC_DEFINE(WITH_OPENSSL, 1, [Define if openssl exists])
-      ],
-      [
-      	AC_MSG_WARN(No openssl headers found.)
-	LIBS=${TMPLIBS}
-      ])
-
-    AC_CHECK_HEADERS(openssl/evp.h)
-    AC_CHECK_HEADERS(openssl/crypto.h)
-
-])
-
-
-AC_DEFUN([AX_KERNEL_FEATURES],
-[
-	dnl 
-	dnl kernel feature tests.  Set CFLAGS once here and use it for all
-	dnl kernel features.  reset to the old value at the end. 
-	dnl 
-	dnl on some systems, there is a /usr/include/linux/xattr_acl.h , so the
-	dnl check for xattr_acl.h down below will always pass, even if it
-	dnl should fail.  this hack (-nostdinc -isystem ...) will bring in just
-	dnl enough system headers dnl for kernel compilation
-
-	dnl -Werror can be overkill, but for these kernel feature tests
-	dnl 'implicit function declaration' usually ends up in an undefined
-	dnl symbol somewhere.
-
-	NOSTDINCFLAGS="-Werror-implicit-function-declaration -nostdinc -isystem `$CC -print-file-name=include`"
-
-	CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src/include -I$lk_src/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty)  -DKBUILD_MODNAME=KBUILD_STR(empty)"
-
-	dnl kernels > 2.6.32 now use generated/autoconf.h
-	if test -f $lk_src/include/generated/autoconf.h ; then
-		CFLAGS="$CFLAGS -imacros $lk_src/include/generated/autoconf.h"
-	else
-		CFLAGS="$CFLAGS -imacros $lk_src/include/linux/autoconf.h"
-	fi
-
-        dnl we probably need additional includes if this build is intended
-        dnl for a different architecture
-	if test -n "${ARCH}" ; then
-		CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include -I$lk_src/arch/${ARCH}/include/asm/mach-default"
-        else
-            SUBARCH=`uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
-            -e s/arm.*/arm/ -e s/sa110/arm/ \
-            -e s/s390x/s390/ -e s/parisc64/parisc/ \
-            -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
-            -e s/sh.*/sh/`
-            if test "x$SUBARCH" = "xi386"; then
-                ARCH=x86    
-            elif test "x$SUBARCH" = "xx86_64"; then
-                ARCH=x86    
-            elif test "x$SUBARCH" = "xsparc64"; then
-                ARCH=sparc    
-            else
-                ARCH=$SUBARCH
-            fi
-
-            CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include -I$lk_src/arch/${ARCH}/include/asm/mach-default"
-	fi
-
-	AC_MSG_CHECKING(for i_size_write in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel already defined it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		void i_size_write(struct inode *inode,
-				loff_t i_size)
-		{
-			return;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_I_SIZE_WRITE, 1, Define if kernel has i_size_write),
-	)
-
-	AC_MSG_CHECKING(for i_size_read in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel already defined it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		loff_t i_size_read(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_I_SIZE_READ, 1, Define if kernel has i_size_read),
-	)
-
-	AC_MSG_CHECKING(for iget_locked function in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel already defined it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		loff_t iget_locked(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IGET_LOCKED, 1, Define if kernel has iget_locked),
-	)
-
-	AC_MSG_CHECKING(for iget4_locked function in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel already defined it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		loff_t iget4_locked(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IGET4_LOCKED, 1, Define if kernel has iget4_locked),
-	)
-
-	AC_MSG_CHECKING(for iget5_locked function in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel already defined it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		loff_t iget5_locked(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IGET5_LOCKED, 1, Define if kernel has iget5_locked),
-	)
-
-	dnl Check if the kernel defines the xtvec structure.
-	dnl This is part of a POSIX extension.
-	AC_MSG_CHECKING(for struct xtvec in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/uio.h>
-		static struct xtvec xv = { 0, 0 };
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_STRUCT_XTVEC, 1, Define if struct xtvec is defined in the kernel),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl 2.6.20 deprecated kmem_cache_t; some old ones do not have struct
-	dnl kmem_cache, but may have kmem_cache_s.  It's a mess.  Just look
-	dnl for this, and assume _t if not found.
-	dnl This test relies on gcc complaining about declaring a struct
-	dnl in a parameter list.  Fragile, but nothing better is available
-	dnl to check for the existence of a struct.  We cannot see the
-	dnl definition of the struct in the kernel, it's private to the
-	dnl slab implementation.  And C lets you declare structs freely as
-	dnl long as you don't try to deal with their contents.
-        tmp_cflags=$CFLAGS
-        CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for struct kmem_cache in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/kernel.h>
-		#include <linux/slab.h>
-
-		int foo(struct kmem_cache *s)
-		{
-		    return (s == NULL) ? 3 : 4;
-		}
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_STRUCT_KMEM_CACHE, 1, Define if struct kmem_cache is defined in kernel),
-		AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-	dnl 2.6.20 removed SLAB_KERNEL.  Need to use GFP_KERNEL instead
-	AC_MSG_CHECKING(for SLAB_KERNEL flag in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/slab.h>
-		static int flags = SLAB_KERNEL;
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SLAB_KERNEL, 1, Define if SLAB_KERNEL is defined in kernel),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl The name of this field changed from memory_backed to capabilities
-	dnl in 2.6.12.
-	AC_MSG_CHECKING(for memory_backed in struct backing_dev_info in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/mm.h>
-		#include <linux/backing-dev.h>
-		static struct backing_dev_info bdi = {
-		    .memory_backed = 0
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BDI_MEMORY_BACKED, 1, Define if struct backing_dev_info in kernel has memory_backed),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl checking if we have a sendfile callback 
-	if test "x$enable_kernel_sendfile" = "xyes"; then
-		AC_MSG_CHECKING(for sendfile callback in struct file_operations in kernel)
-		AC_TRY_COMPILE([
-			#define __KERNEL__
-			#include <linux/fs.h>
-			static struct file_operations fop = {
-				 .sendfile = NULL,
-			};
-		], [],
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_SENDFILE_VFS_SUPPORT, 1, Define if struct file_operations in kernel has sendfile callback),
-			AC_MSG_RESULT(no)
-		)
-	fi
-
-	dnl checking if we have a readv callback in super_operations 
-	AC_MSG_CHECKING(for readv callback in struct file_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct file_operations fop = {
-		    .readv = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_READV_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readv callback),
-		AC_MSG_RESULT(no)
-	)
-	dnl checking if we have a writev callback in super_operations 
-	AC_MSG_CHECKING(for writev callback in struct file_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct file_operations fop = {
-		    .writev = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_WRITEV_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has writev callback),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl checking if we have a find_inode_handle callback in super_operations 
-	AC_MSG_CHECKING(for find_inode_handle callback in struct super_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct super_operations sop = {
-		    .find_inode_handle = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has find_inode_handle callback),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl 2.6.18.1 removed this member
-	AC_MSG_CHECKING(for i_blksize in struct inode)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct inode i = {
-			.i_blksize = 0,
-			};
-		], [],
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_I_BLKSIZE_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_blksize member),
-			AC_MSG_RESULT(no)
-	)
-
-	dnl 2.6.16 removed this member
-	AC_MSG_CHECKING(for i_sem in struct inode)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct inode i = {
-			.i_sem = {0},
-			};
-		], [],
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_I_SEM_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_sem member),
-			AC_MSG_RESULT(no)
-	)
-
-	dnl checking if we have a statfs_lite callback in super_operations 
-	AC_MSG_CHECKING(for statfs_lite callback in struct super_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct super_operations sop = {
-		    .statfs_lite = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_STATFS_LITE_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has statfs_lite callback),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl checking if we have a fill_handle callback in inode_operations 
-	AC_MSG_CHECKING(for fill_handle callback in struct inode_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct inode_operations iop = {
-		    .fill_handle = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_FILL_HANDLE_INODE_OPERATIONS, 1, Define if struct inode_operations in kernel has fill_handle callback),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl checking if we have a getattr_lite callback in inode_operations 
-	AC_MSG_CHECKING(for getattr_lite callback in struct inode_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct inode_operations iop = {
-		    .getattr_lite = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GETATTR_LITE_INODE_OPERATIONS, 1, Define if struct inode_operations in kernel has getattr_lite callback),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl checking if we have a get_fs_key callback in super_operations 
-	AC_MSG_CHECKING(for get_fs_key callback in struct super_operations in kernel)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct super_operations sop = {
-		    .get_fs_key = NULL,
-		};
-	], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GET_FS_KEY_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has get_fs_key callback),
-		AC_MSG_RESULT(no)
-	)
-	
-	dnl checking if we have a readdirplus callback in file_operations
-	AC_MSG_CHECKING(for readdirplus member in file_operations structure)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-		 ], [
-		 struct file_operations filop = {
-				.readdirplus = NULL
-		 };
-	    ],
-	    AC_MSG_RESULT(yes)
-		 AC_DEFINE(HAVE_READDIRPLUS_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readdirplus callback),
-	    AC_MSG_RESULT(no)
-	    )
-
-	dnl checking if we have a readdirplus_lite callback in file_operations
-	AC_MSG_CHECKING(for readdirplus_lite member in file_operations structure)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-		 ], [
-		 struct file_operations filop = {
-				.readdirplus_lite = NULL
-		 };
-	    ],
-	    AC_MSG_RESULT(yes)
-		 AC_DEFINE(HAVE_READDIRPLUSLITE_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readdirplus_lite callback),
-	    AC_MSG_RESULT(no)
-	    )
-
-
-	dnl checking if we have a readx callback in file_operations
-	AC_MSG_CHECKING(for readx member in file_operations structure)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-		 ], [
-		 struct file_operations filop = {
-				.readx = NULL
-		 };
-	    ],
-	    AC_MSG_RESULT(yes)
-		 AC_DEFINE(HAVE_READX_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readx callback),
-	    AC_MSG_RESULT(no)
-	    )
-
-	dnl checking if we have a writex callback in file_operations
-	AC_MSG_CHECKING(for writex member in file_operations structure)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-		 ], [
-		 struct file_operations filop = {
-				.writex = NULL
-		 };
-	    ],
-	    AC_MSG_RESULT(yes)
-		 AC_DEFINE(HAVE_WRITEX_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has writex callback),
-	    AC_MSG_RESULT(no)
-	    )
-
-	AC_MSG_CHECKING(for aio support in kernel)
-	dnl if this test passes, the kernel has it
-	dnl if this test fails, the kernel does not have it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/wait.h>
-		#include <linux/aio.h>
-		  static struct kiocb iocb;
-		], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_AIO, 1, Define if kernel has aio support)
-		have_aio=yes,
-		AC_MSG_RESULT(no)
-		have_aio=no
-	)
-
-	if test "x$have_aio" = "xyes" -a "x$enable_kernel_aio" = "xyes"; then
-		AC_MSG_CHECKING(for ki_dtor in kiocb structure of kernel)
-		dnl if this test passes, the kernel does have it and we enable
-		dnl support for AIO.   if this test fails, the kernel does not
-		dnl have this member and we disable support for AIO
-		AC_TRY_COMPILE([
-			#define __KERNEL__
-			#include <linux/wait.h>
-			#include <linux/aio.h>
-			static struct kiocb io_cb = {
-					  .ki_dtor = NULL,
-			};
-		], [],
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_AIO_VFS_SUPPORT, 1, Define if we are enabling VFS AIO support in kernel),
-			AC_MSG_RESULT(no)
-		)
-
-		tmp_cflags=$CFLAGS
-		dnl if this test passes, the signature of aio_read has changed to the new one 
-		CFLAGS="$CFLAGS -Werror"
-		AC_MSG_CHECKING(for new prototype of aio_read callback of file_operations structure)
-		AC_TRY_COMPILE([
-			#define __KERNEL__
-			#include <linux/fs.h>
-			extern ssize_t my_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-			static struct file_operations fop = {
-					  .aio_read = my_aio_read,
-			};
-		], [],
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_AIO_NEW_AIO_SIGNATURE, 1, Define if VFS AIO support in kernel has a new prototype),
-			AC_MSG_RESULT(no)
-		)
-		CFLAGS=$tmp_cflags
-
-	fi
-
-	AC_MSG_CHECKING(for dentry argument in kernel super_operations statfs)
-	dnl Rely on the fact that there is an external vfs_statfs that is
-	dnl of the same type as the .statfs in struct super_operations to
-	dnl verify the signature of that function pointer.  There is a single
-	dnl commit in the git history where both changed at the same time
-	dnl from super_block to dentry.
-	dnl
-	dnl The alternative approach of trying to define a s_op.statfs is not
-	dnl as nice because that only throws a warning, requiring -Werror to
-	dnl catch it.  This is a problem if the compiler happens to spit out
-	dnl other spurious warnings that have nothing to do with the test.
-	dnl
-	dnl If this test passes, the kernel uses a struct dentry argument.
-	dnl If this test fails, the kernel uses something else (old struct
-	dnl super_block perhaps).
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		int vfs_statfs(struct dentry *de, struct kstatfs *kfs)
-		{
-			return 0;
-		}
-		], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_DENTRY_STATFS_SOP, 1, Define if super_operations statfs has dentry argument),
-		AC_MSG_RESULT(no)
-	)
-
-	AC_MSG_CHECKING(for vfsmount argument in kernel file_system_type get_sb)
-	dnl Same trick as above.  A single commit changed mayn things at once:
-	dnl type and signature of file_system_type.get_sb, and signature of
-	dnl get_sb_bdev.  This test is a bit more tenuous, as get_sb_bdev
-	dnl isn't used directly in a file_system_type, but is a popular helper
-	dnl for many FSes.  And it has not exactly the same signature.
-	dnl
-	dnl If this test passes, the kernel has the most modern known form,
-	dnl which includes a stfuct vfsmount argument.
-	dnl If this test fails, the kernel uses something else.
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		int get_sb_bdev(struct file_system_type *fs_type, int flags,
-				const char *dev_name, void *data,
-				int (*fill_super)(struct super_block *, void *,
-						  int),
-				struct vfsmount *vfsm)
-		{
-			return 0;
-		}
-		], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_VFSMOUNT_GETSB, 1, Define if file_system_type get_sb has vfsmount argument),
-		AC_MSG_RESULT(no)
-	)
-
-	AC_MSG_CHECKING(for xattr support in kernel)
-	dnl if this test passes, the kernel has it
-	dnl if this test fails, the kernel does not have it
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-			  static struct inode_operations in_op = {
-				  .getxattr = NULL
-			  };
-		], [],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_XATTR, 1, Define if kernel has xattr support)
-		have_xattr=yes,
-		AC_MSG_RESULT(no)
-		have_xattr=no
-	)
-
-	if test "x$have_xattr" = "xyes"; then
-	   dnl Test to check if setxattr function has a const void * argument
-	   AC_MSG_CHECKING(for const argument to setxattr function)
-	   dnl if this test passes, there is a const void* argument
-	   AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		], 
-		[
-			struct inode_operations inode_ops;
-			int ret;
-			struct dentry * dent = NULL;
-			const char * name = NULL;
-			const void * val = NULL;
-			size_t size = 0;
-			int flags = 0;
-
-			ret = inode_ops.setxattr(dent, name, val, size, flags);
-		],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SETXATTR_CONST_ARG, 1, Define if kernel setxattr has const void* argument),
-		AC_MSG_RESULT(no)
-		)
-	fi
-
-        dnl the proc handler functions have changed over the years.
-        dnl pre-2.6.8: proc_handler(ctl_table       *ctl,
-        dnl                         int             write,
-        dnl                         struct file     *filp,
-        dnl                         void            *buffer,
-        dnl                         size_t          *lenp)
-        dnl
-        dnl 2.6.8-2.6.31: proc_handler(ctl_table       *ctl,
-        dnl                            int             write,
-        dnl                            struct file     *filp,
-        dnl                            void            *buffer,
-        dnl                            size_t          *lenp,
-        dnl                            loff_t          *ppos)
-        dnl > 2.6.31: proc_handler(ctl_table       *ctl,
-        dnl                        int             write,
-        dnl                        void            *buffer,
-        dnl                        size_t          *lenp,
-        dnl                        loff_t          *ppos)
- 
-	dnl Test to see if sysctl proc handlers have a file argument
-	AC_MSG_CHECKING(for file argument to sysctl proc handlers)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-	    #include <linux/sysctl.h>
-	    ], [
-                struct ctl_table * ctl = NULL;
-                int write = 0;
-                struct file * filp = NULL;
-                void __user * buffer = NULL;
-                size_t * lenp = NULL;
-                loff_t * ppos = NULL;
-
-                proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
-	    ],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_PROC_HANDLER_FILE_ARG, 1, Define if sysctl proc handlers have 6th argument),
-	    AC_MSG_RESULT(no)
-	    )
-
-	AC_MSG_CHECKING(for ppos argument to sysctl proc handlers)
-	dnl if this test passes, there is a ppos argument
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-	    #include <linux/sysctl.h>
-	    ], [
-                struct ctl_table * ctl = NULL;
-                int write = 0;
-                void __user * buffer = NULL;
-                size_t * lenp = NULL;
-                loff_t * ppos = NULL;
-
-                proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-	    ],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_PROC_HANDLER_PPOS_ARG, 1, Define if sysctl proc handlers have ppos argument),
-	    AC_MSG_RESULT(no)
-	    )
-
-	AC_CHECK_HEADERS([linux/posix_acl.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/fs.h>
-		 #ifdef HAVE_XATTR 
-		 #include <linux/xattr.h> 
-		 #endif
-		 ] )
-
-	AC_CHECK_HEADERS([linux/posix_acl_xattr.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/fs.h>
-		 #ifdef HAVE_XATTR 
-		 #include <linux/xattr.h> 
-		 #endif
-		 ] )
-
-	dnl linux-2.6.11 had xattr_acl.h, but 2.6.12 did not!
-	AC_CHECK_HEADERS([linux/xattr_acl.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/fs.h>
-		 #ifdef HAVE_XATTR
-		 #include <linux/xattr.h>
-		 #endif
-		 ] )
-
-	AC_CHECK_HEADERS([linux/mount.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/mount.h>
-		 ] )
-	AC_CHECK_HEADERS([linux/ioctl32.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/ioctl32.h>
-		 ] )
-	AC_CHECK_HEADERS([linux/compat.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/compat.h>
-		 ] )
-	AC_CHECK_HEADERS([linux/syscalls.h], [], [], 
-		[#define __KERNEL__
-		 #include <linux/syscalls.h>
-		 ] )
-	AC_CHECK_HEADERS([asm/ioctl32.h], [], [], 
-		[#define __KERNEL__
-		 #include <asm/ioctl32.h>
-		 ] )
-	AC_CHECK_HEADERS([linux/exportfs.h], [],[],
-		[#define __KERNEL__
-		 #include <linux/exportfs.h>
-		])
-
-	AC_MSG_CHECKING(for generic_file_readv api in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel has it defined with a different
-	dnl signature!  deliberately, the signature for this method has been
-	dnl changed for it to give a compiler error.
-
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		int generic_file_readv(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GENERIC_FILE_READV, 1, Define if kernel has generic_file_readv),
-	)
-
-	AC_MSG_CHECKING(for generic_permission api in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel has it defined with a different
-	dnl signature!  deliberately, the signature for this method has been
-	dnl changed for it to give a compiler error.
-
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		int generic_permission(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GENERIC_PERMISSION, 1, Define if kernel has generic_permission),
-	)
-
-	AC_MSG_CHECKING(for generic_getxattr api in kernel)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel has it defined
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-				#include <linux/xattr.h>
-		int generic_getxattr(struct inode *inode)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GENERIC_GETXATTR, 1, Define if kernel has generic_getxattr),
-	)
-
-	AC_MSG_CHECKING(for arg member in read_descriptor_t in kernel)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-	    ], [
-	    read_descriptor_t x;
-	    x.arg.data = NULL;
-	    ],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_ARG_IN_READ_DESCRIPTOR_T, 1, Define if read_descriptor_t has an arg member),
-	    AC_MSG_RESULT(no)
-	)
-
-        AC_MSG_CHECKING(for fh_to_dentry member in export_operations in kernel)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/exportfs.h>
-	    ], [
-	    struct export_operations x;
-	    x.fh_to_dentry = NULL;
-	    ],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_FHTODENTRY_EXPORT_OPERATIONS, 1, Define if export_operations has an fh_to_dentry member),
-	    AC_MSG_RESULT(no)
-	)
-
-        AC_MSG_CHECKING(for encode_fh member in export_operations in kernel)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/exportfs.h>
-	    ], [
-	    struct export_operations x;
-	    x.encode_fh = NULL;
-	    ],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_ENCODEFH_EXPORT_OPERATIONS, 1, Define if export_operations has an encode_fh member),
-	    AC_MSG_RESULT(no)
-	)
-
-	dnl Using -Werror is not an option, because some arches throw lots of
-	dnl warnings that would trigger false negatives.  We know that the
-	dnl change to the releasepage() function signature was accompanied by
-	dnl a similar change to the exported function try_to_release_page(),
-	dnl and that one we can check without using -Werror.  The test fails
-	dnl unless the previous declaration was identical to the one we suggest
-	dnl below.  New kernels use gfp_t, not int.
-	AC_MSG_CHECKING(for second arg type int in address_space_operations releasepage)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/buffer_head.h>
-	    extern int try_to_release_page(struct page *page, int gfp_mask);
-	    ], [],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_INT_ARG2_ADDRESS_SPACE_OPERATIONS_RELEASEPAGE, 1, Define if sceond argument to releasepage in address_space_operations is type int),
-	    AC_MSG_RESULT(no)
-	)
-
-	dnl Similar logic for the follow_link member in inode_operations.  New
-	dnl kernels return a void *, not int.
-	AC_MSG_CHECKING(for int return in inode_operations follow_link)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-	    extern int page_follow_link_light(struct dentry *,
-	                                      struct nameidata *);
-	    ], [],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_INT_RETURN_INODE_OPERATIONS_FOLLOW_LINK, 1, Define if return value from follow_link in inode_operations is type int),
-	    AC_MSG_RESULT(no)
-	)
-
-	dnl kmem_cache_destroy function may return int only on pre 2.6.19 kernels
-	dnl else it returns a void.
-	AC_MSG_CHECKING(for int return in kmem_cache_destroy)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/slab.h>
-	    extern int kmem_cache_destroy(kmem_cache_t *);
-	    ], [],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_INT_RETURN_KMEM_CACHE_DESTROY, 1, Define if return value from kmem_cache_destroy is type int),
-	    AC_MSG_RESULT(no)
-	)
-
-	dnl more 2.6 api changes.  return type for the invalidatepage
-	dnl address_space_operation is 'void' in new kernels but 'int' in old
-	dnl I had to turn on -Werror for this test because i'm not sure how
-	dnl else to make dnl "initialization from incompatible pointer type"
-	dnl fail.  
-	AC_MSG_CHECKING(for older int return in invalidatepage)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		], 
-                [
-			struct address_space_operations aso;
-
-			int ret;
-			struct page * page = NULL;
-			unsigned long offset;
-
-			ret = aso.invalidatepage(page, offset);
-		],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_INT_RETURN_ADDRESS_SPACE_OPERATIONS_INVALIDATEPAGE, 1, Define if return type of invalidatepage should be int),
-		AC_MSG_RESULT(NO)
-		)
-
-	dnl In 2.6.18.1 and newer, including <linux/config.h> will throw off a
-	dnl warning 
-	tmp_cflags=${CFLAGS}
-	CFLAGS="${CFLAGS} -Werror"
-	AC_MSG_CHECKING(for warnings when including linux/config.h)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/config.h>
-		], [], 
-		AC_MSG_RESULT(no)
-		AC_DEFINE(HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H, 1, Define if including linux/config.h gives no warnings),
-		AC_MSG_RESULT(yes)
-	)
-	CFLAGS=$tmp_cflags
-
-	AC_MSG_CHECKING(for compat_ioctl member in file_operations structure)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-		 ], [
-		 struct file_operations filop = {
-				.compat_ioctl = NULL
-		 };
-	    ],
-	    AC_MSG_RESULT(yes)
-		 AC_DEFINE(HAVE_COMPAT_IOCTL_HANDLER, 1, Define if there exists a compat_ioctl member in file_operations),
-	    AC_MSG_RESULT(no)
-	    )
-
-	dnl Gives wrong answer if header is missing; don't try then.
-	if test x$ac_cv_header_linux_ioctl32_h = xyes ; then
-	AC_MSG_CHECKING(for register_ioctl32_conversion kernel exports)
-	dnl if this test passes, the kernel does not have it
-	dnl if this test fails, the kernel has it defined
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/kernel.h>
-		#include <linux/ioctl32.h>
-		int register_ioctl32_conversion(void)
-		{
-			return 0;
-		}
-	], [],
-		AC_MSG_RESULT(no),
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_REGISTER_IOCTL32_CONVERSION, 1, Define if kernel has register_ioctl32_conversion),
-	)
-	fi
-
-	AC_MSG_CHECKING(for int return value of kmem_cache_destroy)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/slab.h>
-		], [
-		int i = kmem_cache_destroy(NULL);
-		],
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT_RETURN, 1, Define if kmem_cache_destroy returns int),
-		AC_MSG_RESULT(no)
-	)
-
-	dnl As of 2.6.19, combined readv/writev into aio_read and aio_write
-	dnl functions.  Detect this by not finding a readv member.
-	AC_MSG_CHECKING(for combined file_operations readv and aio_read)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-		 ], [
-		 struct file_operations filop = {
-			.readv = NULL
-		 };
-	    ],
-	    AC_MSG_RESULT(no),
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_COMBINED_AIO_AND_VECTOR, 1, Define if struct file_operations has combined aio_read and readv functions),
-	    )
-
-	dnl Check for kzalloc
-	AC_MSG_CHECKING(for kzalloc)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/slab.h>
-	], [
-		void * a;
-		a = kzalloc(1024, GFP_KERNEL);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_KZALLOC, 1, Define if kzalloc exists),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl Check for two arg register_sysctl_table()
-	AC_MSG_CHECKING(for two arguments to register_sysctl_table)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/sysctl.h>
-		#include <linux/proc_fs.h>
-	], [
-		register_sysctl_table(NULL, 0);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_TWO_ARG_REGISTER_SYSCTL_TABLE, 1, Define if register_sysctl_table takes two arguments),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl FS_IOC_GETFLAGS and FS_IOC_SETFLAGS appeared 
-	dnl somewhere around 2.6.20.1 as generic versions of fs-specific flags
-	AC_MSG_CHECKING(for generic FS_IOC ioctl flags)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/fs.h>
-	], [
-	    int flags = FS_IOC_GETFLAGS;
-	],
-	AC_MSG_RESULT(yes),
-	AC_DEFINE(HAVE_NO_FS_IOC_FLAGS, 1, Define if FS_IOC flags missing from fs.h)
-	AC_MSG_RESULT(no)
-	)
-
-	dnl old linux kernels define struct page with a 'count' member, whereas
-	dnl other kernels (since at least 2.6.20) define struct page with a
-	dnl '_count'
-	AC_MSG_CHECKING(for obsolete struct page count without underscore)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/mm.h>
-	], [
-	    struct page *p;
-	    int foo;
-	    foo = atomic_read(&(p)->count);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_OBSOLETE_STRUCT_PAGE_COUNT_NO_UNDERSCORE, 1, Define if struct page defines a count member without leading underscore),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl old linux kernels do not have class_create and related functions
-        dnl
-        dnl check for class_device_destroy() to weed out RHEL4 kernels that
-        dnl have some class functions but not others
-	AC_MSG_CHECKING(if kernel has device classes)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/device.h>
-	], [
-	    class_device_destroy(NULL, "pvfs2")
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_KERNEL_DEVICE_CLASSES, 1, Define if kernel has device classes),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl 2.6.23 removed the destructor parameter from kmem_cache_create
-	AC_MSG_CHECKING(for destructor param to kmem_cache_create)
-	AC_TRY_COMPILE([
-	    #define __KERNEL__
-	    #include <linux/slab.h>
-	], [
-	   kmem_cache_create("config-test", 0, 0, 0, NULL, NULL);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM, 1, [Define if kernel kmem_cache_create has destructor param]),
-	AC_MSG_RESULT(no)
-	)
-
-        dnl 2.6.27 changed the constructor parameter signature of
-	dnl kmem_cache_create.  Check for this newer one-param style
-        dnl If they don't match, gcc complains about
-	dnl passing argument ... from incompatible pointer type, hence the
-	dnl need for the -Werror.  Note that the next configure test will
-        dnl determine if we have a two param constructor or not.
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for one-param kmem_cache_create constructor)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/kernel.h>
-		#include <linux/slab.h>
-		void ctor(void *req)
-		{
-		}
-	], [
-		kmem_cache_create("config-test", 0, 0, 0, ctor);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM, 1, [Define if kernel kmem_cache_create constructor has newer-style one-parameter form]),
-	AC_MSG_RESULT(no)
-	)
-	CFLAGS=$tmp_cflags
-
-        dnl 2.6.27 changed the parameter signature of
-	dnl inode_operations->permission.  Check for this newer two-param style
-        dnl If they don't match, gcc complains about
-	dnl passing argument ... from incompatible pointer type, hence the
-	dnl need for the -Werror and -Wall.
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror -Wall"
-	AC_MSG_CHECKING(for two param permission)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/kernel.h>
-		#include <linux/slab.h>
-		#include <linux/fs.h>
-		#include <linux/namei.h>
-		int ctor(struct inode *i, int a)
-		{
-			return 0;
-		}
-		struct inode_operations iop = {
-			.permission = ctor,
-		};
-	], [
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_TWO_PARAM_PERMISSION, 1, [Define if kernel's inode_operations has two parameters permission function]),
-	AC_MSG_RESULT(no)
-	)
-	CFLAGS=$tmp_cflags
-
-
-        dnl 2.6.24 changed the constructor parameter signature of
-	dnl kmem_cache_create.  Check for this newer two-param style and
-	dnl if not, assume it is old.  Note we can get away with just
-	dnl struct kmem_cache (and not kmem_cache_t) as that change happened
-	dnl in older kernels.  If they don't match, gcc complains about
-	dnl passing argument ... from incompatible pointer type, hence the
-	dnl need for the -Werror.
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for two-param kmem_cache_create constructor)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/kernel.h>
-		#include <linux/slab.h>
-		void ctor(struct kmem_cache *cachep, void *req)
-		{
-		}
-	], [
-		kmem_cache_create("config-test", 0, 0, 0, ctor);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM, 1, [Define if kernel kmem_cache_create constructor has new-style two-parameter form]),
-	AC_MSG_RESULT(no)
-	)
-	CFLAGS=$tmp_cflags
-
-	AC_MSG_CHECKING(if kernel address_space struct has a spin_lock field named page_lock)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct address_space as;
-		spin_lock(&as.page_lock);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock member named page_lock instead of rw_lock]),
-	AC_MSG_RESULT(no)
-	)
-
-        tmp_cflags=$CFLAGS
-        CFLAGS="$CFLAGS -Werror"
-        AC_MSG_CHECKING(if kernel address_space struct has a rwlock_t field named tree_lock)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct address_space as;
-		read_lock(&as.tree_lock);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a rw_lock_t member named tree_lock]),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-        tmp_cflags=$CFLAGS
-        CFLAGS="$CFLAGS -Werror"
-        AC_MSG_CHECKING(if kernel address_space struct has a spinlock_t field named tree_lock)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct address_space as;
-		spin_lock(&as.tree_lock);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock_t member named tree_lock]),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-	AC_MSG_CHECKING(if kernel address_space struct has a priv_lock field - from RT linux)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct address_space as;
-		spin_lock(&as.priv_lock);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock for private data instead of rw_lock -- used by RT linux]),
-	AC_MSG_RESULT(no)
-	)
-
-	AC_MSG_CHECKING(if kernel defines mapping_nrpages macro - from RT linux)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct address_space idata;
-		int i = mapping_nrpages(&idata);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_MAPPING_NRPAGES_MACRO, 1, [Define if kernel defines mapping_nrpages macro -- defined by RT linux]),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl Starting with 2.6.25-rc1, .read_inode goes away.
-	AC_MSG_CHECKING(if kernel super_operations contains read_inode field)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct super_operations sops;
-		sops.read_inode(NULL);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_READ_INODE, 1, [Define if kernel super_operations contains read_inode field]),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl Starting with 2.6.26, drop_inode and put_inode go away
-	AC_MSG_CHECKING(if kernel super_operations contains drop_inode field)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct super_operations sops;
-		sops.drop_inode(NULL);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_DROP_INODE, 1, [Define if kernel super_operations contains drop_inode field]),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl Starting with 2.6.26, drop_inode and put_inode go away
-	AC_MSG_CHECKING(if kernel super_operations contains put_inode field)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-	], [
-		struct super_operations sops;
-		sops.put_inode(NULL);
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_PUT_INODE, 1, [Define if kernel super_operations contains put_inode field]),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl older 2.6 kernels don't have MNT_NOATIME
-	AC_MSG_CHECKING(if mount.h defines MNT_NOATIME)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/mount.h>
-	], [
-		int flag = MNT_NOATIME;
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_MNT_NOATIME, 1, [Define if mount.h contains
-	MNT_NOATIME flags]),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl older 2.6 kernels don't have MNT_NODIRATIME
-	AC_MSG_CHECKING(if mount.h defines MNT_NODIRATIME)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/mount.h>
-	], [
-		int flag = MNT_NODIRATIME;
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_MNT_NODIRATIME, 1, [Define if mount.h contains
-	MNT_NODIRATIME flags]),
-	AC_MSG_RESULT(no)
-	)
-
-        dnl newer 2.6 kernels (2.6.28) use d_obtain_alias instead of d_alloc_anon
-        AC_MSG_CHECKING(for d_alloc_anon)
-        AC_TRY_COMPILE([
-                #define __KERNEL__
-                #include <linux/dcache.h>
-        ], [
-                struct inode *i;
-                d_alloc_anon(i);
-        ],
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_D_ALLOC_ANON, 1, [Define if dcache.h contains 
-                  d_alloc_annon]),
-        AC_MSG_RESULT(no)
-        )
-
-        AC_MSG_CHECKING(for s_dirty in struct super_block)
-        AC_TRY_COMPILE([
-                #define __KERNEL__
-                #include <linux/fs.h>
-        ], [
-                struct super_block *s;
-                list_empty(&s->s_dirty);
-        ],
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_SB_DIRTY_LIST, 1, [Define if struct super_block has s_dirty list]),
-        AC_MSG_RESULT(no)
-        )
-
-        dnl newer 2.6 kernels (2.6.29-ish) use current_fsuid() macro instead
-        dnl of accessing task struct fields directly
-        tmp_cflags=$CFLAGS
-        CFLAGS="$CFLAGS -Werror"
-        AC_MSG_CHECKING(for current_fsuid)
-        AC_TRY_COMPILE([
-                #define __KERNEL__
-                #include <linux/sched.h>
-                #include <linux/cred.h>
-        ], [
-                int uid = current_fsuid();
-        ],
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_CURRENT_FSUID, 1, [Define if cred.h contains current_fsuid]),
-        AC_MSG_RESULT(no)
-        )
-        CFLAGS=$tmp_cflags
-
-        dnl 2.6.32 added a mandatory name field to the bdi structure
-        AC_MSG_CHECKING(if kernel backing_dev_info struct has a name field)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		#include <linux/backing-dev.h>
-	], [
-                struct backing_dev_info foo = 
-                {
-                    .name = "foo"
-                };
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_BACKING_DEV_INFO_NAME, 1, [Define if kernel backing_dev_info struct has a name field]),
-	AC_MSG_RESULT(no)
-	)
-
-        dnl some 2.6 kernels have functions to explicitly initialize bdi structs
-        tmp_cflags=$CFLAGS
-        CFLAGS="$CFLAGS -Werror"
-        AC_MSG_CHECKING(for bdi_init)
-        AC_TRY_COMPILE([
-                #define __KERNEL__
-		#include <linux/fs.h>
-		#include <linux/backing-dev.h>
-        ], [
-                int ret = bdi_init(NULL);
-        ],
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_BDI_INIT, 1, [Define if bdi_init function is present]),
-        AC_MSG_RESULT(no)
-        )
-        CFLAGS=$tmp_cflags
-
-
-	dnl 2.6.33 API change,
-	dnl Removed .ctl_name from struct ctl_table.
-        tmp_cflags=$CFLAGS
-        CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING([whether struct ctl_table has ctl_name])
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/sysctl.h>
-                static struct ctl_table c = { .ctl_name = 0, };
-	],[ ],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_CTL_NAME, 1, Define if struct ctl_table has ctl_name member),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl Removed .strategy from struct ctl_table.
-	AC_MSG_CHECKING([whether struct ctl_table has strategy])
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/sysctl.h>
-                static struct ctl_table c = { .strategy = 0, };
-	], [ ],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_STRATEGY_NAME, 1, Define if struct ctl_table has strategy member),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-	dnl 2.6.33 changed the parameter signature of xattr_handler get 
-	dnl member functions to have a fifth argument and changed the first
-	dnl parameter from struct inode to struct dentry. if the test fails
-	dnl assume the old 4 param with struct inode
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for five-param xattr_handler.get)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/dcache.h>
-		#include <linux/xattr.h>
-		static struct xattr_handler x;
-		static int get_xattr_h( struct dentry *d, const char *n, 
-					void *b, size_t s, int h)
-		{ return 0; }
-	], 
-	[ 
-	    x.get = get_xattr_h;
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_XATTR_HANDLER_GET_FIVE_PARAM, 1, [Define if kernel xattr_handle get function has dentry as first parameter and a fifth parameter]),
-	AC_MSG_RESULT(no)
-	)
-
-	dnl 2.6.33 changed the parameter signature of xattr_handler set 
-	dnl member functions to have a sixth argument and changed the first
-	dnl parameter from struct inode to struct dentry. if the test fails
-	dnl assume the old 5 param with struct inode
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for six-param xattr_handler.set)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/dcache.h>
-		#include <linux/xattr.h>
-		static struct xattr_handler x;
-		static int set_xattr_h( struct dentry *d, const char *n, 
-					const void *b, size_t s, int f, int h)
-		{ return 0; }
-	], 
-	[ 
-	    x.set = set_xattr_h;
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_XATTR_HANDLER_SET_SIX_PARAM, 1, [Define if kernel xattr_handle set function has dentry as first parameter and a sixth parameter]),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-	dnl xattr_handler is also a const
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for const s_xattr member in super_block struct)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		#include <linux/xattr.h>
-		struct super_block sb;
-                const struct xattr_handler *x[] = { NULL };
-	], 
-	[ 
-            sb.s_xattr = x;
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_CONST_S_XATTR_IN_SUPERBLOCK, 1, [Define if s_xattr member of super_block struct is const]),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-        dnl early 2.6 kernels do not contain true/false enum in stddef.h
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(stddef.h true/false enum)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/stddef.h>
-                int f = true;
-	], 
-	[ ],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_TRUE_FALSE_ENUM, 1, [Define if kernel stddef has true/false enum]),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-
-	dnl fsync no longer has a dentry second parameter
-	tmp_cflags=$CFLAGS
-	CFLAGS="$CFLAGS -Werror"
-	AC_MSG_CHECKING(for dentry argument in fsync)
-	AC_TRY_COMPILE([
-		#define __KERNEL__
-		#include <linux/fs.h>
-		static struct file_operations f;
-		static int local_fsync(struct file *f, struct dentry *d, int i)
-		{ return 0; }
-	], 
-	[ 
-	    f.fsync = local_fsync;
-	],
-	AC_MSG_RESULT(yes)
-	AC_DEFINE(HAVE_FSYNC_DENTRY_PARAM, 1, [Define if fsync function in file_operations struct wants a dentry pointer as the second parameter]),
-	AC_MSG_RESULT(no)
-	)
-        CFLAGS=$tmp_cflags
-
-	CFLAGS=$oldcflags
-
-])
-
-
-AC_DEFUN([AX_BERKELEY_DB],
-[
-    dbpath=ifelse([$1], ,,$1)
-
-    DB_LDFLAGS=
-    dnl 
-    dnl if the db is specified, try to link with -ldb
-    dnl otherwise try -ldb4, then -ldb3, then -ldb
-    dnl $lib set to notfound on link failure
-    dnl    
-    AC_MSG_CHECKING([for db library])
-    oldlibs=$LIBS
-    lib=notfound
-
-    if test "x$dbpath" != "x" ; then
-	oldcflags=$CFLAGS
-	for dbheader in db4 db3 notfound; do
-		AC_COMPILE_IFELSE(
-			[#include "$dbpath/include/$dbheader/db.h"],
-			[DB_CFLAGS="-I$dbpath/include/$dbheader/"
-			 break])
-	done
-
-	if test "x$dbheader" = "xnotfound"; then
-		AC_COMPILE_IFELSE(
-			[#include "$dbpath/include/db.h"],
-			[DB_CFLAGS="-I$dbpath/include/"],
-			[AC_MSG_FAILURE(
-				Invalid libdb path specified. No db.h found.)])
-	fi
-
-        DB_LDFLAGS="-L${dbpath}/lib"
-	LDFLAGS="$DB_LDFLAGS ${LDFLAGS}"
-
-	LIBS="${oldlibs} -ldb -lpthread"
-	DB_LIB="-ldb"
-	CFLAGS="$DB_CFLAGS $oldcflags"
-	AC_TRY_LINK(
-		[#include <db.h>],
-		[DB *dbp; db_create(&dbp, NULL, 0);],
-		lib=db)
-	CFLAGS=$oldcflags
-	
-    else
-        for lib in db4  db3  db  notfound; do
-           LIBS="${oldlibs} -l$lib -lpthread"
-           DB_LIB="-l$lib"
-           AC_TRY_LINK(
-                  [#include <db.h>],
-                  [DB *dbp; db_create(&dbp, NULL, 0);],
-                  [break])
-        done
-    fi
-
-    dnl reset LIBS value and just report through DB_LIB
-    LIBS=$oldlibs 
-    if test "x$lib" = "xnotfound" ; then
-           AC_MSG_ERROR(could not find DB libraries)
-    else
-           AC_MSG_RESULT($lib)
-    fi
-    AC_SUBST(DB_CFLAGS)	
-    AC_SUBST(DB_LIB)
-    
-    dnl See if we have a new enough version of Berkeley DB; needed for
-    dnl    compilation of trove-dbpf component
-    dnl AC_MSG_CHECKING(whether version of Berkeley DB is new enough)
-    dnl       AC_TRY_COMPILE([
-    dnl       #include <db.h>
-    dnl       ], [
-    dnl       #if DB_VERSION_MAJOR < 4
-    dnl              #error "DB_VERSION_MAJOR < 4; need newer Berkeley DB implementation"
-    dnl       #endif
-    dnl       ], AC_MSG_RESULT(yes),
-    dnl       AC_MSG_RESULT(no)
-    dnl              AC_MSG_ERROR(Need newer (4.x.x or later) version of Berkeley DB.
-    dnl try: http://www.sleepycat.com/download/index.shtml
-    dnl or: /parl/pcarns/rpms/db4-4.0.14-1mdk.src.rpm (to build rpm))
-    dnl       )
-    
-    dnl Test to check for DB_ENV variable to error callback fn.  Then
-    dnl test to see if third parameter must be const (related but not 
-    dnl exactly the same).
-    AC_MSG_CHECKING(for dbenv parameter to DB error callback function)
-    oldcflags=$CFLAGS
-    CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror"
-    AC_TRY_COMPILE([
-    #include <db.h>
-    
-    void error_callback_fn(const DB_ENV *dbenv,
-                           const char *prefix,
-                           const char *message)
-    {
-        return;
-    }
-    ], [
-    DB *db;
-    
-    db->set_errcall(db, error_callback_fn);
-    ], AC_MSG_RESULT(yes)
-    AC_DEFINE(HAVE_DBENV_PARAMETER_TO_DB_ERROR_CALLBACK, 1,
-    Define if DB error callback function takes dbenv parameter)
-    have_dbenv_parameter_to_db_error_callback=yes,
-    AC_MSG_RESULT(no)
-    have_dbenv_parameter_to_db_error_callback=no)
-    
-    if test "x$have_dbenv_parameter_to_db_error_callback" = "xyes" ; then
-        dnl Test if compilation succeeds without const; we expect that it will
-        dnl not.
-        dnl NOTE: still using -Werror!
-        AC_MSG_CHECKING(if third parameter to error callback function is const)
-        AC_TRY_COMPILE([
-        #include <db.h>
-        
-        void error_callback_fn(const DB_ENV *dbenv,
-                               const char *prefix,
-                               char *message)
-        {
-            return;
-        }
-        ], [
-        DB *db;
-        
-        db->set_errcall(db, error_callback_fn);
-        ], AC_MSG_RESULT(no),
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK, 1,
-        Define if third param (message) to DB error callback function is const))
-    fi
-    
-    CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror"    
-    dnl Test to check for unknown third param to DB stat (four params 
-    dnl total).  The unknown parameter is a function ptr so that the
-    dnl the user can pass in a replcaement for malloc.
-    dnl Note: this is a holdover from relatively old DB implementations,
-    dnl while the txnid parameter is new.  So we don't test for the old
-    dnl unknown parameter if we found the new one.
-    AC_MSG_CHECKING(for DB stat with malloc function ptr)
-    AC_TRY_COMPILE([
-      #include <db.h>
-      #include <stdlib.h>
-      ], [
-      int ret = 0;
-      DB *db = db;
-      int dummy = 0;
-      u_int32_t flags = 0;
-        
-      ret = db->stat(db, &dummy, malloc, flags);
-      ], AC_MSG_RESULT(yes)
-    AC_DEFINE(HAVE_UNKNOWN_PARAMETER_TO_DB_STAT, 1,
-    Define if DB stat function takes malloc function ptr)
-    have_db_stat_malloc=yes,
-    AC_MSG_RESULT(no)
-    have_db_stat_malloc=no)
-
-    dnl Test to check for txnid parameter to DB stat (DB 4.3.xx+)
-    if test "x$have_db_stat_malloc" = "xno" ; then
-    
-       AC_MSG_CHECKING(for txnid parameter to DB stat function)
-       AC_TRY_COMPILE([
-       #include <db.h>
-       ], [
-       int ret = 0;
-       DB *db = db;
-       DB_TXN *txnid = txnid;
-       u_int32_t flags = 0;
-    
-        ret = db->stat(db, txnid, NULL, flags);
-        ], AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_TXNID_PARAMETER_TO_DB_STAT, 1, 
-        Define if DB stat function takes txnid parameter)
-        have_txnid_param_to_stat=yes,
-        AC_MSG_RESULT(no)
-        have_txnid_param_to_stat=no)
-    
-    fi
-    
-    dnl Test to check for txnid parameter to DB open (DB4.1+)
-    AC_MSG_CHECKING(for txnid parameter to DB open function)
-    AC_TRY_COMPILE([
-    #include <db.h>
-    ], [
-    int ret = 0;
-    DB *db = NULL;
-    DB_TXN *txnid = NULL;
-    char *file = NULL;
-    char *database = NULL;
-    DBTYPE type = 0;
-    u_int32_t flags = 0;
-    int mode = 0;
-    
-    ret = db->open(db, txnid, file, database, type, flags, mode);
-    ], AC_MSG_RESULT(yes)
-    AC_DEFINE(HAVE_TXNID_PARAMETER_TO_DB_OPEN, 1,
-    Define if DB open function takes a txnid parameter),
-    AC_MSG_RESULT(no))
-    
-    dnl check for DB_DIRTY_READ (it is not in db-3.2.9, for example)
-    AC_MSG_CHECKING(for DB_DIRTY_READ flag)
-    AC_TRY_COMPILE([
-    #include <db.h>
-    ], [
-    u_int32_t flags = DB_DIRTY_READ;
-    ], AC_MSG_RESULT(yes)
-    AC_DEFINE(HAVE_DB_DIRTY_READ, 1, [Define if db library has DB_DIRTY_READ flag]),
-    AC_MSG_RESULT(no))
-
-    dnl check for DB_BUFFER_SMALL (it is returned by dbp->get in db-4.4 and up)
-    AC_MSG_CHECKING(for DB_BUFFER_SMALL error)
-    AC_TRY_COMPILE([
-    #include <db.h>
-    ], [
-    int res = DB_BUFFER_SMALL;
-    res++;
-    ], AC_MSG_RESULT(yes)
-    AC_DEFINE(HAVE_DB_BUFFER_SMALL, 1, [Define if db library has DB_BUFFER_SMALL error]),
-    AC_MSG_RESULT(no))
-
-    dnl Test to check for db->get_pagesize
-    AC_MSG_CHECKING(for berkeley db get_pagesize function)
-    AC_TRY_COMPILE([
-    #include <db.h>
-    ], [
-    int ret = 0;
-    DB *db = NULL;
-    int pagesize;
-    
-    ret = db->get_pagesize(db, &pagesize);
-    ], AC_MSG_RESULT(yes)
-    AC_DEFINE(HAVE_DB_GET_PAGESIZE, 1, [Define if DB has get_pagesize function]),
-    AC_MSG_RESULT(no))
-    
-    CFLAGS="$oldcflags"    
-])
-
-
-AC_DEFUN([AX_CHECK_NEEDS_LIBRT],
-[
-
-AC_MSG_CHECKING([if server lib needs -lrt])
-AC_TRY_LINK(
-	[#include <stdlib.h>
-	 #include <unistd.h>
-	 #include <aio.h>],
-	[lio_listio(LIO_NOWAIT, NULL, 0, NULL);],
-	[AC_MSG_RESULT(no)],
-	[
-		oldlibs=$LIBS
-		LIBS="$LIBS -lrt"
-		AC_TRY_LINK(
-			[#include <stdlib.h>
-			 #include <unistd.h>
-			 #include <aio.h>],
-			[lio_listio(LIO_NOWAIT, NULL, 0, NULL);],
-			[NEEDS_LIBRT=1
-			 AC_SUBST(NEEDS_LIBRT)
-			 AC_MSG_RESULT(yes)],
-			[AC_MSG_ERROR(failed attempting to link lio_listio)])
-		LIBS=$oldlibs
-	])
-])
-
-#
-# Configure rules for GM
-#
-# Copyright (C) 2008 Pete Wyckoff <pw@osc.edu>
-#
-# See COPYING in top-level directory.
-#
-AC_DEFUN([AX_GM],
-[
-    dnl Configure options for GM install path.
-    dnl --with-gm=<dir> is shorthand for
-    dnl    --with-gm-includes=<dir>/include
-    dnl    --with-gm-libs=<dir>/lib  (or lib64 if that exists)
-    gm_home=
-    AC_ARG_WITH(gm,
-[  --with-gm=<dir>         Location of the GM install (default no GM)],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-gm requires the path to your GM tree.])
-	elif test "$withval" != no ; then
-	    gm_home="$withval"
-	fi
-    )
-    AC_ARG_WITH(gm-includes,
-[  --with-gm-includes=<dir>
-                          Location of the GM includes],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-gm-includes requires path to GM headers.])
-	elif test "$withval" != no ; then
-	    GM_INCDIR="$withval"
-	fi
-    )
-    AC_ARG_WITH(gm-libs,
-[  --with-gm-libs=<dir>    Location of the GM libraries],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-gm-libs requires path to GM libraries.])
-	elif test "$withval" != no ; then
-	    GM_LIBDIR="$withval"
-	fi
-    )
-    dnl If supplied the incls and libs explicitly, use them, else populate them
-    dnl using guesses from the --with-gm dir.
-    if test -n "$gm_home" ; then
-	if test -z "$GM_INCDIR"; then
-	    GM_INCDIR=$gm_home/include
-	fi
-	if test -z "$GM_LIBDIR"; then
-	    GM_LIBDIR=$gm_home/lib64
-	    if test ! -d "$GM_LIBDIR" ; then
-		GM_LIBDIR=$gm_home/lib
-	    fi
-	fi
-    fi
-    dnl If anything GM-ish was set, go look for header.
-    if test -n "$GM_INCDIR$GM_LIBDIR" ; then
-	save_cppflags="$CPPFLAGS"
-	CPPFLAGS="$CPPFLAGS -I$GM_INCDIR -I$GM_INCDIR/gm"
-	AC_CHECK_HEADER(gm.h,, AC_MSG_ERROR([Header gm.h not found.]))
-	dnl Run test is not possible on a machine that does not have a GM NIC.
-	dnl Link test would work, but just check for existence.
-	if test ! -f $GM_LIBDIR/libgm.so ; then
-	    if test ! -f $GM_LIBDIR/libgm.a ; then
-		AC_MSG_ERROR([Neither GM library libgm.so or libgm.a found.])
-	    fi
-	fi
-	BUILD_GM=1
-	CPPFLAGS="$save_cppflags"
-    fi
-    AC_SUBST(BUILD_GM)
-    AC_SUBST(GM_INCDIR)
-    AC_SUBST(GM_LIBDIR)
-])
-
-dnl vim: set ft=config :
-
-#
-# Configure rules for MX
-#
-# Copyright (C) 2008 Pete Wyckoff <pw@osc.edu>
-#
-# See COPYING in top-level directory.
-#
-AC_DEFUN([AX_MX],
-[
-    dnl Configure options for MX install path.
-    dnl --with-mx=<dir> is shorthand for
-    dnl    --with-mx-includes=<dir>/include
-    dnl    --with-mx-libs=<dir>/lib  (or lib64 if that exists)
-    mx_home=
-    AC_ARG_WITH(mx,
-[  --with-mx=<dir>         Location of the MX install (default no MX)],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-mx requires the path to your MX tree.])
-	elif test "$withval" != no ; then
-	    mx_home="$withval"
-	fi
-    )
-    AC_ARG_WITH(mx-includes,
-[  --with-mx-includes=<dir>
-                          Location of the MX includes],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-mx-includes requires path to MX headers.])
-	elif test "$withval" != no ; then
-	    MX_INCDIR="$withval"
-	fi
-    )
-    AC_ARG_WITH(mx-libs,
-[  --with-mx-libs=<dir>    Location of the MX libraries],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-mx-libs requires path to MX libraries.])
-	elif test "$withval" != no ; then
-	    MX_LIBDIR="$withval"
-	fi
-    )
-    dnl If supplied the incls and libs explicitly, use them, else populate them
-    dnl using guesses from the --with-mx dir.
-    if test -n "$mx_home" ; then
-	if test -z "$MX_INCDIR"; then
-	    MX_INCDIR=$mx_home/include
-	fi
-	if test -z "$MX_LIBDIR"; then
-	    MX_LIBDIR=$mx_home/lib64
-	    if test ! -d "$MX_LIBDIR" ; then
-		MX_LIBDIR=$mx_home/lib
-	    fi
-	fi
-    fi
-    dnl If anything MX-ish was set, go look for header.
-    if test -n "$MX_INCDIR$MX_LIBDIR" ; then
-	save_cppflags="$CPPFLAGS"
-	CPPFLAGS="$CPPFLAGS -I$MX_INCDIR -I$MX_INCDIR/mx"
-	AC_CHECK_HEADER(myriexpress.h,,
-			AC_MSG_ERROR([Header myriexpress.h not found.]))
-	dnl Run test is not possible on a machine that does not have a MX NIC.
-	dnl Link test would work, but just check for existence.
-	if test ! -f $MX_LIBDIR/libmyriexpress.so ; then
-	    if test ! -f $MX_LIBDIR/libmyriexpress.a ; then
-		AC_MSG_ERROR([Neither MX library libmyriexpress.so or libmyriexpress.a found.])
-	    fi
-	fi
-	BUILD_MX=1
-	CPPFLAGS="$save_cppflags"
-    fi
-    AC_SUBST(BUILD_MX)
-    AC_SUBST(MX_INCDIR)
-    AC_SUBST(MX_LIBDIR)
-
-    if test -n "$BUILD_MX" ; then
-        dnl Check for existence of mx_decompose_endpoint_addr2
-        save_ldflags="$LDFLAGS"
-        LDFLAGS="-L$MX_LIBDIR $LDFLAGS"
-	save_libs="$LIBS"
-	LIBS="-lmyriexpress -lpthread $LIBS"
-        save_cppflags="$CPPFLAGS"
-        CPPFLAGS="$CPPFLAGS -I$MX_INCDIR"
-
-        AC_MSG_CHECKING(for mx_decompose_endpoint_addr2)
-        AC_TRY_LINK([
-            #include "mx_extensions.h"
-            #include <stdlib.h>
-        ], [ 
-            mx_endpoint_addr_t epa;
-            mx_decompose_endpoint_addr2(epa, NULL, NULL, NULL);
-        ],
-            AC_MSG_RESULT(yes),
-            AC_MSG_RESULT(no)
-	    AC_MSG_ERROR([Function mx_decompose_endpoint_addr2() not found.])
-        )
-
-        LDFLAGS="$save_ldflags"
-        CPPFLAGS="$save_cppflags"
-        LIBS="$save_libs"
-    fi
-])
-
-dnl vim: set ft=config :
-
-AC_DEFUN([AX_IB],
-[
-    dnl Configure options for IB install path.
-    dnl --with-ib=<dir> is shorthand for
-    dnl    --with-ib-includes=<dir>/include
-    dnl    --with-ib-libs=<dir>/lib  (or lib64 if that exists)
-    ib_home=
-    AC_ARG_WITH(ib,
-    [  --with-ib=<dir>         Location of the IB installation (default no IB)],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-ib requires the path to your IB tree.])
-	elif test "$withval" != no ; then
-	    ib_home="$withval"
-	fi
-    )
-    AC_ARG_WITH(ib-includes,
-[  --with-ib-includes=<dir>
-                          Location of the IB includes],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-ib-includes requires path to IB headers.])
-	elif test "$withval" != no ; then
-	    IB_INCDIR="$withval"
-	fi
-    )
-    AC_ARG_WITH(ib-libs,
-[  --with-ib-libs=<dir>    Location of the IB libraries],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-ib-libs requires path to IB libraries.])
-	elif test "$withval" != no ; then
-	    IB_LIBDIR="$withval"
-	fi
-    )
-    dnl If supplied the incls and libs explicitly, use them, else populate them
-    dnl using guesses from the --with-ib dir.
-    if test -n "$ib_home" ; then
-	if test -z "$IB_INCDIR"; then
-	    IB_INCDIR=$ib_home/include
-	fi
-	if test -z "$IB_LIBDIR"; then
-	    IB_LIBDIR=$ib_home/lib64
-	    if test ! -d "$IB_LIBDIR" ; then
-		IB_LIBDIR=$ib_home/lib
-	    fi
-	fi
-    fi
-    dnl If anything IB-ish was set, go look for header.
-    if test -n "$IB_INCDIR$IB_LIBDIR" ; then
-	save_cppflags="$CPPFLAGS"
-	CPPFLAGS="$CPPFLAGS -I$IB_INCDIR"
-	AC_CHECK_HEADER(vapi.h,, AC_MSG_ERROR([Header vapi.h not found.]))
-	dnl Run test is not possible on a machine that does not have an IB NIC,
-	dnl and link test is hard because we need so many little libraries.   Bail
-	dnl and just check for existence; full library list is in Makefile.in.
-	if test ! -f $IB_LIBDIR/libvapi.so ; then
-	    if test ! -f $IB_LIBDIR/libvapi.a ; then
-		AC_MSG_ERROR([Infiniband library libvapi.so not found.])
-	    fi
-	fi
-	BUILD_IB=1
-	AC_CHECK_HEADER(wrap_common.h,
-	    AC_DEFINE(HAVE_IB_WRAP_COMMON_H, 1, Define if IB wrap_common.h exists.),
-	    ,
-	    [#include <vapi.h>])
-	CPPFLAGS="$save_cppflags"
-    fi
-    AC_SUBST(BUILD_IB)
-    AC_SUBST(IB_INCDIR)
-    AC_SUBST(IB_LIBDIR)
-
-    dnl Configure options for OpenIB install path.
-    dnl --with-openib=<dir> is shorthand for
-    dnl    --with-openib-includes=<dir>/include
-    dnl    --with-openib-libs=<dir>/lib  (or lib64 if that exists)
-    openib_home=
-    AC_ARG_WITH(openib,
-    [  --with-openib=<dir>     Location of the OpenIB install (default no OpenIB)],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-openib requires the path to your OpenIB tree.])
-	elif test "$withval" != no ; then
-	    openib_home="$withval"
-	fi
-    )
-    AC_ARG_WITH(openib-includes,
-[  --with-openib-includes=<dir>
-                          Location of the OpenIB includes],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-openib-includes requires path to OpenIB headers.])
-	elif test "$withval" != no ; then
-	    OPENIB_INCDIR="$withval"
-	fi
-    )
-    AC_ARG_WITH(openib-libs,
-[  --with-openib-libs=<dir>
-                          Location of the OpenIB libraries],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-openib-libs requires path to OpenIB libraries.])
-	elif test "$withval" != no ; then
-	    OPENIB_LIBDIR="$withval"
-	fi
-    )
-    dnl If supplied the incls and libs explicitly, use them, else populate them
-    dnl using guesses from the --with-openib dir.
-    if test -n "$openib_home" ; then
-	if test -z "$OPENIB_INCDIR"; then
-	    OPENIB_INCDIR=$openib_home/include
-	fi
-	if test -z "$OPENIB_LIBDIR"; then
-	    OPENIB_LIBDIR=$openib_home/lib64
-	    if test ! -d "$OPENIB_LIBDIR" ; then
-		OPENIB_LIBDIR=$openib_home/lib
-	    fi
-	fi
-    fi
-    dnl If anything OpenIB-ish was set, go look for header.
-    if test -n "$OPENIB_INCDIR$OPENIB_LIBDIR" ; then
-	save_cppflags="$CPPFLAGS"
-	CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR"
-	AC_CHECK_HEADER(infiniband/verbs.h,,
-	    AC_MSG_ERROR([Header infiniband/verbs.h not found.]))
-	dnl Run test is not possible on a machine that does not have an IB NIC.
-	dnl Link test would work, but just check for existence.
-	if test ! -f $OPENIB_LIBDIR/libibverbs.so ; then
-	    if test ! -f $OPENIB_LIBDIR/libibverbs.a ; then
-		AC_MSG_ERROR([OpenIB library libibverbs.so not found.])
-	    fi
-	fi
-	BUILD_OPENIB=1
-	CPPFLAGS="$save_cppflags"
-    fi
-    AC_SUBST(BUILD_OPENIB)
-    AC_SUBST(OPENIB_INCDIR)
-    AC_SUBST(OPENIB_LIBDIR)
-
-    if test -n "$BUILD_OPENIB" ; then
-	dnl Check for which version of the ibverbs library; device opening is
-	dnl different.  This format is the older one, newer is
-	dnl ibv_get_device_list.
-	save_ldflags="$LDFLAGS"
-	LDFLAGS="-L$OPENIB_LIBDIR -libverbs"
-	save_cppflags="$CPPFLAGS"
-	CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR"
-
-	AC_MSG_CHECKING(for ibv_get_devices)
-	AC_TRY_LINK([], [
-	    ibv_get_devices();
-	    ],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_IBV_GET_DEVICES, 1,
-		      Define if libibverbs has ibv_get_devices),
-	    AC_MSG_RESULT(no)
-	)
-
-	dnl Check for existence of reregister event; it's somewhat new.
-	AC_MSG_CHECKING(for IBV_EVENT_CLIENT_REREGISTER)
-	AC_TRY_COMPILE([
-	    #include "infiniband/verbs.h"
-	], [
-	    enum ibv_event_type x = IBV_EVENT_CLIENT_REREGISTER;
-	],
-	    AC_MSG_RESULT(yes)
-	    AC_DEFINE(HAVE_IBV_EVENT_CLIENT_REREGISTER, 1,
-		      Define if libibverbs has reregister event),
-	    AC_MSG_RESULT(no)
-	)
-
-	LDFLAGS="$save_ldflags"
-	CPPFLAGS="$save_cppflags"
-    fi
-])
-
-dnl vim: set ft=config :
-
-AC_DEFUN([AX_PORTALS],
-[
-    dnl
-    dnl Configure to build Portals BMI method, if requested and available.
-    dnl Use
-    dnl   --with-portals       To find include files and libraries in standard
-    dnl                        system paths.
-    dnl   --with-portals=<dir> To specify a location that has include and lib
-    dnl                        (or lib64) subdirectories with the goods.
-    dnl
-    dnl Or specify the -I an -L and -l flags exactly using, e.g.:
-    dnl
-    dnl   --with-portals-includes="-I<dir>"
-    dnl   --with-portals-libs="-L<dir> -l<name>"
-    dnl
-    dnl The C file uses #include <portals/portals3.h>, so choose your include
-    dnl path accordingly.  If it did not do this, portals/errno.h would sit in
-    dnl front of the system version.
-    dnl
-    use_portals=
-    home=
-    incs=
-    libs=
-    AC_ARG_WITH(portals,
-    [  --with-portals[=<dir>]   Location of the Portals install (default no Portals)],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    use_portals=yes
-	elif test "$withval" != no ; then
-	    home="$withval"
-	fi
-    )
-    AC_ARG_WITH(portals-includes,
-[  --with-portals-includes=<dir>
-                          Extra CFLAGS to specify Portals includes],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-portals-includes requires an argument.])
-	elif test "$withval" != no ; then
-	    incs="$withval"
-	fi
-    )
-    AC_ARG_WITH(portals-libs,
-[  --with-portals-libs=<dir>
-                          Extra LIBS to link Portals libraries],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-portals-libs requires an argument.])
-	elif test "$withval" != no ; then
-	    libs="$withval"
-	fi
-    )
-    dnl If supplied the incls and libs explicitly, use them, else populate them
-    dnl using guesses from the --with-portals dir.
-    if test -n "$home" ; then
-	if test -z "$incs"; then
-	    incs=-I$home/include
-	fi
-	if test -z "$libs"; then
-	    libs=-L$home/lib64
-	    if test ! -d "$home/lib64" ; then
-		libs=-L$home/lib
-	    fi
-	fi
-    fi
-
-    dnl
-    dnl Look for headers and libs.
-    dnl
-    BUILD_PORTALS=
-    PORTALS_INCS=
-    PORTALS_LIBS=
-    if test "X$use_portals$home$incs$libs" != X ; then
-	# Save stuff
-	save_cppflags="$CPPFLAGS"
-	save_libs="$LIBS"
-
-	PORTALS_INCS="$incs"
-	CPPFLAGS="$CPPFLAGS $PORTALS_INCS"
-
-	PORTALS_LIBS="$libs"
-	LIBS="$save_libs $PORTALS_LIBS"
-
-	AC_MSG_CHECKING([for portals3.h header])
-	ok=no
-	AC_TRY_COMPILE(
-	    [#include <portals/portals3.h>],
-	    [int m, n; m = PtlInit(&n);],
-	    [ok=yes])
-
-	if test "$ok" = yes ; then
-	    AC_MSG_RESULT([yes])
-	else
-	    AC_MSG_RESULT([no])
-	    AC_MSG_ERROR([Header portals/portals3.h not found.])
-	fi
-
-	dnl try without first, for Cray, then try TCP version
-	dnl Run test is not always possible, esp when cross-compiling or on
-	dnl a box that does not have the hardware.
-	AC_MSG_CHECKING([for portals libraries])
-	ok=no
-	AC_TRY_LINK(
-	    [#include <portals/portals3.h>],
-	    [int m, n; m = PtlInit(&n);],
-	    [ok=yes])
-
-	if test "$ok" = no ; then
-	    PORTALS_LIBS="$libs -lportals"
-	    LIBS="$save_libs $PORTALS_LIBS"
-	    AC_TRY_LINK(
-		[#include <portals/portals3.h>],
-		[int m, n; m = PtlInit(&n);],
-		[ok=yes])
-	fi
-
-	if test "$ok" = no ; then
-	    PORTALS_LIBS="$libs -lp3api -lp3lib -lp3utcp -lp3rt -lpthread"
-	    LIBS="$save_libs $PORTALS_LIBS"
-	    AC_TRY_LINK(
-		[#include <portals/portals3.h>],
-		[int m, n; m = PtlInit(&n);],
-		[ok=yes])
-	fi
-
-	if test "$ok" = yes ; then
-	    AC_MSG_RESULT([yes])
-	    BUILD_PORTALS=1
-	else
-	    AC_MSG_RESULT([no])
-	    AC_MSG_ERROR([Could not link Portals library.])
-	fi
-
-	#
-	# Check for API variations.
-	#
-	AC_CHECK_FUNCS(PtlErrorStr)
-	AC_CHECK_FUNCS(PtlEventKindStr)
-
-	AC_TRY_COMPILE(
-	    [#include <portals/portals3.h>],
-	    [int m; ptl_process_id_t any_pid;
-	     m = PtlACEntry(0, 0, any_pid, (ptl_uid_t) -1, (ptl_jid_t) -1, 0);],
-	    AC_DEFINE(HAVE_PTLACENTRY_JID, 1,
-		      [Define if have PtlACEntry with jid argument.]))
-
-	# Reset
-	CPPFLAGS="$save_cppflags"
-	LIBS="$save_libs"
-    fi
-    AC_SUBST(BUILD_PORTALS)
-    AC_SUBST(PORTALS_INCS)
-    AC_SUBST(PORTALS_LIBS)
-])
-
-dnl vim: set ft=config : 
-
-#
-# Configure rules for ZOID
-#
-# See COPYING in top-level directory.
-#
-AC_DEFUN([AX_ZOID],
-[
-    dnl Configure options for ZOID install path.
-    dnl --with-zoid=<dir>
-    AC_ARG_WITH(zoid,
-[  --with-zoid=<dir>         Location of the ZOID tree (default no ZOID)],
-	if test -z "$withval" -o "$withval" = yes ; then
-	    AC_MSG_ERROR([Option --with-zoid requires the path to your ZOID source tree.])
-	elif test "$withval" != no ; then
-	    ZOID_SRCDIR="$withval"
-	fi
-    )
-    if test -n "$ZOID_SRCDIR" ; then
-	save_cppflags="$CPPFLAGS"
-	CPPFLAGS="$CPPFLAGS -Isrc/io/bmi -I$ZOID_SRCDIR/include -I$ZOID_SRCDIR/zbmi -I$ZOID_SRCDIR/zbmi/implementation"
-	AC_CHECK_HEADER(zbmi.h,, AC_MSG_ERROR([Header zbmi.h not found.]))
-	AC_CHECK_HEADER(zoid_api.h,, AC_MSG_ERROR([Header zoid_api.h not found.]))
-	AC_CHECK_HEADER(zbmi_protocol.h,, AC_MSG_ERROR([Header zbmi_protocol.h not found.]))
-	CPPFLAGS="$save_cppflags"
-	BUILD_ZOID=1
-    fi
-    AC_SUBST(BUILD_ZOID)
-    AC_SUBST(ZOID_SRCDIR)
-])
-
-dnl vim: set ft=config :
-
diff --git a/cert-utils/pvfs2-grid-proxy-init.sh b/cert-utils/pvfs2-grid-proxy-init.sh
new file mode 100755
index 0000000..3080b53
--- /dev/null
+++ b/cert-utils/pvfs2-grid-proxy-init.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+# This script generates a proxy certificate with a policy in the format
+# of {UID}/{GID}, e.g. 1000/100. The certificate is stored in /tmp/x509up_u{UID},
+# e.g. /tmp/x509up_u1000. This certificate is for use with the OrangeFS 
+# Windows Client.
+# 
+# $GLOBUS_LOCATION must be set, or grid-proxy-init must be on the path.
+# 
+# Arguments to this script will be passed to grid-proxy-init.
+
+echo `id -u`/`id -g` > cert-policy
+if [ $? -ne 0 ]; then
+    echo Could not create cert-policy, exiting
+    exit 1
+fi
+
+if [ "$GLOBUS_LOCATION" != "" ]; then
+    $GLOBUS_LOCATION/bin/grid-proxy-init -policy cert-policy -pl id-ppl-anyLanguage $@
+else
+    grid-proxy-init -policy cert-policy -pl id-ppl-anyLanguage $@
+fi
+
diff --git a/config.save b/config.save
index be895d8..c8eb321 100755
--- a/config.save
+++ b/config.save
@@ -525,17 +525,17 @@ s,@ECHO_C@,,;t t
 s,@ECHO_N@,-n,;t t
 s,@ECHO_T@,,;t t
 s,@LIBS@, -lcrypto -lssl -ldl,;t t
-s,@PVFS2_VERSION@,2.8.6-orangefs-2012-07-03-162939,;t t
+s,@PVFS2_VERSION@,2.8.6-orangefs-2012-07-13-043849,;t t
 s,@PVFS2_VERSION_MAJOR@,2,;t t
 s,@PVFS2_VERSION_MINOR@,8,;t t
 s,@PVFS2_VERSION_SUB@,6,;t t
-s,@build@,x86_64-unknown-linux-gnu,;t t
-s,@build_cpu@,x86_64,;t t
-s,@build_vendor@,unknown,;t t
+s,@build@,i686-pc-linux-gnu,;t t
+s,@build_cpu@,i686,;t t
+s,@build_vendor@,pc,;t t
 s,@build_os@,linux-gnu,;t t
-s,@host@,x86_64-unknown-linux-gnu,;t t
-s,@host_cpu@,x86_64,;t t
-s,@host_vendor@,unknown,;t t
+s,@host@,i686-pc-linux-gnu,;t t
+s,@host_cpu@,i686,;t t
+s,@host_vendor@,pc,;t t
 s,@host_os@,linux-gnu,;t t
 s,@CC@,gcc,;t t
 s,@CFLAGS@, -g -O2,;t t
@@ -579,9 +579,9 @@ s,@THREADED_KMOD_HELPER@,,;t t
 s,@LINUX_KERNEL_SRC@,,;t t
 s,@LINUX24_KERNEL_SRC@,,;t t
 s,@LINUX24_KERNEL_MINOR_VER@,,;t t
-s,@BUILD_ABSOLUTE_TOP@,/usr/src/orangefsosd-2.8.6,;t t
+s,@BUILD_ABSOLUTE_TOP@,/usr/src/pvfs2-osd,;t t
 s,@SRC_RELATIVE_TOP@,./,;t t
-s,@SRC_ABSOLUTE_TOP@,/usr/src/orangefsosd-2.8.6,;t t
+s,@SRC_ABSOLUTE_TOP@,/usr/src/pvfs2-osd,;t t
 s,@FUSE_LDFLAGS@,,;t t
 s,@FUSE_CFLAGS@,,;t t
 s,@BUILD_FUSE@,,;t t
@@ -983,14 +983,14 @@ CEOF
 t clr
 : clr
 ${ac_dA}HAVE_ARPA_INET_H${ac_dB}HAVE_ARPA_INET_H${ac_dC}1${ac_dD}
-${ac_dA}SIZEOF_LONG_INT${ac_dB}SIZEOF_LONG_INT${ac_dC}8${ac_dD}
+${ac_dA}SIZEOF_LONG_INT${ac_dB}SIZEOF_LONG_INT${ac_dC}4${ac_dD}
 ${ac_dA}WITH_OPENSSL${ac_dB}WITH_OPENSSL${ac_dC}1${ac_dD}
 ${ac_dA}HAVE_OPENSSL_EVP_H${ac_dB}HAVE_OPENSSL_EVP_H${ac_dC}1${ac_dD}
 ${ac_dA}HAVE_OPENSSL_CRYPTO_H${ac_dB}HAVE_OPENSSL_CRYPTO_H${ac_dC}1${ac_dD}
 ${ac_dA}HAVE_OPENSSL_SHA_H${ac_dB}HAVE_OPENSSL_SHA_H${ac_dC}1${ac_dD}
 ${ac_dA}HAVE_AIOCB_ERROR_CODE${ac_dB}HAVE_AIOCB_ERROR_CODE${ac_dC}1${ac_dD}
 ${ac_dA}HAVE_AIOCB_RETURN_VALUE${ac_dB}HAVE_AIOCB_RETURN_VALUE${ac_dC}1${ac_dD}
-${ac_dA}SIZEOF_VOID_P${ac_dB}SIZEOF_VOID_P${ac_dC}8${ac_dD}
+${ac_dA}SIZEOF_VOID_P${ac_dB}SIZEOF_VOID_P${ac_dC}4${ac_dD}
 ${ac_dA}HAVE_SYS_EPOLL_H${ac_dB}HAVE_SYS_EPOLL_H${ac_dC}1${ac_dD}
 ${ac_dA}PVFS_USRINT_BUILD${ac_dB}PVFS_USRINT_BUILD${ac_dC}1${ac_dD}
 ${ac_dA}PVFS_USRINT_KMOUNT${ac_dB}PVFS_USRINT_KMOUNT${ac_dC}0${ac_dD}
@@ -1075,14 +1075,14 @@ CEOF
 t clr
 : clr
 ${ac_uA}HAVE_ARPA_INET_H${ac_uB}HAVE_ARPA_INET_H${ac_uC}1${ac_uD}
-${ac_uA}SIZEOF_LONG_INT${ac_uB}SIZEOF_LONG_INT${ac_uC}8${ac_uD}
+${ac_uA}SIZEOF_LONG_INT${ac_uB}SIZEOF_LONG_INT${ac_uC}4${ac_uD}
 ${ac_uA}WITH_OPENSSL${ac_uB}WITH_OPENSSL${ac_uC}1${ac_uD}
 ${ac_uA}HAVE_OPENSSL_EVP_H${ac_uB}HAVE_OPENSSL_EVP_H${ac_uC}1${ac_uD}
 ${ac_uA}HAVE_OPENSSL_CRYPTO_H${ac_uB}HAVE_OPENSSL_CRYPTO_H${ac_uC}1${ac_uD}
 ${ac_uA}HAVE_OPENSSL_SHA_H${ac_uB}HAVE_OPENSSL_SHA_H${ac_uC}1${ac_uD}
 ${ac_uA}HAVE_AIOCB_ERROR_CODE${ac_uB}HAVE_AIOCB_ERROR_CODE${ac_uC}1${ac_uD}
 ${ac_uA}HAVE_AIOCB_RETURN_VALUE${ac_uB}HAVE_AIOCB_RETURN_VALUE${ac_uC}1${ac_uD}
-${ac_uA}SIZEOF_VOID_P${ac_uB}SIZEOF_VOID_P${ac_uC}8${ac_uD}
+${ac_uA}SIZEOF_VOID_P${ac_uB}SIZEOF_VOID_P${ac_uC}4${ac_uD}
 ${ac_uA}HAVE_SYS_EPOLL_H${ac_uB}HAVE_SYS_EPOLL_H${ac_uC}1${ac_uD}
 ${ac_uA}PVFS_USRINT_BUILD${ac_uB}PVFS_USRINT_BUILD${ac_uC}1${ac_uD}
 ${ac_uA}PVFS_USRINT_KMOUNT${ac_uB}PVFS_USRINT_KMOUNT${ac_uC}0${ac_uD}
diff --git a/include/orange.h b/include/orange.h
new file mode 100644
index 0000000..84138c7
--- /dev/null
+++ b/include/orange.h
@@ -0,0 +1,27 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* This is the master header file for OrangeFS.  It pulls in all header
+ * files needed by client side for software that operates at or above
+ * the system interface level.
+ */
+
+#ifndef __ORANGE_H
+#define __ORANGE_H
+
+#include "pvfs2-usrint.h"
+#include "pvfs2.h"
+
+#endif /* __ORANGE_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/include/pvfs2-usrint.h b/include/pvfs2-usrint.h
new file mode 100644
index 0000000..08ae3dc
--- /dev/null
+++ b/include/pvfs2-usrint.h
@@ -0,0 +1,329 @@
+/* 
+ * (C) 2011 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ *  \ingroup usrint
+ *
+ *  PVFS2 user interface routines
+ */
+
+#ifndef PVFS_USRINT_H
+#define PVFS_USRINT_H 1
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#ifndef _ATFILE_SOURCE
+#define _ATFILE_SOURCE 1
+#endif
+#ifndef _LARGEFILE_SOURCE
+#define _LARGEFILE_SOURCE 1
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE 1
+#endif
+#ifndef _USE_FILE_OFFSET64
+#define _USE_FILE_OFFSET64 1
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#include <features.h>
+
+#include <fcntl.h>
+#include <utime.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+
+/* define open flags unique to PVFS here */
+#define O_HINTS     02000000  /* PVFS hints are present */
+#define O_NOTPVFS   04000000  /* Open non-PVFS files if possible */
+
+/* define FD flags unique to PVFS here */
+#define PVFS_FD_NOCACHE 0x10000
+
+/* Define AT_FDCWD and related flags on older systems */
+#ifndef AT_FDCWD
+# define AT_FDCWD		-100	/* Special value used to indicate
+					   the *at functions should use the
+					   current working directory. */
+#endif
+#ifndef AT_SYMLINK_NOFOLLOW
+# define AT_SYMLINK_NOFOLLOW	0x100	/* Do not follow symbolic links.  */
+#endif
+#ifndef AT_REMOVDIR
+# define AT_REMOVEDIR		0x200	/* Remove directory instead of
+					   unlinking file.  */
+#endif
+#ifndef AT_SYMLINK_FOLLOW
+# define AT_SYMLINK_FOLLOW	0x400	/* Follow symbolic links.  */
+#endif
+#ifndef AT_EACCESS
+# define AT_EACCESS		0x200	/* Test access permitted for
+					   effective IDs, not real IDs.  */
+#endif
+
+/* pvfs_open */
+extern int pvfs_open(const char *path, int flags, ...);
+
+/* pvfs_open64 */
+extern int pvfs_open64(const char *path, int flags, ...);
+
+/* pvfs_openat */
+extern int pvfs_openat(int dirfd, const char *path, int flags, ...);
+
+/* pvfs_openat64 */
+extern int pvfs_openat64(int dirfd, const char *path, int flags, ...);
+
+extern int pvfs_creat(const char *path, mode_t mode, ...);
+
+extern int pvfs_creat64(const char *path, mode_t mode, ...);
+
+/* pvfs_unlink */
+extern int pvfs_unlink (const char *path);
+
+extern int pvfs_unlinkat (int dirfd, const char *path, int flags);
+
+extern int pvfs_rename(const char *oldpath, const char *newpath);
+
+extern int pvfs_renameat(int olddirfd, const char *oldpath,
+                  int newdirfd, const char *newpath);
+
+/* pvfs_read */
+extern ssize_t pvfs_read( int fd, void *buf, size_t count );
+
+/* pvfs_pread */
+extern ssize_t pvfs_pread( int fd, void *buf, size_t count, off_t offset );
+
+extern ssize_t pvfs_readv(int fd, const struct iovec *vector, int count);
+
+/* pvfs_pread64 */
+extern ssize_t pvfs_pread64( int fd, void *buf, size_t count, off64_t offset );
+
+/* pvfs_write */
+extern ssize_t pvfs_write( int fd, const void *buf, size_t count );
+
+/* pvfs_pwrite */
+extern ssize_t pvfs_pwrite( int fd, const void *buf, size_t count, off_t offset );
+
+extern ssize_t pvfs_writev( int fd, const struct iovec *vector, int count );
+
+/* pvfs_pwrite64 */
+extern ssize_t pvfs_pwrite64( int fd, const void *buf, size_t count, off64_t offset );
+
+/* pvfs_lseek */
+extern off_t pvfs_lseek(int fd, off_t offset, int whence);
+
+/* pvfs_lseek64 */
+extern off64_t pvfs_lseek64(int fd, off64_t offset, int whence);
+
+extern int pvfs_truncate(const char *path, off_t length);
+
+extern int pvfs_truncate64 (const char *path, off64_t length);
+
+extern int pvfs_fallocate(int fd, off_t offset, off_t length);
+
+extern int pvfs_ftruncate (int fd, off_t length);
+
+extern int pvfs_ftruncate64 (int fd, off64_t length);
+
+/* pvfs_close */
+extern int pvfs_close( int fd );
+
+extern int pvfs_flush(int fd);
+
+/* various flavors of stat */
+extern int pvfs_stat(const char *path, struct stat *buf);
+
+extern int pvfs_stat64(const char *path, struct stat64 *buf);
+
+extern int pvfs_stat_mask(const char *path, struct stat *buf, uint32_t mask);
+
+extern int pvfs_fstat(int fd, struct stat *buf);
+
+extern int pvfs_fstat64(int fd, struct stat64 *buf);
+
+extern int pvfs_fstatat(int fd, const char *path, struct stat *buf, int flag);
+
+extern int pvfs_fstatat64(int fd, const char *path, struct stat64 *buf, int flag);
+
+extern int pvfs_fstat_mask(int fd, struct stat *buf, uint32_t mask);
+
+extern int pvfs_lstat(const char *path, struct stat *buf);
+
+extern int pvfs_lstat64(const char *path, struct stat64 *buf);
+
+extern int pvfs_lstat_mask(const char *path, struct stat *buf, uint32_t mask);
+
+extern int pvfs_futimesat(int dirfd, const char *path, const struct timeval times[2]);
+
+extern int pvfs_utimes(const char *path, const struct timeval times[2]);
+
+extern int pvfs_utime(const char *path, const struct utimbuf *buf);
+
+extern int pvfs_futimes(int fd, const struct timeval times[2]);
+
+extern int pvfs_dup(int oldfd);
+
+extern int pvfs_dup2(int oldfd, int newfd);
+
+extern int pvfs_chown (const char *path, uid_t owner, gid_t group);
+
+extern int pvfs_fchown (int fd, uid_t owner, gid_t group);
+
+extern int pvfs_fchownat(int fd, const char *path, uid_t owner, gid_t group, int flag);
+
+extern int pvfs_lchown (const char *path, uid_t owner, gid_t group);
+
+extern int pvfs_chmod (const char *path, mode_t mode);
+
+extern int pvfs_fchmod (int fd, mode_t mode);
+
+extern int pvfs_fchmodat(int fd, const char *path, mode_t mode, int flag);
+
+extern int pvfs_mkdir (const char *path, mode_t mode);
+
+extern int pvfs_mkdirat (int dirfd, const char *path, mode_t mode);
+
+extern int pvfs_rmdir (const char *path);
+
+extern ssize_t pvfs_readlink (const char *path, char *buf, size_t bufsiz);
+
+extern ssize_t pvfs_readlinkat (int dirfd, const char *path, char *buf, size_t bufsiz);
+
+extern int pvfs_symlink (const char *oldpath, const char *newpath);
+
+extern int pvfs_symlinkat (const char *oldpath, int newdirfd, const char *newpath);
+
+/* PVFS does not have hard links */
+extern int pvfs_link (const char *oldpath, const char *newpath);
+
+/* PVFS does not have hard links */
+extern int pvfs_linkat (int olddirfd, const char *oldpath,
+                 int newdirfd, const char *newpath, int flags);
+
+/* this reads exactly one dirent, count is ignored */
+extern int pvfs_readdir(unsigned int fd, struct dirent *dirp, unsigned int count);
+
+/* this reads multiple dirents, up to count */
+extern int pvfs_getdents(unsigned int fd, struct dirent *dirp, unsigned int count);
+
+extern int pvfs_getdents64(unsigned int fd, struct dirent64 *dirp, unsigned int count);
+
+extern int pvfs_access (const char * path, int mode);
+
+extern int pvfs_faccessat (int dirfd, const char * path, int mode, int flags);
+
+extern int pvfs_flock(int fd, int op);
+
+extern int pvfs_fcntl(int fd, int cmd, ...);
+
+/* sync all disk data */
+extern void pvfs_sync(void );
+
+/* sync file, but not dir it is in */
+extern int pvfs_fsync(int fd);
+
+/* does not sync file metadata */
+extern int pvfs_fdatasync(int fd);
+
+extern int pvfs_fadvise(int fd, off_t offset, off_t len, int advice);
+
+extern int pvfs_fadvise64(int fd, off64_t offset, off64_t len, int advice);
+
+extern int pvfs_statfs(const char *path, struct statfs *buf);
+
+extern int pvfs_statfs64(const char *path, struct statfs64 *buf);
+
+extern int pvfs_fstatfs(int fd, struct statfs *buf);
+
+extern int pvfs_fstatfs64(int fd, struct statfs64 *buf);
+
+extern int pvfs_statvfs(const char *path, struct statvfs *buf);
+
+extern int pvfs_fstatvfs(int fd, struct statvfs *buf);
+
+extern int pvfs_mknod(const char *path, mode_t mode, dev_t dev);
+
+extern int pvfs_mknodat(int dirfd, const char *path, mode_t mode, dev_t dev);
+
+extern ssize_t pvfs_sendfile(int outfd, int infd, off_t *offset, size_t count);
+
+extern ssize_t pvfs_sendfile64(int outfd, int infd, off64_t *offset, size_t count);
+
+extern int pvfs_setxattr(const char *path, const char *name,
+                          const void *value, size_t size, int flags);
+
+extern int pvfs_lsetxattr(const char *path, const char *name,
+                          const void *value, size_t size, int flags);
+
+extern int pvfs_fsetxattr(int fd, const char *name,
+                          const void *value, size_t size, int flags);
+
+extern ssize_t pvfs_getxattr(const char *path, const char *name,
+                             void *value, size_t size);
+
+extern ssize_t pvfs_lgetxattr(const char *path, const char *name,
+                              void *value, size_t size);
+
+extern ssize_t pvfs_fgetxattr(int fd, const char *name,
+                              void *value, size_t size);
+
+extern ssize_t pvfs_listxattr(const char *path, char *list, size_t size);
+
+extern ssize_t pvfs_llistxattr(const char *path, char *list, size_t size);
+
+extern ssize_t pvfs_flistxattr(int fd, char *list, size_t size);
+
+extern int pvfs_removexattr(const char *path, const char *name);
+
+extern int pvfs_lremovexattr(const char *path, const char *name);
+
+extern int pvfs_fremovexattr(int fd, const char *name);
+
+extern int pvfs_chdir(const char *path);
+
+extern int pvfs_fchdir(int fd);
+
+extern int pvfs_cwd_init(const char *buf, size_t size);
+
+extern char *pvfs_getcwd(char *buf, size_t size);
+
+extern char *pvfs_get_current_dir_name(void);
+
+extern char *pvfs_getwd(char *buf);
+
+extern mode_t pvfs_umask(mode_t mask);
+
+extern mode_t pvfs_getumask(void);
+
+extern int pvfs_getdtablesize(void);
+
+extern void *pvfs_mmap(void *start, size_t length, int prot, int flags,
+                int fd, off_t offset);
+
+extern int pvfs_munmap(void *start, size_t length);
+
+extern int pvfs_msync(void *start, size_t length, int flags);
+
+#endif
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/apps/admin/pvfs2-get-uid.c b/src/apps/admin/pvfs2-get-uid.c
new file mode 100644
index 0000000..2e8ba7b
--- /dev/null
+++ b/src/apps/admin/pvfs2-get-uid.c
@@ -0,0 +1,337 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "pvfs2.h"
+#include "pvfs2-mgmt.h"
+#include "bmi.h"
+#include "pint-uid-mgmt.h"
+#include "pint-util.h"
+#include "pint-cached-config.h"
+
+#define UID_HISTORY_MAX_SECS 4294967295 /* max uint32_t val */
+#define UID_SERV_LIST_SIZE 25           /* maximum servers to get stats from */
+
+struct options
+{
+    uint32_t history;
+    char **server_list;
+    int server_count;
+    PVFS_fs_id fs_id;
+};
+
+static struct options *parse_args(int argc, char *argv[]);
+static void usage(int argc, char *argv[]);
+static void cleanup(struct options *ptr, PVFS_BMI_addr_t *addr_array,
+                    PVFS_uid_info_s **uid_stats);
+
+int main(int argc, char *argv[])
+{
+    PVFS_credentials creds;
+    PVFS_fs_id cur_fs;
+    PVFS_BMI_addr_t *addr_array, server_addr;
+    PVFS_uid_info_s **uid_info_array;
+    uint32_t *uid_info_count;
+    char uid_timestamp[64], curTime[64];
+    struct options *prog_opts = NULL;
+    int ret = 0;
+    int i, j;
+    struct timeval currentTime;
+
+    /* parse command line arguments */ 
+    prog_opts = parse_args(argc, argv);
+    if (!prog_opts)
+    {
+        fprintf(stderr, "Unable to allocate memory for command line args\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (!(prog_opts->history))
+    {
+        prog_opts->history = UID_HISTORY_MAX_SECS;
+    }
+
+    ret = PVFS_util_init_defaults();
+    if (ret < 0)
+    {
+        PVFS_perror("PVFS_util_init_defaults", ret);
+        return (-1);
+    }
+
+    PVFS_util_gen_credentials(&creds);
+
+    /* get a default fsid or use the one given by the user */
+    if (prog_opts->fs_id == -1)
+    {
+        ret = PVFS_util_get_default_fsid(&cur_fs);
+        if (ret < 0)
+        {
+            PVFS_perror("PVFS_util_get_default_fsid", ret);
+            return (-1);
+        }
+    }
+    else
+    {
+        cur_fs = prog_opts->fs_id;
+    }
+
+    /* if user specifies servers, allocate memory for the BMI addrs and
+     * then translate the server strings to BMI addrs
+     */
+    if (prog_opts->server_count)
+    {
+        /* allocate memory for our BMI addresses and fill them in */
+        addr_array = (PVFS_BMI_addr_t *)malloc(prog_opts->server_count *
+                                           sizeof(PVFS_BMI_addr_t));
+        if (!addr_array)
+        {
+            fprintf(stderr, "Unable to allocate memory for BMI addrs\n");
+            exit(EXIT_FAILURE);
+        }
+
+        for (i = 0; i < prog_opts->server_count; i ++)
+        {
+            ret = BMI_addr_lookup(&server_addr, prog_opts->server_list[i]);
+            if (ret < 0)
+            {
+                PVFS_perror("BMI_addr_lookup", ret);
+                return (-1);
+            }
+            addr_array[i] = server_addr;
+        }
+    }
+    else
+    {
+        /* else, user specified no servers, so a list will be built */
+        ret = PVFS_mgmt_count_servers(cur_fs, &creds, PINT_SERVER_TYPE_ALL,
+                                      &(prog_opts->server_count));
+        if (ret < 0)
+        {
+            PVFS_perror("PVFS_mgmt_count_servers", ret);
+            return (-1);
+        }
+
+        /* allocate memory for the number of BMI addrs found */
+        addr_array = (PVFS_BMI_addr_t *)malloc(prog_opts->server_count *
+                                           sizeof(PVFS_BMI_addr_t));
+        if (!addr_array)
+        {
+            fprintf(stderr, "Unable to allocate memory for BMI addrs\n");
+            exit(EXIT_FAILURE);
+        }
+
+        /* retrieve the list of BMI addrs for the list of servers */
+        ret = PVFS_mgmt_get_server_array(cur_fs, &creds, PINT_SERVER_TYPE_ALL, 
+                                            addr_array,
+                                            &(prog_opts->server_count));
+        if (ret < 0)
+        {
+            PVFS_perror("PVFS_mgmt_get_server_array", ret);
+            return (-1);
+        }
+
+        /* use reverse lookups so the server URI's can be displayed to the user */
+        for (i = 0; i < prog_opts->server_count; i++)
+        {
+            prog_opts->server_list[i] = strdup(BMI_addr_rev_lookup(addr_array[i]));
+        }
+    }
+
+    /* allocate memory to store the uid statistics from the given servers */
+    uid_info_array = (PVFS_uid_info_s **)malloc(prog_opts->server_count *
+                                          sizeof(PVFS_uid_info_s *));
+    if (!uid_info_array)
+    {
+        fprintf(stderr, "Unable to allocate memory for uid stats array\n");
+        exit(EXIT_FAILURE);
+    }
+    for (i = 0; i < prog_opts->server_count; i++)
+    {
+        uid_info_array[i] = (PVFS_uid_info_s *)malloc(UID_MGMT_MAX_HISTORY *
+                                                sizeof(PVFS_uid_info_s));
+        if(!uid_info_array[i])
+        {
+            fprintf(stderr, "Unable to allocate memory for uid stats array\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    uid_info_count = (uint32_t *)malloc(prog_opts->server_count * sizeof(uint32_t));
+    if (!uid_info_count)
+    {
+        fprintf(stderr, "Memory allocation error, out of memory\n");
+    }
+
+    /* retrieve the statistics from the servers, checking for any errors */
+    ret = PVFS_mgmt_get_uid_list(cur_fs, &creds, prog_opts->server_count,
+                            addr_array, prog_opts->history, uid_info_array,
+                            uid_info_count, NULL, NULL);
+    if (ret < 0)
+    {
+        PVFS_perror("PVFS_mgmt_get_uid_list", ret);
+        return (-1);
+    }
+
+    printf("\nFSID: %d\n", cur_fs);
+
+    /* get a current timestamp for users to compare against */
+    PINT_util_get_current_timeval(&currentTime);
+    PINT_util_parse_timeval(currentTime, curTime); 
+    printf("Current Time: %s\n\n", curTime);
+
+    /* display the uid statistics for each server to the user */
+    for (i = 0; i < prog_opts->server_count; i++)
+    {
+        printf("Server: %s\n", prog_opts->server_list[i]);
+        for (j = 0; j < uid_info_count[i]; j++)
+        {
+
+            PINT_util_parse_timeval(uid_info_array[i][j].tv, uid_timestamp);
+            printf("\tUID: %-10u\tcount: %-10llu\t%s\n", uid_info_array[i][j].uid,
+                   (long long unsigned int)uid_info_array[i][j].count,
+                    uid_timestamp);
+        }
+        printf("\n");
+    }
+
+    /* memory cleanup */
+    cleanup(prog_opts, addr_array, uid_info_array);
+
+    return 0;
+}
+
+/* parse_args()
+ *
+ * parses command line arguments and returns pointer to program options
+ */
+static struct options *parse_args(int argc, char *argv[])
+{
+    char flags[] = "s:t:f:h";
+    int one_opt = 0;
+    struct options *tmp_opts = NULL;
+    int server_cnt = 0;
+    int i;
+
+    /* allocate memory for the program options */
+    tmp_opts = (struct options *)malloc(sizeof(struct options));
+    if (!tmp_opts)
+    {
+        return NULL;
+    }
+    memset(tmp_opts, 0, sizeof(struct options));
+
+    /* allocate memory for storing pointers to server addrs */
+    tmp_opts->server_list = (char **)malloc(UID_SERV_LIST_SIZE * sizeof(char *));
+    for (i = 0; i < UID_SERV_LIST_SIZE; i++)
+    {
+        tmp_opts->server_list[i] = NULL;
+    }
+
+    tmp_opts->fs_id = -1;
+
+    /* parse args using getopt() */
+    while((one_opt = getopt(argc, argv, flags)) != EOF)
+    {
+        switch(one_opt)
+        {
+            case('s'):
+                if (server_cnt == UID_SERV_LIST_SIZE)
+                {
+                    fprintf(stderr, "Server limit exceded, using first %d servers\n",                                   UID_SERV_LIST_SIZE);
+                    break;
+                }
+                if (server_cnt > UID_SERV_LIST_SIZE)
+                {
+                    break;
+                }
+                tmp_opts->server_list[server_cnt] = strdup(optarg);
+                server_cnt++;
+                break;
+            case('t'):
+                tmp_opts->history = atoi(optarg);
+                if (tmp_opts->history < 1)
+                {
+                    usage(argc, argv);
+                    exit(EXIT_FAILURE);
+                }
+                break;
+            case('f'):
+                tmp_opts->fs_id = atoi(optarg);
+                if (tmp_opts->fs_id < 0)
+                {
+                    usage(argc, argv);
+                    exit(EXIT_FAILURE);
+                }
+                break;
+            case('h'):
+                usage(argc, argv);
+                exit(EXIT_SUCCESS);
+            case('?'):
+                usage(argc, argv);
+                exit(EXIT_FAILURE);
+        }
+    }
+
+    tmp_opts->server_count = server_cnt;
+
+    return tmp_opts;
+}
+
+/* usage()
+ *
+ * displays proper program usage to the user
+ */
+static void usage(int argc, char *argv[])
+{
+    fprintf(stderr, "\n");
+    fprintf(stderr, "Usage : %s [-s server] ... [-t history] [-f fs_id]\n", argv[0]);
+    fprintf(stderr, "Example: %s -s tcp://127.0.0.1:3334 -t 60 -f 135161\n", argv[0]);
+    fprintf(stderr, "\nOPTIONS:\n");
+    fprintf(stderr, "\n-s\t specify a server address, e.g. tcp://127.0.0.1:3334\n");
+    fprintf(stderr, "\t multiple servers can be specified by repeating -s option\n");
+    fprintf(stderr, "\t if no servers are specified, a list will be generated\n");
+    fprintf(stderr, "\n-t\t history  measured in seconds (must be > 0)\n");
+    fprintf(stderr, "\t if no history is specified, all uid history is returned\n");
+    fprintf(stderr, "\n-f\t specify a PVFS_fs_id\n");
+    fprintf(stderr, "\t if not specified, a default fs_id is found\n");
+    fprintf(stderr, "\n-h\t display program usage\n\n");
+    return;
+}
+
+/* cleanup() 
+ *
+ * This function frees all memory used by this application
+ */
+static void cleanup(struct options *opts, PVFS_BMI_addr_t *addr_array,
+                    PVFS_uid_info_s **uid_stats)
+{
+    int i;
+
+    for (i = 0; i < UID_SERV_LIST_SIZE; i++)
+    {
+        if (opts->server_list[i] == NULL)
+        {
+            break;
+        }
+        free(opts->server_list[i]);
+    }
+    for (i = 0; i < opts->server_count; i++)
+    {
+        free(uid_stats[i]);
+    }
+    free(opts->server_list);
+    free(opts);
+    free(addr_array);
+    free(uid_stats);
+    return;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=4 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/apps/admin/pvfs2-perf-mon-snmp.c b/src/apps/admin/pvfs2-perf-mon-snmp.c
new file mode 100644
index 0000000..226ab67
--- /dev/null
+++ b/src/apps/admin/pvfs2-perf-mon-snmp.c
@@ -0,0 +1,429 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+#include "bmi.h"
+#include "pvfs2.h"
+#include "pvfs2-mgmt.h"
+#include "pvfs2-internal.h"
+
+#define HISTORY 1
+#define CMD_BUF_SIZE 256
+
+/* these defines should match the defines in include/pvfs2-mgmt.h */
+#define OID_READ ".1.3.6.1.4.1.7778.0"
+#define OID_WRITE ".1.3.6.1.4.1.7778.1"
+#define OID_MREAD ".1.3.6.1.4.1.7778.2"
+#define OID_MWRITE ".1.3.6.1.4.1.7778.3"
+#define OID_DSPACE ".1.3.6.1.4.1.7778.4"
+#define OID_KEYVAL ".1.3.6.1.4.1.7778.5"
+#define OID_REQSCHED ".1.3.6.1.4.1.7778.6"
+#define OID_REQUESTS ".1.3.6.1.4.1.7778.7"
+#define OID_SMALL_READ ".1.3.6.1.4.1.7778.8"
+#define OID_SMALL_WRITE ".1.3.6.1.4.1.7778.9"
+#define OID_FLOW_READ ".1.3.6.1.4.1.7778.10"
+#define OID_FLOW_WRITE ".1.3.6.1.4.1.7778.11"
+
+#define INT_TYPE "INTEGER"
+#define CNT_TYPE "COUNTER"
+
+#ifndef PVFS2_VERSION
+#define PVFS2_VERSION "Unknown"
+#endif
+
+struct MGMT_perf_iod
+{
+    const char *key_oid;
+    const char *key_type;
+    int key_number;
+    const char *key_name;
+};
+
+/* this table needs to match the list of keys in pvfs2-mgmt.h */
+static struct MGMT_perf_iod key_table[] = 
+{
+   {OID_READ, CNT_TYPE, PINT_PERF_READ, "Bytes Read"},
+   {OID_WRITE, CNT_TYPE, PINT_PERF_WRITE, "Bytes Written"},
+   {OID_MREAD, CNT_TYPE, PINT_PERF_METADATA_READ, "Metadata Read Ops"},
+   {OID_MWRITE, CNT_TYPE, PINT_PERF_METADATA_WRITE, "Metadata Write Ops"},
+   {OID_DSPACE, CNT_TYPE, PINT_PERF_METADATA_DSPACE_OPS, "Metadata DSPACE Ops"},
+   {OID_KEYVAL, CNT_TYPE, PINT_PERF_METADATA_KEYVAL_OPS, "Metadata KEYVAL Ops"},
+   {OID_REQSCHED, INT_TYPE, PINT_PERF_REQSCHED, "Requests Active"},
+   {OID_REQUESTS, CNT_TYPE, PINT_PERF_REQUESTS, "Requests Received"},
+   {OID_SMALL_READ, CNT_TYPE, PINT_PERF_SMALL_READ, "Bytes Read by Small_IO"},
+   {OID_SMALL_WRITE, CNT_TYPE, PINT_PERF_SMALL_WRITE, "Bytes Written by Small_IO"},
+   {OID_FLOW_READ, CNT_TYPE, PINT_PERF_FLOW_READ, "Bytes Read by Flow"},
+   {OID_FLOW_WRITE, CNT_TYPE, PINT_PERF_FLOW_WRITE, "Bytes Written by Flow"},
+   {NULL, NULL, -1, NULL}   /* this halts the key count */
+};
+
+struct options
+{
+    char* mnt_point;
+    int mnt_point_set;
+    char* server_addr;
+    int server_addr_set;
+};
+
+static struct options* parse_args(int argc, char* argv[]);
+static void usage(int argc, char** argv);
+
+int main(int argc, char **argv)
+{
+    int ret = -1;
+    char *retc = NULL;
+    PVFS_fs_id cur_fs;
+    struct options* user_opts = NULL;
+    char pvfs_path[PVFS_NAME_MAX] = {0};
+    int i;
+    PVFS_credentials creds;
+    int io_server_count;
+    int64_t **perf_matrix;
+    uint64_t* end_time_ms_array;
+    uint32_t* next_id_array;
+    PVFS_BMI_addr_t *addr_array, server_addr;
+    char *cmd_buffer = (char *)malloc(CMD_BUF_SIZE);
+    int max_keys, key_count;
+
+    /* look at command line arguments */
+    user_opts = parse_args(argc, argv);
+    if (!user_opts)
+    {
+        fprintf(stderr, "Error: failed to parse command line arguments.\n");
+        usage(argc, argv);
+        return(-1);
+    }
+
+    ret = PVFS_util_init_defaults();
+    if (ret < 0)
+    {
+        PVFS_perror("PVFS_util_init_defaults", ret);
+        return(-1);
+    }
+
+    PVFS_util_gen_credentials(&creds);
+    if (user_opts->server_addr_set)
+    {
+        if (PVFS_util_get_default_fsid(&cur_fs) < 0)
+        {
+            /* Can't find a file system */
+            fprintf(stderr, "Error: failed to find a file system.\n");
+            usage(argc, argv);
+            return(-1);
+        }
+        if (user_opts->server_addr &&
+                (BMI_addr_lookup (&server_addr, user_opts->server_addr) == 0))
+        {
+            /* set up single server */
+            addr_array = (PVFS_BMI_addr_t *)malloc(sizeof(PVFS_BMI_addr_t));
+            addr_array[0] = server_addr;
+            io_server_count = 1;
+        }
+        else
+        {
+            /* bad argument - address not found */
+            fprintf(stderr, "Error: failed to parse server address.\n");
+            usage(argc, argv);
+            return(-1);
+        }
+    }
+    else
+    {
+        /* will sample all servers */
+        /* translate local path into pvfs2 relative path */
+        ret = PVFS_util_resolve(user_opts->mnt_point,
+                                &cur_fs, pvfs_path, PVFS_NAME_MAX);
+        if (ret < 0)
+        {
+            PVFS_perror("PVFS_util_resolve", ret);
+            return(-1);
+        }
+
+        /* count how many I/O servers we have */
+        ret = PVFS_mgmt_count_servers(cur_fs, &creds, PVFS_MGMT_IO_SERVER,
+                                    &io_server_count);
+        if (ret < 0)
+        {
+            PVFS_perror("PVFS_mgmt_count_servers", ret);
+	        return(-1);
+        }
+    
+        /* build a list of servers to talk to */
+        addr_array = (PVFS_BMI_addr_t *)
+	    malloc(io_server_count * sizeof(PVFS_BMI_addr_t));
+        if (addr_array == NULL)
+        {
+	        perror("malloc");
+	        return -1;
+        }
+        ret = PVFS_mgmt_get_server_array(cur_fs,
+				     &creds,
+				     PVFS_MGMT_IO_SERVER,
+				     addr_array,
+				     &io_server_count);
+        if (ret < 0)
+        {
+	        PVFS_perror("PVFS_mgmt_get_server_array", ret);
+	        return -1;
+        }
+    }
+
+    /* count keys */
+    for (max_keys = 0; key_table[max_keys].key_number >= 0; max_keys++);
+
+    /* allocate a 2 dimensional array for statistics */
+    perf_matrix = (int64_t **)malloc(io_server_count * sizeof(int64_t *));
+    if (!perf_matrix)
+    {
+        perror("malloc");
+        return(-1);
+    }
+    for(i=0; i<io_server_count; i++)
+    {
+	    perf_matrix[i] = (int64_t *)malloc(HISTORY * (max_keys + 2)
+                                        * sizeof(int64_t));
+	    if (perf_matrix[i] == NULL)
+	    {
+	        perror("malloc");
+	        return -1;
+	    }
+    }
+
+    /* allocate an array to keep up with what iteration of statistics
+     * we need from each server 
+     */
+    next_id_array = (uint32_t *) malloc(io_server_count * sizeof(uint32_t));
+    if (next_id_array == NULL)
+    {
+	     perror("malloc");
+	     return -1;
+    }
+    memset(next_id_array, 0, io_server_count*sizeof(uint32_t));
+
+    /* allocate an array to keep up with end times from each server */
+    end_time_ms_array = (uint64_t *)malloc(io_server_count * sizeof(uint64_t));
+    if (end_time_ms_array == NULL)
+    {
+	    perror("malloc");
+	    return -1;
+    }
+
+
+    /* loop for ever, grabbing stats when requested */
+    while (1)
+    {
+        int srv = 0;
+        time_t snaptime = 0;
+        const char *returnType = NULL; 
+        int64_t returnValue = 0;
+        /* wait for a request from SNMP driver */
+        retc = fgets(cmd_buffer, CMD_BUF_SIZE, stdin);
+        if (!retc)
+        {
+            /* error on read */
+            return -1;
+        }
+
+        /* if PING output PONG */
+        if (!strncasecmp(cmd_buffer, "PING", 4))
+        {
+            fprintf(stdout,"PONG\n");
+	        fflush(stdout);
+            continue;
+        }
+
+        /* try to parse GET command */
+        if (!strncasecmp(cmd_buffer, "GET", 3))
+        {
+            char *c;
+            /* found GET read OID */
+            retc = fgets(cmd_buffer, CMD_BUF_SIZE, stdin);
+            if (!retc)
+            {
+                /* error on read */
+                return -1;
+            }
+            /* replace newlines with null char */
+            for(c = cmd_buffer; *c != '\0'; c++)
+                if (*c == '\n')
+                    *c = '\0';
+        }
+        else
+        {
+            /* bad command */
+            fprintf(stdout, "NONE\n");
+            fflush(stdout);
+            continue;
+        }
+
+        /* good command - read counters */
+        if (time(NULL) - snaptime > 60)
+        {
+            snaptime = time(NULL);
+            key_count = max_keys;
+	        ret = PVFS_mgmt_perf_mon_list(cur_fs,
+				          &creds,
+				          perf_matrix, 
+				          end_time_ms_array,
+				          addr_array,
+				          next_id_array,
+				          io_server_count, 
+                          &key_count,
+				          HISTORY,
+				          NULL, NULL);
+	        if (ret < 0)
+	        {
+	            PVFS_perror("PVFS_mgmt_perf_mon_list", ret);
+	            return -1;
+	        }
+        }
+
+        /* format requested OID */
+        if (perf_matrix[srv][key_count] != 0)
+        {
+            int k;
+            /* this is a valid measurement */
+            for(k = 0; k < max_keys &&
+                    strcmp(cmd_buffer, key_table[k].key_oid); k++);
+            /* out of for loop k equals selected key */
+            if (k < max_keys)
+            {
+                returnType = key_table[k].key_type;
+                returnValue = perf_matrix[srv][key_table[k].key_number];
+            }
+            else
+            {
+                /* invalid command */
+                fprintf(stdout,"NONE\n");
+                fflush(stdout);
+                continue;
+            }
+        }
+        else
+        {
+            /* invalid measurement */
+            fprintf(stdout,"NONE\n");
+            fflush(stdout);
+            continue;
+        }
+        fprintf(stdout, "%s\n%llu\n", returnType, llu(returnValue));
+        fflush(stdout);
+        /* return to top for next command */
+    }
+
+    PVFS_sys_finalize();
+
+    return(ret);
+}
+
+/* parse_args()
+ *
+ * parses command line arguments
+ *
+ * returns pointer to options structure on success, NULL on failure
+ */
+static struct options* parse_args(int argc, char* argv[])
+{
+    char flags[] = "vm:s:";
+    int one_opt = 0;
+    int len = 0;
+
+    struct options *tmp_opts = NULL;
+    int ret = -1;
+
+    /* create storage for the command line options */
+    tmp_opts = (struct options *) malloc(sizeof(struct options));
+    if(tmp_opts == NULL)
+    {
+	    return(NULL);
+    }
+    memset(tmp_opts, 0, sizeof(struct options));
+
+    /* look at command line arguments */
+    while((one_opt = getopt(argc, argv, flags)) != EOF)
+    {
+	    switch(one_opt)
+        {
+            case('v'):
+                printf("%s\n", PVFS2_VERSION);
+                exit(0);
+	        case('m'):
+                /* we need to add a '/' to the end so cannot strdup */
+		        len = strlen(optarg)+1;
+		        tmp_opts->mnt_point = (char*)malloc(len+1);
+		        if(!tmp_opts->mnt_point)
+		        {
+		            free(tmp_opts);
+		            return(NULL);
+		        }
+		        memset(tmp_opts->mnt_point, 0, len+1);
+		        ret = sscanf(optarg, "%s", tmp_opts->mnt_point);
+		        if(ret < 1)
+                {
+		            free(tmp_opts);
+		            return(NULL);
+		        }
+		        /* TODO: dirty hack... fix later.  The remove_dir_prefix()
+		        * function expects some trailing segments or at least
+		        * a slash off of the mount point
+		        */
+		        strcat(tmp_opts->mnt_point, "/");
+		        tmp_opts->mnt_point_set = 1;
+		        break;
+	        case('s'):
+                tmp_opts->server_addr = strdup(optarg);
+                if (!tmp_opts->server_addr)
+                {
+                    free(tmp_opts);
+                    return NULL;
+                }
+		        tmp_opts->server_addr_set = 1;
+		        break;
+	        case('?'):
+		        usage(argc, argv);
+		        exit(EXIT_FAILURE);
+	    }
+    }
+
+    if (!(tmp_opts->mnt_point_set || tmp_opts->server_addr_set))
+    {
+	    free(tmp_opts);
+	    return(NULL);
+    }
+
+    return(tmp_opts);
+}
+
+
+static void usage(int argc, char **argv)
+{
+    fprintf(stderr, "\n");
+    fprintf(stderr, "Usage  : %s [-m fs_mount_point]\n", argv[0]);
+    fprintf(stderr, "Example: %s -m /mnt/pvfs2\n", argv[0]);
+    fprintf(stderr, "Usage  : %s [-s bmi_address_string]\n", argv[0]);
+    fprintf(stderr, "Example: %s -s tcp://localhost:3334\n", argv[0]);
+    return;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=4 sts=4 sw=4 expandtab
+ */
diff --git a/src/apps/ucache/module.mk.in b/src/apps/ucache/module.mk.in
new file mode 100644
index 0000000..fc5cb0c
--- /dev/null
+++ b/src/apps/ucache/module.mk.in
@@ -0,0 +1,11 @@
+
+ifdef BUILD_UCACHE
+
+DIR := src/apps/ucache
+
+UCACHEDSRC := \
+        $(DIR)/ucached.c \
+        $(DIR)/ucached_cmd.c
+
+endif # BUILD_UCACHE
+
diff --git a/src/apps/ucache/shmem_util.c b/src/apps/ucache/shmem_util.c
new file mode 100644
index 0000000..a6792c9
--- /dev/null
+++ b/src/apps/ucache/shmem_util.c
@@ -0,0 +1,75 @@
+#include "shmem_util.h"
+
+/** Aquire a SysV shared memory segment. 
+ * key_file and proj_id are identifiers used by ftok to uniquly identify the 
+ * segment. size is the desired size in bytes. memory is an optional parameter
+ * that refers to a void pointer which can be set to the address of the segment.
+ */
+int shmem_init(char *key_file, int proj_id, size_t size, void **memory)
+{
+    int key = 0;
+    int id = 0;
+
+    /* Generate key based on key_file and proj_id */
+    key = ftok(key_file, proj_id);
+    if(key < 0)
+    {
+        return -1;
+    }
+
+    /* Allocate Shared Memory Segment */
+    id = shmget(key, size, FLAGS | IPC_CREAT | IPC_EXCL);
+    if(id < 0)
+    {
+        return -1;
+    }
+
+    /* Reference to pointer not required. */
+    if(memory)
+    {
+        *memory = shmat(id, NULL, AT_FLAGS);
+        if(*memory == (void *) -1)
+        {
+            return -1;
+        }
+    }
+    else
+    {
+        if(shmat(id, NULL, AT_FLAGS) == (void *) 0)
+        {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/** Destroy SysV shared memory segment */
+int shmem_destroy(char *key_file, int proj_id)
+{
+    int key = 0;
+    int id = 0;
+    int rc = 0;
+
+    /* Generate key based on key_file and proj_id */
+    key = ftok(key_file, proj_id);
+    if(key < 0)
+    {
+        return -1;
+    }
+
+    /* Allocate Shared Memory Segment */
+    id = shmget(key, 0, FLAGS);
+    if(id < 0)
+    {
+        return -1;
+    }
+
+    rc = shmctl(id, IPC_RMID, NULL);
+    if(rc < 0)
+    {
+        return -1;
+    }
+    return 0;
+}
+
diff --git a/src/apps/ucache/shmem_util.h b/src/apps/ucache/shmem_util.h
new file mode 100644
index 0000000..a096871
--- /dev/null
+++ b/src/apps/ucache/shmem_util.h
@@ -0,0 +1,20 @@
+#ifndef SHMEM_UTIL_H
+#define SHMEM_UTIL_H
+
+#define _XOPEN_SOURCE 500
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/shm.h>
+#include <stdio.h>
+#include <errno.h>
+#include <syslog.h>
+
+#define SVSHM_MODE (SHM_R | SHM_W | SHM_R>>3 | SHM_R>>6)
+#define FLAGS (SVSHM_MODE)
+#define AT_FLAGS 0
+
+int shmem_init(char *key_file, int proj_id, size_t size, void **memory);
+int shmem_destroy(char *key_file, int proj_id);
+
+#endif
diff --git a/src/apps/ucache/ucached.c b/src/apps/ucache/ucached.c
new file mode 100644
index 0000000..4dd8562
--- /dev/null
+++ b/src/apps/ucache/ucached.c
@@ -0,0 +1,711 @@
+#include <stdio.h>
+#include <usrint.h>
+#include "ucached.h"
+
+/* FIFO  */
+static int readfd = 0;  /* Command File Descriptor */
+static int writefd = 0; /* Response File Descriptor */
+static char buffer[BUFF_SIZE]; /* For FIFO reads and writes */
+char buff[LOG_LEN];
+
+/* Time Structures For Log 
+static time_t rawtime;
+static struct tm * timeinfo;
+*/
+
+/* Booleans */
+/* 1 if ucache is available for use */
+static unsigned char ucache_avail = 0;
+/* Set this to one if the ucache doesn't get created, and the 
+ * create_ucache_shmem function should be run again. 
+ */
+//static unsigned char tryAgain = 0;
+
+/* Use this global to determine if the atexit registered function (clean_up)
+ * needs to run. A child process is created to create shmem. This facilitates
+ * destruction later on, since segments hang around until their creator exits. 
+ */
+pid_t pid = -1;
+
+/* Hung Lock Detection */
+time_t locked_time[BLOCKS_IN_CACHE+1];
+
+/* Forward Function Declarations */
+static int run_as_child(char c); /* Run as child of ucached */
+static int execute_cmd(char command);
+static int create_ucache_shmem(void);
+static int destroy_ucache_shmem(char dest_locks, char dest_ucache);
+static void clean_up(void);
+static int ucached_lockchk(void);
+
+void check_rc(int rc)
+{
+    memset(buffer, 0, BUFF_SIZE);
+    if(rc >= 0)
+    {
+        strcpy(buffer, "SUCCESS");
+    }
+    else
+    {
+        strcpy(buffer, "FAILURE: check log: " UCACHED_LOG_FILE);
+    }
+}
+
+/** Function to be run upon successful termination from an exit call */
+static void clean_up(void)
+{
+    int rc = 0;
+    /* Only the parent process should execute these lines.
+     * Must check the pid since the atexit function registered 
+     * clean_up. This registration is passed on to any child
+     * processes forked off of the parent. We don't want to execute
+     * these lines when any of the children exit. Run only when parent.
+     */
+    if(pid !=0)
+    {
+        if(DEST_AT_EXIT)
+        {
+            rc = destroy_ucache_shmem(1, 1);
+        }
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: ucached exiting...PID=%d\n", pid);
+        rc = unlink(FIFO1);
+        rc = unlink(FIFO2);
+    }
+}
+
+/** Checks ucache lock shmem region for hung locks. 
+ * Returns 0 when no hung locks are detected. 
+ * Returns 1 when 1 or more hung locks are detected and all are gracefully
+ * handled. 
+ * Returns -1 when 1 or more  hung locks are detected and couldn't
+* be handled properly. (error) 
+ */
+static int ucached_lockchk(void)
+{
+    int rc = 0;
+    int i;
+    for(i = 0; i < (BLOCKS_IN_CACHE + 1); i++)
+    {
+        ucache_lock_t * currlock = get_lock((uint16_t)i);
+        if(lock_trylock(currlock) == 0)
+        {
+            /* Lock wasn't held, so set the timer to zero for this lock */
+            locked_time[i] = 0;        
+        }
+        else
+        {
+            /* Lock was held, so calculate if lock timeout has occured */
+            /* First check to see if this lock's timer has been set at all */
+            if(!locked_time[i])
+            {
+                /* Timer for this lock isn't currently set */
+                time(&locked_time[i]);
+                continue;
+            }
+            else
+            {
+                /* Timer was previously set meaning the block had been locked*/
+                double time_diff = difftime(time((time_t *)NULL), locked_time[i]); 
+                if((int)time_diff >= BLOCK_LOCK_TIMEOUT)
+                {
+                    /*
+                    gossip_debug(GOSSIP_UCACHED_DEBUG,
+                        "WARNING: HUNG LOCK DETECTED @ block index = %d\n", i);
+                    TODO: what to do with hung locks?
+                    rc = pick_lock(ucache_lock_t * currlock);
+                    if(rc == 1)
+                    {
+                        locked_time[i] = (time_t)0;
+                    }
+                    */
+                }
+            }
+        }
+    } 
+
+    return rc;
+}
+
+
+/** Runs the command in a child process */ 
+static int run_as_child(char c)
+{
+    pid = fork();
+    int rc = 0;
+    /* Fork Error? */
+    if(pid < 0)
+    {
+        exit(EXIT_FAILURE);
+    }
+    /* Child Process */
+    else if(pid == 0)
+    {
+        rc = execute_cmd(c);
+        if(rc < 0)
+        {
+            exit(EXIT_FAILURE);
+        }
+        exit(EXIT_SUCCESS);
+    }
+    /* Parent Process */
+    else 
+    {
+        wait(&rc);
+        if(WIFEXITED(rc))
+        {
+            if(WEXITSTATUS(rc) != 0)
+            {
+                return -1;
+            }                  
+        }
+    }
+    return rc;
+}
+
+
+static int execute_cmd(char cmd)
+{
+    int rc = 0;
+    switch(cmd)
+    {
+        /* Create the shared memory required by the ucache */
+        case 'c':
+            rc = create_ucache_shmem();
+            break;
+        /* Destroy the shared memory required by the ucache */
+        case 'd':
+            rc = destroy_ucache_shmem(1, 1);
+            break;
+        case 'i':
+        {
+            char info_options[6];
+            memset(info_options, 0, 6);
+
+            /* Open FILE * to output ucache_info */
+            FILE * info_out = fopen(UCACHED_INFO_FILE, "w");
+            //FILE * info_out = fdopen(writefd, "w");
+
+            int howmany = sscanf(&buffer[1], "%s", info_options);
+            if(howmany > 0)
+            {
+                rc = ucache_info(info_out, info_options);
+            }
+            else
+            {
+                fprintf(info_out, "No display options specified. " 
+                    "Showing ucache contents.\n");
+                rc = ucache_info(info_out, "c");
+            }
+            rc = 1;
+            fclose(info_out);
+            //shmdt(ucache);
+            //shmdt(ucache_aux);
+            break;
+        }
+        /* Close Daemon */
+        case 'x': 
+            writefd = open(FIFO2, O_WRONLY); 
+            rc = write(writefd, "SUCCESS\tExiting ucached", BUFF_SIZE);
+            while(rc <= 0)
+            {
+                rc = write(writefd, "SUCCESS\tExiting ucached", BUFF_SIZE);
+            }
+            remove(UCACHED_STARTED);
+            close(writefd);
+            close(readfd);
+            exit(EXIT_SUCCESS);
+            break;
+        default:
+            strcpy(buffer, "FAILURE\tInvalid command character");
+            return -1;
+    }
+    return rc;
+}
+
+/* Returns -1 on failure, 1 on success */
+static int create_ucache_shmem(void)
+{
+    int rc = 0;
+
+    int old_aux_present = 0;
+
+    /* attempt setup of shmem region for locks (inlcude SYSV later? */
+    int id = SHM_ID1;
+    key_t key = ftok(KEY_FILE, id);
+    size_t size = UCACHE_AUX_SIZE;
+    int shmflg = SVSHM_MODE;
+    int aux_shmid = shmget(key, size, shmflg);
+
+    if(aux_shmid == -1)
+    {
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: shmget on aux_shmid returned -1 on first try\n");
+
+        /* Shared memory segment used for aux data was not previosly created, 
+         * so create it.
+         */
+        shmflg = shmflg | IPC_CREAT | IPC_EXCL;
+        aux_shmid = shmget(key, size, shmflg);
+        if(aux_shmid == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: shmget (IPC_CREATE, IPC_EXCL)"
+                " on aux_shmid returned -1\n");
+            /* Couldn't create the required segment */
+            return -1;
+        }
+        else
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: shmget (using IPC_CREATE, IPC_EXCL)"
+                " on aux_shmid returned shmid = %d\n", aux_shmid);
+
+            /* Attach to shmem and initialize all the aux struct */
+            shmflg = 0;
+            /* ucache_aux is defined in src/client/usrint/ucache.h */
+            ucache_aux = shmat(aux_shmid, NULL, shmflg);
+            if (!ucache_aux)
+            {
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: shmat on aux_shmid returned NULL");
+                return -1;
+            }
+
+            ucache_locks = ucache_aux->ucache_locks;
+
+            int i;
+            /* Initialize Shared Block Level Locks */
+            for(i = 0; i < (BLOCKS_IN_CACHE + 1); i++)
+            {
+                rc = lock_init(get_lock(i));
+                if (rc == -1)
+                {
+                    gossip_debug(GOSSIP_UCACHED_DEBUG,
+                        "ERROR: lock_init returned -1 @ lock index = %d\n", i);
+                    rc = -1;
+                }
+            }
+        }    
+    }
+    else
+    {
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: first shmget on aux_shmid found segment"
+            ": shmid = %d\n", aux_shmid);
+        old_aux_present = 1;
+        /* Shmem for ucache_aux was already created, so just attach to it */
+        shmflg = 0;
+        ucache_aux = shmat(aux_shmid, NULL, shmflg);
+        if (!ucache_aux)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: shmat on aux_shmid returned NULL\n");
+            return -1;
+        }    
+    }
+
+    /* At this point all the locks should be aquired and initialized.
+     * They could also be locked or unlocked */
+
+    /* Set the global lock point to the address of the last lock in the locks
+     * shmem segment. Then lock it.
+     */
+    ucache_lock = get_lock(BLOCKS_IN_CACHE);
+    lock_lock(ucache_lock);
+
+    gossip_debug(GOSSIP_UCACHED_DEBUG,
+        "INFO: lock segment successfully retrieved and global lock locked.\n");
+
+    /* Set and zero out global ucache stats struct */
+    ucache_stats = &(ucache_aux->ucache_stats);
+    *ucache_stats = (struct ucache_stats_s){ 0, 0, 0, 0, 0 };
+
+    /* Try to get/create the shmem required for the ucache */
+    id = SHM_ID2;
+    key = ftok(KEY_FILE, id);
+    size = CACHE_SIZE;
+    shmflg = SVSHM_MODE;
+    int ucache_shmid = shmget(key, size, shmflg);
+    
+    if(ucache_shmid == -1)
+    {
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: shmget on ucache_shmid returned -1 first try\n");
+
+        /* Remember if there was an old lock region detected */
+        if(old_aux_present)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "INFO: old ucache_aux found, attempting destruction of old"
+                " locks and starting\n");
+
+            /* Destroy old aux region and start function over */
+            rc = shmctl(aux_shmid, IPC_RMID, (struct shmid_ds *) NULL);
+
+            /* Let this child process exit, since exiting is required to get
+             * the shmem segment to be completely removed. Try to create the 
+             * shmem again later in another child process. 
+             */
+            return -1; 
+        }
+
+        /* Shared memory segmet used for ucache was not previosly created, 
+         * so create it.
+         */
+        shmflg = shmflg | IPC_CREAT | IPC_EXCL;
+        ucache_shmid = shmget(key, size, shmflg);
+        if(ucache_shmid == -1)
+        { 
+            /* Couldn't create the required segment */
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: shmget (using IPC_CREATE, IPC_EXCL)"
+                " on ucache_shmid returned -1\n");
+
+            rc = -1;
+            goto errout;
+        }
+        else
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "INFO: shmget (using IPC_CREATE, IPC_EXCL)"
+                " on ucache_shmid returned shmid = %d\n", ucache_shmid);
+
+            /* Attach to the ucache shmem region */
+            shmflg = 0;
+            /* ucache is defined in src/client/usrint/ucache.h */
+            ucache = shmat(ucache_shmid, NULL, shmflg);
+            if (!ucache)
+            {
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: shmat on ucache_shmid returned NULL\n");
+                rc = -1;
+                goto errout;
+            }
+  
+            /* Initialize the file table */
+            rc = ucache_init_file_table(0);
+            if(rc != 0)
+            {
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: file table initialization failed\n");
+                /* Couldn't Initialize File Table */
+                rc = -1;
+                goto errout;
+            }
+        }
+    }
+    else
+    {
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: first shmget on ucache_shmid found segment"
+            ": shmid = %d\n", ucache_shmid);
+
+        /* Previously created ucache segment present. Need more info. */
+        /* See if marked for deletion, but has users attached still */
+        struct shmid_ds buf;     
+        int cmd = IPC_STAT;
+        rc = shmctl(ucache_shmid, cmd, &buf);
+        if(rc == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: shmctl failed to IPC_STAT ucache_shmid\n");
+            goto errout;
+        } 
+
+        /* Determine the count of processes attached to this shm segment */
+        char hasAttached = (buf.shm_nattch > 0);
+
+        /* Determine if the ucache shmem segment is marked for destruction*/
+        uint16_t currentMode = buf.shm_perm.mode;
+        char markedForDest = ((currentMode & SHM_DEST) == SHM_DEST);
+
+        if(markedForDest && hasAttached)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "INFO: detected previous ucache shmem segment"
+                " marked for destruction that still has" 
+                " one or more processes attached to it.\n");
+
+            shmflg = shmflg | IPC_CREAT; /* Note: CREAT w/o EXCL */
+            ucache_shmid = shmget(key, size, shmflg);
+            if(ucache_shmid == -1)
+            {
+                /* Couldn't create the required segment */
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: shmget (using IPC_CREAT && !EXCL)"
+                    " on ucache_shmid returned -1\n");
+                rc = -1;
+                goto errout;
+            }
+            /* Attach to the ucache shmem region */
+            shmflg = 0;
+            /* ucache is defined in src/client/usrint/ucache.h */
+            ucache = shmat(ucache_shmid, NULL, shmflg);
+            if (!ucache)
+            {
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: shmat on ucache_shmid returned NULL\n");
+                rc = -1;
+                goto errout;
+            }
+
+            /* Initialize the ftbl, and force the creation of it 
+             * since the init boolean is set to 1.
+             */
+            rc = ucache_init_file_table(1); 
+            if(rc != 0)
+            {
+                /* Couldn't Initialize File Table */
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: file table initialization failed\n");
+                rc = -1;    
+                goto errout;
+            }
+        }
+        else
+        {
+            /* Asume we will keep using the previously allocated segment */
+            /* Attach to the ucache shmem region */
+            shmflg = 0;
+            /* ucache is defined in src/client/usrint/ucache.h */
+            ucache = shmat(ucache_shmid, NULL, shmflg);
+            if (!ucache)
+            {
+                gossip_debug(GOSSIP_UCACHED_DEBUG,
+                    "ERROR: shmat on ucache_shmid returned NULL\n");
+                rc = -1;
+                goto errout;
+            }
+        }
+    }
+
+    lock_unlock(ucache_lock);
+    return 1;
+
+errout:
+    lock_unlock(ucache_lock);
+    return rc;
+}
+
+static int destroy_ucache_shmem(char dest_locks, char dest_ucache)
+{
+    int rc = 0;
+    /* Aquire the main lock then attempt to destroy the ucache shmem segment */
+    if(ucache_lock)
+    {
+        lock_lock(ucache_lock);
+    }
+
+    if(dest_ucache)
+    {
+//        printf("dest_ucache\n");
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: destroying ucache shmem\n");
+
+        /* Destroy shmem segment containing ucache */
+        int id = SHM_ID2;
+        key_t key = ftok(KEY_FILE, id);
+        int shmflg = SVSHM_MODE;
+        int ucache_shmid = shmget(key, 0, shmflg);
+        if(ucache_shmid == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: shmget on ucache_shmid returned -1\n");
+            return -1;
+        }
+        rc = shmctl(ucache_shmid, IPC_RMID, (struct shmid_ds *) NULL);
+        if(rc == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "WARNING: ucache shmem_destroy: errno == %d\n", errno);
+        }
+    }
+
+    if(dest_locks)
+    {
+//        printf("dest_locks\n");
+        gossip_debug(GOSSIP_UCACHED_DEBUG,
+            "INFO: destroying locks' shmem\n");
+
+        /* Destroy shmem segment containing locks */
+        int id = SHM_ID1;
+        key_t key = ftok(KEY_FILE, id);
+        int shmflg = SVSHM_MODE;
+        int lock_shmid = shmget(key, 0, shmflg);
+        if(lock_shmid == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: shmget on lock_shmid returned -1\n");
+            return -1;
+        }
+        rc = shmctl(lock_shmid, IPC_RMID, (struct shmid_ds *) NULL);
+        if(rc == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "WARNING: ucache_locks shmem_destroy: errno == %d\n", errno);
+        }
+    }
+
+    gossip_debug(GOSSIP_UCACHED_DEBUG,
+        "INFO: both shmem segments marked for destruction.\n");
+    return rc;
+}
+
+/** This program should be run as root on startup to initialize the shared 
+ * memory segments required by the user cache in PVFS. 
+ */
+int main(int argc, char **argv)
+{
+    int rc = 0; 
+
+    gossip_enable_file(UCACHED_LOG_FILE, "a");
+    uint64_t curr_mask;
+    int debug_on;
+    gossip_get_debug_mask(&debug_on, &curr_mask);
+    /* Enable the writing of the error message and write the message to file. */
+    gossip_set_debug_mask(1, GOSSIP_UCACHED_DEBUG);
+    //printf("now gossip_debug_mask = 0x%016lx\n", gossip_debug_mask);
+    /* restore previous gossip_debug_mask */
+    //gossip_set_debug_mask(debug_on, curr_mask);
+
+    memset(locked_time, 0, (sizeof(time_t) * (BLOCKS_IN_CACHE + 1)));
+
+    /* Direct output of ucache library, TODO: change this later */
+    if (!out)
+    {
+        out = stdout;
+    }
+
+    /* Daemonize! */
+    //rc = daemon( 0, 0);
+    rc = daemon( 1, 1);
+
+    if(rc != 0)
+    {
+        
+        perror("daemon-izing failed");
+        exit(EXIT_FAILURE);
+    }
+
+    gossip_debug(GOSSIP_UCACHED_DEBUG,
+        "INFO: ucached started\n");
+
+    /* Start up with shared memory initialized */
+    if(CREATE_AT_START)
+    {
+        run_as_child('c');
+        atexit(clean_up);
+    }
+
+    /* Create 2 fifos */
+    rc = mkfifo(FIFO1, FILE_MODE);
+    if(rc != 0)
+    {
+        /* Couldn't create FIFO */
+        return -1;
+    }
+    rc = mkfifo(FIFO2, FILE_MODE);
+    if(rc != 0)
+    {
+        /* Couldn't create FIFO */
+        return -1;
+    }
+
+    while(1)
+    {
+        readfd = open(FIFO1, O_RDONLY | O_NONBLOCK);
+        struct pollfd fds[1];
+        fds[0].fd = readfd;
+        fds[0].events = POLLIN;
+
+        rc = poll(fds, 1, FIFO_TIMEOUT * 1000); 
+
+        if(rc == -1)
+        {
+            gossip_debug(GOSSIP_UCACHED_DEBUG,
+                "ERROR: poll: errno = %d\n", errno);
+        }
+
+        if(fds[0].revents & POLLIN)
+        {
+            /* Data to be read */
+            memset(buffer, 0, BUFF_SIZE);
+            int count = read(readfd, buffer, BUFF_SIZE);
+            while(count <= 0)
+            {
+                if(count == -1)
+                {
+                    gossip_debug(GOSSIP_UCACHED_DEBUG,
+                        "ERROR: caught error while trying to read cmd: errno = %d\n", 
+                        errno);
+                }
+                /* Try to read again */ 
+                count = read(readfd, buffer, BUFF_SIZE);
+            } 
+            if(count > 0)
+            {
+                /* Data read into buffer*/ 
+                char c = buffer[0];
+                /* Valid Command? */
+                if(c == 'c' || c == 'd' || c == 'x' || c == 'i')
+                {
+                    gossip_debug(GOSSIP_UCACHED_DEBUG,
+                        "INFO: Command Received: %c\n", c);
+                    if(c == 'c' || c == 'i')
+                    {
+                        /* Run creation in child process */
+                        run_as_child(c);
+                    }
+                    else
+                    {
+                        execute_cmd(c);
+                    }
+                    check_rc(rc);
+                }
+                /* Invalid Command */
+                else
+                {
+                    gossip_debug(GOSSIP_UCACHED_DEBUG,
+                        "ERROR: Invalid Command Received: %c\n", c);
+                    rc = -1;
+                    check_rc(rc); 
+                }
+
+                /* Data can be written, not guaranteed anything to write */
+                int responseLength = strlen(buffer);
+                if(responseLength != 0)
+                {
+                    writefd = open(FIFO2, O_WRONLY);
+                    if(writefd == -1)
+                    {   perror("Error Opening File");
+                        gossip_debug(GOSSIP_UCACHED_DEBUG,
+                           "ERROR: opening write FIFO: errno = %d\n", errno);
+                    }
+                    rc = write(writefd, buffer, BUFF_SIZE);
+                    while(rc <= 0)
+                    {   printf("rc = %d\n", rc);
+                        rc = write(writefd, buffer, BUFF_SIZE);
+                    }
+                    memset(buffer, 0, BUFF_SIZE);
+                }
+                else
+                {
+                    printf("no response\n");
+                }
+            }
+        }
+        close(readfd);
+
+        if(ucache_avail)
+        {
+            /* TODO: write some stats to file periodically */
+
+            /* Write some dirty blocks out */
+            /* TODO: create function to do this. */
+
+            /* Check for hung locks */
+            rc = ucached_lockchk();
+        }
+    }
+}
diff --git a/src/apps/ucache/ucached.h b/src/apps/ucache/ucached.h
new file mode 100644
index 0000000..e876abe
--- /dev/null
+++ b/src/apps/ucache/ucached.h
@@ -0,0 +1,81 @@
+#ifndef UCACHED_H
+#define UCACHED_H
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/stat.h>
+#include <sys/shm.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <poll.h>
+#include <ucache.h>
+ 
+/* Daemon Logging */
+#ifndef UCACHED_LOG_FILE
+#define UCACHED_LOG_FILE "/tmp/ucached.log"
+#endif
+
+#ifndef UCACHED_INFO_FILE
+#define UCACHED_INFO_FILE "/tmp/ucached.info"
+#endif
+
+#ifndef UCACHED_STARTED
+#define UCACHED_STARTED "/tmp/ucached.started"
+#endif
+
+#define GOSSIP_UCACHED_DEBUG 0x0001000000000000
+#define GOSSIP_UCACHED_CMD_DEBUG 0x0000100000000000
+
+
+/* FIFO Defines */
+#define FILE_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)
+#define FIFO1 "/tmp/ucached.fifo.1"
+#define FIFO2 "/tmp/ucached.fifo.2"
+#define BUFF_SIZE 4096
+#define LOG_LEN 256
+
+#ifndef LOG_TIMESTAMP
+#define LOG_TIMESTAMP 0
+#endif
+
+#ifndef CREATE_AT_START
+#define CREATE_AT_START 1
+#endif 
+
+#ifndef DEST_AT_EXIT 
+#define DEST_AT_EXIT 1
+#endif
+
+#ifndef FIFO_TIMEOUT 
+#define FIFO_TIMEOUT 10 /* Second */
+#endif
+
+/* For shared memory for ucache and ucache locks */
+#define KEY_FILE "/etc/fstab"
+#define SHM_ID1 'l' /* for ucache locks */ 
+#define SHM_ID2 'm' /* for ucache memory */
+
+#ifndef SHM_R
+#define SHM_R 0400
+#endif
+
+#ifndef SHM_W
+#define SHM_W 0200
+#endif
+
+/* SVSHM Permissions */
+#ifndef SVSHM_MODE
+#define SVSHM_MODE (SHM_R | SHM_W | SHM_R >> 3 | SHM_W >> 3 | SHM_R >> 6 | SHM_W >> 6)
+#endif
+
+#ifndef BLOCK_LOCK_TIMEOUT
+#define BLOCK_LOCK_TIMEOUT 100
+#endif
+
+#endif
diff --git a/src/apps/ucache/ucached_cmd.c b/src/apps/ucache/ucached_cmd.c
new file mode 100644
index 0000000..7197d9b
--- /dev/null
+++ b/src/apps/ucache/ucached_cmd.c
@@ -0,0 +1,120 @@
+
+#include <usrint.h>
+#include "ucached.h"
+
+/*
+ * s = start ucached
+ * c = create shared memory for ucache
+ * d = destroy shared memory for ucache
+ * x = exit ucached
+ */
+int main(int argc, char **argv)
+{
+    if(argc < 2 || argc > 3)
+    {
+        printf("usage: ucache_cmd <command char>\n");
+        return 0; 
+    }
+
+    int rc = 0;
+    void *rp;
+
+    char this_cmd = argv[1][0];
+    if(this_cmd == 's')
+    {
+        char ps_buff[256];
+        FILE *pipe = popen("ps -e | grep -w ucached", "r");
+        rp = fgets(ps_buff, 256, pipe);
+        if(rp == NULL)
+        {
+            rc = remove(FIFO1);
+            rc = remove(FIFO2);
+            /* Crank up the daemon since it's not running */
+            rc = system("ucached");
+            puts("SUCCESS: Daemon started");
+        }
+        else
+        {  
+            puts("FAILURE: Daemon already started");
+            puts(ps_buff);
+        }
+        return 1;
+    }
+
+    char buffer[BUFF_SIZE];
+    memset(buffer, 0, BUFF_SIZE);
+
+   /* Read and Write File Descriptors */
+    int readfd;
+    int writefd;
+
+    /* Open FIFOs for use */
+    writefd = open(FIFO1, O_WRONLY);
+
+    if(writefd == -1)
+    {
+        perror("ucached_cmd couldn't open writefd"); 
+        return -1;       
+    }
+
+    /* Send Command to Daemon */
+    buffer[0] = this_cmd;
+    if(argc == 3)
+    {   
+        strcat(buffer, " ");
+        strcat(buffer, argv[2]);
+    }
+    rc = write(writefd, buffer, BUFF_SIZE);
+    if(rc == -1)
+    {
+        perror("Error occured during write to ucached");
+    }
+
+    memset(buffer, 0, BUFF_SIZE);
+    readfd = open(FIFO2, O_RDONLY); 
+
+    /* Collect Response */
+    int count = read(readfd, buffer, BUFF_SIZE);
+    while(count > 0 || ((count == -1) && (errno == EINTR)))
+    {
+        //if(count)
+        //    printf("read: %d\n", count);
+        //buffer[count] = 0;
+        fputs(buffer, stdout);
+        if(strlen(buffer) < BUFF_SIZE)
+        {
+            //printf("strlen = %d\n", strlen(buffer));
+            break;
+        }
+        memset(buffer, 0, BUFF_SIZE);
+        count = read(readfd, buffer, BUFF_SIZE);
+    }
+    printf("\n");
+    /* Close FIFO when done */
+    close(readfd);
+    close(writefd);
+
+    if(this_cmd == 'i')
+    {
+        memset(buffer, 0, BUFF_SIZE);
+        FILE *info = fopen(UCACHED_INFO_FILE, "r");
+        /*
+        while(!info)
+        {
+            info = fopen(UCACHED_INFO_FILE, "r");
+        }*/
+        if(!info)
+        {
+            perror("UCACHED_INFO_FILE");
+        }
+        while(fread(buffer, sizeof(char), BUFF_SIZE - 1, info) > 0)
+        {
+            buffer[strlen(buffer)] = 0;
+            printf("%s", buffer);
+            memset(buffer, 0, BUFF_SIZE);
+        }
+        fclose(info);
+    }
+    return 1;
+}
+
diff --git a/src/apps/ucache/ucached_common.c b/src/apps/ucache/ucached_common.c
new file mode 100644
index 0000000..03ccde7
--- /dev/null
+++ b/src/apps/ucache/ucached_common.c
@@ -0,0 +1,16 @@
+/* ucached_common.c */
+
+#include "ucached.h"
+
+int myread(int readfd, char *buffer)
+{
+    int count = read(readfd, buffer, BUFF_SIZE);
+    return count;
+}
+
+void mywrite(int writefd, const char *src, char *buffer)
+{
+    strcpy(buffer, src);
+    write(writefd, buffer, BUFF_SIZE);
+    memset(buffer, 0, BUFF_SIZE);
+}
diff --git a/src/apps/ucache/watch_daemons b/src/apps/ucache/watch_daemons
new file mode 100755
index 0000000..749f710
--- /dev/null
+++ b/src/apps/ucache/watch_daemons
@@ -0,0 +1 @@
+watch --interval=1 pgrep -l ucached
diff --git a/src/apps/ucache/watch_ipcs b/src/apps/ucache/watch_ipcs
new file mode 100755
index 0000000..7714e86
--- /dev/null
+++ b/src/apps/ucache/watch_ipcs
@@ -0,0 +1 @@
+watch --interval=1 ipcs -m
diff --git a/src/apps/ucache/watch_log b/src/apps/ucache/watch_log
new file mode 100755
index 0000000..1911bdc
--- /dev/null
+++ b/src/apps/ucache/watch_log
@@ -0,0 +1 @@
+watch --interval=1 'cat /tmp/ucached.log | cat -n | sort -r -g'
diff --git a/src/client/sysint/mgmt-get-uid-list.sm b/src/client/sysint/mgmt-get-uid-list.sm
new file mode 100644
index 0000000..c1db40e
--- /dev/null
+++ b/src/client/sysint/mgmt-get-uid-list.sm
@@ -0,0 +1,243 @@
+/* 
+ * (C) 2003 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ *  \ingroup mgmtint
+ *
+ *  PVFS management interface routines for obtaining server UID (user ID)
+ *  information. This is used to determine which user's are sending
+ *  requests to a given server.
+ */
+#include "client-state-machine.h"
+#include "pvfs2-debug.h"
+#include "job.h"
+#include "gossip.h"
+#include "pvfs2-mgmt.h"
+
+extern job_context_id pint_client_sm_context;
+
+static int get_uid_list_comp_fn(
+    void* v_p, struct PVFS_server_resp *resp_p, int i);
+
+%%
+
+machine pvfs2_client_mgmt_get_uid_list_sm
+{
+    state setup_msgpair
+    {
+        run mgmt_get_uid_list_setup_msgpair;
+        success => xfer_msgpair;
+        default => cleanup;
+    }
+
+    state xfer_msgpair
+    {
+        jump pvfs2_msgpairarray_sm;
+        default => cleanup;
+    }
+
+    state cleanup
+    {
+        run mgmt_get_uid_list_cleanup;
+        default => terminate;
+    }
+}
+
+%%
+
+PVFS_error PVFS_imgmt_get_uid_list(
+    PVFS_fs_id fs_id,
+    PVFS_credentials *credentials,
+    int server_count, 
+    PVFS_BMI_addr_t *addr_array,
+    uint32_t history,
+    PVFS_uid_info_s **uid_info_array,
+    uint32_t *uid_count,
+    PVFS_mgmt_op_id *op_id,
+    PVFS_hint hints,
+    void *user_ptr)
+{
+    PINT_smcb *smcb = NULL;
+    PINT_client_sm *sm_p = NULL;
+    int ret = 0;
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG, 
+                 "PVFS_imgmt_get_uid_list entered\n");
+
+    if ((server_count < 1) || (!addr_array) || (history < 1) ||
+        (!uid_info_array) || (!uid_count))
+    {
+        return -PVFS_EINVAL;
+    }
+
+    PINT_smcb_alloc(&smcb, PVFS_MGMT_GET_UID_LIST, 
+             sizeof(struct PINT_client_sm),
+             client_op_state_get_machine,
+             client_state_machine_terminate,
+             pint_client_sm_context);
+
+    if (!smcb)
+    {
+        return -PVFS_ENOMEM;
+    }
+
+    sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);           
+
+    PINT_init_msgarray_params(sm_p, fs_id);
+    PINT_init_sysint_credentials(sm_p->cred_p, credentials);
+    sm_p->u.get_uid_list.uid_statistics = uid_info_array;
+    sm_p->u.get_uid_list.history = history;
+    sm_p->u.get_uid_list.fs_id = fs_id;
+    sm_p->u.get_uid_list.server_count = server_count;
+    sm_p->u.get_uid_list.addr_array = addr_array;
+    sm_p->u.get_uid_list.uid_count = uid_count;
+    PVFS_hint_copy(hints, &sm_p->hints);
+
+    ret = PINT_msgpairarray_init(&sm_p->msgarray_op, server_count);
+    if (ret != 0)
+    {
+       PINT_smcb_free(smcb);
+       return ret;
+    }
+
+    return PINT_client_state_machine_post(
+        smcb, op_id, user_ptr); 
+}
+
+PVFS_error PVFS_mgmt_get_uid_list(
+    PVFS_fs_id fs_id,
+    PVFS_credentials *credentials,
+    int server_count, 
+    PVFS_BMI_addr_t *addr_array,
+    uint32_t history,
+    PVFS_uid_info_s **uid_info_array,
+    uint32_t *uid_count,
+    PVFS_hint hints,
+    void *user_ptr)
+{
+    PVFS_error ret = -PVFS_EINVAL, error = 0;
+    PVFS_mgmt_op_id op_id;
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG,
+                 "PVFS_mgmt_get_uid_list entered\n");
+
+    ret = PVFS_imgmt_get_uid_list(fs_id, credentials, server_count, addr_array,
+              history, uid_info_array, uid_count, &op_id, hints, NULL);
+    if (ret)
+    {
+        PVFS_perror_gossip("PVFS_imgmt_get_uid_list call", ret);
+        error = ret;
+    }
+    else
+    {
+        ret = PVFS_mgmt_wait(op_id, "get_uid_list", &error);
+        if (ret)
+        {
+            PVFS_perror_gossip("PVFS_mgmt_wait call", ret);
+            error = ret;
+        }
+    }
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG,
+                 "PVFS_mgmt_get_uid_list completed\n");
+
+    PINT_mgmt_release(op_id);
+    return error;
+}
+
+static PINT_sm_action mgmt_get_uid_list_setup_msgpair(
+        struct PINT_smcb *smcb, job_status_s *js_p)
+{
+    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+    int i = 0;
+    PINT_sm_msgpair_state *msg_p = NULL;
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG, "get_uid_list state: "
+                 "mgmt_get_uid_list_setup_msgpair\n");
+
+    /* setup msgpair array */
+    foreach_msgpair(&sm_p->msgarray_op, msg_p, i)
+    {
+	PINT_SERVREQ_MGMT_GET_UID_FILL(
+            msg_p->req,
+            *sm_p->cred_p,
+            sm_p->u.get_uid_list.history,
+            sm_p->hints);
+
+	msg_p->fs_id = sm_p->u.get_uid_list.fs_id;
+	msg_p->handle = PVFS_HANDLE_NULL;
+	msg_p->retry_flag = PVFS_MSGPAIR_RETRY;
+	msg_p->comp_fn = get_uid_list_comp_fn;
+	msg_p->svr_addr = sm_p->u.get_uid_list.addr_array[i];
+    }
+
+    /* immediate return: next state jumps to msgpairarray machine */
+    js_p->error_code = 0;
+
+    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
+    return SM_ACTION_COMPLETE;
+}
+
+static PINT_sm_action mgmt_get_uid_list_cleanup(
+        struct PINT_smcb *smcb, job_status_s *js_p)
+{
+    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+
+    PINT_msgpairarray_destroy(&sm_p->msgarray_op);
+
+    sm_p->error_code  = js_p->error_code;
+
+    PINT_SET_OP_COMPLETE;
+    return SM_ACTION_TERMINATE;
+}
+
+static int get_uid_list_comp_fn(void* v_p,
+				 struct PVFS_server_resp *resp_p,
+				 int i)
+{
+    int j = 0;
+    PINT_smcb *smcb = v_p;
+    PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM);
+
+    /* if this particular request was successful, then store the 
+     * performance information in an array to be returned to caller
+     */
+    if (sm_p->msgarray_op.msgarray[i].op_status == 0)
+    {
+       (sm_p->u.get_uid_list.uid_count)[i] =
+           resp_p->u.mgmt_get_uid.uid_info_array_count;
+       memcpy(sm_p->u.get_uid_list.uid_statistics[i],
+              resp_p->u.mgmt_get_uid.uid_info_array,
+              resp_p->u.mgmt_get_uid.uid_info_array_count
+              * sizeof(PVFS_uid_info_s));
+    }
+ 
+    /* if this is the last response, check all of the status values and 
+     * return error code if any requests failed 
+     */
+    if (i == (sm_p->msgarray_op.count -1))
+    {
+	for (j=0; j < sm_p->msgarray_op.count; j++)
+	{
+	    if (sm_p->msgarray_op.msgarray[j].op_status != 0)
+	    {
+		return(sm_p->msgarray_op.msgarray[j].op_status);
+	    }
+	}
+    }
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ *  mode: c
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ft=c ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/client/usrint/mmap.c b/src/client/usrint/mmap.c
new file mode 100644
index 0000000..909e8f3
--- /dev/null
+++ b/src/client/usrint/mmap.c
@@ -0,0 +1,180 @@
+
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+/** \file
+ *  \ingroup usrint
+ *
+ *  mmap operations for user interface
+ */
+
+#include "usrint.h"
+#include "posix-ops.h"
+#include "posix-pvfs.h"
+#include "openfile-util.h"
+#include <quicklist.h>
+
+static struct qlist_head maplist = QLIST_HEAD_INIT(maplist);
+
+/** PVFS mmap
+ *
+ *  This is a very basic implementation that reads whole mapped
+ *  region into memory and writes it back if shared on unmap.
+ *
+ *  This may not perform well or do all of the neat things mmap
+ *  does, but it will let basic stuff work.
+ */
+void *pvfs_mmap(void *start,
+                size_t length,
+                int prot,
+                int flags,
+                int fd,
+                off_t offset)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+    struct pvfs_mmap_s *mlist;
+    void *maddr;
+
+    if (flags & MAP_ANONYMOUS)
+    {
+        void *maddr;
+        /* this isn't a file system map - just do it */
+        maddr = glibc_ops.mmap(start, length, prot, flags, fd, offset);
+        if (maddr == MAP_FAILED)
+        {
+            return MAP_FAILED;
+        }
+        /* and done */
+        return maddr;
+    }
+    /* this is a PVFS file system map */
+    /* first find the open file */
+    pd = pvfs_find_descriptor(fd);
+    if (!pd)
+    {
+        return MAP_FAILED;
+    }
+    /* we will map an ANON region and read the file into it */
+    maddr = glibc_ops.mmap(start, length, prot, flags & MAP_ANONYMOUS,
+                               -1, offset);
+    if (maddr == MAP_FAILED)
+    {
+        return MAP_FAILED;
+    }
+    rc = pvfs_pread(fd, maddr, length, offset);
+    if (rc < 0)
+    {
+        glibc_ops.munmap(maddr, length);
+        return MAP_FAILED;
+    }
+    /* record this in the open file descriptor */
+    mlist = (struct pvfs_mmap_s *)malloc(sizeof(struct pvfs_mmap_s));
+    mlist->mst = start;
+    mlist->mlen = length;
+    mlist->mprot = prot;
+    mlist->mflags = flags;
+    mlist->mfd = fd;
+    mlist->moff = offset;
+    qlist_add(&mlist->link, &maplist);
+    /* and done */
+    return maddr;
+}
+
+/** PVFS munmap
+ *
+ *  for now only unmap whole regions mapped with mmap
+ */
+int pvfs_munmap(void *start, size_t length)
+{
+    int rc = 0;
+    struct pvfs_mmap_s *mapl, *temp;
+    long long pagesize = getpagesize();
+
+#if __SIZEOF_POINTER__ == __SIZEOF_LONG__
+    if (((long)start % pagesize) != 0 || (length % pagesize) != 0)
+#else
+    if (((long long)start % pagesize) != 0 || (length % pagesize) != 0)
+#endif
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    qlist_for_each_entry_safe(mapl, temp, &maplist, link)
+    {
+        /* assuming we must unmap something that was mapped */
+        /* and not just part of it */
+        if (mapl->mst == start && mapl->mlen == length)
+        {
+            qlist_del(&mapl->link);
+            break;
+        }
+    }
+    if (!mapl)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    if (mapl->mflags & MAP_SHARED)
+    {
+        pvfs_pwrite(mapl->mfd, mapl->mst, mapl->mlen, mapl->moff);
+    }
+    rc = glibc_ops.munmap(start, length);
+    free(mapl);
+    return rc;
+}
+
+/** PVFS msync
+ *
+ *  We ignore flags for now - only syncronous writebacks
+ *  can add async later - but invalidate is not likely
+ */
+int pvfs_msync(void *start, size_t length, int flags)
+{
+    int rc = 0;
+    struct pvfs_mmap_s *mapl, *temp;
+    long long pagesize = getpagesize();
+
+#if __SIZEOF_POINTER__ == __SIZEOF_LONG__
+    if (((long)start % pagesize) != 0 || (length % pagesize) != 0)
+#else
+    if (((long long)start % pagesize) != 0 || (length % pagesize) != 0)
+#endif
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    qlist_for_each_entry_safe(mapl, temp, &maplist, link)
+    {
+        if ((u_char *)mapl->mst <= (u_char *)start &&
+            (u_char *)mapl->mst + mapl->mlen >= (u_char *)start + length)
+        {
+            break;
+        }
+    }
+    if (!mapl)
+    {
+        errno = ENOMEM;
+        return -1;
+    }
+    if (mapl->mflags & MAP_SHARED)
+    {
+        /* the diff between start and mst is distance from */
+        /* start of buffer, and distnace from original offset */
+        rc = pvfs_pwrite(mapl->mfd, start, length,
+                          mapl->moff + ((u_char *)start - (u_char *)mapl->mst));
+    }
+    return rc;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/client/usrint/module.mk.in b/src/client/usrint/module.mk.in
new file mode 100644
index 0000000..364aeb3
--- /dev/null
+++ b/src/client/usrint/module.mk.in
@@ -0,0 +1,21 @@
+ifdef BUILD_USRINT
+
+DIR := src/client/usrint
+
+SRC := \
+	$(DIR)/posix-pvfs.c \
+	$(DIR)/request.c \
+	$(DIR)/iocommon.c \
+	$(DIR)/openfile-util.c \
+	$(DIR)/ucache.c \
+    $(DIR)/mmap.c
+
+USRC := \
+	$(DIR)/posix.c \
+	$(DIR)/stdio.c 
+
+# list of all .c files (generated or otherwise) that belong in library
+LIBSRC += $(SRC)
+ULIBSRC += $(USRC)
+
+endif # BUILD_USRINT
diff --git a/src/client/usrint/posix-ops.h b/src/client/usrint/posix-ops.h
new file mode 100644
index 0000000..bb6f8b5
--- /dev/null
+++ b/src/client/usrint/posix-ops.h
@@ -0,0 +1,230 @@
+/* 
+ * (C) 2011 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ *  \ingroup usrint
+ *
+ *  PVFS2 user interface routines - file descriptors for pvfs
+ */
+
+#ifndef POSIX_OPS_H
+#define POSIX_OPS_H 1
+
+/* POSIX functions */ 
+
+/** struct of pointers to methods for posix system calls */
+typedef struct posix_ops_s
+{   
+    int (*open)(const char *path, int flags, ...);
+    int (*open64)(const char *path, int flags, ...);
+    int (*openat)(int dirfd, const char *path, int flags, ...);
+    int (*openat64)(int dirfd, const char *path, int flags, ...);
+    int (*creat)(const char *path, mode_t mode, ...);
+    int (*creat64)(const char *path, mode_t mode, ...);
+    int (*unlink)(const char *path);
+    int (*unlinkat)(int dirfd, const char *path, int flags);
+    int (*rename)(const char *oldpath, const char *newpath);
+    int (*renameat)(int olddirfd, const char *oldpath,
+                    int newdirfd, const char *newpath);
+    ssize_t (*read)( int fd, void *buf, size_t count);
+    ssize_t (*pread)( int fd, void *buf, size_t count, off_t offset);
+    ssize_t (*readv)(int fd, const struct iovec *vector, int count);
+    ssize_t (*pread64)( int fd, void *buf, size_t count, off64_t offset);
+    ssize_t (*write)( int fd, const void *buf, size_t count);
+    ssize_t (*pwrite)( int fd, const void *buf, size_t count, off_t offset);
+    ssize_t (*writev)( int fd, const struct iovec *vector, int count);
+    ssize_t (*pwrite64)( int fd, const void *buf, size_t count, off64_t offset);
+    off_t (*lseek)(int fd, off_t offset, int whence);
+    off64_t (*lseek64)(int fd, off64_t offset, int whence);
+    int (*truncate)(const char *path, off_t length);
+    int (*truncate64)(const char *path, off64_t length);
+    int (*ftruncate)(int fd, off_t length);
+    int (*ftruncate64)(int fd, off64_t length);
+    int (*fallocate)(int fd, off_t offset, off_t length);
+    int (*close)( int fd);
+    int (*stat)(const char *path, struct stat *buf);
+    int (*stat64)(const char *path, struct stat64 *buf);
+    int (*fstat)(int fd, struct stat *buf);
+    int (*fstat64)(int fd, struct stat64 *buf);
+    int (*fstatat)(int fd, const char *path, struct stat *buf, int flag);
+    int (*fstatat64)(int fd, const char *path, struct stat64 *buf, int flag);
+    int (*lstat)(const char *path, struct stat *buf);
+    int (*lstat64)(const char *path, struct stat64 *buf);
+    int (*futimesat)(int dirfd, const char *path, const struct timeval times[2]);
+    int (*utimes)(const char *path, const struct timeval times[2]);
+    int (*utime)(const char *path, const struct utimbuf *buf);
+    int (*futimes)(int fd, const struct timeval times[2]);
+    int (*dup)(int oldfd);
+    int (*dup2)(int oldfd, int newfd);
+    int (*chown)(const char *path, uid_t owner, gid_t group);
+    int (*fchown)(int fd, uid_t owner, gid_t group);
+    int (*fchownat)(int fd, const char *path, uid_t owner, gid_t group, int flag);
+    int (*lchown)(const char *path, uid_t owner, gid_t group);
+    int (*chmod)(const char *path, mode_t mode);
+    int (*fchmod)(int fd, mode_t mode);
+    int (*fchmodat)(int fd, const char *path, mode_t mode, int flag);
+    int (*mkdir)(const char *path, mode_t mode);
+    int (*mkdirat)(int dirfd, const char *path, mode_t mode);
+    int (*rmdir)(const char *path);
+    ssize_t (*readlink)(const char *path, char *buf, size_t bufsiz);
+    ssize_t (*readlinkat)(int dirfd, const char *path, char *buf, size_t bufsiz);
+    int (*symlink)(const char *oldpath, const char *newpath);
+    int (*symlinkat)(const char *oldpath, int newdirfd, const char *newpath);
+    int (*link)(const char *oldpath, const char *newpath);
+    int (*linkat)(int olddirfd, const char *oldpath,
+                  int newdirfd, const char *newpath, int flags);
+    int (*readdir)(u_int fd, struct dirent *dirp, u_int count);
+    int (*getdents)(u_int fd, struct dirent *dirp, u_int count);
+    int (*getdents64)(u_int fd, struct dirent64 *dirp, u_int count);
+    int (*access)(const char *path, int mode);
+    int (*faccessat)(int dirfd, const char *path, int mode, int flags);
+    int (*flock)(int fd, int op);
+    int (*fcntl)(int fd, int cmd, ...);
+    void (*sync)(void);
+    int (*fsync)(int fd);
+    int (*fdatasync)(int fd);
+    int (*fadvise)(int fd, off_t offset, off_t len, int advice);
+    int (*fadvise64)(int fd, off64_t offset, off64_t len, int advice);
+    int (*statfs)(const char *path, struct statfs *buf);
+    int (*statfs64)(const char *path, struct statfs64 *buf);
+    int (*fstatfs)(int fd, struct statfs *buf);
+    int (*fstatfs64)(int fd, struct statfs64 *buf);
+    int (*statvfs)(const char *path, struct statvfs *buf);
+    int (*fstatvfs)(int fd, struct statvfs *buf);
+    int (*mknod)(const char *path, mode_t mode, dev_t dev);
+    int (*mknodat)(int dirfd, const char *path, mode_t mode, dev_t dev);
+    ssize_t (*sendfile)(int outfd, int infd, off_t *offset, size_t count);
+    ssize_t (*sendfile64)(int outfd, int infd, off64_t *offset, size_t count);
+    int (*setxattr)(const char *path, const char *name,
+                    const void *value, size_t size, int flags);
+    int (*lsetxattr)(const char *path, const char *name,
+                     const void *value, size_t size, int flags);
+    int (*fsetxattr)(int fd, const char *name,
+                     const void *value, size_t size, int flags);
+    ssize_t (*getxattr)(const char *path, const char *name,
+                        void *value, size_t size);
+    ssize_t (*lgetxattr)(const char *path, const char *name,
+                         void *value, size_t size);
+    ssize_t (*fgetxattr)(int fd, const char *name, void *value, size_t size);
+    ssize_t (*listxattr)(const char *path, char *list, size_t size);
+    ssize_t (*llistxattr)(const char *path, char *list, size_t size);
+    ssize_t (*flistxattr)(int fd, char *list, size_t size);
+    int (*removexattr)(const char *path, const char *name);
+    int (*lremovexattr)(const char *path, const char *name);
+    int (*fremovexattr)(int fd, const char *name);
+    mode_t (*umask)(mode_t mask);
+    mode_t (*getumask)(void);
+    int (*getdtablesize)(void);
+    void *(*mmap)(void *start, size_t length, int prot,
+                    int flags, int fd, off_t offset);
+    int (*munmap)(void *start, size_t length);
+    int (*msync)(void *start, size_t length, int flags);
+#if 0
+    int (*acl_delete_def_file)(const char *path_p);
+    acl_t (*acl_get_fd)(int fd);
+    acl_t (*acl_get_file)(const char *path_p, acl_type_t type);
+    int (*acl_set_fd)(int fd, acl_t acl);
+    int (*acl_set_file)(const char *path_p, acl_type_t type, acl_t acl);
+#endif
+
+    /* socket operations */
+    int (*socket)(int dowmain, int type, int protocol);
+    int (*accept)(int sockfd, struct sockaddr *addr, socklen_t *alen);
+    int (*bind)(int sockfd, const struct sockaddr *addr, socklen_t alen);
+    int (*connect)(int sockfd, const struct sockaddr *addr, socklen_t alen);
+    int (*getpeername)(int sockfd, struct sockaddr *addr, socklen_t *alen);
+    int (*getsockname)(int sockfd, struct sockaddr *addr, socklen_t *alen);
+    int (*getsockopt)(int sockfd, int lvl, int oname,
+                      void *oval, socklen_t *olen);
+    int (*setsockopt)(int sockfd, int lvl, int oname,
+                      const void *oval, socklen_t olen);
+    int (*ioctl)(int fd, int request, ...);
+    int (*listen)(int sockfd, int backlog);
+    int (*recv)(int sockfd, void *buf, size_t len, int flags);
+    int (*recvfrom)(int sockfd, void *buf, size_t len, int flags,
+                    struct sockaddr *addr, socklen_t *alen);
+    int (*recvmsg)(int sockfd, struct msghdr *msg, int flags);
+    /* int (*select)(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds,
+                  struct timeval *timeout); */
+    /* void (*FD_CLR)(int fd, fd_set *set); */
+    /* void (*FD_ISSET)(int fd, fd_set *set); */
+    /* void (*FD_SET)(int fd, fd_set *set); */
+    /* void (*FD_ZERO)(fd_set *set); */
+    /* int (*pselect)(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds,
+                   const struct timeval *timeout, const sigset_t *sigmask); */
+    int (*send)(int sockfd, const void *buf, size_t len, int flags);
+    int (*sendto)(int sockfd, const void *buf, size_t len, int flags,
+                  const struct sockaddr *addr, socklen_t alen);
+    int (*sendmsg)(int sockfd, const struct msghdr *msg, int flags);
+    int (*shutdown)(int sockfd, int how);
+    int (*socketpair)(int d, int type, int prtocol, int sv[2]);
+    int (*pipe)(int filedes[2]);
+} posix_ops;
+
+#ifdef BITDEFS
+#define stat stat64
+#define fstat fstat64
+#define fstatat fstatat64
+#define lstat lstat64
+#define statfs statfs64
+#define fstatfs fstatfs64
+#define sendfile sendfile64
+#endif
+
+extern posix_ops glibc_ops;
+extern posix_ops pvfs_ops;
+
+typedef struct pvfs_mmap_s
+{
+    void *mst;              /**< start of mmap region */
+    size_t mlen;            /**< length of mmap region */
+    int mprot;              /**< protection of mmap region */
+    int mflags;             /**< flags of mmap region */
+    int mfd;                /**< file descriptor of mmap region */
+    off_t moff;             /**< offset of mmap region */
+    struct qlist_head link;
+} *pvfs_mmap_t;
+
+/** PVFS-POSIX Descriptor table entry */
+/* these items are shared between duped descrptors */
+typedef struct pvfs_descriptor_status_s
+{
+    gen_mutex_t lock;         /**< protect struct from mult threads */
+    int dup_cnt;              /**< number of table slots with this des */
+    posix_ops *fsops;         /**< syscalls to use for this file */
+    PVFS_object_ref pvfs_ref; /**< PVFS fs_id and handle for PVFS file */
+    int flags;                /**< the open flags used for this file */
+    int mode;                 /**< stat mode of the file - may be volatile */
+    off64_t file_pointer;     /**< offset from the beginning of the file */
+    PVFS_ds_position token;   /**< used db Trove to iterate dirents */
+    char *dpath;              /**< path of an open directory for fchdir */
+    struct file_ent_s *fent; /**< reference to cached objects */            
+                              /**< set to NULL if not caching this file */
+} pvfs_descriptor_status;
+
+/* these are unique among descriptors */
+typedef struct pvfs_descriptor_s
+{
+    gen_mutex_t lock;         /**< protect struct from mult threads */
+    int is_in_use;            /**< PVFS_FS if this descriptor is valid */
+    int fd;                   /**< file number in PVFS descriptor_table */
+    int true_fd;              /**< the true file number depending on FS */
+    int fdflags;              /**< POSIX file descriptor flags */
+    pvfs_descriptor_status *s;
+} pvfs_descriptor;
+
+typedef struct pvfs_descriptor_s PFILE; /* these are for posix interface */
+typedef struct pvfs_descriptor_s PDIR;
+
+#endif
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/client/usrint/request.c b/src/client/usrint/request.c
new file mode 100644
index 0000000..940d5cc
--- /dev/null
+++ b/src/client/usrint/request.c
@@ -0,0 +1,157 @@
+/* 
+ * (C) 2011 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ *  \ingroup usrint
+ *
+ *  PVFS2 user interface routines - routines to convert requests for pvfs
+ */
+#define USRINT_SOURCE 1
+#include "usrint.h"
+
+int pvfs_check_vector(const struct iovec *iov,
+                      int count,
+                      PVFS_Request *req,
+                      void **buf);
+
+/**
+ * converts a posix iovec into a PVFS Request
+ */
+int pvfs_convert_iovec (const struct iovec *vector,
+                        int count,
+                        PVFS_Request *req,
+                        void **buf)
+{
+    /* for now just check for vectors and covert the rest */
+    /* to a basic indexed struct */
+    return pvfs_check_vector(vector, count, req, buf);
+}
+
+int pvfs_check_vector(const struct iovec *iov,
+                      int count,
+                      PVFS_Request *req,
+                      void **buf)
+{
+    int i;
+    int vstart;
+    int vlen;
+    int bsz;
+    PVFS_size stride;
+    int32_t *bsz_array;
+    PVFS_size *disp_array;
+    PVFS_Request *req_array;
+    int rblk;
+
+    /* set up request arrays */
+    bsz_array = (int32_t *)malloc(count * sizeof(int32_t));
+    if (!bsz_array)
+    {
+        return -1;
+    }
+    disp_array = (PVFS_size *)malloc(count * sizeof(PVFS_size));
+    if (!disp_array)
+    {
+        free(bsz_array);
+        return -1;
+    }
+    req_array = (PVFS_Request *)malloc(count * sizeof(PVFS_Request));
+    if (!disp_array)
+    {
+        free(disp_array);
+        free(bsz_array);
+        return -1;
+    }
+    /* for now we assume that addresses in the iovec are ascending */
+    /* not that otherwise won't work, but we're not sure */
+    /* the first address will be assumed to be the base address of */
+    /* the whole request.  the displacement of each vector is relative */
+    /* to that address */
+    if (count > 0)
+    {
+        *buf = iov[0].iov_base;
+    }
+    rblk = 0;
+    /* start at beginning of iovec */
+    i = 0;
+    while(i < count)
+    {
+        /* starting a new vector at position i */
+        vstart = i;
+        vlen = 1;
+        bsz = iov[i].iov_len;
+        stride = 0;
+        /* vector blocks must be of equal size */
+        while(++i < count && iov[i].iov_len == bsz)
+        {
+            if(vlen == 1)
+            {
+                /* two blocks of equal size are a vector of two */
+                stride = (u_char *)iov[i].iov_base -
+                        (u_char *)iov[i - 1].iov_base;
+                if (stride < bsz)
+                {
+                    /* overlapping blocks and negative strides are problems */
+                    break;
+                }
+                vlen++;
+            }
+            else if (((u_char *)iov[i].iov_base -
+                        (u_char *)iov[i - 1].iov_base) == stride)
+            {
+                /* to add more blocks, stride must match */
+                vlen++;
+            }
+            else
+            {
+                /* doesn't match - end of vector */
+                break;
+            }
+        }
+        if (vlen == 1)
+        {
+            /* trivial conversion */
+            bsz_array[rblk] = iov[vstart].iov_len;
+            disp_array[rblk] = (PVFS_size)((u_char *)iov[vstart].iov_base -
+                                                (u_char *)*buf);
+            req_array[rblk] = PVFS_BYTE;
+            rblk++;
+        }
+        else
+        {
+            /* found a vector */
+            bsz_array[rblk] = 1;
+            disp_array[rblk] = (PVFS_size)((u_char *)iov[vstart].iov_base -
+                                                (u_char *)*buf);
+            PVFS_Request_vector(vlen, bsz, stride, PVFS_BYTE, &req_array[rblk]);
+            rblk++;
+        }
+    }
+    /* now build full request */
+    PVFS_Request_struct(rblk, bsz_array, disp_array, req_array, req);
+    PVFS_Request_commit(req);
+    free(bsz_array);
+    free(disp_array);
+    while (rblk--)
+    {
+        if (req_array[rblk] != PVFS_BYTE)
+        {
+            PVFS_Request_free(&req_array[rblk]);
+        }
+    }
+    free(req_array);
+    /* req is not freed, the caller is expected to do that */
+    return 0;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/client/usrint/socket.c b/src/client/usrint/socket.c
new file mode 100644
index 0000000..0ae1802
--- /dev/null
+++ b/src/client/usrint/socket.c
@@ -0,0 +1,506 @@
+/* 
+ * (C) 2011 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ *  \ingroup usrint
+ *
+ *  PVFS2 user interface routines - wrappers for posix socket system calls
+ */
+#define USRINT_SOURCE 1
+#include "usrint.h"
+#include <sys/syscall.h>
+#include "posix-ops.h"
+#include "posix-pvfs.h"
+#include "openfile-util.h"
+
+/*
+ * SOCKET SYSTEM CALLS
+ */
+
+int socket (int domain, int type, int protocol)
+{   
+    int sockfd;
+    pvfs_descriptor *pd;
+
+    /* sockfd = glibc_ops.socket(domain, type, protocol); */
+    sockfd = syscall(SYS_socketcall, domain, type, protocol);
+    if (sockfd < 0)
+    {
+        return sockfd;
+    }
+    pd = pvfs_alloc_descriptor(&glibc_ops, sockfd, NULL, 0);
+    pd->mode |= S_IFSOCK;
+    return pd->fd;
+}
+
+int accept (int sockfd, struct sockaddr *addr, socklen_t *alen)
+{
+    int rc = 0, fd;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    fd = pd->fsops->accept(pd->true_fd, addr, alen);
+    if (fd < 0)
+    {
+        rc = -1;
+        goto errorout;
+    }
+    pd = pvfs_alloc_descriptor(&glibc_ops, fd , NULL, 0);
+    pd->mode |= S_IFSOCK;
+    rc = fd;   
+errorout:
+    return rc;
+}
+
+int bind (int sockfd, const struct sockaddr *addr, socklen_t alen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->bind(pd->true_fd, addr, alen);
+errorout:
+    return rc;
+}
+
+int connect (int sockfd, const struct sockaddr *addr, socklen_t alen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->connect(pd->true_fd, addr, alen);
+errorout:
+    return rc;
+}
+
+int getpeername (int sockfd, struct sockaddr *addr, socklen_t *alen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->getpeername(pd->true_fd, addr, alen);
+errorout:
+    return rc;
+}
+
+int getsockname (int sockfd, struct sockaddr *addr, socklen_t *alen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->getsockname(pd->true_fd, addr, alen);
+errorout:
+    return rc;
+}
+
+int getsockopt (int sockfd, int lvl, int oname,
+                  void *oval, socklen_t *olen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->getsockopt(pd->true_fd, lvl, oname, oval, olen);
+errorout:
+    return rc;
+}
+
+int setsockopt (int sockfd, int lvl, int oname,
+                  const void *oval, socklen_t olen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->setsockopt(pd->true_fd, lvl, oname, oval, olen);
+errorout:
+    return rc;
+}
+
+int ioctl (int fd, int request, ...)
+{
+    int rc;
+    pvfs_descriptor *pd;
+    va_list ap;
+
+    va_start(ap, request);
+    pd = pvfs_find_descriptor(fd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->ioctl(pd->true_fd, request, ap);
+    va_end(ap);
+errorout:
+    return rc;
+}
+
+int listen (int sockfd, int backlog)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->listen(pd->true_fd, backlog);
+errorout:
+    return rc;
+}
+
+int recv (int sockfd, void *buf, size_t len, int flags)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->recv(pd->true_fd, buf, len, flags);
+errorout:
+    return rc;
+}
+
+int recvfrom (int sockfd, void *buf, size_t len, int flags,
+                struct sockaddr *addr, socklen_t *alen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->recvfrom(pd->true_fd, buf, len, flags, addr, alen);
+errorout:
+    return rc;
+}
+
+int recvmsg (int sockfd, struct msghdr *msg, int flags)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->recvmsg(pd->true_fd, msg, flags);
+errorout:
+    return rc;
+}
+
+/* int select (int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds,
+              struct timeval *timeout); */
+/* void FD_CLR (int fd, fd_set *set) */
+/* void FD_ISSET (int fd, fd_set *set) */
+/* void FD_SET (int fd, fd_set *set) */
+/* void FD_ZERO (fd_set *set); */
+/* int pselect (int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds,
+               const struct timeval *timeout, const sigset_t *sigmask); */
+
+int send (int sockfd, const void *buf, size_t len, int flags)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->send(pd->true_fd, buf, len, flags);
+errorout:
+    return rc;
+}
+
+int sendto (int sockfd, const void *buf, size_t len, int flags,
+            const struct sockaddr *addr, socklen_t alen)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->sendto(pd->true_fd, buf, len, flags, addr, alen);
+errorout:
+    return rc;
+}
+
+int sendmsg (int sockfd, const struct msghdr *msg, int flags)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->sendmsg(pd->true_fd, msg, flags);
+errorout:
+    return rc;
+}
+
+int shutdown (int sockfd, int how)
+{
+    int rc = 0;
+    pvfs_descriptor *pd;
+
+    pd = pvfs_find_descriptor(sockfd);
+    if (!pd)
+    {
+        errno = EBADF;
+        rc = -1;
+        goto errorout;
+    }
+    if (!S_ISSOCK(pd->mode))
+    {
+        errno = ENOTSOCK;
+        rc = -1;
+        goto errorout;
+    }
+    rc = pd->fsops->shutdown(pd->true_fd, how);
+errorout:
+    return rc;
+}
+
+int socketpair (int d, int type, int protocol, int sv[2])
+{
+    int rc = 0;
+    pvfs_descriptor *pd0, *pd1;
+    rc = glibc_ops.socketpair(d, type, protocol, sv);
+    if (rc < 0)
+    {
+        goto errorout;
+    }
+    pd0 = pvfs_alloc_descriptor(&glibc_ops, sv[0], NULL, 0);
+    if (!pd0)
+    {
+        goto errorout;
+    }
+    pd1 = pvfs_alloc_descriptor(&glibc_ops, sv[1], NULL, 0);
+    if (!pd1)
+    {
+        pvfs_free_descriptor(pd0->fd);
+        errno = EMFILE;
+        rc = -1;
+        goto errorout;
+    }
+    pd0->mode |= S_IFSOCK;
+    pd1->mode |= S_IFSOCK;
+    sv[0] = pd0->true_fd;
+    sv[1] = pd1->true_fd;
+errorout:
+    return rc;
+}
+
+int pipe(int filedes[2])
+{
+    int rc = 0;
+    pvfs_descriptor *f0, *f1;
+    int fa[2];
+    if(!filedes)
+    {
+        errno = EFAULT;
+        rc = -1;
+        goto errorout;
+    }   
+    rc = glibc_ops.pipe(fa);
+    if (rc < 0)
+    {
+        goto errorout;
+    }
+    f0 = pvfs_alloc_descriptor(&glibc_ops, fa[0], NULL, 0);
+    if (!f0)
+    {
+        goto errorout;
+    }
+    f1 = pvfs_alloc_descriptor(&glibc_ops, fa[1], NULL, 0);
+    if (!f1)
+    {
+        pvfs_free_descriptor(f0->fd);
+        errno = EMFILE;
+        rc = -1;
+        goto errorout;
+    }
+    f0->mode |= S_IFSOCK;
+    f1->mode |= S_IFSOCK;
+    filedes[0] = f0->true_fd;
+    filedes[1] = f1->true_fd;
+errorout:
+    return rc;
+}
+
+/*  
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End: 
+ *      
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */ 
+
diff --git a/src/client/usrint/stdio-ops.h b/src/client/usrint/stdio-ops.h
new file mode 100644
index 0000000..30f4407
--- /dev/null
+++ b/src/client/usrint/stdio-ops.h
@@ -0,0 +1,108 @@
+/* 
+ * (C) 2011 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ *  \ingroup usrint
+ *
+ *  PVFS2 user interface routines - implementation of stdio for pvfs
+ */
+
+#ifndef STDIO_OPS_H
+#define STDIO_OPS_H
+
+struct stdio_ops_s
+{
+    FILE *(*fopen)(const char *path, const char *mode);
+    FILE *(*fdopen)(int fd, const char *mode);
+    FILE *(*freopen)(const char *path, const char *mode, FILE *stream);
+    size_t (*fwrite)(const void *ptr, size_t size, size_t nmemb, FILE *stream);
+    size_t (*fwrite_unlocked)(const void *ptr, size_t size, size_t nmemb, FILE *stream);
+    size_t (*fread)(void *ptr, size_t size, size_t nmemb, FILE *stream);
+    size_t (*fread_unlocked)(void *ptr, size_t size, size_t nmemb, FILE *stream);
+    int (*fclose)(FILE *stream);
+    int (*fseek)(FILE *stream, long offset, int whence);
+    int (*fseek64)(FILE *stream, const off64_t offset, int whence);
+    int (*fsetpos)(FILE *stream, const fpos_t *pos);
+    void (*rewind)(FILE *stream);
+    long int (*ftell)(FILE *stream);
+    off64_t (*ftell64)(FILE *stream);
+    int (*fgetpos)(FILE *stream, fpos_t *pos);
+    int (*fflush)(FILE *stream);
+    int (*fflush_unlocked)(FILE *stream);
+    int (*fputc)(int c, FILE *stream);
+    int (*fputc_unlocked)(int c, FILE *stream);
+    int (*fputs)(const char *s, FILE *stream);
+    int (*fputs_unlocked)(const char *s, FILE *stream);
+    int (*putc)(int c, FILE *stream);
+    int (*putc_unlocked)(int c, FILE *stream);
+    int (*putchar)(int c);
+    int (*putchar_unlocked)(int c);
+    int (*puts)(const char *s);
+    int (*putw)(int wd, FILE *stream);
+    char *(*fgets)(char *s, int size, FILE *stream);
+    char *(*fgets_unlocked)(char *s, int size, FILE *stream);
+    int (*fgetc)(FILE *stream);
+    int (*fgetc_unlocked)(FILE *stream);
+    int (*getc)(FILE *stream);
+    int (*getc_unlocked)(FILE *stream);
+    int (*getchar)(void);
+    int (*getchar_unlocked)(void);
+    int (*getw)(FILE *stream);
+    char *(*gets)(char * s);
+    ssize_t (*getdelim)(char **lnptr, size_t *n, int delim, FILE *stream);
+    int (*ungetc)(int c, FILE *stream);
+    int (*vfprintf)(FILE *stream, const char *format, va_list ap);
+    int (*vprintf)(const char *format, va_list ap);
+    int (*fprintf)(FILE *stream, const char *format, ...);
+    int (*printf)(const char *format, ...);
+    void (*perror)(const char *s);
+    int (*fscanf)(FILE *stream, const char *format, ...);
+    int (*scanf)(const char *format, ...);
+    void (*clearerr)(FILE *stream);
+    void (*clearerr_unlocked)(FILE *stream);
+    int (*feof)(FILE *stream);
+    int (*feof_unlocked)(FILE *stream);
+    int (*ferror)(FILE *stream);
+    int (*ferror_unlocked)(FILE *stream);
+    int (*fileno)(FILE *stream);
+    int (*fileno_unlocked)(FILE *stream);
+    int (*remove)(const char *path);
+    void (*setbuf)(FILE *stream, char *buf);
+    void (*setbuffer)(FILE *stream, char *buf, size_t size);
+    void (*setlinebuf)(FILE *stream);
+    int (*setvbuf)(FILE *stream, char *buf, int mode, size_t size);
+    char *(*mkdtemp)(char *template);
+    int (*mkstemp)(char *template);
+    FILE *(*tmpfile)(void);
+    DIR *(*opendir)(const char *name);
+    DIR *(*fdopendir)(int fd);
+    int (*dirfd)(DIR *dir);
+    struct dirent *(*readdir)(DIR *dir);
+    struct dirent64 *(*readdir64)(DIR *dir);
+    void (*rewinddir)(DIR *dir);
+    void (*seekdir)(DIR *dir, off_t offset);
+    off_t (*telldir)(DIR *dir);
+    int (*closedir)(DIR *dir);
+    int (*scandir)(const char *dir,
+                    struct dirent ***namelist,
+                    int(*filter)(const struct dirent *),
+                    int(*compar)(const void *, const void *));
+    int (*scandir64 )(const char *dir,
+                      struct dirent64 ***namelist,
+                      int(*filter)(const struct dirent64 *),
+                      int(*compar)(const void *, const void *));
+};
+
+#endif
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/client/usrint/ucache.c b/src/client/usrint/ucache.c
new file mode 100644
index 0000000..ccb861b
--- /dev/null
+++ b/src/client/usrint/ucache.c
@@ -0,0 +1,2066 @@
+/* 
+ * (C) 2011 Clemson University
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** 
+ * \file  
+ * \ingroup usrint
+ * 
+ * Experimental cache for user data. 
+ *
+ */
+#include "usrint.h"
+#include "posix-ops.h"
+#include "openfile-util.h"
+#include "iocommon.h"
+#if PVFS_UCACHE_ENABLE
+#include "ucache.h"
+#include <gen-locks.h>
+
+/* Global Variables */
+FILE *out;                   /* For Logging Purposes */
+
+/* static uint32_t ucache_blk_cnt = 0; */
+
+/* Global pointers to data in shared mem. Pointers set in ucache_initialize */
+union user_cache_u *ucache = 0;
+struct ucache_aux_s *ucache_aux = 0; /* All locks and stats stored here */
+
+/* ucache_aux is a pointer to the actual data summarized by the following 
+ * pointers 
+*/
+ucache_lock_t *ucache_locks = 0; /* The shmem of all ucache locks */
+ucache_lock_t *ucache_lock = 0;  /* Global Lock maintaining concurrency */
+struct ucache_stats_s *ucache_stats = 0; /* Pointer to stats structure*/
+
+/* Per-process (thread) execution statistics */
+struct ucache_stats_s these_stats = { 0, 0, 0, 0, 0 }; 
+
+/* Flags indicating ucache status */
+int ucache_enabled = 0;
+char ftblInitialized = 0;
+
+/* Internal Only Function Declarations */
+
+/* Initialization */
+static void add_mtbls(uint16_t blk);
+static void init_memory_table(struct mem_table_s *mtbl);
+static inline int init_memory_entry(struct mem_table_s *mtbl, int16_t index);
+
+/* Gets */
+static uint16_t get_next_free_mtbl(uint16_t *free_mtbl_blk, uint16_t *free_mtbl_ent);
+static uint16_t get_free_fent(void);
+static inline uint16_t get_free_ment(struct mem_table_s *mtbl);
+static inline uint16_t get_free_blk(void);
+
+/* Puts */
+static int put_free_mtbl(struct mem_table_s *mtbl, struct file_ent_s *file);
+static void put_free_fent(struct file_ent_s *fent);
+static void put_free_ment(struct mem_table_s *mtbl, uint16_t ent);
+static inline void put_free_blk(uint16_t blk);
+
+/* File Entry Chain Iterator */
+static unsigned char file_done(uint16_t index);
+static uint16_t file_next(struct file_table_s *ftbl, uint16_t index);
+
+/* Memory Entry Chain Iterator */
+static inline unsigned char ment_done(uint16_t index);
+static inline uint16_t ment_next(struct mem_table_s *mtbl, uint16_t index);
+
+/* Dirty List Iterator */
+static inline unsigned char dirty_done(uint16_t index);
+static inline uint16_t dirty_next(struct mem_table_s *mtbl, uint16_t index);
+
+/* File and Memory Insertion */
+uint16_t insert_file(uint32_t fs_id, uint64_t handle);
+
+static inline void *insert_mem(struct file_ent_s *fent, 
+                                       uint64_t offset, 
+                                    uint16_t *block_ndx
+);
+
+static inline void *set_item(struct file_ent_s *fent,
+                      uint64_t offset, 
+                      uint16_t index
+);
+
+/* File and Memory Lookup */
+static struct mem_table_s *lookup_file(
+    uint32_t fs_id, 
+    uint64_t handle,
+    uint16_t *file_mtbl_blk,    /* Can be NULL if not desired */
+    uint16_t *file_mtbl_ent,  
+    uint16_t *file_ent_index,
+    uint16_t *file_ent_prev_index
+);
+static inline void *lookup_mem(struct mem_table_s *mtbl, 
+                    uint64_t offset, 
+                    uint16_t *item_index,
+                    uint16_t *mem_ent_index,
+                    uint16_t *mem_ent_prev_index
+);
+
+/* File and Memory Entry Removal */
+static int remove_file(struct file_ent_s *fent);
+static int wipe_mtbl(struct mem_table_s *mtbl);
+static int remove_mem(struct file_ent_s *fent, uint64_t offset);
+
+/* Eviction Utilities */
+static uint16_t locate_max_fent(struct file_ent_s **fent);
+static void update_LRU(struct mem_table_s *mtbl, uint16_t index);
+static int evict_LRU(struct file_ent_s *fent);
+
+/* Logging */
+//static void log_ucache_stats(void);
+
+/* List Printing Functions */ 
+void print_LRU(struct mem_table_s *mtbl);
+void print_dirty(struct mem_table_s *mtbl);
+
+/* Flushing of individual files and blocks */
+int flush_file(struct file_ent_s *fent);
+int flush_block(struct file_ent_s *fent, struct mem_ent_s *ment);
+
+/*  Externally Visible API
+ *      The following functions are thread/processor safe regarding the cache 
+ *      tables and data.      
+ */
+
+/**  
+ * Initializes the cache. 
+ * Mainly, it aquires a previously created shared memory segment used to 
+ * cache data. The shared mem. creation and ftbl initialization should already
+ * have been done by the daemon at this point. 
+ * 
+ * The whole cache is protected globally by a locking mechanism.
+ *
+ * Locks (same type as global lock) can be used to protect block level data. 
+ */
+int ucache_initialize(void)
+{
+    int rc = 0;
+    //gossip_set_debug_mask(1, GOSSIP_UCACHE_DEBUG);  
+
+    /* Aquire pointers to shmem segments (ucache_aux and ucache) */
+    /* shmget segment containing ucache_aux */
+    key_t key = ftok(KEY_FILE, SHM_ID1);
+    int shmflg = SVSHM_MODE;
+    int aux_shmid = shmget(key, 0, shmflg);
+    if(aux_shmid == -1)
+    {
+        //gossip_debug(GOSSIP_UCACHE_DEBUG, 
+        //    "ucache_initialize - ucache_aux shmget: errno = %d\n", errno);
+        return -1;
+    }
+    /* shmat ucache_aux */
+    ucache_aux = shmat(aux_shmid, NULL, 0);
+    if((long int)ucache_aux == -1)
+    {
+        //gossip_debug(GOSSIP_UCACHE_DEBUG,        
+        //    "ucache_initialize - ucache_aux shmat: errno = %d\n", errno);
+        return -1;
+    }
+
+    /* Set our global pointers to data in the ucache_aux struct */
+    ucache_locks = ucache_aux->ucache_locks;
+    ucache_lock = get_lock(BLOCKS_IN_CACHE);
+    ucache_stats = &(ucache_aux->ucache_stats);
+
+    /* ucache */
+    key = ftok(KEY_FILE, SHM_ID2);
+    int ucache_shmid = shmget(key, 0, shmflg);
+    if(ucache_shmid == -1)
+    {
+        //gossip_debug(GOSSIP_UCACHE_DEBUG,        
+        //    "ucache_initialize - ucache shmget: errno = %d\n", errno);
+        return -1;
+    }
+    ucache = (union user_cache_u *)shmat(ucache_shmid, NULL, 0);
+    if((long int)ucache == -1) 
+    {
+        //gossip_debug(GOSSIP_UCACHE_DEBUG,        
+        //    "ucache_initialize - ucache shmat: errno = %d\n", errno);
+        return -1;
+    }
+
+    /* When this process ends we may want to dump ucache stats to a log file */
+    //rc = atexit(log_ucache_stats);    
+
+    /* Declare the ucache enabled! */
+    ucache_enabled = 1;
+    return rc;
+}
+
+/** 
+ * Returns a pointer to the mtbl corresponding to the blk & ent. 
+ * Input must be reliable otherwise invalid mtbl could be returned.
+ */
+inline struct mem_table_s *get_mtbl(uint16_t mtbl_blk, uint16_t mtbl_ent)
+{
+    if( mtbl_blk < BLOCKS_IN_CACHE &&
+        mtbl_ent < MEM_TABLE_ENTRY_COUNT)
+    {
+        return &(ucache->b[mtbl_blk].mtbl[mtbl_ent]);
+    }
+    else
+    {
+        return (struct mem_table_s *)NILP;
+    }
+}
+
+/** 
+ * Initializes the ucache file table if it hasn't previously been initialized.
+ * Although this function is visible, DO NOT CALL THIS FUNCTION. 
+ * It is meant to be called in the ucache daemon or during testing.
+ * see: src/apps/ucache/ucached.c for more info.
+ *
+ * Sets the char booelan ftblInitialized when ftbl has been successfully 
+ * initialized.
+ * 
+ * Returns 0 on success, -1 on failure.
+ */
+int ucache_init_file_table(char forceCreation)
+{
+    int i;
+
+    /* check if already initialized? */
+    if(ftblInitialized == 1 && !forceCreation) 
+    {
+        return -1;
+    }
+    if(ucache)
+    {
+        memset(ucache, 0, CACHE_SIZE);
+    }
+    else
+    {
+        return -1;
+    }
+        
+
+    /* initialize mtbl free list table */
+    ucache->ftbl.free_mtbl_blk = NIL16;
+    ucache->ftbl.free_mtbl_ent = NIL16;
+    add_mtbls(0);
+
+    /* set up list of free blocks */
+    ucache->ftbl.free_blk = 1;
+    for (i = 1; i < (BLOCKS_IN_CACHE - 1); i++)
+    {
+        ucache->b[i].mtbl[0].free_list_blk = i + 1;
+    }
+    ucache->b[BLOCKS_IN_CACHE - 1].mtbl[0].free_list_blk = NIL16;
+
+    /* set up file hash table */
+    for (i = 0; i < FILE_TABLE_HASH_MAX; i++)
+    {
+        ucache->ftbl.file[i].tag_handle = NIL64;
+        ucache->ftbl.file[i].tag_id = NIL32;
+        ucache->ftbl.file[i].mtbl_blk = NIL16;
+        ucache->ftbl.file[i].mtbl_ent = NIL16;
+        ucache->ftbl.file[i].next = NIL16;
+    }
+
+    /* set up list of free hash table entries */
+    ucache->ftbl.free_list = FILE_TABLE_HASH_MAX;
+    for (i = FILE_TABLE_HASH_MAX; i < FILE_TABLE_ENTRY_COUNT - 1; i++)
+    {
+        ucache->ftbl.file[i].mtbl_blk = NIL16;
+        ucache->ftbl.file[i].mtbl_ent = NIL16;
+        ucache->ftbl.file[i].next = i + 1;
+    }
+    ucache->ftbl.file[FILE_TABLE_ENTRY_COUNT - 1].next = NIL16;
+
+    /* Success */
+    ftblInitialized = 1;
+    return 0;
+}
+
+/**
+ * Opens a file in ucache.
+ */
+int ucache_open_file(PVFS_fs_id *fs_id,
+                     PVFS_handle *handle, 
+                     struct file_ent_s **fent)
+{
+    int rc = -1;
+    uint16_t file_mtbl_blk;
+    uint16_t file_mtbl_ent;
+    uint16_t file_ent_index;
+    uint16_t file_ent_prev_index;
+
+    lock_lock(ucache_lock);
+
+    struct mem_table_s *mtbl = lookup_file((uint32_t)(*fs_id), 
+                                           (uint64_t)(*handle), 
+                                            &file_mtbl_blk, 
+                                            &file_mtbl_ent, 
+                                            &file_ent_index,
+                                            &file_ent_prev_index);
+
+    if(mtbl == (struct mem_table_s *)NIL)
+    {
+        uint16_t fentIndex  = insert_file((uint32_t)*fs_id, (uint64_t)*handle);
+        if(fentIndex > FILE_TABLE_ENTRY_COUNT)
+        {
+            rc = -1;
+            goto done;
+        }
+        *fent = &(ucache->ftbl.file[fentIndex]);
+        if((*fent)->mtbl_blk == NIL16 || (*fent)->mtbl_ent == NIL16)
+        {
+            rc = -1;
+            goto done;
+        }
+
+        mtbl = get_mtbl((*fent)->mtbl_blk, (*fent)->mtbl_ent);
+        if(mtbl == (struct mem_table_s *)NILP)
+        {   
+            /* Error - Could not insert */
+            rc = -1;
+            goto done;
+        }
+        else
+        {
+            /* File Inserted */
+            mtbl->ref_cnt = 1;
+            rc = 0;
+            goto done;
+        }
+    }
+    else
+    {
+        /* File was previously Inserted */
+        mtbl->ref_cnt++;
+        *fent = &(ucache->ftbl.file[file_ent_index]);
+        rc = 1;
+        goto done;
+    }
+done:
+    lock_unlock(ucache_lock);
+    return rc;
+}
+
+/** 
+ * Returns ptr to block in ucache based on file and offset 
+ */
+inline void *ucache_lookup(struct file_ent_s *fent, uint64_t offset, 
+                                         uint16_t *block_ndx)
+{
+    void *retVal = (void *) NIL;
+    if(fent)
+    {
+        lock_lock(ucache_lock);
+        struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); 
+        retVal = lookup_mem(mtbl, 
+                            offset, 
+                            block_ndx,
+                            NULL, 
+                            NULL);
+        lock_unlock(ucache_lock);
+    }
+    return retVal;
+}
+
+/** 
+ * Prepares the data structures for block storage. 
+ * On success, returns a pointer to where the block of data should be written. 
+ * On failure, returns NIL.
+ */
+inline void *ucache_insert(struct file_ent_s *fent, 
+                    uint64_t offset, 
+                    uint16_t *block_ndx
+)
+{
+    lock_lock(ucache_lock);
+    void * retVal = insert_mem(fent, offset, block_ndx);
+    lock_unlock(ucache_lock);
+    return (retVal); 
+}
+
+#if 0
+/** 
+ * Removes a cached block of data from mtbl 
+ * Returns 1 on success, 0 on failure.
+ */ 
+int ucache_remove(struct file_ent_s *fent, uint64_t offset)
+{
+    int rc = 0;
+    lock_lock(ucache_lock);
+    rc = remove_mem(fent , offset);
+    lock_unlock(ucache_lock);
+    return rc;
+}
+#endif
+
+/** 
+ * Flushes the entire ucache's dirty blocks (every file's dirty blocks)
+ * Returns 0 on success, -1 on failure
+ */
+int ucache_flush_cache(void)
+{
+    int rc = 0;
+    lock_lock(ucache_lock);
+    struct file_table_s *ftbl = &ucache->ftbl;
+    int i;
+    for(i = 0; i < FILE_TABLE_HASH_MAX; i++)
+    {
+        if((ftbl->file[i].tag_handle != NIL64) &&
+               (ftbl->file[i].tag_handle != 0))
+        {
+            /* Iterate accross file table chain. */ 
+            uint16_t j;
+            for(j = i; !file_done(j); j = file_next(ftbl, j))
+            {
+                rc = flush_file(&ftbl->file[j]);
+                if(rc !=0)
+                {
+                    rc = -1;
+                    goto done;
+                }
+            }
+        }
+    }
+
+done:
+    lock_unlock(ucache_lock);
+    return rc;
+}
+
+/** 
+ * Externally visible wrapper of the internal flush file function.
+ * This is intended to allow and external flush file call which locks the 
+ * global lock, flushes the file, then releases the global lock.
+ * To prevent deadlock, do not call this in any function that aquires the 
+ * global lock.
+ * Returns 0 on success, -1 on failure.
+ */
+int ucache_flush_file(struct file_ent_s *fent)
+{
+    int rc = 0;
+    lock_lock(ucache_lock);
+    rc = flush_file(fent);
+    lock_unlock(ucache_lock);
+    return rc;
+}
+
+/** 
+ * Internal only function - Flushes dirty blocks to the I/O Nodes 
+ * Returns 0 on success and -1 on failure.
+ */
+int flush_file(struct file_ent_s *fent)
+{
+    int rc = 0;
+
+    struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent);
+
+    uint16_t i;
+    uint16_t temp_next = NIL16;
+    for(i = mtbl->dirty_list; !dirty_done(i); i = temp_next)
+    {
+        struct mem_ent_s *ment = &(mtbl->mem[i]);
+        if(ment->tag == NIL64 || ment->item == NIL16)
+        {
+            break;
+        }
+
+        /* Aquire block lock - TODO:check if this is redundant due to global lock */
+        ucache_lock_t *blk_lock = get_lock(ment->item);
+        lock_lock(blk_lock);
+
+        temp_next = mtbl->mem[i].dirty_next;
+        mtbl->mem[i].dirty_next = NIL16; 
+
+        /*#ifdef FILE_SYSTEM_ENABLED*/
+        PVFS_object_ref ref = {fent->tag_handle, fent->tag_id, 0};
+        struct iovec vector = {&(ucache->b[ment->item].mblk[0]), CACHE_BLOCK_SIZE_K * 1024};
+        rc = iocommon_vreadorwrite(2, &ref, ment->tag, 1, &vector); 
+        /*
+        #endif
+        #ifndef FILE_SYSTEM_ENABLED
+        rc = 0;
+        #endif
+        */
+
+        lock_unlock(blk_lock);
+        if(rc == -1)
+        {
+           goto done; 
+        }
+    }
+
+    mtbl->dirty_list = NIL16;
+    rc = 0;
+
+done:
+    return rc;
+}
+
+/**
+ * This function is meant to be called only inside remove_mem.
+ * Returns 0 on success, -1 on failure 
+ */
+int flush_block(struct file_ent_s *fent, struct mem_ent_s *ment)
+{
+    int rc = 0;
+    PVFS_object_ref ref = {fent->tag_handle, fent->tag_id, 0};
+    struct iovec vector = {&(ucache->b[ment->item].mblk[0]), CACHE_BLOCK_SIZE_K * 1024};
+    rc = iocommon_vreadorwrite(2, &ref, ment->tag, 1, &vector);
+    return rc;
+}
+
+
+/** 
+ * For testing purposes only!
+ */
+int wipe_ucache(void)
+{
+    int rc = 0;
+
+    /* Aquire pointers to shmem segments (just ucache) */
+    int shmflg = SVSHM_MODE;
+
+    /* ucache */
+    key_t key = ftok(KEY_FILE, SHM_ID2);
+    int ucache_shmid = shmget(key, 0, shmflg);
+    if(ucache_shmid == -1)
+    {
+        perror("wipe_ucache - ucache shmget");
+        return -1;
+    }
+    ucache = (union user_cache_u *)shmat(ucache_shmid, NULL, 0);
+    if((long int)ucache == -1)
+    {
+        perror("wipe ucache - ucache shmat");
+        return -1;
+    }
+
+    /* wipe the cache, locks, and reinitialize */
+    memset(ucache, 0, CACHE_SIZE);
+
+    /* Force Re-creation of ftbl */
+    rc = ucache_init_file_table(1);
+    return rc;
+}
+
+/** 
+ * Removes all memory entries in the mtbl corresponding to the file info 
+ * provided as parameters. It also removes the mtbl and the file entry from 
+ * the cache.
+ */
+int ucache_close_file(struct file_ent_s *fent)
+{
+    int rc = 0;
+    rc = lock_lock(ucache_lock);
+    rc = remove_file(fent);
+    lock_unlock(ucache_lock);
+    return rc;
+}
+
+/** May dump stats to log file if the envar LOG_UCACHE_STATS is set to 1.
+ *
+ */
+#if 0
+void log_ucache_stats(void)
+{
+    /* Return if envar not set to 1 */
+    char *var = getenv("LOG_UCACHE_STATS");
+    if(!var)
+    {
+        return;
+    }
+    if(atoi(var) != 1)
+    {
+        return;
+    }
+
+    float attempts = these_stats.hits + these_stats.misses;
+    float percentage = 0.0;
+    /* Don't Divide By Zero! */
+    if(attempts)
+    {
+        percentage = ((float)these_stats.hits) / attempts;
+    }
+   /* 
+    gossip_debug(GOSSIP_UCACHE_DEBUG,
+        "user cache statistics for this execution:\n"
+        "\thits=\t%llu\n"
+        "\tmisses=\t%llu\n"
+        "\thit percentage=\t%f\n"
+        "\tpseudo_misses=\t%llu\n"
+        "\tblock_count=\t%hu\n"
+        "\tfile_count=\t%hu\n",
+        (long long unsigned int) these_stats.hits,
+        (long long unsigned int) these_stats.misses,
+        percentage,
+        (long long unsigned int) these_stats.pseudo_misses,
+        these_stats.block_count,
+        these_stats.file_count
+    );
+   */
+}
+#endif
+
+/** 
+ * Dumps all cache related information to the specified file pointer.
+ * Returns 0 on succes, -1 on failure meaning the ucache wasn't enabled 
+ * for some reason. 
+ */
+int ucache_info(FILE *out, char *flags)
+{
+    if(!ucache_enabled)
+    {
+        ucache_initialize();
+    } 
+    if(!ucache_enabled)
+    {
+        //fprintf(out, "ucache is not enabled. See ucache.log and ucached.log.\n");
+        return -1;
+    }
+   
+    /* Decide what to show */
+    unsigned char show_all = 0;
+    unsigned char show_summary = 0;
+    unsigned char show_parameters = 0;
+    unsigned char show_contents = 0;
+    unsigned char show_free = 0;
+
+    int char_ndx;
+    for (char_ndx=0; char_ndx<strlen(flags); char_ndx++)
+    {
+        char c = flags[char_ndx];
+        switch(c)
+        {
+            case 'a':
+                show_all = 1;
+                break;
+            case 's':
+                show_summary = 1;             
+                break;
+            case 'p':
+                show_parameters = 1;
+                break;
+            case 'c':
+                show_contents = 1;
+                break;
+            case 'f':
+                show_free = 1;
+                break;
+        }
+    }
+
+    float attempts = ucache_stats->hits + ucache_stats->misses;
+    float percentage = 0.0;
+
+    /* Don't Divide By Zero! */
+    if(attempts)
+    {
+        percentage = ((float) ucache_stats->hits) / attempts;
+    }
+
+    if(show_all || show_summary)
+    {
+        fprintf(out, 
+            "user cache statistics:\n"
+            "\thits=\t%llu\n"
+            "\tmisses=\t%llu\n"
+            "\thit percentage=\t%f\n"
+            "\tpseudo_misses=\t%llu\n"
+            "\tblock_count=\t%hu\n"
+            "\tfile_count=\t%hu\n",
+            (long long unsigned int) ucache_stats->hits, 
+            (long long unsigned int) ucache_stats->misses, 
+            percentage * 100, 
+            (long long unsigned int) ucache_stats->pseudo_misses,
+            ucache_stats->block_count,
+            ucache_stats->file_count
+        );
+    }
+
+    if(show_all || show_parameters)
+    {
+
+        fprintf(out, "\n#defines:\n");
+        /* First, print many of the #define values */
+        fprintf(out, "MEM_TABLE_ENTRY_COUNT = %d\n", MEM_TABLE_ENTRY_COUNT);
+        fprintf(out, "FILE_TABLE_ENTRY_COUNT = %d\n", FILE_TABLE_ENTRY_COUNT);
+        fprintf(out, "CACHE_BLOCK_SIZE_K = %d\n", CACHE_BLOCK_SIZE_K);
+        fprintf(out, "MEM_TABLE_HASH_MAX = %d\n", MEM_TABLE_HASH_MAX);
+        fprintf(out, "FILE_TABLE_HASH_MAX = %d\n", FILE_TABLE_HASH_MAX);
+        fprintf(out, "MTBL_PER_BLOCK  = %d\n", MTBL_PER_BLOCK );
+        fprintf(out, "KEY_FILE = %s\n", KEY_FILE);
+        fprintf(out, "SHM_ID1 = %d\n", SHM_ID1);
+        fprintf(out, "SHM_ID2 = %d\n", SHM_ID2);
+        fprintf(out, "BLOCKS_IN_CACHE = %d\n", BLOCKS_IN_CACHE);
+        fprintf(out, "CACHE_SIZE = %d(B)\t%d(MB)\n", CACHE_SIZE, 
+                                        (CACHE_SIZE/(1024*1024)));
+        fprintf(out, "AT_FLAGS = %d\n", AT_FLAGS);
+        fprintf(out, "SVSHM_MODE = %d\n", SVSHM_MODE);
+        fprintf(out, "CACHE_FLAGS = %d\n", CACHE_FLAGS);
+        fprintf(out, "NIL = 0X%X\n", NIL);
+        fprintf(out, "NIL8 = 0X%X\n", NIL8);
+        fprintf(out, "NIL16 = 0X%X\n", NIL16);    
+        fprintf(out, "NIL32 = 0X%X\n", NIL32);
+        fprintf(out, "NIL64 = 0X%lX\n", NIL64);
+    
+        /* Print sizes of ucache elements */
+        fprintf(out, "sizeof union cache_block_u = %lu\n", sizeof(union cache_block_u));
+        fprintf(out, "sizeof struct file_table_s = %lu\n", sizeof(struct file_table_s));
+        fprintf(out, "sizeof struct file_ent_s = %lu\n", sizeof(struct file_ent_s));
+        fprintf(out, "sizeof struct mem_table_s = %lu\n", sizeof(struct mem_table_s));
+        fprintf(out, "sizeof struct mem_ent_s = %lu\n", sizeof(struct mem_ent_s));
+    }
+
+    if(show_all || show_contents)
+    {
+        /* Auxilliary structure related to ucache */
+        fprintf(out, "ucache_aux ptr:\t\t0X%lX\n", (long int)ucache_aux);
+
+        /* ucache Shared Memory Info */
+        fprintf(out, "ucache ptr:\t\t0X%lX\n", (long int)ucache);
+    
+        /* FTBL Info */
+        struct file_table_s *ftbl = &(ucache->ftbl);
+        fprintf(out, "ftbl ptr:\t\t0X%lX\n", (long int)&(ucache->ftbl));
+        fprintf(out, "free_blk = %hu\n", ftbl->free_blk);
+        fprintf(out, "free_mtbl_blk = %hu\n", ftbl->free_mtbl_blk);
+        fprintf(out, "free_mtbl_ent = %hu\n", ftbl->free_mtbl_ent);
+        fprintf(out, "free_list = %hu\n", ftbl->free_list);
+    
+        uint16_t i;
+
+        if(show_all || show_free)
+        {
+            /* Other Free Blocks */
+            fprintf(out, "\nIterating Over Free Blocks:\n\n");
+            for(i = ftbl->free_blk; i < BLOCKS_IN_CACHE; i = ucache->b[i].mtbl[0].
+                                                                      free_list_blk)
+            {
+                fprintf(out, "Free Block:\tCurrent: %hu\tNext: %hu\n", i, 
+                                       ucache->b[i].mtbl[0].free_list_blk); 
+            }
+            fprintf(out, "End of Free Blocks List\n");
+
+
+            /* Iterate Over Free Mtbls */
+            fprintf(out, "\nIterating Over Free Mtbls:\n");
+            uint16_t current_blk = (uint16_t)ftbl->free_mtbl_blk;
+            uint16_t current_ent = ftbl->free_mtbl_ent;
+            while(current_blk != NIL16)
+            {
+                fprintf(out, "free mtbl: block = %hu\tentry = %hu\n", 
+                        current_blk, current_ent);
+                uint16_t temp_blk = ucache->b[current_blk].mtbl[current_ent].free_list_blk;
+                uint16_t temp_ent = ucache->b[current_blk].mtbl[current_ent].free_list;
+                current_blk = temp_blk;
+                current_ent = temp_ent;
+            }
+            fprintf(out, "End of Free Mtbl List\n\n");
+        
+            /* Iterating Over Free File Entries */
+            fprintf(out, "Iterating Over Free File Entries:\n");
+            uint16_t current_fent; 
+            for(current_fent = ftbl->free_list; current_fent != NIL16; 
+                                current_fent = ftbl->file[current_fent].next)
+            {
+                fprintf(out, "free file entry: index = %d\n", (int16_t)current_fent);
+            }
+            fprintf(out, "End of Free File Entry List\n\n");
+        }
+    
+        fprintf(out, "Iterating Over File Entries in Hash Table:\n\n");
+        /* iterate over file table entries */
+        for(i = 0; i < FILE_TABLE_HASH_MAX; i++)
+        {
+            if((ftbl->file[i].tag_handle != NIL64) && 
+                   (ftbl->file[i].tag_handle != 0))
+            {
+                /* iterate accross file table chain */
+                uint16_t j;
+                for(j = i; !file_done(j); j = file_next(ftbl, j))
+                {
+                    fprintf(out, "FILE ENTRY INDEX %hu ********************\n", j);
+                    struct file_ent_s * fent = &(ftbl->file[j]);
+                    fprintf(out, "tag_handle = 0X%llX\n", 
+                                (long long int)fent->tag_handle);
+                    fprintf(out, "tag_id = 0X%X\n", (uint32_t)fent->tag_id);
+                    fprintf(out, "mtbl_blk = %hu\n", fent->mtbl_blk);
+                    fprintf(out, "mtbl_ent = %hu\n", fent->mtbl_ent);
+                    fprintf(out, "next = %hu\n", fent->next);
+                    fprintf(out, "index = %hu\n", fent->index);
+    
+                    struct mem_table_s * mtbl = get_mtbl(fent->mtbl_blk, 
+                                                        fent->mtbl_ent);
+    
+                    fprintf(out, "\tMTBL LRU List ****************\n");
+                    print_LRU(mtbl); 
+                    print_dirty(mtbl); 
+    
+                    fprintf(out, "\tMTBL INFO ********************\n");
+                    fprintf(out, "\tnum_blocks = %hu\n", mtbl->num_blocks);
+                    fprintf(out, "\tfree_list = %hu\n", mtbl->free_list); 
+                    fprintf(out, "\tfree_list_blk = %hu\n", mtbl->free_list_blk);
+                    fprintf(out, "\tlru_first = %hu\n", mtbl->lru_first);
+                    fprintf(out, "\tlru_last = %hu\n", mtbl->lru_last);
+                    fprintf(out, "\tdirty_list = %hu\n", mtbl->dirty_list);
+                    fprintf(out, "\tref_cnt = %hu\n\n", mtbl->ref_cnt);
+                    fflush(out);
+                    /* Iterate Over Memory Entries */
+                    uint16_t k;
+                    for(k = 0; k < MEM_TABLE_HASH_MAX; k++)
+                    {
+                        if(mtbl->bucket[k] == NIL16)
+                            continue;
+   
+                        if(mtbl->mem[mtbl->bucket[k]].tag != NIL64)
+                        {
+                            uint16_t l;
+                            for(l = mtbl->bucket[k]; !ment_done(l); l = ment_next(mtbl, l))
+                            {
+                                struct mem_ent_s * ment = &(mtbl->mem[l]);
+                                fprintf(out, "\t\tMEMORY ENTRY INDEX %hd **********"
+                                                                  "*********\n", l);
+                                fprintf(out, "\t\ttag = 0X%lX\n", 
+                                             (long unsigned int)ment->tag);
+
+                                fprintf(out, "\t\titem = %hu\n", 
+                                                    ment->item);
+                                fprintf(out, "\t\tnext = %hu\n", 
+                                                    ment->next);
+                                fprintf(out, "\t\tdirty_next = %hu\n", 
+                                                    ment->dirty_next);
+                                fprintf(out, "\t\tlru_next = %hu\n", 
+                                                    ment->lru_next);
+                                fprintf(out, "\t\tlru_prev = %hu\n\n", 
+                                                      ment->lru_prev);
+                            } 
+                        }
+                        else
+                        {
+                            if(mtbl->num_blocks != 0 
+                                && (show_all || show_free))
+                            {
+                                fprintf(out, "\tvacant memory entry @ index = %d\n",
+                                    mtbl->bucket[k]);
+                            }
+                        }
+                    }
+                }
+                fprintf(out, "End of chain @ Hash Table Index %hu\n\n", i);
+            }
+            else
+            {
+                if(show_all || show_free)
+                {
+                    fprintf(out, "vacant file entry @ index = %hu\n\n", i);
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/** 
+ * Returns a pointer to the lock corresponding to the block_index.
+ * If the index is out of range, then 0 is returned.
+ */
+inline ucache_lock_t *get_lock(uint16_t block_index)
+{
+    if(block_index >= (BLOCKS_IN_CACHE + 1))
+    {
+        return (ucache_lock_t *)0;
+    }
+    return &ucache_locks[block_index];
+}
+
+/** 
+ * Initializes the proper lock based on the LOCK_TYPE 
+ * Returns 0 on success, -1 on error
+ */
+int lock_init(ucache_lock_t * lock)
+{
+    int rc = -1;
+    /* TODO: ability to disable locking */
+    #if LOCK_TYPE == 0
+    rc = sem_init(lock, 1, 1);
+    if(rc != -1)
+    {
+        rc = 0; 
+    }
+    #elif LOCK_TYPE == 1
+    pthread_mutexattr_t attr;
+    rc = pthread_mutexattr_init(&attr);
+    assert(rc == 0);
+    rc = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+    assert(rc == 0);
+    rc = pthread_mutex_init(lock, &attr);
+    assert(rc == 0);
+    if(rc != 0)
+    {
+        return -1;
+    }
+    #elif LOCK_TYPE == 2
+    rc = pthread_spin_init(lock, 1);
+    if(rc != 0)
+    {
+        return -1;
+    }
+    #elif LOCK_TYPE == 3
+    *lock = (ucache_lock_t) GEN_SHARED_MUTEX_INITIALIZER_NP; //GEN_SHARED_MUTEX_INITIALIZER_NP;
+    rc = 0;
+    #endif
+    return rc;
+}
+
+/** 
+ * Returns 0 when lock is locked; otherwise, return -1 and sets errno.
+ */
+inline int lock_lock(ucache_lock_t * lock)
+{
+    int rc = 0;
+    #if LOCK_TYPE == 0
+    return sem_wait(lock);
+    #elif LOCK_TYPE == 1
+/*
+    while(1)
+    {
+        rc = pthread_mutex_trylock(lock);
+        if(rc != 0)
+        {
+            printf("couldn't lock lock 0X%lX\n", (long unsigned int) lock); 
+            fflush(stdout);
+            rc = -1;
+        }
+        else
+        {
+            break;
+        }
+    }
+*/
+    rc = pthread_mutex_lock(lock);
+    return rc;
+    #elif LOCK_TYPE == 2
+    return pthread_spin_lock(lock);
+    #elif LOCK_TYPE == 3
+    rc = gen_mutex_lock(lock);
+    return rc;
+    #endif   
+}
+
+/** 
+ * If successful, return zero; otherwise, return -1 and sets errno. 
+ */
+inline int lock_unlock(ucache_lock_t * lock)
+{
+    #if LOCK_TYPE == 0
+    return sem_post(lock);
+    #elif LOCK_TYPE == 1
+    return pthread_mutex_unlock(lock); 
+    #elif LOCK_TYPE == 2
+    return pthread_spin_unlock(lock);
+    #elif LOCK_TYPE == 3
+    return gen_mutex_unlock(lock);
+    #endif
+}
+
+/** 
+ * Upon successful completion, returns zero 
+ * Otherwise, returns -1 and sets errno.
+ */
+#if (LOCK_TYPE == 0)
+int ucache_lock_getvalue(ucache_lock_t * lock, int *sval)
+{
+    return sem_getvalue(lock, sval);
+}
+#endif
+
+/** 
+ * Tries the lock to see if it's available:
+ * Returns 0 if lock has not been aquired ie: success
+ * Otherwise, returns -1
+ */
+inline int lock_trylock(ucache_lock_t * lock)
+{
+    int rc = -1;
+    #if (LOCK_TYPE == 0)
+    int sval = 0;
+    rc = sem_getvalue(lock, &sval);
+    if(sval <= 0 || rc == -1){
+        rc = -1;
+    }
+    else
+    {
+        rc = 0;
+    }
+    #elif (LOCK_TYPE == 1)
+    rc = pthread_mutex_trylock(lock);
+    if( rc != 0)
+    {
+        rc = -1;
+    }
+    #elif (LOCK_TYPE == 2)
+    rc = pthread_spin_trylock(lock);
+    if(rc != 0)
+    {
+        rc = -1;
+    }
+    #elif LOCK_TYPE == 3
+    rc = gen_mutex_trylock(lock);
+    if(rc != 0)
+    {
+        rc = -1;
+    }
+    #endif
+    if(rc == 0)
+    {
+        /* Unlock before leaving if lock wasn't already set */
+        rc = lock_unlock(lock);
+    }
+    return rc;
+}
+/***************************************** End of Externally Visible API */
+
+/* Beginning of internal only (static) functions */
+
+/* Dirty List Iterator */
+/** 
+ * Returns true if current index is NIL, otherwise, returns 0.
+ */
+static inline unsigned char dirty_done(uint16_t index)
+{
+    return (index == NIL16);
+}
+
+/** 
+ * Returns the next index in the dirty list for the provided mtbl and index 
+ */
+static inline uint16_t dirty_next(struct mem_table_s *mtbl, uint16_t index)
+{
+    return mtbl->mem[index].dirty_next;
+}
+
+/*  Memory Entry Chain Iterator */
+/** 
+ * Returns true if current index is NIL, otherwise, returns 0.
+ */
+static inline unsigned char ment_done(uint16_t index)
+{
+    return (index == NIL16);
+}
+
+/** 
+ * Returns the next index in the memory entry chain for the provided mtbl 
+ * and index. 
+ */
+static inline uint16_t ment_next(struct mem_table_s *mtbl, uint16_t index)
+{
+    return mtbl->mem[index].next;
+}
+
+/*  File Entry Chain Iterator   */
+/** 
+ * Returns true if current index is NIL, otherwise, returns 0 
+ */
+static unsigned char file_done(uint16_t index)
+{
+    return (index == NIL16);
+}
+
+/** 
+ * Returns the next index in the file entry chain for the provided mtbl 
+ * and index. 
+ */
+static uint16_t file_next(struct file_table_s *ftbl, uint16_t index)
+{
+    return ftbl->file[index].next;
+}
+
+/**
+ * This function should only be called when the ftbl has no free mtbls. 
+ * It initizializes MTBL_PER_BLOCK additional mtbls in the block provided,
+ * meaning this block will no longer be used for storing file data but 
+ * hash table related data instead.
+ */
+static void add_mtbls(uint16_t blk)
+{
+    uint16_t i, start_mtbl;
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    union cache_block_u *b = &(ucache->b[blk]);
+
+    /* add mtbls in blk to ftbl free list */
+    if (blk == 0)
+    {
+        start_mtbl = 1; /* skip blk 0 ent 0 which is ftbl */
+    }
+    else
+    {
+        start_mtbl = 0;
+    }
+    for (i = start_mtbl; i < (MTBL_PER_BLOCK - 1); i++)
+    {
+        b->mtbl[i].free_list_blk = blk;
+        b->mtbl[i].free_list = i + 1;
+    }
+    b->mtbl[i].free_list_blk = NIL16;
+    b->mtbl[i].free_list = NIL16;
+    ftbl->free_mtbl_blk = blk;
+    ftbl->free_mtbl_ent = start_mtbl;   
+}
+/**
+ * Initializes a memory entry.
+ */
+static inline int init_memory_entry(struct mem_table_s *mtbl, int16_t index)
+{
+        if(index > MEM_TABLE_ENTRY_COUNT)
+        {
+            return -1;
+        }
+        mtbl->mem[index].tag = NIL64;
+        mtbl->mem[index].item = NIL16;
+        mtbl->mem[index].next = NIL16;
+        mtbl->mem[index].dirty_next = NIL16;
+        mtbl->mem[index].lru_prev = NIL16;
+        mtbl->mem[index].lru_next = NIL16;
+        return 0;
+}
+
+/** 
+ * Initializes a mtbl which is a hash table of memory entries.
+ * The mtbl will be located at the provided entry index within 
+ * the provided block.
+ */
+static void init_memory_table(struct mem_table_s *mtbl)
+{
+    uint16_t i;
+    int rc = -1;
+    mtbl->num_blocks = 0;
+    mtbl->free_list_blk = NIL16;
+    mtbl->lru_first = NIL16;
+    mtbl->lru_last = NIL16;
+    mtbl->dirty_list = NIL16;
+    mtbl->ref_cnt = 0;
+
+    /* Initialize Buckets */
+    for(i = 0; i < MEM_TABLE_HASH_MAX; i++)
+    {
+        mtbl->bucket[i] = NIL16;
+    }
+
+    /* set up free ments */
+    mtbl->free_list = 0;
+    for(i = 0; i < (MEM_TABLE_ENTRY_COUNT - 1); i++)
+    {
+        rc = init_memory_entry(mtbl, i);
+        mtbl->mem[i].next = i + 1;
+
+    }
+    /* NIL Terminate the last entries next index */
+    rc = init_memory_entry(mtbl, MEM_TABLE_ENTRY_COUNT - 1);
+    mtbl->mem[MEM_TABLE_ENTRY_COUNT - 1].next = NIL16;
+}
+
+/** 
+ * This function asks the file table if a free block is avaialable. 
+ * If so, returns the block's index; otherwise, returns NIL.
+ */
+static inline uint16_t get_free_blk(void)
+{
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    uint16_t desired_blk = ftbl->free_blk;
+    if(desired_blk != NIL16 && desired_blk < BLOCKS_IN_CACHE)
+    {  
+        /* Update the head of the free block list */ 
+        /* Use mtbl index zero since free_blks have no ititialized mem tables */
+        ftbl->free_blk = ucache->b[desired_blk].mtbl[0].free_list_blk; 
+        return desired_blk;
+    }
+    return NIL16;
+}
+
+/** 
+ * Accepts an index corresponding to a block that is put back on the file 
+ * table free list.
+ */
+static inline void put_free_blk(uint16_t blk)
+{
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    /* set the block's next value to the current head of the block free list */
+    ucache->b[blk].mtbl[0].free_list_blk = ftbl->free_blk;
+    /* blk is now the head of the ftbl blk free list */
+    ftbl->free_blk = blk;
+}
+
+/** 
+ * Consults the file table to retrieve an index corresponding to a file entry
+ * If available, returns the file entry index, otherwise returns NIL.
+ */
+static uint16_t get_free_fent(void)
+{
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    uint16_t entry = ftbl->free_list;
+    if(entry != NIL16)
+    {
+        ftbl->free_list = ftbl->file[entry].next;
+        ftbl->file[entry].next = NIL16;
+        return entry;
+    }
+    else
+    {
+        return NIL16;
+    }
+}
+
+/** 
+ * Places the file entry located at the provided index back on the file table's
+ * free file entry list. If the index is < FILE_TABLE_HASH_MAX, then set next 
+ * to NIL since this index must remain the head of the linked list. Otherwise,
+ * set next to the current head of fent free list and set the free list head to
+ * the provided index.
+ */
+static void put_free_fent(struct file_ent_s *fent)
+{
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    fent->tag_handle = NIL64;
+    fent->tag_id = NIL32;
+    if(fent->index < FILE_TABLE_HASH_MAX)
+    {
+        fent->next = NIL16;
+    }
+    else
+    {
+        /* Set next index to the current head of the free list */
+        fent->next = ftbl->free_list;
+        /* Set fent index as the head of the free_list */
+        ftbl->free_list = fent->index;
+    }
+}
+
+/** 
+ * Consults the provided mtbl's memory entry free list to get the index of the
+ * next free memory entry. Returns the index if one is available, otherwise 
+ * returns NIL.
+ */
+static inline uint16_t get_free_ment(struct mem_table_s *mtbl)
+{
+    uint16_t ment = mtbl->free_list;
+    if(ment != NIL16)
+    {
+        mtbl->free_list = mtbl->mem[ment].next;
+        mtbl->mem[ment].next = NIL16;
+    }
+    return ment;
+}
+
+/** 
+ * Puts the memory entry corresponding to the provided mtbl and entry index 
+ * back on the mtbl's memory entry free list. 
+ */
+static void put_free_ment(struct mem_table_s *mtbl, uint16_t ent)
+{
+    /* Reset ment values */
+    mtbl->mem[ent].tag = NIL64;
+    mtbl->mem[ent].item = NIL16;
+    mtbl->mem[ent].dirty_next = NIL16;
+    mtbl->mem[ent].lru_prev = NIL16;
+    mtbl->mem[ent].lru_next = NIL16;
+    /* Set next index to the current head of the free list */
+    mtbl->mem[ent].next = mtbl->free_list;
+    /* Update free list to include this entry */
+    mtbl->free_list = ent;
+}
+
+/** 
+ * Perform a file lookup on the ucache using the provided fs_id and handle.
+ *
+ * Additional parameters (references) may used that will be set to values 
+ * pertaining to mtbl and file entry location. If NULL is passed in place of 
+ * these parameters, then they cannot be set.
+ *
+ * If the file is found, a pointer to the mtbl is returned and the parameter 
+ * references set accordingly. Otherwise, NIL is returned. 
+ */
+static struct mem_table_s *lookup_file(
+    uint32_t fs_id, 
+    uint64_t handle,
+    uint16_t *file_mtbl_blk,
+    uint16_t *file_mtbl_ent,
+    uint16_t *file_ent_index,
+    uint16_t *file_ent_prev_index
+)
+{
+    /* Index into file hash table */
+    uint16_t index = handle % FILE_TABLE_HASH_MAX; 
+
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    struct file_ent_s *current = &(ftbl->file[index]);
+
+    /* previous, current, next fent index */
+    uint16_t p = NIL16;
+    uint16_t c = index;
+    uint16_t n = current->next;
+
+    while(1)
+    {
+        if((current->tag_id == fs_id) && (current->tag_handle == handle))
+        {
+            /* If params !NULL, set their values */
+            if(file_mtbl_blk!=NULL && file_mtbl_ent!=NULL && 
+                file_ent_index!=NULL && file_ent_prev_index!=NULL)
+            {
+                    *file_mtbl_blk = current->mtbl_blk;
+                    *file_mtbl_ent = current->mtbl_ent;
+                    *file_ent_index = c;
+                    *file_ent_prev_index = p;
+            }
+            return (struct mem_table_s *)&(ucache->b[current->mtbl_blk].mtbl[
+                                                            current->mtbl_ent]);
+        }
+        /* No match yet */
+        else    
+        {
+            if(current->next == NIL16 || current->next == 0)
+            {
+                return (struct mem_table_s *)NIL;
+            }
+            else
+            {
+                current = &(ftbl->file[current->next]);
+                p=c; 
+                c=n; 
+                n=current->next;
+            }
+
+        }
+    }    
+}
+
+/** 
+ * Function that locates the next free mtbl.
+ * On success, Returns 1 and sets reference parameters to proper indexes.
+ * On failure, returns NIL; 
+ */
+static uint16_t get_next_free_mtbl(uint16_t *free_mtbl_blk, uint16_t *free_mtbl_ent)
+{
+        struct file_table_s *ftbl = &(ucache->ftbl);
+
+        /* Get next free mtbl_blk and ent from ftbl */
+        *free_mtbl_blk = ftbl->free_mtbl_blk;
+        *free_mtbl_ent = ftbl->free_mtbl_ent;
+
+        /* Is free mtbl_blk available? */
+        if((*free_mtbl_blk == NIL16) || 
+             (*free_mtbl_ent == NIL16))
+        { 
+            return NIL16;
+        }
+
+        /* Update ftbl to contain new next free mtbl */
+        ftbl->free_mtbl_blk = ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent].
+                                                                free_list_blk;
+        ftbl->free_mtbl_ent = ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent].
+                                                                    free_list;
+
+        /* Set free info to NIL */
+        ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent].free_list = NIL16;
+        ucache->b[*free_mtbl_blk].mtbl[*free_mtbl_ent].free_list_blk = NIL16;
+
+        return 1;
+}
+
+/** 
+ * Places memory entries' corresponding blocks 
+ * back on the ftbl block free list. Reinitializes mtbl.
+ * Assumes mtbl->ref_cnt is 0.  
+ */
+static int wipe_mtbl(struct mem_table_s *mtbl)
+{
+    uint16_t i;
+    for(i = 0; i < MEM_TABLE_HASH_MAX; i++)
+    {
+        uint16_t j;
+        for(j = mtbl->bucket[i]; !ment_done(j); j = ment_next(mtbl, j))
+        {
+            /* Current Memory Entry */
+            struct mem_ent_s *ment = &(mtbl->mem[j]);
+            /*  Account for empty head of ment chain    */
+            if((ment->tag == NIL64) || (ment->item == NIL16))
+            {
+                break;
+            }
+            put_free_blk(ment->item);
+        }
+    }
+    memset(&mtbl->mem[0], 0, sizeof(struct mem_ent_s) * MEM_TABLE_ENTRY_COUNT);
+    init_memory_table(mtbl);
+    return 1;
+}
+
+/** 
+ * Places the provided mtbl back on the ftbl's mtbl free list provided it 
+ * isn't currently referenced.
+ */
+static int put_free_mtbl(struct mem_table_s *mtbl, struct file_ent_s *file)
+{
+    /* Remove mtbl */
+    mtbl->num_blocks = 0;   /* number of used blocks in this mtbl */
+    mtbl->lru_first = NIL16;  /* index of first block on lru list */
+    mtbl->lru_last = NIL16;   /* index of last block on lru list */
+    mtbl->dirty_list = NIL16; /* index of first dirty block */
+    mtbl->ref_cnt = 0;      /* number of clients using this record */
+
+    /* Add mem_table back to free list */
+    /* Temporarily store copy of current head (the new next) */
+    uint16_t tmp_blk = ucache->ftbl.free_mtbl_blk;
+    uint16_t tmp_ent = ucache->ftbl.free_mtbl_ent;
+    /* newly free mtbl becomes new head of free mtbl list */
+    ucache->ftbl.free_mtbl_blk = file->mtbl_blk;
+    ucache->ftbl.free_mtbl_ent = file->mtbl_ent;
+    /* Point to the next free mtbl (the former head) */
+    mtbl->free_list_blk = tmp_blk;
+    mtbl->free_list = tmp_ent;
+
+    return 1;
+}
+
+/** 
+ * Insert information about file into ucache (no file data inserted)
+ * Returns pointer to mtbl on success.
+ * 
+ * Returns NIL if necessary data structures could not be aquired from the free
+ * lists or through an eviction policy (meaning references are held).
+ */
+uint16_t insert_file(
+    uint32_t fs_id,
+    uint64_t handle 
+)
+{
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    struct file_ent_s *current;     /* Current ptr for iteration */
+    uint16_t free_fent = NIL16;       /* Index of next free fent */
+
+    /* index into file hash table */
+    uint16_t index = handle % FILE_TABLE_HASH_MAX;
+    current = &(ftbl->file[index]);
+
+    unsigned char indexOccupied = (current->tag_handle != NIL64 && current->tag_id != NIL32);
+
+    /* Get free mtbl */
+    uint16_t free_mtbl_blk = NIL16;
+    uint16_t free_mtbl_ent = NIL16; 
+    /* Create free mtbls if none are available */
+    if(get_next_free_mtbl(&free_mtbl_blk, &free_mtbl_ent) != 1)
+    {   
+        if(ucache->ftbl.free_blk == NIL16) 
+        {
+            /* Evict a block from mtbl with most mem entries */
+            struct file_ent_s *max_fent = 0;
+            struct mem_table_s *max_mtbl;
+            locate_max_fent(&max_fent);
+            max_mtbl = get_mtbl(max_fent->mtbl_blk, max_fent->mtbl_ent);
+            evict_LRU(max_fent);
+        }
+        /* TODO: other policy? */
+        if(ucache->ftbl.free_blk == NIL16)
+        {
+
+        }
+        /* Intitialize memory tables */
+        if(ucache->ftbl.free_blk != NIL16)
+        {
+            int16_t free_blk = get_free_blk(); 
+            add_mtbls(free_blk);
+            get_next_free_mtbl(&free_mtbl_blk, &free_mtbl_ent);
+        }
+        else
+        {
+            /* Couldn't get free mtbl - unlikely */
+            return NIL16;
+        }
+    }
+
+    /* Now, we know which hashed chain we are trying to insert into and have a 
+     * mtbl ready to be filled.
+     */
+
+    /* Insert at the head or just after the head, since we can't change the 
+     * indexing (only can change "nexts"). 
+     */
+    if(indexOccupied)
+    {
+        /* Certain a file entry is required */
+        /* get free file entry and update ftbl */
+        free_fent = get_free_fent();
+        if(free_fent != NIL16)
+        {
+            uint16_t temp_next = current->next;
+            current->next = free_fent;
+            current = &(ftbl->file[free_fent]);
+            current->next = temp_next; /* repair link */
+            current->index = free_fent;
+        }
+        else
+        {
+            /* Return an error indicating the ucache is full and file couldn't 
+             * be cached 
+             */
+            return NIL16;
+        }
+    }
+    else
+    {
+        current->index = index;
+    }
+
+    /* Insert file data @ index */
+    current->tag_id = fs_id;
+    current->tag_handle = handle;
+    /* Update fent with it's new mtbl: blk and ent */
+    current->mtbl_blk = free_mtbl_blk;
+    current->mtbl_ent = free_mtbl_ent;
+    /* Initialize Memory Table */
+    init_memory_table(get_mtbl(free_mtbl_blk, free_mtbl_ent));
+    return current->index;
+}
+
+/** 
+ * Remove file entry and memory table of file identified by parameters
+ * Returns 1 following removal
+ * Returns -1 if file is referenced or if the file could not be located.
+ */
+static int remove_file(struct file_ent_s *fent)
+{
+    int rc = 0;
+    struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk,
+                                       fent->mtbl_ent);
+
+    if(mtbl == (struct mem_table_s *)NILP)
+    {
+        return -1;
+    }
+
+    /* Flush file blocks before file removal */
+    mtbl->ref_cnt--;
+
+    if(mtbl->ref_cnt > 0)
+    {
+        return 0;
+    }
+
+    /* Flush dirty blocks before file removal from cache */
+    rc = flush_file(fent);
+    if(rc == -1)
+    {
+        return rc;
+    }
+
+    /* Instead of removing individually, since memory entries are already 
+     * flushed, just wipe the mtbl 
+     */
+    rc = wipe_mtbl(mtbl);
+    if(rc == -1)
+    {
+        /* Couldn't remove entries */
+        return rc;
+    }
+
+    rc = put_free_mtbl(mtbl, fent);
+    if(rc == -1)
+    {
+        return rc;
+    }
+
+    put_free_fent(fent);
+    if(rc == -1)
+    {
+        return rc;
+    }
+
+    /* Success */
+    return 0;
+}
+
+/** 
+ * Lookup the memory location of a block of data in cache that is identified 
+ * by the mtbl and offset parameters.
+ *
+ * If located, returns a pointer to memory where the desired block of data is 
+ * stored. Otherwise, NIL is returned.
+ *
+ * pertaining to the memory entry's location. If NULLs are passed in place of 
+ * these parameters, then they will not be set.
+ */
+inline static void *lookup_mem(struct mem_table_s *mtbl, 
+                    uint64_t offset, 
+                    uint16_t *item_index,
+                    uint16_t *mem_ent_index,
+                    uint16_t *mem_ent_prev_index)
+{
+    /* index into mem hash table */
+    uint16_t index = (uint16_t) ((offset / CACHE_BLOCK_SIZE) % MEM_TABLE_HASH_MAX);
+
+    /* If the bucket is empty then go ahead and return */
+    if(mtbl->bucket[index] == NIL16)
+    {
+        return (struct mem_table_s *)NIL;
+    }
+
+    uint16_t bucket_index = mtbl->bucket[index];
+    struct mem_ent_s *current = &(mtbl->mem[bucket_index]);
+
+    /* previous, current, next memory entry index in mtbl */
+    int16_t p = NIL16;
+    int16_t c = bucket_index;
+    int16_t n = current->next;  
+
+    while(1)
+    {
+        if(offset == current->tag)
+        {
+            /* If parameters !NULL, set their values */
+            if(item_index != NULL)
+            {
+                *item_index = current->item;
+            }
+            if((mem_ent_index != NULL) && (mem_ent_prev_index != NULL))
+            {
+                    *mem_ent_index = c;
+                    *mem_ent_prev_index = p;
+            }
+            return (void *)(&ucache->b[current->item].mblk);
+        }
+        else
+        {
+            if(current->next == NIL16)
+            {
+                return (struct mem_table_s *)NIL;
+            }
+            else
+            {
+                /* Iterate */
+                current = &(mtbl->mem[current->next]);
+                p = c; 
+                c = n; 
+                n = current->next;
+            }
+        }
+    }
+}
+
+/** 
+ * Update the provided mtbl's LRU doubly-linked list by placing the memory 
+ * entry, identified by the provided index, at the head of the list (lru_first).
+ */
+static inline void update_LRU(struct mem_table_s *mtbl, uint16_t index)
+{
+    /* First memory entry used becomes the head and tail of the list */
+    if((mtbl->lru_first == NIL16) && 
+        (mtbl->lru_last == NIL16))
+    {
+        mtbl->lru_first = index;
+        mtbl->lru_last = index;
+        mtbl->mem[index].lru_prev = NIL16;
+        mtbl->mem[index].lru_next = NIL16;
+    }
+    /* 2nd Memory Entry */
+    else if(mtbl->lru_first == mtbl->lru_last)
+    {
+        /* Do nothing if this index is already the only entry */
+        if(mtbl->lru_first == index)
+        {
+            return;
+        }
+        else
+        {   
+            /* Must be 2nd unique memory entry */
+            /* point tail.prev to new */
+            mtbl->mem[mtbl->lru_first].lru_prev = index;
+            /* point new.prev to NIL */  
+            mtbl->mem[index].lru_prev = NIL16;
+            /* point the new.next to the tail */      
+            mtbl->mem[index].lru_next = mtbl->lru_first;
+            /* point the head to the new */  
+            mtbl->lru_first = index;                    
+        }
+    }
+    /* 3rd+ Memory Entry */
+    else
+    {
+        if(mtbl->mem[index].lru_prev == NIL16 && 
+            mtbl->mem[index].lru_next == NIL16)
+        {
+            /* First time on the LRU List, Add to the front */
+            mtbl->mem[index].lru_next = mtbl->lru_first;
+            mtbl->mem[mtbl->lru_first].lru_prev = index;    
+        }
+        else if(mtbl->mem[index].lru_prev == NIL16)
+        {
+            /* Already the head of MRU */
+            return;
+        }
+        else if(mtbl->mem[index].lru_next == NIL16)
+        {
+            /* Relocate the LRU to become the MRU */
+            mtbl->lru_last = mtbl->mem[index].lru_prev;
+            mtbl->mem[mtbl->lru_last].lru_next = NIL16;
+            mtbl->mem[mtbl->lru_first].lru_prev = index;
+            mtbl->mem[index].lru_next = mtbl->lru_first;
+            mtbl->mem[index].lru_prev = NIL16;
+        }
+        else
+        {
+            /* Relocate interior LRU list item to head */
+            uint16_t current_prev = mtbl->mem[index].lru_prev;
+            uint16_t current_next = mtbl->mem[index].lru_next;
+
+            mtbl->mem[current_prev].lru_next = current_next;
+            mtbl->mem[current_next].lru_prev = current_prev;
+
+            mtbl->mem[index].lru_prev = NIL16;
+            mtbl->mem[index].lru_next = mtbl->lru_first;
+        }
+        mtbl->lru_first = index;
+    }
+}    
+
+/** 
+ * Searches the ftbl for the mtbl with the most entries.
+ * Returns the number of memory entries the max mtbl has. The double ptr 
+ * parameter is used to store a reference to the mtbl pointer with the most 
+ * memory entries. 
+ */
+static uint16_t locate_max_fent(struct file_ent_s **fent)
+{
+    struct file_table_s *ftbl = &(ucache->ftbl);
+    uint16_t index_of_max_blk = NIL16;
+    uint16_t index_of_max_ent = NIL16;
+    uint16_t value_of_max = 0;
+    /* Iterate over file hash table indices */
+    uint16_t i;
+    for(i = 0; i < FILE_TABLE_HASH_MAX; i++)
+    {
+
+        if((ftbl->file[i].tag_handle == NIL64) ||
+               (ftbl->file[i].tag_handle == 0))
+            continue;
+
+        /* Iterate over hash table chain */
+        uint16_t j;
+        for(j = i; !file_done(j); j = file_next(ftbl, j))
+        {
+            struct file_ent_s *current_fent = &(ftbl->file[j]);
+            if((current_fent->mtbl_blk == NIL16) || 
+                    (current_fent->mtbl_ent == NIL16))
+            {
+                break;
+            }
+            /* Examine the mtbl's value of num_blocks to see if it's the 
+             * greatest. 
+             */
+            struct mem_table_s *current_mtbl = get_mtbl(current_fent->mtbl_blk, 
+                                                       current_fent->mtbl_ent);
+
+            if(current_mtbl->num_blocks >= value_of_max)
+            {
+                *fent = current_fent; /* Set the parameter to this mtbl */
+                index_of_max_blk = current_fent->mtbl_blk;
+                index_of_max_ent = current_fent->mtbl_ent;
+                value_of_max = current_mtbl->num_blocks;
+            }
+        }
+    }
+    return value_of_max;
+}
+
+/** 
+ * Evicts the LRU memory entry from the tail (lru_last) of the provided
+ * mtbl's LRU list.
+ * 
+ * Returns 1 on success; 0 on failure, meaning there was no LRU
+ * or that the block's lock couldn't be aquired.
+ */
+static int evict_LRU(struct file_ent_s *fent)
+{
+    int rc = -1;
+    
+    struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent);
+
+    if(mtbl->num_blocks != 0 && mtbl->lru_last != NIL16)
+    {
+        //printf("evicting: %hu\n", mtbl->lru_last);
+        rc = remove_mem(fent, mtbl->mem[mtbl->lru_last].tag);
+        if(rc != 1)
+        {
+            return 0;
+        } 
+        return 1;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+
+/** 
+ * Used to obtain a block for storage of data identified by the offset 
+ * parameter and maintained in the mtbl at the memory entry identified by the 
+ * index parameter.
+ *
+ * If a free block could be aquired, returns the memory address of the block 
+ * just inserted. Otherwise, returns NIL.
+ */
+static inline void *set_item(struct file_ent_s *fent, 
+                    uint64_t offset, 
+                    uint16_t index)
+{
+        uint16_t free_blk = get_free_blk();
+
+        struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent);
+
+        /* No Free Blocks Available */
+        if(free_blk == NIL16)
+        {
+            evict_LRU(fent); 
+            free_blk = get_free_blk();
+        }
+        
+        /* After Eviction Routine - No Free Blocks Available, Evict from mtbl 
+         * with the most memory entries 
+         */
+        if(free_blk == NIL16)   
+        {
+            struct file_ent_s *max_fent = 0;
+            struct mem_table_s *max_mtbl;
+            int ment_count = 0;
+            ment_count = locate_max_fent(&max_fent);
+            max_mtbl = get_mtbl(max_fent->mtbl_blk, max_fent->mtbl_ent);
+            if(ment_count == 0 || max_mtbl->lru_last == NIL16)
+            {
+                goto errout;
+            }
+            evict_LRU(max_fent);
+            free_blk = get_free_blk();
+        }
+        /* TODO: other policy? */
+
+
+        /* A Free Block is Avaiable for Use */
+        if(free_blk != NIL16)
+        {
+            mtbl->num_blocks++;
+            update_LRU(mtbl, index);
+            /* set item to block number */
+            mtbl->mem[index].tag = offset;
+            mtbl->mem[index].item = free_blk;
+            /* add block index to head of dirty list */
+            mtbl->mem[index].dirty_next = mtbl->dirty_list;
+            mtbl->dirty_list = index;
+            /* Return the address of the block where data is stored */
+            return (void *)&(ucache->b[free_blk]); 
+        }
+errout:
+    return (void *)(NIL);
+}
+
+/** 
+ * Requests a location in memory to place the data identified by the mtbl and 
+ * offset parameters. Also inserts the necessary info into the mtbl.
+ *
+ */
+static inline void *insert_mem(struct file_ent_s *fent, uint64_t offset,
+                                              uint16_t *block_ndx)
+{
+    void* rc = 0;
+    struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent);
+
+    /* Lookup first */
+    void *returnValue = lookup_mem(mtbl, offset, block_ndx, NULL, NULL);
+    if(returnValue != (void *)NIL)
+    {
+        /* Already exists in mtbl so just return a ptr to the blk */
+        return returnValue;
+    }
+
+    /* Index into mem hash table */
+    /* Hash to a bucket */
+    uint16_t index = (uint16_t) ((offset / CACHE_BLOCK_SIZE) % MEM_TABLE_HASH_MAX);
+
+    int evict_rc = 0;
+    uint16_t mentIndex = get_free_ment(mtbl);
+    if(mentIndex == NIL16)
+    {   /* No free ment available, so attempt eviction, and try again */
+        evict_rc = evict_LRU(fent);
+        mentIndex = get_free_ment(mtbl);
+    }
+
+    /* Eviction Failed */
+    if(mentIndex == NIL16)
+    {
+        return (void *)NULL;
+    }
+
+    /* Procede with memory insertion if ment aquired */
+    uint16_t next_ment = NIL16;
+    /* Insert at head, keeping track of the previous head */
+    next_ment = mtbl->bucket[index];
+    mtbl->bucket[index] = mentIndex;
+
+    rc = set_item(fent, offset, mentIndex);
+    if(rc != (void *)NIL)
+    {
+        mtbl->mem[mentIndex].next = next_ment;
+        *block_ndx = mtbl->mem[mentIndex].item;
+        return rc;      
+    }
+    else
+    {
+        /* Restore the previous head back to head of the chain */
+        mtbl->bucket[index] = next_ment;
+        return (void *)NIL;   
+    } 
+}
+
+/** 
+ * Removes all table info regarding the block identified by the mtbl and
+ * offset provided the block isn't locked. 
+ *
+ * Flushing the block to fs now occurs here upon removal from cache. 
+ * 
+ * On success returns 1, on failure returns 0.
+ *
+ */
+static int remove_mem(struct file_ent_s *fent, uint64_t offset)
+{
+    struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent);
+
+    /* Some Indices */
+    uint16_t item_index = NIL16; /* index of cached block */
+    uint16_t mem_ent_index = NIL16;
+    uint16_t mem_ent_prev_index = NIL16;
+
+    void *retValue = lookup_mem(mtbl, offset, &item_index, &mem_ent_index, 
+                                                     &mem_ent_prev_index);
+    /* Verify we've recieved the necessary info */
+    if(retValue == (void *)NIL)
+    {
+        return 0;
+    }
+
+    /* Verify the block isn't being used by trying the corresponding lock */
+    ucache_lock_t *block_lock = get_lock(mtbl->mem[mem_ent_index].item);
+    int rc = lock_trylock(block_lock);
+    if(rc != 0)
+    {
+        return -1;
+    }
+
+    /* Aquire Lock */
+    lock_lock(block_lock);
+
+    /* Optionally flush block - may need to be mandatory */
+    flush_block(fent, &(mtbl->mem[mem_ent_index]));
+
+    /* Update First and Last...First */
+    if(mem_ent_index == mtbl->lru_first)
+    {
+        /* Node is the head */
+        mtbl->lru_first = mtbl->mem[mem_ent_index].lru_next;
+    }
+    if(mem_ent_index == mtbl->lru_last)
+    {
+        /* Node is the tail */
+        mtbl->lru_last = mtbl->mem[mem_ent_index].lru_prev;
+    }
+    
+    /* Remove from LRU */
+    /* Update each of the adjacent nodes' link */
+    uint16_t lru_prev = mtbl->mem[mem_ent_index].lru_prev;
+    if(lru_prev != NIL16)
+    {
+        mtbl->mem[lru_prev].lru_next = mtbl->mem[mem_ent_index].lru_next;
+    }
+    uint16_t lru_next = mtbl->mem[mem_ent_index].lru_next;
+    if(lru_next != NIL16)
+    {
+        mtbl->mem[lru_next].lru_prev = mtbl->mem[mem_ent_index].lru_prev;
+    }
+
+    /* Add memory block back to free list */
+    put_free_blk(item_index);
+
+    /* Repair link */
+    if(mem_ent_prev_index != NIL16)
+    {
+        mtbl->mem[mem_ent_prev_index].next = mtbl->mem[mem_ent_index].next;
+    }
+
+    /* Newly free mem entry becomes new head of free mem entry list if index 
+     * is less than hash table max 
+     */
+    put_free_ment(mtbl, mem_ent_index);
+    mtbl->num_blocks--;
+
+    /* Release Lock */
+    lock_unlock(block_lock);
+    return 1;
+}
+
+/* The following two functions are provided for error checking purposes. */
+/**
+ * Prints the Least Recently Used (LRU) list.
+ */
+void print_LRU(struct mem_table_s *mtbl)
+{
+    fprintf(out, "\tprinting lru list:\n");
+    fprintf(out, "\t\tmru: %hu\n", mtbl->lru_first);
+    fprintf(out, "\t\t\tmru->lru_prev = %hu\n\t\t\tmru->lru_next = %hu\n", 
+        mtbl->mem[mtbl->lru_first].lru_prev, mtbl->mem[mtbl->lru_first].lru_next);
+    uint16_t current = mtbl->mem[mtbl->lru_first].lru_next; 
+    while(current != mtbl->lru_last && current != NIL16)
+    {
+        fprintf(out, "\t\t\tcurr->lru_prev = %hu\n", 
+                       mtbl->mem[current].lru_prev);
+        fprintf(out, "\t\t%hu\n", current);
+        fprintf(out, "\t\t\tcurr->lru_next = %hu\n",
+                       mtbl->mem[current].lru_next);
+        current = mtbl->mem[current].lru_next;
+    }
+    fprintf(out, "\t\tlru: %hu\n", mtbl->lru_last);
+    fprintf(out, "\t\t\tlru->lru_prev = %hu\n\t\t\tlru->lru_next = %hu\n", 
+        mtbl->mem[mtbl->lru_last].lru_prev, mtbl->mem[mtbl->lru_last].lru_next);
+}
+
+/**
+ * Prints the list of dirty (modified) blocks that should eventually be 
+ * flushed to disk.
+ */
+void print_dirty(struct mem_table_s *mtbl)
+{
+    fprintf(out, "\tprinting dirty list:\n");
+    int i;
+    for(i = 0; !dirty_done(i); i = dirty_next(mtbl, i))
+    {
+        fprintf(out, "\t\tment index = %hu\t\t\tdirty_next = %hu\n", 
+                                            i, dirty_next(mtbl, i));
+    }
+    if(i >= MEM_TABLE_ENTRY_COUNT && i != NIL16)
+    {
+        fprintf(out, "BAD MEM_TABLE_ENTRY INDEX: %hu\n", i);
+        exit(0);
+    } 
+    fprintf(out, "\t\tdone w/ dirty list\n");
+} 
+
+/*  End of Internal Only Functions    */
+#endif /* PVFS_UCACHE_ENABLE */
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/client/usrint/ucache.h b/src/client/usrint/ucache.h
new file mode 100644
index 0000000..597b4ad
--- /dev/null
+++ b/src/client/usrint/ucache.h
@@ -0,0 +1,251 @@
+/* 
+ * (C) 2011 Clemson University
+ *
+ * See COPYING in top-level directory.
+ */
+
+/** 
+ * \file 
+ * \ingroup usrint
+ * ucache routines
+ */
+#ifndef UCACHE_H
+#define UCACHE_H 1
+
+#include <stdint.h>
+#include <pthread.h>
+#include <sys/shm.h>
+
+#define MEM_TABLE_ENTRY_COUNT 679
+#define FILE_TABLE_ENTRY_COUNT 682
+#define CACHE_BLOCK_SIZE_K 256
+#define CACHE_BLOCK_SIZE (CACHE_BLOCK_SIZE_K * 1024)
+#define MEM_TABLE_HASH_MAX 31
+#define FILE_TABLE_HASH_MAX 31
+#define MTBL_PER_BLOCK 16
+#define KEY_FILE "/etc/fstab"
+#define SHM_ID1 'l'
+#define SHM_ID2 'm'
+#ifndef BLOCKS_IN_CACHE 
+#define BLOCKS_IN_CACHE 1024
+#endif
+#define CACHE_SIZE (CACHE_BLOCK_SIZE * BLOCKS_IN_CACHE)
+#define AT_FLAGS 0
+#define SVSHM_MODE (SHM_R | SHM_W | SHM_R>>3 | SHM_R>>6)
+#define CACHE_FLAGS (SVSHM_MODE)
+#define NIL (-1)
+
+#ifndef UCACHE_MAX_REQ 
+#define UCACHE_MAX_REQ (CACHE_BLOCK_SIZE * MEM_TABLE_ENTRY_COUNT)
+#endif 
+
+/* Define multiple NILS to there's no need to cast for different types */
+#define NIL8  0XFF
+#define NIL16 0XFFFF
+#define NIL32 0XFFFFFFFF
+#define NIL64 0XFFFFFFFFFFFFFFFF
+#if (PVFS2_SIZEOF_VOIDP == 32)
+#define NILP NIL32
+#elif (PVFS2_SIZEOF_VOIDP == 64)
+#define NILP NIL64
+#endif
+
+
+#ifndef DBG
+#define DBG 0   
+#endif
+
+#ifndef UCACHE_LOG_FILE
+#define UCACHE_LOG_FILE "/tmp/ucache.log"
+#endif
+
+/* TODO: set this to an appropriate value. */
+#define GOSSIP_UCACHE_DEBUG 0x0010000000000000
+
+#ifndef LOCK_TYPE
+#define LOCK_TYPE 3 /* 0 for Semaphore, 1 for Mutex, 2 for Spinlock */
+#endif
+
+#if (LOCK_TYPE == 0)
+# include <semaphore.h>
+# define ucache_lock_t sem_t
+# define LOCK_SIZE sizeof(sem_t)
+#elif (LOCK_TYPE == 1)
+# define ucache_lock_t pthread_mutex_t /* sizeof(pthread_mutex_t)=24 */
+# define LOCK_SIZE sizeof(pthread_mutex_t)
+#elif (LOCK_TYPE == 2)
+# define ucache_lock_t pthread_spinlock_t
+# define LOCK_SIZE sizeof(pthread_spinlock_t)
+#elif (LOCK_TYPE == 3)
+# define ucache_lock_t gen_mutex_t
+# define LOCK_SIZE sizeof(gen_mutex_t)
+#endif
+
+#define LOCKS_SIZE ((LOCK_SIZE) * (BLOCKS_IN_CACHE + 1))
+
+#define UCACHE_STATS_64 3
+#define UCACHE_STATS_16 2
+/* This is the size of the ucache_aux auxilliary shared mem segment */
+#define UCACHE_AUX_SIZE ( LOCKS_SIZE + (UCACHE_STATS_64 * 64) + \
+    (UCACHE_STATS_16 * 16))
+
+/* Globals */
+extern FILE * out;
+extern int ucache_enabled;
+extern union user_cache_u *ucache;
+extern struct ucache_aux_s *ucache_aux;
+extern ucache_lock_t *ucache_locks;
+extern ucache_lock_t *ucache_lock;
+extern struct ucache_stats_s *ucache_stats;
+extern struct ucache_stats_s these_stats;
+
+/** A structure containing the statistics summarizing the ucache. 
+ *
+ */
+struct ucache_stats_s
+{
+    uint64_t hits;
+    uint64_t misses;
+    uint64_t pseudo_misses;
+    uint16_t block_count;
+    uint16_t file_count;
+};
+
+/** A structure containing the auxilliary data required by ucache to properly
+ * function.
+ */
+struct ucache_aux_s
+{
+    ucache_lock_t ucache_locks[BLOCKS_IN_CACHE + 1]; /* +1 for global lock */
+    struct ucache_stats_s ucache_stats; /* Summary Statistics of ucache */
+};
+
+/** A link for one block of memory in a files hash table
+ *
+ */
+/* 24 bytes */
+struct mem_ent_s
+{
+    uint64_t tag;           /* offset of data block in file */
+    uint16_t item;          /* index of cache block with data */
+    uint16_t next;          /* use for hash table chain */
+    uint16_t dirty_next;    /* if dirty used in dirty list */
+    uint16_t lru_prev;      /* used in lru list */
+    uint16_t lru_next;      /* used in lru list */
+    char pad[6];
+};
+
+/** A cache for a specific file
+ *
+ *  Keyed on the address of the block of memory
+ */
+struct mem_table_s
+{
+    uint16_t num_blocks;        /* number of used blocks in this mtbl */
+    uint16_t free_list;         /* index of next free mem entry */
+    uint16_t free_list_blk;     /* used when mtbl is on mtbl free list and to track free blks */
+    uint16_t lru_first;         /* index of first block on lru list */
+    uint16_t lru_last;          /* index of last block on lru list */
+    uint16_t dirty_list;        /* index of first dirty block */
+    uint16_t ref_cnt;           /* number of clients using this record */
+    uint16_t bucket[MEM_TABLE_HASH_MAX]; /* bucket may contain index of ment */
+    char pad[4];
+    struct mem_ent_s mem[MEM_TABLE_ENTRY_COUNT];
+    char pad2[8];
+};
+
+/** One allocation block in the cache
+ *
+ *  Either a block of memory or a block of mtbls
+ */
+union cache_block_u
+{
+    struct mem_table_s mtbl[MTBL_PER_BLOCK];
+    char mblk[CACHE_BLOCK_SIZE_K * 1024];
+}; 
+
+/** A link for one file in the top level hash table
+ *
+ */
+/* 24 bytes */
+struct file_ent_s
+{
+    uint64_t tag_handle;    /* PVFS_handle */
+    uint32_t tag_id;        /* PVFS_fs_id */
+    uint16_t mtbl_blk;      /* block index of this mtbl */
+    uint16_t mtbl_ent;      /* entry index of this mtbl */
+    uint16_t next;          /* next fent in chain */
+    uint16_t index;         /* fent index in ftbl */
+    char pad[4];
+};
+
+/** A hash table to find caches for specific files
+ *
+ *  Keyed on fs_id/handle of the file
+ */
+struct file_table_s
+{
+    uint16_t free_blk;  /* index of the next free block */
+    uint16_t free_mtbl_blk; /* block index of next free mtbl */
+    uint16_t free_mtbl_ent; /* entry index of next free mtbl */
+    uint16_t free_list; /* index of next free file entry */
+    char pad[8];
+    struct file_ent_s file[FILE_TABLE_ENTRY_COUNT];
+};
+
+/** The whole system wide cache
+ *
+ */
+union user_cache_u
+{
+    struct file_table_s ftbl;
+    union cache_block_u b[0]; /* actual size of this varies */
+};
+
+struct ucache_ref_s
+{
+    union user_cache_u *ucache;     /* pointer to ucache shmem */
+    ucache_lock_t *ucache_locks;    /* pointer to ucache locks */
+};
+
+/* externally visible API */
+union user_cache_u *get_ucache(void);
+int ucache_initialize(void);
+int ucache_open_file(PVFS_fs_id *fs_id,
+                     PVFS_handle *handle, 
+                     struct file_ent_s **fent);
+int ucache_close_file(struct file_ent_s *fent);
+inline struct mem_table_s *get_mtbl(uint16_t mtbl_blk, uint16_t mtbl_ent);
+inline void *ucache_lookup(struct file_ent_s *fent, uint64_t offset, uint16_t *block_ndx);
+inline void *ucache_insert(struct file_ent_s *fent, 
+                    uint64_t offset, 
+                    uint16_t *block_ndx);
+int ucache_info(FILE *out, char *flags);
+
+int ucache_flush_cache(void); 
+int ucache_flush_file(struct file_ent_s *fent);
+
+/* Don't call this except in ucache daemon */
+int ucache_init_file_table(char forceCreation);
+
+/* Used only in testing */
+int wipe_ucache(void);
+
+/* Lock Routines */
+inline ucache_lock_t *get_lock(uint16_t block_index);
+int lock_init(ucache_lock_t * lock);
+inline int lock_lock(ucache_lock_t * lock);
+inline int lock_unlock(ucache_lock_t * lock);
+inline int lock_trylock(ucache_lock_t * lock);
+
+#endif /* UCACHE_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/common/gen-locks/gen-win-locks.c b/src/common/gen-locks/gen-win-locks.c
new file mode 100755
index 0000000..1902e54
--- /dev/null
+++ b/src/common/gen-locks/gen-win-locks.c
@@ -0,0 +1,748 @@
+/*
+ * (C) 2001-2011 Clemson University, The University of Chicago and
+ *               Omnibond LLC
+ *
+ * See COPYING in top-level directory.
+ */
+
+
+/* This code implements generic locking that can be turned on or off at
+ * compile time.
+ */
+
+#ifndef _WIN64
+#define _USE_32BIT_TIME_T
+#endif
+
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/timeb.h>
+
+#include "gen-locks.h"
+
+/***************************************************************
+ * visible functions
+ */
+
+#ifndef __GEN_NULL_LOCKING__
+
+/* Global variables */
+/* TODO: may need to init and delete in DLL enter/exit functions */
+LPCRITICAL_SECTION cond_list_lock = NULL;
+LPCRITICAL_SECTION cond_test_init_lock = NULL;
+LPCRITICAL_SECTION mutex_test_init_lock = NULL;
+
+gen_cond_t cond_list_head = NULL;
+gen_cond_t cond_list_tail = NULL;
+
+/* This macro sets the value of errno
+ * based on the Windows error code.
+ */
+#define SET_ERROR(winerr)    switch(winerr) { \
+                                 case ERROR_SUCCESS: errno = 0; \
+                                                     break; \
+                                 case ERROR_NOT_ENOUGH_MEMORY: \
+                                 case ERROR_OUTOFMEMORY: errno = ENOMEM; \
+                                                         break; \
+                                 case ERROR_ACCESS_DENIED: errno = EPERM; \
+                                                           break; \
+                                 case ERROR_INVALID_HANDLE: \
+                                 case ERROR_INVALID_PARAMETER: errno = EINVAL; \
+                                                               break; \
+                                 case WAIT_TIMEOUT: errno = ETIMEDOUT; \
+                                                    break; \
+                                 default: errno = winerr; \
+                             }
+
+/*
+ * gen_mutex_init()
+ *
+ * initializes a previously declared mutex
+ *
+ * returns 0 on success, -1 and sets errno on failure.
+ */
+int gen_win_mutex_init(
+    HANDLE *mut)
+{
+    if (mut == NULL)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    
+    *mut = CreateMutex(NULL, FALSE, NULL);
+    if (*mut == NULL)
+    {
+        DWORD err = GetLastError();
+        SET_ERROR(err)
+    }
+    
+    return (*mut) ? 0 : -1;
+}
+
+/*
+ * gen_mutex_lock()
+ *
+ * blocks until it obtains a mutex lock on the given mutex
+ *
+ * returns 0 on success, -1 and sets errno on failure.
+ */
+int gen_win_mutex_lock(
+    HANDLE *mut)
+{
+    DWORD dwWaitResult;
+    int result = 0;
+
+    if (*mut == GEN_MUTEX_INITIALIZER)
+    {
+        /* initialize default mutex */
+        if (mutex_test_init_lock == NULL)
+        {
+            mutex_test_init_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION));
+            InitializeCriticalSection(mutex_test_init_lock);
+        }
+
+        EnterCriticalSection(mutex_test_init_lock);
+
+        gen_mutex_init(mut);
+
+        LeaveCriticalSection(mutex_test_init_lock);
+    }
+
+    if (mut == NULL || *mut == NULL)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    dwWaitResult = WaitForSingleObject(*mut, INFINITE);
+
+    if (dwWaitResult != WAIT_OBJECT_0 && dwWaitResult != WAIT_ABANDONED)
+    {
+        DWORD err = GetLastError();
+        result = -1;        
+        SET_ERROR(err)
+    }
+
+    return result;
+}
+
+
+/*
+ * gen_mutex_unlock()
+ *
+ * releases a lock held on a mutex
+ *
+ * returns 0 on success, -1 and sets errno on failure
+ */
+int gen_win_mutex_unlock(
+    HANDLE *mut)
+{
+    BOOL rc;
+
+    if (mut == NULL || *mut == NULL)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    rc = ReleaseMutex(*mut);
+    if (!rc)
+    {
+        DWORD err = GetLastError();
+        SET_ERROR(err)
+    }
+
+    return (rc) ? 0 : -1;
+}
+
+
+/*
+ * pthread_mutex_trylock()
+ *
+ * nonblocking attempt to acquire a lock.
+ *
+ * returns 0 on success, -1 and sets errno on failure, sets errno to EBUSY
+ * if it cannot obtain the lock
+ */
+int gen_win_mutex_trylock(
+    HANDLE *mut)
+{
+    DWORD dwWaitResult;
+    int rc;
+
+    if (*mut == GEN_MUTEX_INITIALIZER)
+    {
+        /* initialize default mutex */
+        if (mutex_test_init_lock == NULL)
+        {
+            mutex_test_init_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION));
+            InitializeCriticalSection(mutex_test_init_lock);
+        }
+
+        EnterCriticalSection(mutex_test_init_lock);
+
+        gen_mutex_init(mut);
+
+        LeaveCriticalSection(mutex_test_init_lock);
+    }
+
+    if (mut == NULL || *mut == NULL)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    dwWaitResult = WaitForSingleObject(*mut, 0);
+    if (dwWaitResult == WAIT_OBJECT_0 || dwWaitResult == WAIT_ABANDONED)
+    {
+        rc = 0;
+    }
+    else
+    {
+        rc = -1;
+        if (dwWaitResult == WAIT_TIMEOUT)
+        {
+            errno = EBUSY;
+        }
+        else
+        {
+            DWORD err = GetLastError();
+            SET_ERROR(err);
+        }
+    }
+
+    return rc;
+}
+
+/*
+ * gen_mutex_destroy()
+ *
+ * uninitializes the mutex and frees all memory associated with it.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+int gen_win_mutex_destroy(
+    HANDLE *mut)
+{
+
+    if (mut == NULL || *mut == NULL)
+    {
+        errno = EINVAL;
+        return (-EINVAL);
+    }
+    
+    CloseHandle(*mut);
+
+    /* set mutex back to initializer value */
+    *mut = GEN_MUTEX_INITIALIZER;
+
+    return 0;
+}
+
+HANDLE gen_win_thread_self(void)
+{
+    return GetCurrentThread();
+}
+
+_inline int cond_check_need_init(gen_cond_t *cond)
+{
+    int result = 0;
+
+    /* initialize critical section if necessary */
+    if (cond_test_init_lock == NULL)
+    {
+        cond_test_init_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION));
+        InitializeCriticalSection(cond_test_init_lock);
+    }
+
+    /* initialize condition variable created with GEN_COND_INITIALIZER */
+    EnterCriticalSection(cond_test_init_lock);
+    
+    if (*cond == GEN_COND_INITIALIZER)
+    {
+        result = gen_cond_init(cond);
+    }
+    else if (*cond == NULL)
+    {
+        result = EINVAL;
+    }
+
+    LeaveCriticalSection(cond_test_init_lock);
+
+    return result;
+}
+
+int gen_win_cond_destroy(gen_cond_t *cond)
+{
+    gen_cond_t cv;
+    int result = 0, result1 = 0, result2 = 0;
+
+    if(!cond || !(*cond))
+    {
+        return EINVAL;
+    }
+    
+    if (*cond != GEN_COND_INITIALIZER)
+    {
+        EnterCriticalSection(cond_list_lock);
+
+        cv = *cond;
+
+        if (WaitForSingleObject(cv->semBlockLock, INFINITE) != WAIT_OBJECT_0)
+        {               
+            return errno;
+        }
+
+        if ((result = gen_mutex_trylock(&(cv->mtxUnblockLock))) != 0)
+        {
+            ReleaseSemaphore(cv->semBlockLock, 1, NULL);
+            return errno;
+        }
+
+        if (cv->nWaitersBlocked > cv->nWaitersGone)
+        {
+            if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL))
+            {
+                result = GetLastError();
+                SET_ERROR(result)
+            }
+            result1 = gen_mutex_unlock(&(cv->mtxUnblockLock));
+            result2 = EBUSY;
+        }
+        else
+        {
+            /* Now it is safe to destroy */
+            *cond = NULL;
+
+            if (!CloseHandle(cv->semBlockLock))
+            {
+                DWORD err = GetLastError();
+                SET_ERROR(err)
+                result = errno;                
+            }
+            if (!CloseHandle(cv->semBlockQueue))
+            {
+                DWORD err = GetLastError();
+                SET_ERROR(err)
+                result1 = errno;                
+            }
+            if ((result2 = gen_mutex_unlock(&(cv->mtxUnblockLock))) == 0)
+            {
+                result2 = gen_mutex_destroy(&(cv->mtxUnblockLock));
+            }
+
+            /* Unlink the CV from the list */
+            if (cond_list_head == cv)
+            {
+                cond_list_head = cv->next;
+            }
+            else 
+            {
+                cv->prev->next = cv->next;
+            }
+
+            if (cond_list_tail == cv) 
+            {
+                cond_list_tail = cv->prev;
+            }
+            else {
+                cv->next->prev = cv->prev;
+            }
+
+            free(cv);
+        }
+
+        LeaveCriticalSection(cond_list_lock);
+    }
+    else
+    {
+        EnterCriticalSection(cond_test_init_lock);
+
+        if (*cond == GEN_COND_INITIALIZER) 
+        {
+            *cond = NULL;
+        }
+        else 
+        {
+            result = EBUSY;
+        }
+
+        LeaveCriticalSection(cond_test_init_lock);
+    }
+
+    return ((result != 0) ? result : ((result1 != 0) ? result1 : result2));
+}
+
+typedef struct
+{
+    gen_mutex_t *mutexPtr;
+    gen_cond_t cv;
+    int *resultPtr;
+} cond_wait_cleanup_args_t;
+
+static void __cdecl cond_wait_cleanup(void *args)
+{
+    cond_wait_cleanup_args_t *cleanup_args = (cond_wait_cleanup_args_t *) args;
+    gen_cond_t cv = cleanup_args->cv;
+    int *resultPtr = cleanup_args->resultPtr;
+    int nSignalsWasLeft;
+    int result;
+
+    if ((result = gen_mutex_lock(&(cv->mtxUnblockLock))) != 0) 
+    {
+        *resultPtr = result;
+        return;
+    }
+
+    if ((nSignalsWasLeft = cv->nWaitersToUnblock) != 0)
+    {
+        --(cv->nWaitersToUnblock);
+    }
+    else if (INT_MAX / 2 == ++(cv->nWaitersGone))
+    {
+        if (WaitForSingleObject(cv->semBlockLock, INFINITE) != WAIT_OBJECT_0)
+        {
+            *resultPtr = (int) GetLastError();
+            return;
+        }
+        cv->nWaitersBlocked -= cv->nWaitersGone;
+        if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL))
+        {
+            *resultPtr = (int) GetLastError();
+            return;
+        }
+        cv->nWaitersGone = 0;
+    }
+
+    if ((result = gen_mutex_unlock(&(cv->mtxUnblockLock))) != 0)
+    {
+        *resultPtr = result;
+        return;
+    }
+
+    if (nSignalsWasLeft == 1) 
+    {
+        if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL))
+        {
+            *resultPtr = (int) GetLastError();
+            return;
+        }
+    }
+
+    if ((result = gen_mutex_lock(cleanup_args->mutexPtr)) != 0)
+    {
+        *resultPtr = result;
+    }
+
+}
+
+static _inline int cond_timedwait(gen_cond_t *cond,
+                                   HANDLE *mutex, const struct timespec *abstime)
+{
+    int result = 0;
+    gen_cond_t cv;
+    cond_wait_cleanup_args_t cleanup_args;
+    struct _timeb curtime;
+    unsigned int nano_ms, ms_diff;
+
+    if (cond == NULL || *cond == NULL) 
+    {
+        return EINVAL;
+    }
+
+    if (*cond == GEN_COND_INITIALIZER)
+    {
+        result = cond_check_need_init(cond);
+    }
+
+    if (result != 0 && result != EBUSY) 
+    {
+        return result;
+    }
+
+    cv = *cond;
+
+    if ((result = WaitForSingleObject(cv->semBlockLock, INFINITE)) != 0)
+    {
+        SET_ERROR(result)
+        return errno;
+    }
+
+    ++(cv->nWaitersBlocked);
+
+    if (!ReleaseSemaphore(cv->semBlockLock, 1, NULL))
+    {
+        DWORD err = GetLastError();
+        SET_ERROR(err)
+        return errno;
+    }
+
+    cleanup_args.mutexPtr = mutex;
+    cleanup_args.cv = cv;
+    cleanup_args.resultPtr = &result;
+
+#pragma inline_depth(0)
+
+    /* Now we can release mutex and... */
+    if ((result = gen_mutex_unlock(mutex)) == 0) 
+    {
+        /* convert difference in times to milliseconds */
+        DWORD ms = INFINITE;
+        if (abstime)
+        {
+            nano_ms = abstime->tv_nsec / 1000000L;
+            _ftime_s(&curtime);
+            ms = (abstime->tv_sec - curtime.time) > 0 ? (abstime->tv_sec - curtime.time) * 1000 : 0;            
+            if (ms > 0) 
+            {
+                if (nano_ms >= curtime.millitm) 
+                {
+                    ms_diff = nano_ms - curtime.millitm; 
+                }
+                else 
+                {
+                    ms_diff = nano_ms + 1000 - curtime.millitm;
+                    ms -= 1000;
+                }
+            }
+            else 
+            {
+                ms_diff = (nano_ms >= curtime.millitm) ? nano_ms - curtime.millitm : 0;
+            }
+            ms += ms_diff;
+        }
+        /* always wait at least 1ms so we get WAIT_TIMEOUT result */
+        if (ms == 0) ms = 1;
+        
+        result = WaitForSingleObject(cv->semBlockQueue, ms);
+        SET_ERROR(result)
+        result = errno;
+    }
+    else 
+    {
+        result = errno;
+    }
+
+    cond_wait_cleanup(&cleanup_args);
+
+#pragma inline_depth()
+
+    return result;
+}
+
+int gen_win_cond_wait(gen_cond_t *cond, HANDLE *mut)
+{    
+    return cond_timedwait(cond, mut, NULL);
+}
+
+int gen_win_cond_timedwait(gen_cond_t *cond, HANDLE *mut,
+                             const struct timespec *abstime)
+{    
+    return cond_timedwait(cond, mut, abstime);
+}
+
+static _inline int cond_unblock(gen_cond_t *cond, int unblockAll)
+{
+    int result;
+    gen_cond_t cv;
+    int nSignalsToIssue;
+
+    if (cond == NULL || *cond == NULL)
+    {
+        return EINVAL;
+    }
+
+    errno = 0;
+
+    cv = *cond;
+
+    /* uninitialized static cv */
+    if (cv == GEN_COND_INITIALIZER)
+    {
+        return 0;
+    }
+
+    if ((result = gen_mutex_lock(&(cv->mtxUnblockLock))) != 0)
+    {
+        return errno;
+    }
+
+    if (cv->nWaitersToUnblock != 0)
+    {
+        if (cv->nWaitersBlocked == 0)
+        {
+            result = gen_mutex_unlock(&(cv->mtxUnblockLock));
+            return (result == 0) ? 0 : errno;
+        }
+        if (unblockAll)
+        {
+            cv->nWaitersToUnblock += (nSignalsToIssue = cv->nWaitersBlocked);
+            cv->nWaitersBlocked = 0;
+        }
+        else 
+        {
+            nSignalsToIssue = 1;
+            cv->nWaitersToUnblock++;
+            cv->nWaitersBlocked--;
+        }
+    }
+    else if (cv->nWaitersBlocked > cv->nWaitersGone)
+    {
+        if (WaitForSingleObject(cv->semBlockLock, INFINITE) != WAIT_OBJECT_0)
+        {
+            result = GetLastError();
+            SET_ERROR(result)
+            gen_mutex_unlock(&(cv->mtxUnblockLock));
+            return errno;
+        }
+        if (cv->nWaitersGone != 0)
+        {
+            cv->nWaitersBlocked -= cv->nWaitersGone;
+        }
+        if (unblockAll)
+        {
+            nSignalsToIssue = cv->nWaitersToUnblock = cv->nWaitersBlocked;
+            cv->nWaitersBlocked = 0;
+        }
+        else
+        {
+            nSignalsToIssue = cv->nWaitersToUnblock = 1;
+            cv->nWaitersBlocked--;
+        }
+    }
+    else 
+    {
+        result = gen_mutex_unlock(&(cv->mtxUnblockLock));
+        return (result == 0) ? 0 : errno;
+    }
+
+    if ((result = gen_mutex_unlock(&(cv->mtxUnblockLock))) == 0)
+    {
+        if (!ReleaseSemaphore(cv->semBlockQueue, nSignalsToIssue, NULL))
+        {
+            result = GetLastError();
+            SET_ERROR(result)
+        }
+    }
+
+
+    return errno;
+}
+
+int gen_win_cond_signal(gen_cond_t *cond)
+{   
+    return cond_unblock(cond, FALSE);
+}
+
+int gen_win_cond_broadcast(gen_cond_t *cond)
+{    
+    return cond_unblock(cond, TRUE);
+}
+
+int gen_win_cond_init(gen_cond_t *cond)
+{
+    DWORD err;
+    gen_cond_t cv = NULL;
+
+    if (!cond)
+    {
+        return EINVAL;
+    }
+
+    /* Allocate condition variable */
+    cv = (gen_cond_t) calloc(1, sizeof(*cv));
+    if (cv == NULL)
+    {
+        err = ENOMEM;
+        goto DONE;
+    }
+
+    /* Create locking semaphore */
+    cv->semBlockLock = CreateSemaphore(NULL, 1, LONG_MAX, NULL);
+    if (cv->semBlockLock == NULL)
+    {
+        err = GetLastError();
+        SET_ERROR(err)
+        goto FAIL0;
+    }
+
+    /* Create queue semaphore */
+    cv->semBlockQueue = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
+    if (cv->semBlockQueue == NULL) 
+    {
+        err = GetLastError();
+        SET_ERROR(err)
+        goto FAIL1;
+    }
+
+    /* Create unblock/lock mutex */
+    if ((err = gen_mutex_init(&(cv->mtxUnblockLock))) != 0)
+    {
+        SET_ERROR(err)
+        goto FAIL2;
+    }
+
+    err = 0;
+
+    goto DONE;
+
+    /*
+     * Error conditions
+     */
+FAIL2:
+    CloseHandle(cv->semBlockQueue);
+
+FAIL1:
+    CloseHandle(cv->semBlockLock);
+
+FAIL0:
+    free(cv);
+    cv = NULL;
+
+DONE:
+    if (err == 0)
+    {
+        if (cond_list_lock == NULL)
+        {
+            cond_list_lock = (LPCRITICAL_SECTION) calloc(1, sizeof(CRITICAL_SECTION));
+            InitializeCriticalSection(cond_list_lock);
+        }
+
+        EnterCriticalSection(cond_list_lock);
+
+        cv->next = NULL;
+        cv->prev = cond_list_tail;
+
+        if (cond_list_tail != NULL) 
+        {
+            cond_list_tail->next = cv;
+        }
+
+        cond_list_tail = cv;
+
+        if (cond_list_head == NULL) 
+        {
+            cond_list_head = cv;
+        }
+
+        LeaveCriticalSection(cond_list_lock);
+
+    }
+
+    *cond = cv;
+
+    return errno;
+}
+
+#endif
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/common/misc/pint-uid-mgmt.c b/src/common/misc/pint-uid-mgmt.c
new file mode 100644
index 0000000..e1bb717
--- /dev/null
+++ b/src/common/misc/pint-uid-mgmt.c
@@ -0,0 +1,198 @@
+#include "pint-uid-mgmt.h"
+#include "pint-util.h"
+#include "gen-locks.h"
+
+static list_head_t *uid_lru_list = NULL;
+static hash_table_t *uid_hash_table = NULL;
+
+static gen_mutex_t uid_mgmt_mutex = GEN_MUTEX_INITIALIZER;
+
+static int uid_hash_compare_keys(void* key, list_head_t *link);
+
+/* PINT_uid_mgmt_initialize()
+ *
+ * Allocate memory for the uid management interface. A linked list is
+ * used to implement lru eviction, and a hash table is used to locate
+ * existing uid entries quickly.
+ */
+int PINT_uid_mgmt_initialize()
+{
+    list_head_t *list;
+    hash_table_t *hash_tbl;
+    PINT_uid_mgmt_s *tmp, *rover;
+    int i;
+    int ret = 0;
+
+    /* free any already existing hash table and linked list */
+    if (uid_lru_list)
+    {
+        qlist_for_each_entry_safe(rover, tmp, uid_lru_list, lru_link)
+        {
+            free(rover);
+        }
+        free(uid_lru_list);
+        uid_lru_list = NULL;
+    }
+
+    if (uid_hash_table)
+    {
+        qhash_finalize(uid_hash_table);
+        uid_hash_table = NULL;
+    }
+
+    /* initialize the linked list and the hash table */
+    list = (list_head_t *)malloc(sizeof(list_head_t));
+    if (!list)
+    {
+        ret = -PVFS_ENOMEM;
+        return ret;
+    }
+    INIT_QLIST_HEAD(list);
+
+    hash_tbl = qhash_init(uid_hash_compare_keys, quickhash_32bit_hash, UID_HISTORY_HASH_TABLE_SIZE);
+    if (!hash_tbl)
+    {
+        ret = -PVFS_ENOMEM;
+        return ret;
+    }
+
+    /* zero out the fields of uid structure, so they are not "occupied" */
+    for (i = 0; i < UID_MGMT_MAX_HISTORY; i++)
+    {
+        tmp = (PINT_uid_mgmt_s *)malloc(sizeof(PINT_uid_mgmt_s));
+        if (!tmp)
+        {
+            ret = -PVFS_ENOMEM;
+            return ret;
+        }
+        tmp->info.count = 0;
+        tmp->info.uid = 0;
+        qlist_add_tail(&(tmp->lru_link), list);
+    }
+
+    uid_lru_list = list;
+    uid_hash_table = hash_tbl;
+
+    return 0;
+}
+
+/* PINT_uid_mgmt_finalize()
+ *
+ * Free all memory associated with the uid managment interface.
+ */
+void PINT_uid_mgmt_finalize()
+{
+    PINT_uid_mgmt_s *rover, *tmp;
+
+    if (uid_lru_list)
+    {
+        qlist_for_each_entry_safe(rover, tmp, uid_lru_list, lru_link)
+        {
+            free(rover);
+        }
+        free(uid_lru_list);
+        uid_lru_list = NULL;
+    }
+
+    if (uid_hash_table)
+    {
+        qhash_finalize(uid_hash_table);
+        uid_hash_table = NULL;
+    }
+
+    return;
+}
+
+/* PINT_add_user_to_uid_mgmt()
+ *
+ * This function is called to add new PVFS_uid's to the uid management
+ * interface. LRU eviction is used to keep list "recent"
+ */
+int PINT_add_user_to_uid_mgmt(PVFS_uid userID)
+{
+    list_head_t *found = NULL;
+    PINT_uid_mgmt_s *tmp = NULL;
+    int ret = 0;
+
+    if ((!uid_hash_table) || (!uid_lru_list))
+    {
+        ret = -PVFS_ENODATA;
+        return ret;
+    }
+
+    /* search the hash table for our uid */
+    found = qhash_search(uid_hash_table, &userID);
+    if (found)
+    {
+        tmp = qlist_entry(found, PINT_uid_mgmt_s, hash_link);
+        tmp->info.count++;
+        PINT_util_get_current_timeval(&(tmp->info.tv));
+    }
+    else
+    {
+        /* evict a node from the tail of the list and add new uid */
+        tmp = qlist_entry(uid_lru_list->prev, PINT_uid_mgmt_s, lru_link);
+        if (tmp->info.count)
+        {
+            /* make sure to remove this entry from the hash table if
+               the count variable has already been defined (not 0) */
+            qhash_search_and_remove(uid_hash_table, &(tmp->info.uid));
+        }
+        tmp->info.count = 1;
+        tmp->info.uid = userID;
+        PINT_util_get_current_timeval(&(tmp->info.tv));
+        qhash_add(uid_hash_table, &(tmp->info.uid), &(tmp->hash_link));
+    }
+
+    /* splice the linked list around our tmp node, then move this
+       tmp node to the head of the lru eviction list */
+    tmp->lru_link.prev->next = tmp->lru_link.next;
+    tmp->lru_link.next->prev = tmp->lru_link.prev;
+    qlist_add(&(tmp->lru_link), uid_lru_list);
+
+    return 0;
+}
+
+/* uid_hash_compare_keys()
+ *
+ * Compare will return true if hash entry has same uid as a given key.
+ */
+static int uid_hash_compare_keys(void* key, list_head_t *link)
+{
+    PVFS_uid uid = *(PVFS_uid *)key;
+    PINT_uid_mgmt_s *tmp_entry = NULL;
+
+    tmp_entry = qhash_entry(link, PINT_uid_mgmt_s, hash_link);
+
+    if (uid == tmp_entry->info.uid)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+/* PINT_dump_all_uid_stats()
+ *
+ * This function gathers all uid statistics (even inactive structures)
+ * and stores them in the array that is passed in.
+ */
+void PINT_dump_all_uid_stats(PVFS_uid_info_s *uid_array)
+{
+    int i = 0;
+    list_head_t *rover = uid_lru_list->next;
+    PINT_uid_mgmt_s *tmp;
+
+    gen_mutex_lock(&uid_mgmt_mutex);
+
+    /* now that we have acquired the lock for the list, fill in our array
+     * with the uid statistics
+     */
+    for (i = 0; i < UID_MGMT_MAX_HISTORY; i++, rover = rover->next)
+    {
+        tmp = qlist_entry(rover, PINT_uid_mgmt_s, lru_link);
+        uid_array[i] = tmp->info;
+    }
+    gen_mutex_unlock(&uid_mgmt_mutex);
+
+    return;
+}
diff --git a/src/common/misc/pint-uid-mgmt.h b/src/common/misc/pint-uid-mgmt.h
new file mode 100644
index 0000000..9f95aa4
--- /dev/null
+++ b/src/common/misc/pint-uid-mgmt.h
@@ -0,0 +1,53 @@
+#ifndef __PINT_UID_MGMT_H
+#define __PINT_UID_MGMT_H
+
+#include "quicklist.h"
+#include "quickhash.h"
+#include "pvfs2-types.h"
+
+/* UID_MGMT_MAX_HISTORY is the number of UIDs stored in history
+ * UID_HISTORY_HASH_TABLE_SIZE is the size of the hash tbl used to store uids
+ */
+#define UID_MGMT_MAX_HISTORY 25
+#define UID_HISTORY_HASH_TABLE_SIZE 19
+
+typedef struct qlist_head list_head_t;
+typedef struct qhash_table hash_table_t;
+
+/* information stored in each uid management structure defined below */
+typedef struct
+        {
+        PVFS_uid uid;
+        uint64_t count;
+        struct timeval tv;
+        } PVFS_uid_info_s;
+endecode_fields_2_struct(
+        timeval,
+        uint64_t, tv_sec,
+        uint32_t, tv_usec);
+endecode_fields_3(
+        PVFS_uid_info_s,
+        PVFS_uid, uid,
+        uint64_t, count,
+        timeval, tv);
+
+/* our uid management structure */
+typedef struct
+        {
+        PVFS_uid_info_s info;
+        list_head_t lru_link;
+        list_head_t hash_link;
+        } PINT_uid_mgmt_s;
+
+/* macro helper to determine if a UID is within the history or not */
+#define IN_UID_HISTORY(current, oldest)                   \
+           (((current.tv_sec * 1e6) + current.tv_usec) >  \
+           ((oldest.tv_sec * 1e6) + oldest.tv_usec))
+
+/* FUNCTION PROTOTYPES */
+int PINT_uid_mgmt_initialize(void);
+void PINT_uid_mgmt_finalize(void);
+int PINT_add_user_to_uid_mgmt(PVFS_uid userID);
+void PINT_dump_all_uid_stats(PVFS_uid_info_s *uid_stats);
+
+#endif /* __PINT_UID_MGMT_H */
diff --git a/src/common/misc/pvfs2-win-util.c b/src/common/misc/pvfs2-win-util.c
new file mode 100755
index 0000000..fb293ad
--- /dev/null
+++ b/src/common/misc/pvfs2-win-util.c
@@ -0,0 +1,2117 @@
+/*
+ * (C) 2001-2011 Clemson University, The University of Chicago and
+ *               Omnibond LLC
+ *
+ * Changes by Acxiom Corporation to add relative path support to
+ * PVFS_util_resolve(),
+ * Copyright � Acxiom Corporation, 2005
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <assert.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "pvfs2-config.h"
+#include "pvfs2-sysint.h"
+#include "pvfs2-util.h"
+#include "pvfs2-debug.h"
+#include "gossip.h"
+#include "pvfs2-attr.h"
+#include "pvfs2-types-debug.h"
+#include "str-utils.h"
+#include "gen-locks.h"
+#include "realpath.h"
+#include "pint-sysint-utils.h"
+#include "pvfs2-internal.h"
+#include "pint-util.h"
+
+#ifdef HAVE_MNTENT_H
+
+#include <mntent.h>
+#define PINT_fstab_t FILE
+#define PINT_fstab_entry_t struct mntent
+#define PINT_fstab_open(_fstab, _fname) (_fstab) = setmntent(_fname, "r")
+#define PINT_fstab_close(_tab) endmntent(_tab)
+#define PINT_fstab_next_entry(_tab) getmntent(_tab)
+#define PINT_fstab_entry_destroy(_entry) _entry = NULL
+#define PINT_fstab_entry_hasopt(_entry, _opt) hasmntopt(_entry, _opt)
+
+#define PINT_FSTAB_NAME(_entry) (_entry)->mnt_fsname
+#define PINT_FSTAB_PATH(_entry) (_entry)->mnt_dir
+#define PINT_FSTAB_TYPE(_entry) (_entry)->mnt_type
+#define PINT_FSTAB_OPTS(_entry) (_entry)->mnt_opts
+
+#elif HAVE_FSTAB_H
+
+#include <fstab.h>
+#define PINT_fstab_t FILE
+#define PINT_fstab_entry_t struct fstab
+#define PINT_fstab_open(_fstab, _fname) _fstab = fopen(_fname, "r")
+#define PINT_fstab_close(_tab) fclose(_tab)
+#define PINT_fstab_next_entry(_tab) PINT_util_my_get_next_fsent(_tab)
+#define PINT_fstab_entry_destroy(_entry) PINT_util_fsent_destroy(_entry)
+#define PINT_fstab_entry_hasopt(_entry, _opt) strstr((_entry)->fs_mntops, _opt)
+
+#define PINT_FSTAB_NAME(_entry) (_entry)->fs_spec
+#define PINT_FSTAB_PATH(_entry) (_entry)->fs_file
+#define PINT_FSTAB_TYPE(_entry) (_entry)->fs_vfstype
+#define PINT_FSTAB_OPTS(_entry) (_entry)->fs_mntops
+
+#define DEFINE_MY_GET_NEXT_FSENT
+static struct fstab * PINT_util_my_get_next_fsent(PINT_fstab_t * tab);
+static void PINT_util_fsent_destroy(PINT_fstab_entry_t * entry);
+
+#elif defined(WIN32)
+
+/* define our own simplified fstab */
+struct fstab {
+    char *fs_spec;
+    char *fs_file;
+    char *fs_vfstype;
+    char *fs_type;
+    char *fs_mntops;
+};
+
+#define PINT_fstab_t FILE
+#define PINT_fstab_entry_t struct fstab
+#define PINT_fstab_open(_fstab, _fname) _fstab = fopen(_fname, "r")
+#define PINT_fstab_close(_tab) fclose(_tab)
+#define PINT_fstab_next_entry(_tab) PINT_util_my_get_next_fsent(_tab)
+#define PINT_fstab_entry_destroy(_entry) PINT_util_fsent_destroy(_entry)
+#define PINT_fstab_entry_hasopt(_entry, _opt) strstr((_entry)->fs_mntops, _opt)
+
+#define PINT_FSTAB_NAME(_entry) (_entry)->fs_spec
+#define PINT_FSTAB_PATH(_entry) (_entry)->fs_file
+#define PINT_FSTAB_TYPE(_entry) (_entry)->fs_vfstype
+#define PINT_FSTAB_OPTS(_entry) (_entry)->fs_mntops
+
+#define DEFINE_MY_GET_NEXT_FSENT
+static struct fstab * PINT_util_my_get_next_fsent(PINT_fstab_t * tab);
+static void PINT_util_fsent_destroy(PINT_fstab_entry_t * entry);
+
+#else
+
+#error OS does not have mntent.h or fstab.h.  
+#error Add your own fstab parser macros to fix.
+
+#endif
+
+#define PVFS2_MAX_INVALID_MNTENTS                     256
+#define PVFS2_MAX_TABFILES                              8
+#define PVFS2_DYNAMIC_TAB_INDEX  (PVFS2_MAX_TABFILES - 1)
+#define PVFS2_DYNAMIC_TAB_NAME              "<DynamicTab>"
+
+static PVFS_util_tab s_stat_tab_array[PVFS2_MAX_TABFILES];
+static int s_stat_tab_count = 0;
+static gen_mutex_t s_stat_tab_mutex = GEN_MUTEX_INITIALIZER;
+
+static int parse_flowproto_string(
+    const char *input,
+    enum PVFS_flowproto_type *flowproto);
+
+static int parse_encoding_string(
+    const char *cp,
+    enum PVFS_encoding_type *et);
+
+static int parse_num_dfiles_string(const char* cp, int* num_dfiles);
+
+static int PINT_util_resolve_absolute(
+    const char* local_path,
+    PVFS_fs_id* out_fs_id,
+    char* out_fs_path,
+    int out_fs_path_max);
+
+struct PVFS_sys_mntent* PVFS_util_gen_mntent(
+    char* config_server,
+    char* fs_name)
+{
+    struct PVFS_sys_mntent* tmp_ent = NULL;
+
+    tmp_ent = (struct PVFS_sys_mntent*)malloc(sizeof(struct
+        PVFS_sys_mntent));
+    if(!tmp_ent)
+    {
+        return(NULL);
+    }
+    memset(tmp_ent, 0, sizeof(struct PVFS_sys_mntent));
+
+    tmp_ent->num_pvfs_config_servers = 1;
+    tmp_ent->pvfs_config_servers = (char**)malloc(sizeof(char*));
+    if(!tmp_ent->pvfs_config_servers)
+    {
+        free(tmp_ent);
+        return(NULL);
+    }
+
+    tmp_ent->pvfs_config_servers[0] = strdup(config_server);
+    if(!tmp_ent->pvfs_config_servers[0])
+    {
+        free(tmp_ent->pvfs_config_servers);
+        free(tmp_ent);
+        return(NULL);
+    }
+
+    tmp_ent->pvfs_fs_name = strdup(fs_name);
+    if(!tmp_ent->pvfs_fs_name)
+    {
+        free(tmp_ent->pvfs_config_servers[0]);
+        free(tmp_ent->pvfs_config_servers);
+        free(tmp_ent);
+        return(NULL);
+    }
+
+    tmp_ent->flowproto = FLOWPROTO_DEFAULT;
+    tmp_ent->encoding = PVFS2_ENCODING_DEFAULT;
+
+    return(tmp_ent);
+}
+
+void PVFS_util_gen_mntent_release(struct PVFS_sys_mntent* mntent)
+{
+    free(mntent->pvfs_config_servers[0]);
+    free(mntent->pvfs_config_servers);
+    free(mntent->pvfs_fs_name);
+    free(mntent);
+    return;
+}
+
+
+int PVFS_util_get_umask(void)
+{
+    static int mask = 0, set = 0;
+
+    if (set == 0)
+    {
+        mask = (int)_umask(0);
+        _umask(mask);
+        set = 1;
+    }
+    return mask;
+}
+
+
+PVFS_credentials *PVFS_util_dup_credentials(
+    const PVFS_credentials *credentials)
+{
+    PVFS_credentials *ret = NULL;
+
+    if (credentials)
+    {
+        ret = (PVFS_credentials *) malloc(sizeof(PVFS_credentials));
+        if (ret)
+        {
+            memcpy(ret, credentials, sizeof(PVFS_credentials));
+        }
+    }
+    return ret;
+}
+
+void PVFS_util_release_credentials(
+    PVFS_credentials *credentials)
+{
+    if (credentials)
+    {
+        free(credentials);
+    }
+}
+
+int PVFS_util_copy_sys_attr(
+    PVFS_sys_attr *dest_attr, PVFS_sys_attr *src_attr)
+{
+    int ret = -PVFS_EINVAL;
+
+    if (src_attr && dest_attr)
+    {
+        dest_attr->owner = src_attr->owner;
+        dest_attr->group = src_attr->group;
+        dest_attr->perms = src_attr->perms;
+        dest_attr->atime = src_attr->atime;
+        dest_attr->mtime = src_attr->mtime;
+        dest_attr->ctime = src_attr->ctime;
+        dest_attr->dfile_count = src_attr->dfile_count;
+        dest_attr->objtype = src_attr->objtype;
+        dest_attr->mask = src_attr->mask;
+        dest_attr->flags = src_attr->flags;
+
+        if (src_attr->mask & PVFS_ATTR_SYS_SIZE)
+        {
+            dest_attr->size = src_attr->size;
+        }
+
+        if((src_attr->mask & PVFS_ATTR_SYS_LNK_TARGET) &&
+            src_attr->link_target)
+        {
+            dest_attr->link_target = strdup(src_attr->link_target);
+            if (!dest_attr->link_target)
+            {
+                ret = -PVFS_ENOMEM;
+                return ret;
+            }
+        }
+        else if ((src_attr->mask & PVFS_ATTR_SYS_DIR_HINT))
+        {
+            if (src_attr->dist_name)
+            {
+                dest_attr->dist_name = strdup(src_attr->dist_name);
+                if (dest_attr->dist_name == NULL)
+                {
+                    ret = -PVFS_ENOMEM;
+                    return ret;
+                }
+            }
+            if (src_attr->dist_params)
+            {
+                dest_attr->dist_params = strdup(src_attr->dist_params);
+                if (dest_attr->dist_params == NULL)
+                {
+                    free(dest_attr->dist_name);
+                    ret = -PVFS_ENOMEM;
+                    return ret;
+                }
+            }
+        }
+        ret = 0;
+    }
+    return ret;
+}
+
+void PVFS_util_release_sys_attr(PVFS_sys_attr *attr)
+{
+    if (attr)
+    {
+        if ((attr->mask & PVFS_ATTR_SYS_TYPE) &&
+            (attr->objtype == PVFS_TYPE_SYMLINK) && attr->link_target)
+        {
+            free(attr->link_target);
+            attr->link_target = NULL;
+        }
+        else if ((attr->mask & PVFS_ATTR_SYS_DIR_HINT) &&
+            (attr->objtype == PVFS_TYPE_DIRECTORY))
+        {
+            if (attr->dist_name)
+                free(attr->dist_name);
+            if (attr->dist_params)
+                free(attr->dist_params);
+            attr->dist_name = NULL;
+            attr->dist_params = NULL;
+        }
+    }
+}
+
+/* PVFS_util_parse_pvfstab()
+ *
+ * parses either the file pointed to by the PVFS2TAB_FILE env
+ * variable, or /etc/fstab, or /etc/pvfs2tab or ./pvfs2tab to extract
+ * pvfs2 mount entries.
+ * 
+ * NOTE: if tabfile argument is given at runtime to specify which
+ * tabfile to use, then that will be the _only_ file searched for
+ * pvfs2 entries.
+ *
+ * example entry:
+ * tcp://localhost:3334/pvfs2-fs /mnt/pvfs2 pvfs2 defaults 0 0
+ *
+ * returns const pointer to internal tab structure on success, NULL on
+ * failure
+ */
+const PVFS_util_tab *PVFS_util_parse_pvfstab(
+    const char *tabfile)
+{
+    PINT_fstab_t *mnt_fp = NULL;
+    int file_count = 5;
+    /* NOTE: mtab should be last for clean error logic below */
+/*    const char *file_list[5] =
+        { NULL, "/etc/fstab", "/etc/pvfs2tab", "pvfs2tab", "/etc/mtab" }; */
+    /* just parse a specified file (by caller or environment) */
+    const char *file_list[1] = { NULL };
+    const char *targetfile = NULL;
+    PINT_fstab_entry_t *tmp_ent;
+    int i, j;
+    int ret = -1;
+    int tmp_mntent_count = 0;
+    PVFS_util_tab *current_tab = NULL;
+    char *epenv, *tmp;
+
+    if((epenv = getenv("PVFS2EP")) != NULL)
+    {
+        struct PVFS_sys_mntent *mntent;
+        current_tab = &s_stat_tab_array[0];
+        current_tab->mntent_array = malloc(sizeof(struct PVFS_sys_mntent));
+        mntent = &current_tab->mntent_array[0];
+        strcpy(current_tab->tabfile_name, "PVFSEP");
+        current_tab->mntent_count = 1;
+        mntent->pvfs_config_servers = malloc(sizeof(char *));
+        mntent->pvfs_config_servers[0] = strdup(strchr(epenv, '=') + 1);
+        mntent->num_pvfs_config_servers = 1;
+        mntent->the_pvfs_config_server = mntent->pvfs_config_servers[0];
+        mntent->pvfs_fs_name = strdup(strrchr(mntent->the_pvfs_config_server, '/'));
+        mntent->pvfs_fs_name++;
+        mntent->flowproto = FLOWPROTO_DEFAULT;
+        mntent->encoding = PVFS2_ENCODING_DEFAULT;
+        mntent->mnt_dir = strdup(epenv);
+        tmp = strchr(mntent->mnt_dir, '=');
+        *tmp = 0;
+        mntent->mnt_opts = strdup("rw");
+        mntent->fs_id = PVFS_FS_ID_NULL;
+        return &s_stat_tab_array[0];
+    }
+
+    if (tabfile != NULL)
+    {
+        /*
+          caller wants us to look in a specific location for the
+          tabfile
+        */
+        file_list[0] = tabfile;
+        file_count = 1;
+    }
+    else
+    {
+        /*
+          search the system and env vars for tab files;
+          first check for environment variable override
+        */
+        file_list[0] = getenv("PVFS2TAB_FILE");
+    }
+
+    gen_mutex_lock(&s_stat_tab_mutex);
+
+    /* start by checking list of files we have already parsed */
+    /*** only check one file on Windows
+    for (i = 0; i < s_stat_tab_count; i++)
+    {
+        for (j = 0; j < file_count; j++)
+        {
+            if (file_list[j] &&
+                !strcmp(file_list[j], s_stat_tab_array[i].tabfile_name))
+            {
+                /* already done */
+    /***
+                gen_mutex_unlock(&s_stat_tab_mutex);
+                return (&s_stat_tab_array[i]);
+            }
+        }
+    }
+    ***/
+
+    assert(s_stat_tab_count < PVFS2_DYNAMIC_TAB_INDEX);
+
+    /* 
+     * Open specified file
+     */
+    if(file_list[0])
+    {
+        PINT_fstab_open(mnt_fp, file_list[0]);
+        if (mnt_fp)
+        {
+            while ((tmp_ent = PINT_fstab_next_entry(mnt_fp)))
+            {
+                if(!(PINT_FSTAB_NAME(tmp_ent)) || 
+                   !(strncmp(PINT_FSTAB_NAME(tmp_ent), "#", 1)))
+                {
+                   /* this entry is a comment */
+                   PINT_fstab_entry_destroy(tmp_ent);
+                   continue;
+                }
+
+                if (strcmp(PINT_FSTAB_TYPE(tmp_ent), "pvfs2") == 0)
+                {
+                    targetfile = file_list[0];
+                    tmp_mntent_count++;
+                }
+
+                PINT_fstab_entry_destroy(tmp_ent);
+            }
+            PINT_fstab_close(mnt_fp);
+        }
+    }
+
+    if (!targetfile)
+    {
+        gossip_err("Error: could not find any pvfs2 tabfile entries.\n");
+        if (file_list[0])
+        {
+            gossip_err("Error: tabfile: %s\n", file_list[0]);
+        }
+        else
+        {
+            gossip_err("Error: no tabfile specified\n");
+        }
+        gen_mutex_unlock(&s_stat_tab_mutex);
+        return (NULL);
+    }
+    gossip_debug(GOSSIP_CLIENT_DEBUG,
+                 "Using pvfs2 tab file: %s\n", targetfile);
+
+    /* allocate array of entries */
+    current_tab = &s_stat_tab_array[s_stat_tab_count];
+    current_tab->mntent_array = (struct PVFS_sys_mntent *)malloc(
+        (tmp_mntent_count * sizeof(struct PVFS_sys_mntent)));
+    if (!current_tab->mntent_array)
+    {
+        gen_mutex_unlock(&s_stat_tab_mutex);
+        return (NULL);
+    }
+    memset(current_tab->mntent_array, 0,
+           (tmp_mntent_count * sizeof(struct PVFS_sys_mntent)));
+    for (i = 0; i < tmp_mntent_count; i++)
+    {
+        current_tab->mntent_array[i].fs_id = PVFS_FS_ID_NULL;
+    }
+    current_tab->mntent_count = tmp_mntent_count;
+
+    /* reopen our chosen fstab file */
+    PINT_fstab_open(mnt_fp, targetfile);
+
+    /* scan through looking for every pvfs2 entry */
+    i = 0;
+    while ((tmp_ent = PINT_fstab_next_entry(mnt_fp)))
+    {
+        if (strcmp(PINT_FSTAB_TYPE(tmp_ent), "pvfs2") == 0)
+        {
+            struct PVFS_sys_mntent *me = &current_tab->mntent_array[i];
+            char *cp;
+            int cur_server;
+
+            /* Enable integrity checks by default */
+            me->integrity_check = 1;
+            /* comma-separated list of ways to contact a config server */
+            me->num_pvfs_config_servers = 1;
+            for (cp=PINT_FSTAB_NAME(tmp_ent); *cp; cp++)
+                if (*cp == ',')
+                    ++me->num_pvfs_config_servers;
+
+            /* allocate room for our copies of the strings */
+            me->pvfs_config_servers = malloc(me->num_pvfs_config_servers
+              * sizeof(*me->pvfs_config_servers));
+            if (!me->pvfs_config_servers)
+                goto error_exit;
+            memset(me->pvfs_config_servers, 0,
+              me->num_pvfs_config_servers * sizeof(*me->pvfs_config_servers));
+            me->mnt_dir = malloc(strlen(PINT_FSTAB_PATH(tmp_ent)) + 1);
+            me->mnt_opts = malloc(strlen(PINT_FSTAB_OPTS(tmp_ent)) + 1);
+
+            /* bail if any mallocs failed */
+            if (!me->mnt_dir || !me->mnt_opts)
+            {
+                goto error_exit;
+            }
+
+            /* parse server list and make sure fsname is same */
+            cp = PINT_FSTAB_NAME(tmp_ent);
+            cur_server = 0;
+            for (;;) {
+                char *tok, *p;
+                int slashcount;
+                char *slash;
+                char *last_slash;
+
+                /* tok = strsep(&cp, ","); */
+                if (cp == NULL)
+                {
+                    break;
+                }
+                for (p = cp; *p && *p != ','; p++) ;
+                tok = cp;
+                if (*p)
+                {
+                    *p = 0;
+                    cp = p + 1;
+                }
+                else 
+                {
+                    cp = NULL;
+                }
+                
+                slash = tok;
+                slashcount = 0;
+                while ((slash = strchr(slash, '/')))
+                {
+                    slash++;
+                    slashcount++;
+                }
+                if (slashcount != 3)
+                {
+                    /* N/A                     
+                    if(!strcmp(targetfile, "/etc/mtab"))
+                    {
+                        gossip_err("Error: could not find any pvfs2 tabfile entries.\n");
+                        gossip_err("Error: tried the following tabfiles:\n");
+                        for (j = 0; j < file_count; j++)
+                        {
+                            gossip_err("       %s\n", file_list[j]);
+                        }
+                        goto error_exit;
+                    }
+                    else
+                    {
+                    */
+                    gossip_err("Error: invalid tab file entry: %s\n",
+                               PINT_FSTAB_NAME(tmp_ent));
+                    gossip_err("Error: offending tab file: %s\n",
+                               targetfile);
+                    goto error_exit;
+                   /* } */
+                }
+
+                /* find a reference point in the string */
+                last_slash = strrchr(tok, '/');
+                *last_slash = '\0';
+
+                /* config server and fs name are a special case, take one 
+                 * string and split it in half on "/" delimiter
+                 */
+                me->pvfs_config_servers[cur_server] = strdup(tok);
+                if (!me->pvfs_config_servers[cur_server])
+                    goto error_exit;
+
+                ++last_slash;
+
+                if (cur_server == 0) {
+                    me->pvfs_fs_name = strdup(last_slash);
+                    if (!me->pvfs_fs_name)
+                        goto error_exit;
+                } else {
+                    if (strcmp(last_slash, me->pvfs_fs_name) != 0) {
+                        gossip_lerr(
+                          "Error: different fs names in server addresses: %s\n",
+                          PINT_FSTAB_NAME(tmp_ent));
+                        goto error_exit;
+                    }
+                }
+                ++cur_server;
+            }
+
+            /* make our own copy of parameters of interest */
+            /* mnt_dir and mnt_opts are verbatim copies */
+            strcpy(current_tab->mntent_array[i].mnt_dir,
+                   PINT_FSTAB_PATH(tmp_ent));
+            strcpy(current_tab->mntent_array[i].mnt_opts,
+                   PINT_FSTAB_OPTS(tmp_ent));
+
+            /* find out if a particular flow protocol was specified */
+            if ((PINT_fstab_entry_hasopt(tmp_ent, "flowproto")))
+            {
+                ret = parse_flowproto_string(
+                    PINT_FSTAB_OPTS(tmp_ent),
+                    &(current_tab->
+                      mntent_array[i].flowproto));
+                if (ret < 0)
+                {
+                    goto error_exit;
+                }
+            }
+            else
+            {
+                current_tab->mntent_array[i].flowproto =
+                    FLOWPROTO_DEFAULT;
+            }
+
+            /* pick an encoding to use with the server */
+            current_tab->mntent_array[i].encoding =
+                PVFS2_ENCODING_DEFAULT;
+            cp = PINT_fstab_entry_hasopt(tmp_ent, "encoding");
+            if (cp)
+            {
+                ret = parse_encoding_string(
+                    cp, &current_tab->mntent_array[i].encoding);
+                if (ret < 0)
+                {
+                    goto error_exit;
+                }
+            }
+
+            /* find out if a particular flow protocol was specified */
+            current_tab->mntent_array[i].default_num_dfiles = 0;
+            cp = PINT_fstab_entry_hasopt(tmp_ent, "num_dfiles");
+            if (cp)
+            {
+                ret = parse_num_dfiles_string(
+                    cp,
+                    &(current_tab->mntent_array[i].default_num_dfiles));
+
+                if (ret < 0)
+                {
+                    goto error_exit;
+                }
+            }
+
+            /* Loop counter increment */
+            i++;
+
+            PINT_fstab_entry_destroy(tmp_ent);
+        }
+    }
+    s_stat_tab_count++;
+    strcpy(s_stat_tab_array[s_stat_tab_count-1].tabfile_name, targetfile);
+    PINT_fstab_close(mnt_fp);
+    gen_mutex_unlock(&s_stat_tab_mutex);
+    return (&s_stat_tab_array[s_stat_tab_count - 1]);
+
+  error_exit:
+    for (; i > -1; i--)
+    {
+        struct PVFS_sys_mntent *me = &current_tab->mntent_array[i];
+
+        if (me->pvfs_config_servers)
+        {
+            int j;
+            for (j=0; j<me->num_pvfs_config_servers; j++)
+                if (me->pvfs_config_servers[j])
+                    free(me->pvfs_config_servers[j]);
+            free(me->pvfs_config_servers);
+            me->pvfs_config_servers = NULL;
+            me->num_pvfs_config_servers = 0;
+        }
+
+        if (me->mnt_dir)
+        {
+            free(me->mnt_dir);
+            me->mnt_dir = NULL;
+        }
+
+        if (me->mnt_opts)
+        {
+            free(me->mnt_opts);
+            me->mnt_opts = NULL;
+        }
+
+        if (me->pvfs_fs_name)
+        {
+            free(me->pvfs_fs_name);
+            me->pvfs_fs_name = NULL;
+        }
+    }
+    PINT_fstab_close(mnt_fp);
+    gen_mutex_unlock(&s_stat_tab_mutex);
+    return (NULL);
+}
+
+/* PVFS_util_get_default_fsid()
+ *
+ * fills in the fs identifier for the first active file system that
+ * the library knows about.  Useful for test programs or admin tools
+ * that need default file system to access if the user has not
+ * specified one
+ *
+ * returns 0 on success, -PVFS_error on failure
+ */
+int PVFS_util_get_default_fsid(PVFS_fs_id* out_fs_id)
+{
+    int i = 0, j = 0;
+
+    gen_mutex_lock(&s_stat_tab_mutex);
+
+    for(i = 0; i < s_stat_tab_count; i++)
+    {
+        for(j = 0; j < s_stat_tab_array[i].mntent_count; j++)
+        {
+            *out_fs_id = s_stat_tab_array[i].mntent_array[j].fs_id;
+            if(*out_fs_id != PVFS_FS_ID_NULL)
+            {
+                gen_mutex_unlock(&s_stat_tab_mutex);
+                return(0);
+            }
+        }
+    }
+
+    /* check the dynamic tab area if we haven't found an fs yet */
+    for(j = 0; j < s_stat_tab_array[
+            PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++)
+    {
+        *out_fs_id = s_stat_tab_array[
+            PVFS2_DYNAMIC_TAB_INDEX].mntent_array[j].fs_id;
+        if(*out_fs_id != PVFS_FS_ID_NULL)
+        {
+            gen_mutex_unlock(&s_stat_tab_mutex);
+            return(0);
+        }
+    }
+
+    gen_mutex_unlock(&s_stat_tab_mutex);
+    return(-PVFS_ENOENT);
+}
+
+/*
+ * PVFS_util_add_dynamic_mntent()
+ *
+ * dynamically add mount information to our internally managed mount
+ * tables (used for quick fs resolution using PVFS_util_resolve).
+ * dynamic mnt entries can only be added to a particular dynamic
+ * region of our book keeping, so they're the exception, not the rule.
+ *
+ * returns 0 on success, -PVFS_error on failure, and 1 if the mount
+ * entry already exists as a parsed entry (not dynamic)
+ */
+int PVFS_util_add_dynamic_mntent(struct PVFS_sys_mntent *mntent)
+{
+    int i = 0, j = 0, new_index = 0;
+    int ret = -PVFS_EINVAL;
+    struct PVFS_sys_mntent *current_mnt = NULL;
+    struct PVFS_sys_mntent *tmp_mnt_array = NULL;
+
+    if (mntent)
+    {
+        gen_mutex_lock(&s_stat_tab_mutex);
+
+        /*
+          we exhaustively scan to be sure this mnt entry doesn't exist
+          anywhere in our book keeping; first scan the parsed regions
+        */
+        for(i = 0; i < s_stat_tab_count; i++)
+        {
+            for(j = 0; j < s_stat_tab_array[i].mntent_count; j++)
+            {
+                current_mnt = &(s_stat_tab_array[i].mntent_array[j]);
+
+                if (current_mnt->fs_id == mntent->fs_id)
+                {
+                    /*
+                      no need to add the dynamic mount information
+                      because the file system already exists as a
+                      parsed mount entry
+                    */
+                    gen_mutex_unlock(&s_stat_tab_mutex);
+                    return 1;
+                }
+            }
+        }
+
+#if 0
+        /* check the dynamic region if we haven't found a match yet */
+        for(j = 0; j < s_stat_tab_array[
+                PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++)
+        {
+            current_mnt = &(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].
+                            mntent_array[j]);
+
+            if ((current_mnt->fs_id == mntent->fs_id) &&
+                (strcmp(current_mnt->pvfs_config_servers[0],
+                        mntent->pvfs_config_servers[0]) != 0))
+            {
+                gossip_err("Error: FS with id %d is already mounted using"
+                           " a different config server.\n", (int)mntent->fs_id); 
+                gossip_err("Error: This could indicate that a duplicate fsid"
+                           " is being used.\n");
+                gossip_err("Error: Please check your server configuration.\n");
+                gen_mutex_unlock(&s_stat_tab_mutex);
+                return -PVFS_ENXIO;
+            }
+        }
+#endif
+
+        /* copy the mntent to our table in the dynamic tab area */
+        new_index = s_stat_tab_array[
+            PVFS2_DYNAMIC_TAB_INDEX].mntent_count;
+
+        if (new_index == 0)
+        {
+            /* allocate and initialize the dynamic tab object */
+            s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array =
+                (struct PVFS_sys_mntent *)malloc(
+                    sizeof(struct PVFS_sys_mntent));
+            if (!s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array)
+            {
+                return -PVFS_ENOMEM;
+            }
+            strncpy(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].tabfile_name,
+                    PVFS2_DYNAMIC_TAB_NAME, PVFS_NAME_MAX);
+        }
+        else
+        {
+            /* we need to re-alloc this guy to add a new array entry */
+            tmp_mnt_array = (struct PVFS_sys_mntent *)malloc(
+                ((new_index + 1) * sizeof(struct PVFS_sys_mntent)));
+            if (!tmp_mnt_array)
+            {
+                return -PVFS_ENOMEM;
+            }
+
+            /*
+              copy all mntent entries into the new array, freeing the
+              original entries
+            */
+            for(i = 0; i < new_index; i++)
+            {
+                current_mnt = &s_stat_tab_array[
+                    PVFS2_DYNAMIC_TAB_INDEX].mntent_array[i];
+                PVFS_util_copy_mntent(&tmp_mnt_array[i], current_mnt);
+                PVFS_util_free_mntent(current_mnt);
+            }
+
+            /* finally, swap the mntent arrays */
+            free(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array);
+            s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array =
+                tmp_mnt_array;
+        }
+
+        gossip_debug(GOSSIP_CLIENT_DEBUG, "* Adding new dynamic mount "
+                     "point %s [%d,%d]\n", mntent->mnt_dir,
+                     PVFS2_DYNAMIC_TAB_INDEX, new_index);
+
+        current_mnt = &s_stat_tab_array[
+            PVFS2_DYNAMIC_TAB_INDEX].mntent_array[new_index];
+
+        ret = PVFS_util_copy_mntent(current_mnt, mntent);
+
+        s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_count++;
+
+        gen_mutex_unlock(&s_stat_tab_mutex);
+    }
+    return ret;
+}
+
+/*
+ * PVFS_util_remove_internal_mntent()
+ *
+ * dynamically remove mount information from our internally managed
+ * mount tables.
+ *
+ * returns 0 on success, -PVFS_error on failure
+ */
+int PVFS_util_remove_internal_mntent(
+    struct PVFS_sys_mntent *mntent)
+{
+    int i = 0, j = 0, new_count = 0, found = 0, found_index = 0;
+    int ret = -PVFS_EINVAL;
+    struct PVFS_sys_mntent *current_mnt = NULL;
+    struct PVFS_sys_mntent *tmp_mnt_array = NULL;
+
+    if (mntent)
+    {
+        gen_mutex_lock(&s_stat_tab_mutex);
+
+        /*
+          we exhaustively scan to be sure this mnt entry *does* exist
+          somewhere in our book keeping
+        */
+        for(i = 0; i < s_stat_tab_count; i++)
+        {
+            for(j = 0; j < s_stat_tab_array[i].mntent_count; j++)
+            {
+                current_mnt = &(s_stat_tab_array[i].mntent_array[j]);
+                if ((current_mnt->fs_id == mntent->fs_id)
+                    && (strcmp(current_mnt->mnt_dir, mntent->mnt_dir) == 0))
+                {
+                    found_index = i;
+                    found = 1;
+                    goto mntent_found;
+                }
+            }
+        }
+
+        /* check the dynamic region if we haven't found a match yet */
+        for(j = 0; j < s_stat_tab_array[
+                PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++)
+        {
+            current_mnt = &(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].
+                            mntent_array[j]);
+
+            if (current_mnt->fs_id == mntent->fs_id)
+            {
+                found_index = PVFS2_DYNAMIC_TAB_INDEX;
+                found = 1;
+                goto mntent_found;
+            }
+        }
+
+      mntent_found:
+        if (!found)
+        {
+            return -PVFS_EINVAL;
+        }
+
+        gossip_debug(GOSSIP_CLIENT_DEBUG, "* Removing mount "
+                     "point %s [%d,%d]\n", current_mnt->mnt_dir,
+                     found_index, j);
+
+        /* remove the mntent from our table in the found tab area */
+        if ((s_stat_tab_array[found_index].mntent_count - 1) > 0)
+        {
+            /*
+              this is 1 minus the old count since there will be 1 less
+              mnt entries after this call
+            */
+            new_count = s_stat_tab_array[found_index].mntent_count - 1;
+
+            /* we need to re-alloc this guy to remove the array entry */
+            tmp_mnt_array = (struct PVFS_sys_mntent *)malloc(
+                (new_count * sizeof(struct PVFS_sys_mntent)));
+            if (!tmp_mnt_array)
+            {
+                return -PVFS_ENOMEM;
+            }
+
+            /*
+              copy all mntent entries into the new array, freeing the
+              original entries -- and skipping the one that we're
+              trying to remove
+            */
+            for(i = 0, new_count = 0;
+                i < s_stat_tab_array[found_index].mntent_count; i++)
+            {
+                current_mnt = &s_stat_tab_array[found_index].mntent_array[i];
+
+                if ((current_mnt->fs_id == mntent->fs_id)
+                    && (strcmp(current_mnt->mnt_dir, mntent->mnt_dir) == 0))
+                {
+                    PVFS_util_free_mntent(current_mnt);
+                    continue;
+                }
+                PVFS_util_copy_mntent(
+                    &tmp_mnt_array[new_count++], current_mnt);
+                PVFS_util_free_mntent(current_mnt);
+            }
+
+            /* finally, swap the mntent arrays */
+            free(s_stat_tab_array[found_index].mntent_array);
+            s_stat_tab_array[found_index].mntent_array = tmp_mnt_array;
+
+            s_stat_tab_array[found_index].mntent_count--;
+            ret = 0;
+        }
+        else
+        {
+            /*
+              special case: we're removing the last mnt entry in the
+              array here.  since this is the case, we also free the
+              array since we know it's now empty.
+            */
+            PVFS_util_free_mntent(
+                &s_stat_tab_array[found_index].mntent_array[0]);
+            free(s_stat_tab_array[found_index].mntent_array);
+            s_stat_tab_array[found_index].mntent_array = NULL;
+            s_stat_tab_array[found_index].mntent_count = 0;
+            ret = 0;
+        }
+        gen_mutex_unlock(&s_stat_tab_mutex);
+    }
+    return ret;
+}
+
+/*
+ * PVFS_util_get_mntent_copy()
+ *
+ * Given a pointer to a valid mount entry, out_mntent, copy the contents of
+ * the mount entry  for fs_id into out_mntent.
+ *
+ * returns 0 on success, -PVFS_error on failure
+ */
+int PVFS_util_get_mntent_copy(PVFS_fs_id fs_id,
+                              struct PVFS_sys_mntent* out_mntent)
+{
+    int i = 0;
+
+    /* Search for mntent by fsid */
+    gen_mutex_lock(&s_stat_tab_mutex);
+    for(i = 0; i < s_stat_tab_count; i++)
+    {
+        int j;
+        for(j = 0; j < s_stat_tab_array[i].mntent_count; j++)
+        {
+            struct PVFS_sys_mntent* mnt_iter;
+            mnt_iter = &(s_stat_tab_array[i].mntent_array[j]);
+
+            if (mnt_iter->fs_id == fs_id)
+            {
+                PVFS_util_copy_mntent(out_mntent, mnt_iter);
+                gen_mutex_unlock(&s_stat_tab_mutex);
+                return 0;
+            }
+        }
+    }
+    gen_mutex_unlock(&s_stat_tab_mutex);
+    return -PVFS_EINVAL;
+}
+
+/* basename()
+ * 
+ * Return the portion of a path after the last non-trailing slash 
+ */
+char *basename(char *path)
+{
+    int path_len;
+    char *last_slash;
+
+    if (path == NULL || path[0] == '\0')
+        return ".";
+
+    if (strcmp(path, "/") == 0 ||
+        strchr(path, '/') == NULL)
+        return path;
+
+    /* remove trailing slashes */
+    path_len = strlen(path);
+    while (path[path_len - 1] == '/')
+        path[--path_len] = '\0';
+
+    /* find last_slash */
+    last_slash = strrchr(path, '/');
+
+    /* return base */
+    if (last_slash)
+        return last_slash + 1;
+
+    return path;
+}
+
+/* dirname()
+ * 
+ * Return the portion of a path before the last non-trailing slash 
+ */
+char *dirname(char *path)
+{
+    int path_len;
+    char *last_slash;
+
+    if (path == NULL || path[0] == '\0' ||
+        strchr(path, '/') == NULL ||
+        strcmp(path, "..") == 0)
+        return ".";
+    
+    if (strcmp(path, "/") == 0)
+        return path;
+
+    /* remove trailing slashes */
+    path_len = strlen(path);
+    while (path[path_len - 1] == '/')
+        path[--path_len] = '\0';
+
+    /* find last_slash */
+    last_slash = strrchr(path, '/');
+
+    /* truncate string */
+    if (last_slash)
+    {
+        /* last slash is first character */
+        if (last_slash == path)        
+            last_slash[1] = '\0';
+        else
+            last_slash[0] = '\0';
+    }
+
+    return path;
+}
+
+/* PVFS_util_resolve()
+ *
+ * given a local path of a file that resides on a pvfs2 volume,
+ * determine what the fsid and fs relative path is.  
+ *
+ * returns 0 on succees, -PVFS_error on failure
+ */
+int PVFS_util_resolve(
+    const char* local_path,
+    PVFS_fs_id* out_fs_id,
+    char* out_fs_path,
+    int out_fs_path_max)
+{
+    int ret = -1;
+    char* tmp_path = NULL;
+    char* parent_path = NULL;
+    int base_len = 0;
+
+    if(strlen(local_path) > (PVFS_NAME_MAX-1))
+    {
+        gossip_err("Error: PVFS_util_resolve() input path too long.\n");
+        return(-PVFS_ENAMETOOLONG);
+    }
+
+    /* the most common case first; just try to resolve the path that we
+     * were given
+     */
+    ret = PINT_util_resolve_absolute(local_path, out_fs_id, out_fs_path,
+        out_fs_path_max);
+    if(ret == 0)
+    {
+        /* done */
+        return(0);
+    }
+    if(ret == -PVFS_ENOENT)
+    {
+        /* if the path wasn't found, try canonicalizing the path in case it
+         * refers to a relative path on a mounted volume or contains symlinks
+         */
+        tmp_path = (char*)malloc(PVFS_NAME_MAX*sizeof(char));
+        if(!tmp_path)
+        {
+            return(-PVFS_ENOMEM);
+        }
+        memset(tmp_path, 0, PVFS_NAME_MAX*sizeof(char));
+        ret = PINT_realpath(local_path, tmp_path, (PVFS_NAME_MAX-1));
+        if(ret == -PVFS_EINVAL)
+        {
+            /* one more try; canonicalize the parent in case this function
+             * is called before object creation; the basename
+             * doesn't yet exist but we still need to find the PVFS volume
+             */
+            parent_path = (char*)malloc(PVFS_NAME_MAX*sizeof(char));
+            if(!parent_path)
+            {
+                free(tmp_path);
+                return(-PVFS_ENOMEM);
+            }
+            /* find size of basename so we can reserve space for it */
+            /* note: basename() and dirname() modify args, thus the strcpy */
+            strcpy(parent_path, local_path);
+            base_len = strlen(basename(parent_path));
+            strcpy(parent_path, local_path);
+            ret = PINT_realpath(dirname(parent_path), tmp_path,
+                (PVFS_NAME_MAX-base_len-2));
+            if(ret < 0)
+            {
+                free(tmp_path);
+                free(parent_path);
+                /* last chance failed; this is not a valid pvfs2 path */
+                return(-PVFS_ENOENT);
+            }
+            /* glue the basename back on */
+            strcpy(parent_path, local_path);
+            strcat(tmp_path, "/");
+            strcat(tmp_path, basename(parent_path));
+            free(parent_path);
+        }
+        else if(ret < 0)
+        {
+            /* first canonicalize failed; this is not a valid pvfs2 path */
+            free(tmp_path);
+            return(-PVFS_ENOENT);
+        }
+
+        ret = PINT_util_resolve_absolute(tmp_path, out_fs_id, out_fs_path,
+            out_fs_path_max);
+        free(tmp_path);
+
+        /* fall through and preserve "ret" to be returned */
+    }
+
+    return(ret);
+}
+
+
+/* PVFS_util_init_defaults()
+ *
+ * performs the standard set of initialization steps for the system
+ * interface, mostly just a wrapper function
+ *
+ * returns 0 on success, -PVFS_error on failure
+ */
+int PVFS_util_init_defaults(void)
+{
+    int ret = -1, i = 0, j = 0, found_one = 0;
+    int failed_indices[PVFS2_MAX_INVALID_MNTENTS] = {0};
+
+    /* use standard system tab files */
+    const PVFS_util_tab* tab = PVFS_util_parse_pvfstab(NULL);
+    if (!tab)
+    {
+        gossip_err(
+            "Error: failed to find any pvfs2 file systems in the "
+            "standard system tab files.\n");
+        return(-PVFS_ENOENT);
+    }
+
+    /* initialize pvfs system interface */
+    ret = PVFS_sys_initialize(GOSSIP_NO_DEBUG);
+    if (ret < 0)
+    {
+        return(ret);
+    }
+
+    /* add in any file systems we found in the fstab */
+    for(i = 0; i < tab->mntent_count; i++)
+    {
+        ret = PVFS_sys_fs_add(&tab->mntent_array[i]);
+        if (ret == 0)
+        {
+            found_one = 1;
+        }
+        else
+        {
+            failed_indices[j++] = i;
+
+            if (j > (PVFS2_MAX_INVALID_MNTENTS - 1))
+            {
+                gossip_err("*** Failed to initialize %d file systems "
+                           "from tab file %s.\n ** If this is a valid "
+                           "tabfile, please remove invalid entries.\n",
+                           PVFS2_MAX_INVALID_MNTENTS,
+                           tab->tabfile_name);
+                gossip_err("Continuing execution without remaining "
+                           "mount entries\n");
+                
+                break;
+            }
+        }
+    }
+
+    /* remove any mount entries that couldn't be added here */
+    for(i = 0; i < PVFS2_MAX_INVALID_MNTENTS; i++)
+    {
+        if (failed_indices[i])
+        {
+            PVFS_util_remove_internal_mntent(
+                &tab->mntent_array[failed_indices[i]]);
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    if (found_one)
+    {
+        return 0;
+    }
+
+    gossip_err("ERROR: could not initialize any file systems "
+               "in %s.\n", tab->tabfile_name);
+
+    PVFS_sys_finalize();
+    return -PVFS_ENODEV;
+}
+
+/*********************/
+/* normal size units */
+/*********************/
+#define KILOBYTE                1024
+#define MEGABYTE   (1024 * KILOBYTE)
+#define GIGABYTE   (1024 * MEGABYTE)
+#define TERABYTE   (1024llu * GIGABYTE)
+#define PETABYTE   (1024llu * TERABYTE)
+#define EXABYTE    (1024llu * PETABYTE)
+#define ZETTABYTE  (1024llu * EXABYTE)
+#define YOTTABYTE  (1024llu * ZETTABYTE)
+
+/*****************/
+/* si size units */
+/*****************/
+#define SI_KILOBYTE                   1000
+#define SI_MEGABYTE   (1000 * SI_KILOBYTE)
+#define SI_GIGABYTE   (1000 * SI_MEGABYTE)
+#define SI_TERABYTE  (1000llu * SI_GIGABYTE)
+#define SI_PETABYTE  (1000llu * SI_TERABYTE)
+#define SI_EXABYTE   (1000llu * SI_PETABYTE)
+#define SI_ZETTABYTE (1000llu * SI_EXABYTE)
+#define SI_YOTTABYTE (1000llu * SI_ZETTABYTE)
+
+#if SIZEOF_LONG_INT == 8
+#define NUM_SIZES                  5
+#else
+#define NUM_SIZES                  4
+#endif
+
+static PVFS_size PINT_s_size_table[NUM_SIZES] =
+{
+    /*YOTTABYTE, ZETTABYTE, EXABYTE, */
+#if SIZEOF_LONG_INT == 8
+    PETABYTE,
+    TERABYTE,
+#endif
+    GIGABYTE, MEGABYTE, KILOBYTE
+};
+
+static PVFS_size PINT_s_si_size_table[NUM_SIZES] =
+{
+    /*SI_YOTTABYTE, SI_ZETTABYTE, SI_EXABYTE, */
+#if SIZEOF_LONG_INT == 8
+    SI_PETABYTE, SI_TERABYTE,
+#endif
+    SI_GIGABYTE, SI_MEGABYTE, SI_KILOBYTE
+};
+
+static const char *PINT_s_str_size_table[NUM_SIZES] =
+{
+    /*"Y", "Z", "E", */
+#if SIZEOF_LONG_INT == 8
+    "P","T",
+#endif
+    "G", "M", "K"
+};
+
+/*
+ * PVFS_util_make_size_human_readable
+ *
+ * converts a size value to a human readable string format
+ *
+ * size         - numeric size of file
+ * out_str      - nicely formatted string, like "3.4M"
+ *                  (caller must allocate this string)
+ * max_out_len  - maximum lenght of out_str
+ * use_si_units - use units of 1000, not 1024
+ */
+void PVFS_util_make_size_human_readable(
+    PVFS_size size,
+    char *out_str,
+    int max_out_len,
+    int use_si_units)
+{
+    int i = 0;
+    double tmp = 0.0f;
+    PVFS_size *size_table =
+        (use_si_units? PINT_s_si_size_table : PINT_s_size_table);
+
+    if (out_str)
+    {
+        for (i = 0; i < NUM_SIZES; i++)
+        {
+            tmp = (double)size;
+            if ((PVFS_size) (tmp / size_table[i]) > 0)
+            {
+                tmp = (tmp / size_table[i]);
+                break;
+            }
+        }
+        if (i == NUM_SIZES)
+        {
+            _snprintf(out_str, 16, "%lld", lld(size));
+        }
+        else
+        {
+            _snprintf(out_str, max_out_len, "%.1f%s",
+                      tmp, PINT_s_str_size_table[i]);
+        }
+    }
+}
+
+/* parse_flowproto_string()
+ *
+ * looks in the mount options string for a flowprotocol specifier and 
+ * sets the flowproto type accordingly
+ *
+ * returns 0 on success, -PVFS_error on failure
+ */
+static int parse_flowproto_string(
+    const char *input,
+    enum PVFS_flowproto_type *flowproto)
+{
+    int ret = 0;
+    char *start = NULL;
+    char flow[256];
+    char *comma = NULL;
+
+    start = strstr(input, "flowproto");
+    /* we must find a match if this function is being called... */
+    assert(start);
+
+    /* scan out the option */
+    ret = sscanf(start, "flowproto = %255s ,", flow);
+    if (ret != 1)
+    {
+        gossip_err("Error: malformed flowproto option in tab file.\n");
+        return (-PVFS_EINVAL);
+    }
+
+    /* chop it off at any trailing comma */
+    comma = strchr(flow, ',');
+    if (comma)
+    {
+        comma[0] = '\0';
+    }
+
+    if (!strcmp(flow, "dump_offsets"))
+    {
+        *flowproto = FLOWPROTO_DUMP_OFFSETS;
+    }
+    else if (!strcmp(flow, "bmi_cache"))
+    {
+        *flowproto = FLOWPROTO_BMI_CACHE;
+    }
+    else if (!strcmp(flow, "multiqueue"))
+    {
+        *flowproto = FLOWPROTO_MULTIQUEUE;
+    }
+    else
+    {
+        gossip_err("Error: unrecognized flowproto option: %s\n", flow);
+        return (-PVFS_EINVAL);
+    }
+    return 0;
+}
+
+void PVFS_util_free_mntent(
+    struct PVFS_sys_mntent *mntent)
+{
+    if (mntent)
+    {
+        if (mntent->pvfs_config_servers)
+        {
+            int j;
+            for (j=0; j<mntent->num_pvfs_config_servers; j++)
+                if (mntent->pvfs_config_servers[j])
+                    free(mntent->pvfs_config_servers[j]);
+            free(mntent->pvfs_config_servers);
+            mntent->pvfs_config_servers = NULL;
+            mntent->num_pvfs_config_servers = 0;
+        }
+        if (mntent->pvfs_fs_name)
+        {
+            free(mntent->pvfs_fs_name);
+            mntent->pvfs_fs_name = NULL;
+        }
+        if (mntent->mnt_dir)
+        {
+            free(mntent->mnt_dir);
+            mntent->mnt_dir = NULL;
+        }
+        if (mntent->mnt_opts)
+        {
+            free(mntent->mnt_opts);
+            mntent->mnt_opts = NULL;
+        }
+
+        mntent->flowproto = 0;
+        mntent->encoding = 0;
+        mntent->fs_id = PVFS_FS_ID_NULL;
+    }    
+}
+
+int PVFS_util_copy_mntent(
+    struct PVFS_sys_mntent *dest_mntent,
+    struct PVFS_sys_mntent *src_mntent)
+{
+    int ret = -PVFS_EINVAL, i = 0;
+
+    if (dest_mntent && src_mntent)
+    {
+        memset(dest_mntent, 0, sizeof(struct PVFS_sys_mntent));
+
+        dest_mntent->num_pvfs_config_servers =
+            src_mntent->num_pvfs_config_servers;
+
+        dest_mntent->pvfs_config_servers =
+            malloc(dest_mntent->num_pvfs_config_servers *
+                   sizeof(*dest_mntent->pvfs_config_servers));
+        if (!dest_mntent)
+        {
+            return -PVFS_ENOMEM;
+        }
+
+        memset(dest_mntent->pvfs_config_servers, 0,
+               dest_mntent->num_pvfs_config_servers *
+               sizeof(*dest_mntent->pvfs_config_servers));
+
+        for(i = 0; i < dest_mntent->num_pvfs_config_servers; i++)
+        {
+            dest_mntent->pvfs_config_servers[i] =
+                strdup(src_mntent->pvfs_config_servers[i]);
+            if (!dest_mntent->pvfs_config_servers[i])
+            {
+                ret = -PVFS_ENOMEM;
+                goto error_exit;
+            }
+        }
+
+        dest_mntent->pvfs_fs_name = strdup(src_mntent->pvfs_fs_name);
+        if (!dest_mntent->pvfs_fs_name)
+        {
+            ret = -PVFS_ENOMEM;
+            goto error_exit;
+        }
+
+        if (src_mntent->mnt_dir)
+        {
+            dest_mntent->mnt_dir = strdup(src_mntent->mnt_dir);
+            if (!dest_mntent->mnt_dir)
+            {
+                ret = -PVFS_ENOMEM;
+                goto error_exit;
+            }
+        }
+
+        if (src_mntent->mnt_opts)
+        {
+            dest_mntent->mnt_opts = strdup(src_mntent->mnt_opts);
+            if (!dest_mntent->mnt_opts)
+            {
+                ret = -PVFS_ENOMEM;
+                goto error_exit;
+            }
+        }
+
+        dest_mntent->flowproto = src_mntent->flowproto;
+        dest_mntent->encoding = src_mntent->encoding;
+        dest_mntent->fs_id = src_mntent->fs_id;
+        dest_mntent->default_num_dfiles = src_mntent->default_num_dfiles;
+    }
+    return 0;
+
+  error_exit:
+
+    for(i = 0; i < dest_mntent->num_pvfs_config_servers; i++)
+    {
+        if (dest_mntent->pvfs_config_servers[i])
+        {
+            free(dest_mntent->pvfs_config_servers[i]);
+            dest_mntent->pvfs_config_servers[i] = NULL;
+        }
+    }
+
+    if (dest_mntent->pvfs_config_servers)
+    {
+        free(dest_mntent->pvfs_config_servers);
+        dest_mntent->pvfs_config_servers = NULL;
+    }
+
+    if (dest_mntent->pvfs_fs_name)
+    {
+        free(dest_mntent->pvfs_fs_name);
+        dest_mntent->pvfs_fs_name = NULL;
+    }
+
+    if (dest_mntent->mnt_dir)
+    {
+        free(dest_mntent->mnt_dir);
+        dest_mntent->mnt_dir = NULL;
+    }
+
+    if (dest_mntent->mnt_opts)
+    {
+        free(dest_mntent->mnt_opts);
+        dest_mntent->mnt_opts = NULL;
+    }
+    return ret;
+}
+
+/*
+ * Pull out the wire encoding specified as a mount option in the tab
+ * file.
+ *
+ * Input string is not modified; result goes into et.
+ *
+ * Returns 0 if all okay.
+ */
+static int parse_encoding_string(
+    const char *cp,
+    enum PVFS_encoding_type *et)
+{
+    int i = 0;
+    const char *cq = NULL;
+
+    struct
+    {
+        const char *name;
+        enum PVFS_encoding_type val;
+    } enc_str[] =
+        { { "default", PVFS2_ENCODING_DEFAULT },
+          { "defaults", PVFS2_ENCODING_DEFAULT },
+          { "direct", ENCODING_DIRECT },
+          { "le_bfield", ENCODING_LE_BFIELD },
+          { "xdr", ENCODING_XDR } };
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: input is %s\n",
+                 __func__, cp);
+    cp += strlen("encoding");
+    for (; isspace(*cp); cp++);        /* optional spaces */
+    if (*cp != '=')
+    {
+        gossip_err("Error: %s: malformed encoding option in tab file.\n",
+                   __func__);
+        return -PVFS_EINVAL;
+    }
+    for (++cp; isspace(*cp); cp++);        /* optional spaces */
+    for (cq = cp; *cq && *cq != ','; cq++);/* find option end */
+
+    *et = -1;
+    for (i = 0; i < sizeof(enc_str) / sizeof(enc_str[0]); i++)
+    {
+        int n = strlen(enc_str[i].name);
+        if (cq - cp > n)
+            n = cq - cp;
+        if (!strncmp(enc_str[i].name, cp, n))
+        {
+            *et = enc_str[i].val;
+            break;
+        }
+    }
+    if (*et == -1)
+    {
+        gossip_err("Error: %s: unknown encoding type in tab file.\n",
+                   __func__);
+        return -PVFS_EINVAL;
+    }
+    return 0;
+}
+
+/* PINT_release_pvfstab()
+ *
+ * frees up any resources associated with previously parsed tabfiles
+ *
+ * no return value
+ */
+void PINT_release_pvfstab(void)
+{
+    int i, j;
+
+    gen_mutex_lock(&s_stat_tab_mutex);
+    for(i=0; i<s_stat_tab_count; i++)
+    {
+        for (j = 0; j < s_stat_tab_array[i].mntent_count; j++)
+        {
+            if (s_stat_tab_array[i].mntent_array[j].fs_id !=
+                PVFS_FS_ID_NULL)
+            {
+                PVFS_util_free_mntent(
+                    &s_stat_tab_array[i].mntent_array[j]);
+            }
+        }
+        free(s_stat_tab_array[i].mntent_array);
+    }
+    s_stat_tab_count = 0;
+
+    for (j = 0; j < s_stat_tab_array[
+             PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++)
+    {
+        if (s_stat_tab_array[
+                PVFS2_DYNAMIC_TAB_INDEX].mntent_array[j].fs_id !=
+            PVFS_FS_ID_NULL)
+        {
+            PVFS_util_free_mntent(
+                &s_stat_tab_array[
+                    PVFS2_DYNAMIC_TAB_INDEX].mntent_array[j]);
+        }
+    }
+    if (s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array)
+    {
+        free(s_stat_tab_array[PVFS2_DYNAMIC_TAB_INDEX].mntent_array);
+    }
+
+    gen_mutex_unlock(&s_stat_tab_mutex);
+}
+
+uint32_t PVFS_util_sys_to_object_attr_mask(
+    uint32_t sys_attrmask)
+{
+
+    /*
+      adjust parameters as necessary; what's happening here
+      is that we're converting sys_attr masks to obj_attr masks
+      before passing the getattr request to the server.
+    */
+    uint32_t attrmask = 0;
+    if (sys_attrmask & PVFS_ATTR_SYS_SIZE)
+    {
+        /* need datafile handles and distribution in order to get 
+         * datafile handles and know what function to call to get
+         * the file size.
+         */
+        attrmask |= (PVFS_ATTR_META_ALL | PVFS_ATTR_DATA_SIZE);
+    }
+
+    if (sys_attrmask & PVFS_ATTR_SYS_DFILE_COUNT)
+    {
+        attrmask |= (PVFS_ATTR_META_DFILES | PVFS_ATTR_META_MIRROR_DFILES);
+    }
+    if (sys_attrmask & PVFS_ATTR_SYS_MIRROR_COPIES_COUNT)
+    {
+        attrmask |= PVFS_ATTR_META_MIRROR_DFILES;
+    }
+
+    if (sys_attrmask & PVFS_ATTR_SYS_DIRENT_COUNT)
+    {
+        attrmask |= PVFS_ATTR_DIR_DIRENT_COUNT;
+    }
+
+    if (sys_attrmask & PVFS_ATTR_SYS_DIR_HINT)
+    {
+        attrmask |= PVFS_ATTR_DIR_HINT;
+    }
+
+    if (sys_attrmask & PVFS_ATTR_SYS_LNK_TARGET)
+    {
+        attrmask |= PVFS_ATTR_SYMLNK_TARGET;
+    }
+
+    /* we need the distribution in order to calculate block size */
+    if (sys_attrmask & PVFS_ATTR_SYS_BLKSIZE)
+    {
+        attrmask |= PVFS_ATTR_META_DIST;
+    }
+
+    if(sys_attrmask & PVFS_ATTR_SYS_UID)
+        attrmask |= PVFS_ATTR_COMMON_UID;
+    if(sys_attrmask & PVFS_ATTR_SYS_GID)
+        attrmask |= PVFS_ATTR_COMMON_GID;
+    if(sys_attrmask & PVFS_ATTR_SYS_PERM)
+        attrmask |= PVFS_ATTR_COMMON_PERM;
+    if(sys_attrmask & PVFS_ATTR_SYS_ATIME)
+        attrmask |= PVFS_ATTR_COMMON_ATIME;
+    if(sys_attrmask & PVFS_ATTR_SYS_CTIME)
+        attrmask |= PVFS_ATTR_COMMON_CTIME;
+    if(sys_attrmask & PVFS_ATTR_SYS_MTIME)
+        attrmask |= PVFS_ATTR_COMMON_MTIME;
+    if(sys_attrmask & PVFS_ATTR_SYS_TYPE)
+        attrmask |= PVFS_ATTR_COMMON_TYPE;
+    if(sys_attrmask & PVFS_ATTR_SYS_ATIME_SET)
+        attrmask |= PVFS_ATTR_COMMON_ATIME_SET;
+    if(sys_attrmask & PVFS_ATTR_SYS_MTIME_SET)
+        attrmask |= PVFS_ATTR_COMMON_MTIME_SET;
+
+    gossip_debug(GOSSIP_GETATTR_DEBUG,
+                 "attrmask being passed to server: ");
+    PINT_attrmask_print(GOSSIP_GETATTR_DEBUG, attrmask);
+
+    return attrmask;
+}
+
+uint32_t PVFS_util_object_to_sys_attr_mask( 
+    uint32_t obj_mask)
+{
+    int sys_mask = 0;
+
+    if (obj_mask & PVFS_ATTR_COMMON_UID)
+    {
+        sys_mask |= PVFS_ATTR_SYS_UID;
+    }
+    if (obj_mask & PVFS_ATTR_COMMON_GID)
+    {
+        sys_mask |= PVFS_ATTR_SYS_GID;
+    }
+    if (obj_mask & PVFS_ATTR_COMMON_PERM)
+    {
+        sys_mask |= PVFS_ATTR_SYS_PERM;
+    }
+    if (obj_mask & PVFS_ATTR_COMMON_ATIME)
+    {
+        sys_mask |= PVFS_ATTR_SYS_ATIME;
+    }
+    if (obj_mask & PVFS_ATTR_COMMON_CTIME)
+    {
+        sys_mask |= PVFS_ATTR_SYS_CTIME;
+    }
+    if (obj_mask & PVFS_ATTR_COMMON_MTIME)
+    {
+        sys_mask |= PVFS_ATTR_SYS_MTIME;
+    }
+    if (obj_mask & PVFS_ATTR_COMMON_TYPE)
+    {
+        sys_mask |= PVFS_ATTR_SYS_TYPE;
+    }
+    if (obj_mask & PVFS_ATTR_DATA_SIZE)
+    {
+        sys_mask |= PVFS_ATTR_DATA_SIZE;
+    }
+    if (obj_mask & PVFS_ATTR_SYMLNK_TARGET)
+    {
+        sys_mask |= PVFS_ATTR_SYS_LNK_TARGET;
+    }
+    if (obj_mask & PVFS_ATTR_DIR_DIRENT_COUNT)
+    {
+        sys_mask |= PVFS_ATTR_SYS_DIRENT_COUNT;
+    }
+    if (obj_mask & PVFS_ATTR_META_DFILES)
+    {
+        sys_mask |= PVFS_ATTR_SYS_DFILE_COUNT;
+    }
+    if (obj_mask & PVFS_ATTR_META_MIRROR_DFILES)
+    {
+        sys_mask |= PVFS_ATTR_SYS_MIRROR_COPIES_COUNT;
+    }
+    if (obj_mask & PVFS_ATTR_META_DIST)
+    {
+        sys_mask |= PVFS_ATTR_SYS_BLKSIZE;
+    }
+    if (obj_mask & PVFS_ATTR_DIR_HINT)
+    {
+        sys_mask |= PVFS_ATTR_SYS_DIR_HINT;
+    }
+
+    /* NOTE: the PVFS_ATTR_META_UNSTUFFED is intentionally not exposed
+     * outside of the system interface
+     */
+    return sys_mask;
+}
+
+/*
+ * Pull out the wire encoding specified as a mount option in the tab
+ * file.
+ *
+ * Input string is not modified; result goes into et.
+ *
+ * Returns 0 if all okay.
+ */
+static int parse_num_dfiles_string(const char* cp, int* num_dfiles)
+{
+    int parsed_value = 0;
+    char* end_ptr = NULL;
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: input is %s\n",
+                 __func__, cp);
+    
+    cp += strlen("num_dfiles");
+
+    /* Skip optional spacing */
+    for (; isspace(*cp); cp++);
+    
+    if (*cp != '=')
+    {
+        gossip_err("Error: %s: malformed num_dfiles option in tab file.\n",
+                   __func__);
+        return -PVFS_EINVAL;
+    }
+    
+    /* Skip optional spacing */
+    for (++cp; isspace(*cp); cp++);
+
+    parsed_value = strtol(cp, &end_ptr, 10);
+
+    /* If a numerica value was found, continue
+       else, report an error */
+    if (end_ptr != cp)
+    {
+        *num_dfiles = parsed_value;
+    }
+    else
+    {
+        gossip_err("Error: %s: malformed num_dfiles option in tab file.\n",
+                   __func__);
+        return -PVFS_EINVAL;
+    }
+
+    return 0;
+}
+
+/* PINT_util_resolve_absolute()
+ *
+ * given a local path of a file that may reside on a pvfs2 volume,
+ * determine what the fsid and fs relative path is. Makes no attempt
+ * to canonicalize the path.
+ *
+ * returns 0 on succees, -PVFS_error on failure
+ */
+static int PINT_util_resolve_absolute(
+    const char* local_path,
+    PVFS_fs_id* out_fs_id,
+    char* out_fs_path,
+    int out_fs_path_max)
+{
+    int i = 0, j = 0;
+    int ret = -PVFS_EINVAL;
+
+    gen_mutex_lock(&s_stat_tab_mutex);
+
+    for(i=0; i < s_stat_tab_count; i++)
+    {
+        for(j=0; j<s_stat_tab_array[i].mntent_count; j++)
+        {
+            ret = PINT_remove_dir_prefix(
+                local_path, 
+                s_stat_tab_array[i].mntent_array[j].mnt_dir,
+                out_fs_path, out_fs_path_max);
+            if(ret == 0)
+            {
+                *out_fs_id = s_stat_tab_array[i].mntent_array[j].fs_id;
+                if(*out_fs_id == PVFS_FS_ID_NULL)
+                {
+                    gossip_err("Error: %s resides on a PVFS2 file system "
+                    "that has not yet been initialized.\n", local_path);
+
+                    gen_mutex_unlock(&s_stat_tab_mutex);
+                    return(-PVFS_ENXIO);
+                }
+                gen_mutex_unlock(&s_stat_tab_mutex);
+                return(0);
+            }
+        }
+    }
+
+    /* check the dynamic tab area if we haven't resolved anything yet */
+    for(j = 0; j < s_stat_tab_array[
+            PVFS2_DYNAMIC_TAB_INDEX].mntent_count; j++)
+    {
+        ret = PINT_remove_dir_prefix(
+            local_path, s_stat_tab_array[
+                PVFS2_DYNAMIC_TAB_INDEX].mntent_array[j].mnt_dir,
+            out_fs_path, out_fs_path_max);
+        if (ret == 0)
+        {
+            *out_fs_id = s_stat_tab_array[
+                PVFS2_DYNAMIC_TAB_INDEX].mntent_array[j].fs_id;
+            if(*out_fs_id == PVFS_FS_ID_NULL)
+            {
+                gossip_err("Error: %s resides on a PVFS2 file system "
+                           "that has not yet been initialized.\n",
+                           local_path);
+
+                gen_mutex_unlock(&s_stat_tab_mutex);
+                return(-PVFS_ENXIO);
+            }
+            gen_mutex_unlock(&s_stat_tab_mutex);
+            return(0);
+        }
+    }
+
+    gen_mutex_unlock(&s_stat_tab_mutex);
+    return(-PVFS_ENOENT);
+}
+
+#ifdef DEFINE_MY_GET_NEXT_FSENT
+
+static struct fstab * PINT_util_my_get_next_fsent(PINT_fstab_t * tab)
+{
+    char linestr[500];
+    int linelen = 0;
+    char * strtok_ctx;
+    char * nexttok; 
+    PINT_fstab_entry_t * fsentry;
+    if(!fgets(linestr, 500, tab))
+    {
+        return NULL;
+    }
+
+    fsentry = malloc(sizeof(PINT_fstab_entry_t));
+    if(!fsentry)
+    {
+        return NULL;
+    }
+    memset(fsentry, 0, sizeof(PINT_fstab_entry_t));
+
+    linelen = strlen(linestr);
+    if(linestr[linelen - 1] == '\n')
+    {
+        linestr[linelen - 1] = 0;
+    }
+
+    /* get the path string */
+    /* nexttok = strtok_r(linestr, " ", &strtok_ctx); */
+    nexttok = strtok(linestr, " "); /* thread-safe */
+    if(!nexttok)
+    {
+        goto exit;
+    }
+    fsentry->fs_spec = strdup(nexttok);
+
+    
+    /* get the mount point */
+
+    /* nexttok = strtok_r(NULL, " ", &strtok_ctx); */
+    nexttok = strtok(NULL, " ");
+    if(!nexttok)
+    {
+        goto exit;
+    }
+    fsentry->fs_file = strdup(nexttok);
+
+    /* get the fs type */
+    nexttok = strtok(NULL, " ");
+    if(!nexttok)
+    {
+        goto exit;
+    }
+    fsentry->fs_vfstype = strdup(nexttok);
+
+    /* get the mount opts */
+    nexttok = strtok(NULL, " ");
+    if(!nexttok)
+    {
+        goto exit;
+    }
+    fsentry->fs_mntops = strdup(nexttok);
+
+ exit:
+    return fsentry;
+}
+
+static void PINT_util_fsent_destroy(PINT_fstab_entry_t * entry)
+{
+    if(entry)
+    {
+        if(entry->fs_spec)
+        {
+            free(entry->fs_spec);
+        }
+
+        if(entry->fs_file)
+        {
+            free(entry->fs_file);
+        }
+        
+        if(entry->fs_vfstype)
+        {
+            free(entry->fs_vfstype);
+        }
+
+        if(entry->fs_mntops)
+        {
+            free(entry->fs_mntops);
+        }
+
+        if(entry->fs_type)
+        {
+            free(entry->fs_type);
+        }
+    
+        free(entry);
+    }
+}
+#endif /* DEFINE_MY_GET_NEXT_FSENT */
+
+int32_t PVFS_util_translate_mode(int mode, int suid)
+{
+    int ret = 0, i = 0;
+#define NUM_MODES 11
+
+#define S_IXOTH 0001
+#define S_IWOTH 0002
+#define S_IROTH 0004
+#define S_IXGRP 0010
+#define S_IWGRP 0020
+#define S_IRGRP 0040
+#define S_IXUSR 0100
+#define S_IWUSR 0200
+#define S_IRUSR 0400
+#define S_ISGID 002000
+#define S_ISUID 004000
+
+    static int modes[NUM_MODES] =
+    {
+        S_IXOTH, S_IWOTH, S_IROTH,
+        S_IXGRP, S_IWGRP, S_IRGRP,
+        S_IXUSR, S_IWUSR, S_IRUSR,
+        S_ISGID, S_ISUID
+    };
+    static int pvfs2_modes[NUM_MODES] =
+    {
+        PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ,
+        PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ,
+        PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ,
+        PVFS_G_SGID,    PVFS_U_SUID
+    };
+
+    for(i = 0; i < NUM_MODES; i++)
+    {
+        if (mode & modes[i])
+        {
+            ret |= pvfs2_modes[i];
+        }
+    }
+    if (suid == 0 && (ret & PVFS_U_SUID))
+    {
+         ret &= ~PVFS_U_SUID;
+    }
+    return ret;
+#undef NUM_MODES
+}
+
+void PVFS_util_gen_credentials(
+    PVFS_credentials *credentials)
+{
+    PINT_util_gen_credentials(credentials);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/common/windows/wincommon.h b/src/common/windows/wincommon.h
new file mode 100755
index 0000000..f0c7a6d
--- /dev/null
+++ b/src/common/windows/wincommon.h
@@ -0,0 +1,40 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * declarations for Windows
+ */
+
+#ifndef __WINCOMMON_H
+#define __WINCOMMON_H
+
+#include <Windows.h>
+#include <sys/timeb.h>
+
+#define __inline__     _inline
+#define inline         _inline
+#define __func__       __FUNCTION__
+
+/*
+ * gettimeofday
+ */
+static int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+    struct _timeb timebuffer;
+    errno_t ret;
+
+    memset(&timebuffer, 0, sizeof(struct _timeb));
+    ret = _ftime_s(&timebuffer);
+    if (ret == 0)
+    {
+        tv->tv_sec = (long) timebuffer.time;
+        tv->tv_usec = timebuffer.millitm * 1000;
+    }
+
+    return ret;
+}
+
+#endif
\ No newline at end of file
diff --git a/src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h b/src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h
new file mode 100755
index 0000000..7395a35
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/bmi-tcp-addressing.h
@@ -0,0 +1,102 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * tcp specific host addressing information
+ */
+
+#ifndef __BMI_TCP_ADDRESSING_H
+#define __BMI_TCP_ADDRESSING_H
+
+#include "bmi-types.h"
+/* #include <netinet/in.h> */
+
+/*****************************************************************
+ * Information specific to tcp/ip
+ */
+
+/*
+  max number of sequential zero reads to allow; usually indicates a
+  dead connection, but it's used for checking several times to be sure
+*/
+#define BMI_TCP_ZERO_READ_LIMIT  10
+
+/* wait no more than 10 seconds for a partial BMI header to arrive on a
+ * socket once we have detected part of it.
+ */
+#define BMI_TCP_HEADER_WAIT_SECONDS 10
+
+/* peer name types */
+#define BMI_TCP_PEER_IP 1
+#define BMI_TCP_PEER_HOSTNAME 2
+
+#ifdef USE_TRUSTED
+
+struct tcp_allowed_connection_s {
+    int                 port_enforce;
+    unsigned long       ports[2];
+    int                 network_enforce;
+    int                 network_count;
+    struct in_addr      *network;
+    struct in_addr      *netmask;
+};
+
+#endif
+
+
+/* this contains TCP/IP addressing information- it is filled in as
+ * connections are made */
+struct tcp_addr
+{
+    bmi_method_addr_p map;		/* points back to generic address */ \
+    BMI_addr_t bmi_addr;
+    /* stores error code for addresses that are broken for some reason */
+    int addr_error;		
+    char *hostname;
+    int port;
+    int socket;
+    /* flag that indicates this address represents a
+     * server port on which connections may be accepted */
+    int server_port;
+    /* reference count of pending send operations to this address */
+    int write_ref_count;
+    /* is the socket connected yet? */
+    int not_connected;
+    /* socket collection link */
+    struct qlist_head sc_link;
+    int sc_index;
+    /* count of the number of sequential zero read operations */
+    int zero_read_limit;
+    /* timer for how long we wait on incomplete headers to arrive */
+    int short_header_timer;
+    /* flag used to determine if we can reconnect this address after failure */
+    int dont_reconnect;
+    char* peer;
+    int peer_type;
+};
+
+
+/*****************************************************************
+ * function prototypes
+ */
+
+#define bmi_tcp_errno_to_pvfs bmi_errno_to_pvfs
+
+void tcp_forget_addr(bmi_method_addr_p map,
+		     int dealloc_flag,
+		     int error_code);
+bmi_method_addr_p alloc_tcp_method_addr(void);
+
+#endif /* __BMI_TCP_ADDRESSING_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/bmi-wintcp.c b/src/io/bmi/bmi_wintcp/bmi-wintcp.c
new file mode 100755
index 0000000..e0e69ab
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/bmi-wintcp.c
@@ -0,0 +1,4177 @@
+/*
+ * (C) 2001-2011 Clemson University, The University of Chicago and 
+ *               Omnibond LLC
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* 
+ * Windows TCP/IP implementation of a BMI method 
+ */
+
+#include <WinSock2.h>
+
+#include <errno.h>
+#include <string.h>
+#include <io.h>
+//#include <unistd.h>
+#include <fcntl.h>
+//#include <sys/poll.h>
+//#include <netinet/tcp.h>
+#include <assert.h>
+//#include <sys/uio.h>
+#include <time.h>
+#include <stdint.h>
+//#include <sys/time.h>
+//#include <sys/socket.h>
+//#include <netinet/in.h>
+//#include <arpa/inet.h>
+#include "pint-mem.h"
+
+#include "pvfs2-config.h"
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+
+#include "bmi-method-support.h"
+#include "bmi-method-callback.h"
+#include "bmi-tcp-addressing.h"
+#ifdef __PVFS2_USE_EPOLL__
+#include "socket-collection-epoll.h"
+#else
+#include "socket-collection.h"
+#endif
+#include "op-list.h"
+#include "gossip.h"
+#include "sockio.h"
+#include "bmi-byteswap.h"
+#include "id-generator.h"
+#include "pint-event.h"
+#include "pvfs2-debug.h"
+#ifdef USE_TRUSTED
+#include "server-config.h"
+#include "bmi-tcp-addressing.h"
+#endif
+#include "gen-locks.h"
+#include "pint-hint.h"
+
+static gen_mutex_t interface_mutex = GEN_MUTEX_INITIALIZER;
+static gen_cond_t interface_cond = GEN_COND_INITIALIZER;
+static int sc_test_busy = 0;
+
+/***  Windows-specific additions ***/
+typedef unsigned int socklen_t;
+
+/* Windows Sockets doesn't have inet_aton */
+int inet_aton(const char *cp, struct in_addr *inp)
+{
+    unsigned long addr;
+
+    if (cp == NULL || strlen(cp) == 0 || inp == NULL)
+    {
+        return (0);
+    }
+    
+    /* handle 255.255.255.255 separately */
+    if (strcmp(cp, "255.255.255.255") == 0)
+    {
+        inp->S_un.S_addr = 0xFFFFFFFF;
+        return (1);
+    }
+
+    /* use inet_addr for other addresses */
+    addr = inet_addr(cp);
+    if (addr == INADDR_NONE)
+    {
+        return (0);
+    }
+
+    inp->S_un.S_addr = addr;
+
+    return (1);
+
+}
+/***********************************/
+
+/* function prototypes */
+int BMI_tcp_initialize(bmi_method_addr_p listen_addr,
+                       int method_id,
+                       int init_flags);
+int BMI_tcp_finalize(void);
+int BMI_tcp_set_info(int option,
+                     void *inout_parameter);
+int BMI_tcp_get_info(int option,
+                     void *inout_parameter);
+void *BMI_tcp_memalloc(bmi_size_t size,
+                       enum bmi_op_type send_recv);
+int BMI_tcp_memfree(void *buffer,
+                    bmi_size_t size,
+                    enum bmi_op_type send_recv);
+int BMI_tcp_unexpected_free(void *buffer);
+int BMI_tcp_post_send(bmi_op_id_t * id,
+                      bmi_method_addr_p dest,
+                      const void *buffer,
+                      bmi_size_t size,
+                      enum bmi_buffer_type buffer_type,
+                      bmi_msg_tag_t tag,
+                      void *user_ptr,
+                      bmi_context_id context_id,
+                      PVFS_hint hints);
+int BMI_tcp_post_sendunexpected(bmi_op_id_t * id,
+                                bmi_method_addr_p dest,
+                                const void *buffer,
+                                bmi_size_t size,
+                                enum bmi_buffer_type buffer_type,
+                                bmi_msg_tag_t tag,
+                                void *user_ptr,
+                                bmi_context_id context_id,
+                                PVFS_hint hints);
+int BMI_tcp_post_recv(bmi_op_id_t * id,
+                      bmi_method_addr_p src,
+                      void *buffer,
+                      bmi_size_t expected_size,
+                      bmi_size_t * actual_size,
+                      enum bmi_buffer_type buffer_type,
+                      bmi_msg_tag_t tag,
+                      void *user_ptr,
+                      bmi_context_id context_id,
+                      PVFS_hint hints);
+int BMI_tcp_test(bmi_op_id_t id,
+                 int *outcount,
+                 bmi_error_code_t * error_code,
+                 bmi_size_t * actual_size,
+                 void **user_ptr,
+                 int max_idle_time_ms,
+                 bmi_context_id context_id);
+int BMI_tcp_testsome(int incount,
+                     bmi_op_id_t * id_array,
+                     int *outcount,
+                     int *index_array,
+                     bmi_error_code_t * error_code_array,
+                     bmi_size_t * actual_size_array,
+                     void **user_ptr_array,
+                     int max_idle_time_ms,
+                     bmi_context_id context_id);
+int BMI_tcp_testunexpected(int incount,
+                           int *outcount,
+                           struct bmi_method_unexpected_info *info,
+                           int max_idle_time_ms);
+int BMI_tcp_testcontext(int incount,
+                     bmi_op_id_t * out_id_array,
+                     int *outcount,
+                     bmi_error_code_t * error_code_array,
+                     bmi_size_t * actual_size_array,
+                     void **user_ptr_array,
+                     int max_idle_time_ms,
+                     bmi_context_id context_id);
+bmi_method_addr_p BMI_tcp_method_addr_lookup(const char *id_string);
+const char* BMI_tcp_addr_rev_lookup_unexpected(bmi_method_addr_p map);
+int BMI_tcp_query_addr_range(bmi_method_addr_p, const char *, int);
+int BMI_tcp_post_send_list(bmi_op_id_t * id,
+                           bmi_method_addr_p dest,
+                           const void *const *buffer_list,
+                           const bmi_size_t *size_list,
+                           int list_count,
+                           bmi_size_t total_size,
+                           enum bmi_buffer_type buffer_type,
+                           bmi_msg_tag_t tag,
+                           void *user_ptr,
+                           bmi_context_id context_id,
+                           PVFS_hint hints);
+int BMI_tcp_post_recv_list(bmi_op_id_t * id,
+                           bmi_method_addr_p src,
+                           void *const *buffer_list,
+                           const bmi_size_t *size_list,
+                           int list_count,
+                           bmi_size_t total_expected_size,
+                           bmi_size_t * total_actual_size,
+                           enum bmi_buffer_type buffer_type,
+                           bmi_msg_tag_t tag,
+                           void *user_ptr,
+                           bmi_context_id context_id,
+                           PVFS_hint hints);
+int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id,
+                                     bmi_method_addr_p dest,
+                                     const void *const *buffer_list,
+                                     const bmi_size_t *size_list,
+                                     int list_count,
+                                     bmi_size_t total_size,
+                                     enum bmi_buffer_type buffer_type,
+                                     bmi_msg_tag_t tag,
+                                     void *user_ptr,
+                                     bmi_context_id context_id,
+                                     PVFS_hint hints);
+int BMI_tcp_open_context(bmi_context_id context_id);
+void BMI_tcp_close_context(bmi_context_id context_id);
+int BMI_tcp_cancel(bmi_op_id_t id, bmi_context_id context_id);
+
+char BMI_tcp_method_name[] = "bmi_tcp";
+
+/* size of encoded message header */
+#define TCP_ENC_HDR_SIZE 24
+
+/* structure internal to tcp for use as a message header */
+struct tcp_msg_header
+{
+    uint32_t magic_nr;          /* magic number */
+    uint32_t mode;                /* eager, rendezvous, etc. */
+    bmi_msg_tag_t tag;                /* user specified message tag */
+    bmi_size_t size;                /* length of trailing message */
+    char enc_hdr[TCP_ENC_HDR_SIZE];  /* encoded version of header info */
+};
+
+#define BMI_TCP_ENC_HDR(hdr)                                                \
+    do {                                                                \
+        *((uint32_t*)&((hdr).enc_hdr[0])) = htobmi32((hdr).magic_nr);        \
+        *((uint32_t*)&((hdr).enc_hdr[4])) = htobmi32((hdr).mode);        \
+        *((uint64_t*)&((hdr).enc_hdr[8])) = htobmi64((hdr).tag);        \
+        *((uint64_t*)&((hdr).enc_hdr[16])) = htobmi64((hdr).size);        \
+    } while(0)                                                    
+
+#define BMI_TCP_DEC_HDR(hdr)                                                \
+    do {                                                                \
+        (hdr).magic_nr = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[0])));        \
+        (hdr).mode = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[4])));        \
+        (hdr).tag = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[8])));        \
+        (hdr).size = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[16])));        \
+    } while(0)                                                    
+
+/* enumerate states that we care about */
+enum bmi_tcp_state
+{
+    BMI_TCP_INPROGRESS,
+    BMI_TCP_BUFFERING,
+    BMI_TCP_COMPLETE
+};
+
+/* tcp private portion of operation structure */
+struct tcp_op
+{
+    struct tcp_msg_header env;        /* envelope for this message */
+    enum bmi_tcp_state tcp_op_state;
+    /* these two fields are used as place holders for the buffer
+     * list and size list when we really don't have lists (regular
+     * BMI_send or BMI_recv operations); it allows us to use
+     * generic code to handle both cases 
+     */
+    void *buffer_list_stub;
+    bmi_size_t size_list_stub;
+};
+
+/* static io vector for use with readv and writev; we can only use
+ * this because BMI serializes module calls
+ */
+#define BMI_TCP_IOV_COUNT 10
+static WSABUF stat_io_vector[BMI_TCP_IOV_COUNT+1];
+
+/* internal utility functions */
+static int tcp_server_init(void);
+static void dealloc_tcp_method_addr(bmi_method_addr_p map);
+static int tcp_sock_init(bmi_method_addr_p my_method_addr);
+static int enqueue_operation(op_list_p target_list,
+                             enum bmi_op_type send_recv,
+                             bmi_method_addr_p map,
+                             void *const *buffer_list,
+                             const bmi_size_t *size_list,
+                             int list_count,
+                             bmi_size_t amt_complete,
+                             bmi_size_t env_amt_complete,
+                             bmi_op_id_t * id,
+                             int tcp_op_state,
+                             struct tcp_msg_header header,
+                             void *user_ptr,
+                             bmi_size_t actual_size,
+                             bmi_size_t expected_size,
+                             bmi_context_id context_id,
+                             int32_t event_id);
+static int tcp_cleanse_addr(bmi_method_addr_p map, int error_code);
+static int tcp_shutdown_addr(bmi_method_addr_p map);
+static int tcp_do_work(int max_idle_time);
+static int tcp_do_work_error(bmi_method_addr_p map);
+static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag);
+static int tcp_do_work_send(bmi_method_addr_p map, int* stall_flag);
+static int work_on_recv_op(method_op_p my_method_op,
+                           int *stall_flag);
+static int work_on_send_op(method_op_p my_method_op,
+                           int *blocked_flag, int* stall_flag);
+static int tcp_accept_init(int *socket, char** peer);
+static method_op_p alloc_tcp_method_op(void);
+static void dealloc_tcp_method_op(method_op_p old_op);
+static int handle_new_connection(bmi_method_addr_p map);
+static int tcp_post_send_generic(bmi_op_id_t * id,
+                                 bmi_method_addr_p dest,
+                                 const void *const *buffer_list,
+                                 const bmi_size_t *size_list,
+                                 int list_count,
+                                 enum bmi_buffer_type buffer_type,
+                                 struct tcp_msg_header my_header,
+                                 void *user_ptr,
+                                 bmi_context_id context_id,
+                                 PVFS_hint hints);
+static int tcp_post_recv_generic(bmi_op_id_t * id,
+                                 bmi_method_addr_p src,
+                                 void *const *buffer_list,
+                                 const bmi_size_t *size_list,
+                                 int list_count,
+                                 bmi_size_t expected_size,
+                                 bmi_size_t * actual_size,
+                                 enum bmi_buffer_type buffer_type,
+                                 bmi_msg_tag_t tag,
+                                 void *user_ptr,
+                                 bmi_context_id context_id,
+                                 PVFS_hint hints);
+static int payload_progress(int s, void *const *buffer_list, const bmi_size_t* 
+    size_list, int list_count, bmi_size_t total_size, int* list_index, 
+    bmi_size_t* current_index_complete, enum bmi_op_type send_recv, 
+    char* enc_hdr, bmi_size_t* env_amt_complete);
+
+#if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__)
+static int tcp_enable_trusted(struct tcp_addr *tcp_addr_data);
+#endif
+#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__)
+static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr);
+#endif
+
+static void bmi_set_sock_buffers(int socket);
+
+/* exported method interface */
+const struct bmi_method_ops bmi_tcp_ops = {
+    BMI_tcp_method_name,
+    0, /* flags */
+    BMI_tcp_initialize, 
+    BMI_tcp_finalize,
+    BMI_tcp_set_info,
+    BMI_tcp_get_info,
+    BMI_tcp_memalloc,
+    BMI_tcp_memfree,
+    BMI_tcp_unexpected_free,
+    BMI_tcp_post_send,
+    BMI_tcp_post_sendunexpected,
+    BMI_tcp_post_recv,
+    BMI_tcp_test,
+    BMI_tcp_testsome,
+    BMI_tcp_testcontext,
+    BMI_tcp_testunexpected,
+    BMI_tcp_method_addr_lookup,
+    BMI_tcp_post_send_list,
+    BMI_tcp_post_recv_list,
+    BMI_tcp_post_sendunexpected_list,
+    BMI_tcp_open_context,
+    BMI_tcp_close_context,
+    BMI_tcp_cancel,
+    BMI_tcp_addr_rev_lookup_unexpected,
+    BMI_tcp_query_addr_range
+};
+
+/* module parameters */
+static struct
+{
+    int method_flags;
+    int method_id;
+    bmi_method_addr_p listen_addr;
+} tcp_method_params;
+
+#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__)
+static struct tcp_allowed_connection_s *gtcp_allowed_connection = NULL;
+#endif
+
+static int check_unexpected = 1;
+
+/* op_list_array indices */
+enum
+{
+    NUM_INDICES = 5,
+    IND_SEND = 0,
+    IND_RECV = 1,
+    IND_RECV_INFLIGHT = 2,
+    IND_RECV_EAGER_DONE_BUFFERING = 3,
+    IND_COMPLETE_RECV_UNEXP = 4,        /* MAKE THIS COMES LAST */
+};
+
+/* internal operation lists */
+static op_list_p op_list_array[6] = { NULL, NULL, NULL, NULL,
+    NULL, NULL
+};
+
+/* internal completion queues */
+static op_list_p completion_array[BMI_MAX_CONTEXTS] = { NULL };
+
+/* internal socket collection */
+static socket_collection_p tcp_socket_collection_p = NULL;
+
+/* tunable parameters */
+enum
+{
+    /* amount of pending connections we'll allow */
+    TCP_BACKLOG = 256,
+    /* amount of work to be done during a test.  This roughly 
+     * translates into the number of sockets that we will perform
+     * nonblocking operations on during one function call.
+     */
+    TCP_WORK_METRIC = 128
+};
+
+/* TCP message modes */
+enum
+{
+    TCP_MODE_IMMED = 1,                /* not used for TCP/IP */
+    TCP_MODE_UNEXP = 2,
+    TCP_MODE_EAGER = 4,
+    TCP_MODE_REND = 8
+};
+
+/* Allowable sizes for each mode */
+enum
+{
+    TCP_MODE_EAGER_LIMIT = 16384,        /* 16K */
+    TCP_MODE_REND_LIMIT = 16777216        /* 16M */
+};
+
+/* toggles cancel mode; for bmi_tcp this will result in socket being closed
+ * in all cancellation cases
+ */
+static int forceful_cancel_mode = 0;
+
+/*
+  Socket buffer sizes, currently these default values will be used 
+  for the clients... (TODO)
+ */
+static int tcp_buffer_size_receive = 0;
+static int tcp_buffer_size_send = 0;
+
+static PINT_event_type bmi_tcp_send_event_id;
+static PINT_event_type bmi_tcp_recv_event_id;
+
+static PINT_event_group bmi_tcp_event_group;
+/* static pid_t bmi_tcp_pid */
+static HANDLE bmi_tcp_pid;
+
+/*************************************************************************
+ * Visible Interface 
+ */
+
+/* BMI_tcp_initialize()
+ *
+ * Initializes the tcp method.  Must be called before any other tcp
+ * method functions.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_initialize(bmi_method_addr_p listen_addr,
+                       int method_id,
+                       int init_flags)
+{
+
+    int ret = -1, err;
+    int tmp_errno = bmi_tcp_errno_to_pvfs(-ENOSYS);
+    struct tcp_addr *tcp_addr_data = NULL;
+    int i = 0;
+    WORD version;
+    WSADATA wsaData;
+
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Initializing TCP/IP module.\n");
+
+    /* check args */
+    if ((init_flags & BMI_INIT_SERVER) && !listen_addr)
+    {
+        gossip_lerr("Error: bad parameters given to TCP/IP module.\n");
+        return (bmi_tcp_errno_to_pvfs(-EINVAL));
+    }
+
+    version = MAKEWORD(2, 2);
+    err = WSAStartup(version, &wsaData);
+    if (err != 0)
+    {
+        gossip_lerr("Error: could not initialize Windows Sockets: %d.\n", err);
+        return (bmi_tcp_errno_to_pvfs(-ENOSYS));
+    }
+
+    gen_mutex_lock(&interface_mutex);
+
+    /* zero out our parameter structure and fill it in */
+    memset(&tcp_method_params, 0, sizeof(tcp_method_params));
+    tcp_method_params.method_id = method_id;
+    tcp_method_params.method_flags = init_flags;
+
+    if (init_flags & BMI_INIT_SERVER)
+    {
+        /* hang on to our local listening address if needed */
+        tcp_method_params.listen_addr = listen_addr;
+        /* and initialize server functions */
+        ret = tcp_server_init();
+        if (ret < 0)
+        {
+            tmp_errno = bmi_tcp_errno_to_pvfs(ret);
+            gossip_err("Error: tcp_server_init() failure.\n");
+            goto initialize_failure;
+        }
+    }
+
+    /* set up the operation lists */
+    for (i = 0; i < NUM_INDICES; i++)
+    {
+        op_list_array[i] = op_list_new();
+        if (!op_list_array[i])
+        {
+            tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM);
+            goto initialize_failure;
+        }
+    }
+
+    /* set up the socket collection */
+    if (tcp_method_params.method_flags & BMI_INIT_SERVER)
+    {
+        tcp_addr_data = (struct tcp_addr *) tcp_method_params.listen_addr->method_data;
+        tcp_socket_collection_p = BMI_socket_collection_init(tcp_addr_data->socket);
+    }
+    else
+    {
+        tcp_socket_collection_p = BMI_socket_collection_init(-1);
+    }
+
+    if (!tcp_socket_collection_p)
+    {
+        tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM);
+        goto initialize_failure;
+    }
+
+    /* bmi_tcp_pid = getpid(); */
+    bmi_tcp_pid = GetCurrentProcess();
+    PINT_event_define_group("bmi_tcp", &bmi_tcp_event_group);
+
+    /* Define the send event:
+     *   START: (client_id, request_id, rank, handle, op_id, send_size)
+     *   STOP: (size_sent)
+     */
+    PINT_event_define_event(
+        &bmi_tcp_event_group,
+#ifdef __PVFS2_SERVER__
+        "bmi_server_send",
+#else
+        "bmi_client_send",
+#endif
+        "%d%d%d%llu%d%d",
+        "%d", &bmi_tcp_send_event_id);
+
+    /* Define the recv event:
+     *   START: (client_id, request_id, rank, handle, op_id, recv_size)
+     *   STOP: (size_received)
+     */
+    PINT_event_define_event(
+        &bmi_tcp_event_group,
+#ifdef __PVFS2_SERVER__
+        "bmi_server_recv",
+#else
+        "bmi_client_recv",
+#endif
+        "%d%d%d%llu%d%d",
+        "%d", &bmi_tcp_recv_event_id);
+
+    gen_mutex_unlock(&interface_mutex);
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,
+                  "TCP/IP module successfully initialized.\n");
+    return (0);
+
+  initialize_failure:
+
+    /* cleanup data structures and bail out */
+    for (i = 0; i < NUM_INDICES; i++)
+    {
+        if (op_list_array[i])
+        {
+            op_list_cleanup(op_list_array[i]);
+        }
+    }
+    if (tcp_socket_collection_p)
+    {
+        BMI_socket_collection_finalize(tcp_socket_collection_p);
+    }
+    gen_mutex_unlock(&interface_mutex);
+    return (tmp_errno);
+}
+
+
+/* BMI_tcp_finalize()
+ * 
+ * Shuts down the tcp method.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_finalize(void)
+{
+    int i = 0;
+
+    gen_mutex_lock(&interface_mutex);
+
+    /* shut down our listen addr, if we have one */
+    if ((tcp_method_params.method_flags & BMI_INIT_SERVER)
+        && tcp_method_params.listen_addr)
+    {
+        dealloc_tcp_method_addr(tcp_method_params.listen_addr);
+    }
+
+    /* note that this forcefully shuts down operations */
+    for (i = 0; i < NUM_INDICES; i++)
+    {
+        if (op_list_array[i])
+        {
+            op_list_cleanup(op_list_array[i]);
+            op_list_array[i] = NULL;
+        }
+    }
+
+    /* get rid of socket collection */
+    if (tcp_socket_collection_p)
+    {
+        BMI_socket_collection_finalize(tcp_socket_collection_p);
+        tcp_socket_collection_p = NULL;
+    }
+
+    /* NOTE: we are trusting the calling BMI layer to deallocate 
+     * all of the method addresses (this will close any open sockets)
+     */
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "TCP/IP module finalized.\n");
+    gen_mutex_unlock(&interface_mutex);
+    return (0);
+}
+
+
+/*
+ * BMI_tcp_method_addr_lookup()
+ *
+ * resolves the string representation of an address into a method
+ * address structure.  
+ *
+ * returns a pointer to method_addr on success, NULL on failure
+ */
+bmi_method_addr_p BMI_tcp_method_addr_lookup(const char *id_string)
+{
+    char *tcp_string = NULL;
+    char *delim = NULL;
+    char *hostname = NULL;
+    bmi_method_addr_p new_addr = NULL;
+    struct tcp_addr *tcp_addr_data = NULL;
+    int ret = -1;
+
+    tcp_string = string_key("tcp", id_string);
+    if (!tcp_string)
+    {
+        /* the string doesn't even have our info */
+        return (NULL);
+    }
+
+    /* start breaking up the method information */
+    /* for normal tcp, it is simply hostname:port */
+    if ((delim = strchr(tcp_string, ':')) == NULL)
+    {
+        gossip_lerr("Error: malformed tcp address.\n");
+        free(tcp_string);
+        return (NULL);
+    }
+
+    /* looks ok, so let's build the method addr structure */
+    new_addr = alloc_tcp_method_addr();
+    if (!new_addr)
+    {
+        free(tcp_string);
+        return (NULL);
+    }
+    tcp_addr_data = (struct tcp_addr *) new_addr->method_data;
+
+    ret = sscanf((delim + 1), "%d", &(tcp_addr_data->port));
+    if (ret != 1)
+    {
+        gossip_lerr("Error: malformed tcp address.\n");
+        dealloc_tcp_method_addr(new_addr);
+        free(tcp_string);
+        return (NULL);
+    }
+
+    hostname = (char *) malloc((delim - tcp_string + 1));
+    if (!hostname)
+    {
+        dealloc_tcp_method_addr(new_addr);
+        free(tcp_string);
+        return (NULL);
+    }
+    strncpy(hostname, tcp_string, (delim - tcp_string));
+    hostname[delim - tcp_string] = '\0';
+
+    tcp_addr_data->hostname = hostname;
+
+    free(tcp_string);
+    return (new_addr);
+}
+
+
+/* BMI_tcp_memalloc()
+ * 
+ * Allocates memory that can be used in native mode by tcp.
+ *
+ * returns 0 on success, -errno on failure
+ */
+void *BMI_tcp_memalloc(bmi_size_t size,
+                       enum bmi_op_type send_recv)
+{
+    /* we really don't care what flags the caller uses, TCP/IP has no
+     * preferences about how the memory should be configured.
+     */
+
+/*    return (calloc(1,(size_t) size)); */
+    return PINT_mem_aligned_alloc(size, 4096);
+}
+
+
+/* BMI_tcp_memfree()
+ * 
+ * Frees memory that was allocated with BMI_tcp_memalloc()
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_memfree(void *buffer,
+                    bmi_size_t size,
+                    enum bmi_op_type send_recv)
+{
+    PINT_mem_aligned_free(buffer);
+    return (0);
+}
+
+/* BMI_tcp_unexpected_free()
+ * 
+ * Frees memory that was returned from BMI_tcp_test_unexpected()
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_unexpected_free(void *buffer)
+{
+    if (buffer)
+    {
+        free(buffer);
+    }
+    return (0);
+}
+
+#ifdef USE_TRUSTED
+
+static struct tcp_allowed_connection_s *
+alloc_trusted_connection_info(int network_count)
+{
+    struct tcp_allowed_connection_s *tcp_allowed_connection_info = NULL;
+
+    tcp_allowed_connection_info = (struct tcp_allowed_connection_s *)
+            calloc(1, sizeof(struct tcp_allowed_connection_s));
+    if (tcp_allowed_connection_info)
+    {
+        tcp_allowed_connection_info->network =
+            (struct in_addr *) calloc(network_count, sizeof(struct in_addr));
+        if (tcp_allowed_connection_info->network == NULL)
+        {
+            free(tcp_allowed_connection_info);
+            tcp_allowed_connection_info = NULL;
+        }
+        else
+        {
+            tcp_allowed_connection_info->netmask =
+                (struct in_addr *) calloc(network_count, sizeof(struct in_addr));
+            if (tcp_allowed_connection_info->netmask == NULL)
+            {
+                free(tcp_allowed_connection_info->network);
+                free(tcp_allowed_connection_info);
+                tcp_allowed_connection_info = NULL;
+            }
+            else {
+                tcp_allowed_connection_info->network_count = network_count;
+            }
+        }
+    }
+    return tcp_allowed_connection_info;
+}
+
+static void 
+dealloc_trusted_connection_info(void* ptcp_allowed_connection_info)
+{
+    struct tcp_allowed_connection_s *tcp_allowed_connection_info =
+        (struct tcp_allowed_connection_s *) ptcp_allowed_connection_info;
+    if (tcp_allowed_connection_info)
+    {
+        free(tcp_allowed_connection_info->network);
+        tcp_allowed_connection_info->network = NULL;
+        free(tcp_allowed_connection_info->netmask);
+        tcp_allowed_connection_info->netmask = NULL;
+        free(tcp_allowed_connection_info);
+    }
+    return;
+}
+
+#endif
+
+/*
+ * This function will convert a mask_bits value to an in_addr
+ * representation. i.e for example if
+ * mask_bits was 24 then it would be 255.255.255.0
+ * if mask_bits was 22 then it would be 255.255.252.0
+ * etc
+ */
+static void convert_mask(int mask_bits, struct in_addr *mask)
+{
+   uint32_t addr = -1;
+   addr = addr & ~~(-1 << (mask_bits ? (32 - mask_bits) : 32));
+   mask->s_addr = htonl(addr);
+   return;
+}
+
+/* BMI_tcp_set_info()
+ * 
+ * Pass in optional parameters.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_set_info(int option,
+                     void *inout_parameter)
+{
+    int ret = -1;
+    bmi_method_addr_p tmp_addr = NULL;
+
+    gen_mutex_lock(&interface_mutex);
+
+    switch (option)
+    {
+    case BMI_TCP_BUFFER_SEND_SIZE:
+       tcp_buffer_size_send = *((int *)inout_parameter);
+       ret = 0;
+#ifdef __PVFS2_SERVER__
+       /* Set the default socket buffer sizes for the server socket */
+       bmi_set_sock_buffers(
+           ((struct tcp_addr *)
+            tcp_method_params.listen_addr->method_data)->socket);
+#endif
+       break;
+    case BMI_TCP_BUFFER_RECEIVE_SIZE:
+       tcp_buffer_size_receive = *((int *)inout_parameter);
+       ret = 0;
+#ifdef __PVFS2_SERVER__
+       /* Set the default socket buffer sizes for the server socket */
+       bmi_set_sock_buffers(
+           ((struct tcp_addr *)
+            tcp_method_params.listen_addr->method_data)->socket);
+#endif
+       break;
+    case BMI_TCP_CLOSE_SOCKET: 
+        /* this should no longer make it to the bmi_tcp method; see bmi.c */
+        ret = 0;
+        break;
+    case BMI_FORCEFUL_CANCEL_MODE:
+        forceful_cancel_mode = 1;
+        ret = 0;
+        break;
+    case BMI_DROP_ADDR:
+        if (inout_parameter == NULL)
+        {
+            ret = bmi_tcp_errno_to_pvfs(-EINVAL);
+        }
+        else
+        {
+            tmp_addr = (bmi_method_addr_p) inout_parameter;
+            /* take it out of the socket collection */
+            tcp_forget_addr(tmp_addr, 1, 0);
+            ret = 0;
+        }
+        break;
+#ifdef USE_TRUSTED
+    case BMI_TRUSTED_CONNECTION:
+    {
+        struct tcp_allowed_connection_s *tcp_allowed_connection = NULL;
+        if (inout_parameter == NULL)
+        {
+            ret = bmi_tcp_errno_to_pvfs(-EINVAL);
+            break;
+        }
+        else 
+        {
+            int    bmi_networks_count = 0;
+            char **bmi_networks = NULL;
+            int   *bmi_netmasks = NULL;
+            struct server_configuration_s *svc_config = NULL;
+
+            svc_config = (struct server_configuration_s *) inout_parameter;
+            tcp_allowed_connection = alloc_trusted_connection_info(svc_config->allowed_networks_count);
+            if (tcp_allowed_connection == NULL)
+            {
+                ret = bmi_tcp_errno_to_pvfs(-ENOMEM);
+                break;
+            }
+#ifdef      __PVFS2_SERVER__
+            gtcp_allowed_connection = tcp_allowed_connection;
+#endif
+            /* Stash this in the server_configuration_s structure. freed later on */
+            svc_config->security = tcp_allowed_connection;
+            svc_config->security_dtor = &dealloc_trusted_connection_info;
+            ret = 0;
+            /* Fill up the list of allowed ports */
+            PINT_config_get_allowed_ports(svc_config, 
+                    &tcp_allowed_connection->port_enforce, 
+                    tcp_allowed_connection->ports);
+
+            /* if it was enabled, make sure that we know how to deal with it */
+            if (tcp_allowed_connection->port_enforce == 1)
+            {
+                /* illegal ports */
+                if (tcp_allowed_connection->ports[0] > 65535 
+                        || tcp_allowed_connection->ports[1] > 65535
+                        || tcp_allowed_connection->ports[1] < tcp_allowed_connection->ports[0])
+                {
+                    gossip_lerr("Error: illegal trusted port values\n");
+                    ret = bmi_tcp_errno_to_pvfs(-EINVAL);
+                    /* don't enforce anything! */
+                    tcp_allowed_connection->port_enforce = 0;
+                }
+            }
+            ret = 0;
+            /* Retrieve the list of BMI network addresses and masks  */
+            PINT_config_get_allowed_networks(svc_config,
+                    &tcp_allowed_connection->network_enforce,
+                    &bmi_networks_count,
+                    &bmi_networks,
+                    &bmi_netmasks);
+
+            /* if it was enabled, make sure that we know how to deal with it */
+            if (tcp_allowed_connection->network_enforce == 1)
+            {
+                int i;
+
+                for (i = 0; i < bmi_networks_count; i++)
+                {
+                    char *tcp_string = NULL;
+                    /* Convert the network string into an in_addr_t structure */
+                    tcp_string = string_key("tcp", bmi_networks[i]);
+                    if (!tcp_string)
+                    {
+                        /* the string doesn't even have our info */
+                        gossip_lerr("Error: malformed tcp network address\n");
+                        ret = bmi_tcp_errno_to_pvfs(-EINVAL);
+                    }
+                    else {
+                        /* convert this into an in_addr_t */
+                        inet_aton(tcp_string, &tcp_allowed_connection->network[i]);
+                        free(tcp_string);
+                    }
+                    convert_mask(bmi_netmasks[i], &tcp_allowed_connection->netmask[i]);
+                }
+                /* don't enforce anything if there were any errors */
+                if (ret != 0)
+                {
+                    tcp_allowed_connection->network_enforce = 0;
+                }
+            }
+        }
+        break;
+    }
+#endif
+    case BMI_TCP_CHECK_UNEXPECTED:
+    {
+        check_unexpected = *(int *)inout_parameter;
+        ret = 0;
+        break;
+    }
+
+    default:
+        gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,
+                      "TCP hint %d not implemented.\n", option);
+        ret = 0;
+        break;
+    }
+
+    gen_mutex_unlock(&interface_mutex);
+    return (ret);
+}
+
+/* BMI_tcp_get_info()
+ * 
+ * Query for optional parameters.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_get_info(int option,
+                     void *inout_parameter)
+{
+    struct method_drop_addr_query* query;
+    struct tcp_addr* tcp_addr_data;
+    int ret = 0;
+
+    gen_mutex_lock(&interface_mutex);
+
+    switch (option)
+    {
+    case BMI_CHECK_MAXSIZE:
+        *((int *) inout_parameter) = TCP_MODE_REND_LIMIT;
+        ret = 0;
+        break;
+    case BMI_DROP_ADDR_QUERY:
+        query = (struct method_drop_addr_query*)inout_parameter;
+        tcp_addr_data = (struct tcp_addr *) query->addr->method_data;
+        /* only suggest that we discard the address if we have experienced
+         * an error and there is no way to reconnect
+         */
+        if(tcp_addr_data->addr_error != 0 &&
+           tcp_addr_data->dont_reconnect == 1)
+        {
+            query->response = 1;
+        }
+        else
+        {
+            query->response = 0;
+        }
+        ret = 0;
+        break;
+    case BMI_GET_UNEXP_SIZE:
+        *((int *) inout_parameter) = TCP_MODE_EAGER_LIMIT;
+        ret = 0;
+        break;
+
+    default:
+        gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,
+                      "TCP hint %d not implemented.\n", option);
+        ret = -ENOSYS;
+        break;
+    }
+
+    gen_mutex_unlock(&interface_mutex);
+    return (ret < 0) ? bmi_tcp_errno_to_pvfs(ret) : ret;
+}
+
+
+/* BMI_tcp_post_send()
+ * 
+ * Submits send operations.
+ *
+ * returns 0 on success that requires later poll, returns 1 on instant
+ * completion, -errno on failure
+ */
+int BMI_tcp_post_send(bmi_op_id_t * id,
+                      bmi_method_addr_p dest,
+                      const void *buffer,
+                      bmi_size_t size,
+                      enum bmi_buffer_type buffer_type,
+                      bmi_msg_tag_t tag,
+                      void *user_ptr,
+                      bmi_context_id context_id,
+                      PVFS_hint hints)
+{
+    struct tcp_msg_header my_header;
+    int ret = -1;
+
+    /* clear the id field for safety */
+    *id = 0;
+
+    /* fill in the TCP-specific message header */
+    if (size > TCP_MODE_REND_LIMIT)
+    {
+        return (bmi_tcp_errno_to_pvfs(-EMSGSIZE));
+    }
+
+    if (size <= TCP_MODE_EAGER_LIMIT)
+    {
+        my_header.mode = TCP_MODE_EAGER;
+    }
+    else
+    {
+        my_header.mode = TCP_MODE_REND;
+    }
+    my_header.tag = tag;
+    my_header.size = size;
+    my_header.magic_nr = BMI_MAGIC_NR;
+
+    gen_mutex_lock(&interface_mutex);
+
+    ret = tcp_post_send_generic(id, dest, &buffer,
+                                &size, 1, buffer_type, my_header,
+                                user_ptr, context_id, hints);
+
+    gen_mutex_unlock(&interface_mutex);
+    return(ret);
+}
+
+
+/* BMI_tcp_post_sendunexpected()
+ * 
+ * Submits unexpected send operations.
+ *
+ * returns 0 on success that requires later poll, returns 1 on instant
+ * completion, -errno on failure
+ */
+int BMI_tcp_post_sendunexpected(bmi_op_id_t * id,
+                                bmi_method_addr_p dest,
+                                const void *buffer,
+                                bmi_size_t size,
+                                enum bmi_buffer_type buffer_type,
+                                bmi_msg_tag_t tag,
+                                void *user_ptr,
+                                bmi_context_id context_id,
+                                PVFS_hint hints)
+{
+    struct tcp_msg_header my_header;
+    int ret = -1;
+
+    /* clear the id field for safety */
+    *id = 0;
+
+    if (size > TCP_MODE_EAGER_LIMIT)
+    {
+        return (bmi_tcp_errno_to_pvfs(-EMSGSIZE));
+    }
+
+    my_header.mode = TCP_MODE_UNEXP;
+    my_header.tag = tag;
+    my_header.size = size;
+    my_header.magic_nr = BMI_MAGIC_NR;
+
+    gen_mutex_lock(&interface_mutex);
+
+    ret = tcp_post_send_generic(id, dest, &buffer,
+                                &size, 1, buffer_type, my_header,
+                                user_ptr, context_id, hints);
+    gen_mutex_unlock(&interface_mutex);
+    return(ret);
+}
+
+
+
+/* BMI_tcp_post_recv()
+ * 
+ * Submits recv operations.
+ *
+ * returns 0 on success that requires later poll, returns 1 on instant
+ * completion, -errno on failure
+ */
+int BMI_tcp_post_recv(bmi_op_id_t * id,
+                      bmi_method_addr_p src,
+                      void *buffer,
+                      bmi_size_t expected_size,
+                      bmi_size_t * actual_size,
+                      enum bmi_buffer_type buffer_type,
+                      bmi_msg_tag_t tag,
+                      void *user_ptr,
+                      bmi_context_id context_id,
+                      PVFS_hint hints)
+{
+    int ret = -1;
+
+    /* A few things could happen here:
+     * a) rendez. recv with sender not ready yet
+     * b) rendez. recv with sender waiting
+     * c) eager recv, data not available yet
+     * d) eager recv, some/all data already here
+     * e) rendez. recv with sender in eager mode
+     *
+     * b or d could lead to completion without polling.
+     * we don't look for unexpected messages here.
+     */
+
+    if (expected_size > TCP_MODE_REND_LIMIT)
+    {
+        return (bmi_tcp_errno_to_pvfs(-EINVAL));
+    }
+    gen_mutex_lock(&interface_mutex);
+
+    ret = tcp_post_recv_generic(id, src, &buffer, &expected_size,
+                                1, expected_size, actual_size,
+                                buffer_type, tag,
+                                user_ptr, context_id, hints);
+
+    gen_mutex_unlock(&interface_mutex);
+    return (ret);
+}
+
+
+/* BMI_tcp_test()
+ * 
+ * Checks to see if a particular message has completed.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_test(bmi_op_id_t id,
+                 int *outcount,
+                 bmi_error_code_t * error_code,
+                 bmi_size_t * actual_size,
+                 void **user_ptr,
+                 int max_idle_time,
+                 bmi_context_id context_id)
+{
+    int ret = -1;
+    method_op_p query_op = (method_op_p)id_gen_fast_lookup(id);
+
+    assert(query_op != NULL);
+
+    gen_mutex_lock(&interface_mutex);
+
+    /* do some ``real work'' here */
+    ret = tcp_do_work(max_idle_time);
+    if (ret < 0)
+    {
+        gen_mutex_unlock(&interface_mutex);
+        return (ret);
+    }
+
+    if (((struct tcp_op*)(query_op->method_data))->tcp_op_state ==
+        BMI_TCP_COMPLETE)
+    {
+        assert(query_op->context_id == context_id);
+        op_list_remove(query_op);
+        if (user_ptr != NULL)
+        {
+            (*user_ptr) = query_op->user_ptr;
+        }
+        (*error_code) = query_op->error_code;
+        (*actual_size) = query_op->actual_size;
+        PINT_EVENT_END(
+            (query_op->send_recv == BMI_SEND ?
+             bmi_tcp_send_event_id : bmi_tcp_recv_event_id), bmi_tcp_pid, NULL,
+             query_op->event_id, id, *actual_size);
+
+        dealloc_tcp_method_op(query_op);
+        (*outcount)++;
+    }
+
+    gen_mutex_unlock(&interface_mutex);
+    return (0);
+}
+
+/* BMI_tcp_testsome()
+ * 
+ * Checks to see if any messages from the specified list have completed.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_testsome(int incount,
+                     bmi_op_id_t * id_array,
+                     int *outcount,
+                     int *index_array,
+                     bmi_error_code_t * error_code_array,
+                     bmi_size_t * actual_size_array,
+                     void **user_ptr_array,
+                     int max_idle_time,
+                     bmi_context_id context_id)
+{
+    int ret = -1;
+    method_op_p query_op = NULL;
+    int i;
+
+    gen_mutex_lock(&interface_mutex);
+
+    /* do some ``real work'' here */
+    ret = tcp_do_work(max_idle_time);
+    if (ret < 0)
+    {
+        gen_mutex_unlock(&interface_mutex);
+        return (ret);
+    }
+
+    for(i=0; i<incount; i++)
+    {
+        if(id_array[i])
+        {
+            /* NOTE: this depends on the user passing in valid id's;
+             * otherwise we segfault.  
+             */
+            query_op = (method_op_p)id_gen_fast_lookup(id_array[i]);
+            if(((struct tcp_op*)(query_op->method_data))->tcp_op_state ==
+               BMI_TCP_COMPLETE)
+            {
+                assert(query_op->context_id == context_id);
+                /* this one's done; pop it out */
+                op_list_remove(query_op);
+                error_code_array[*outcount] = query_op->error_code;
+                actual_size_array[*outcount] = query_op->actual_size;
+                index_array[*outcount] = i;
+                if (user_ptr_array != NULL)
+                {
+                    user_ptr_array[*outcount] = query_op->user_ptr;
+                }
+                PINT_EVENT_END(
+                    (query_op->send_recv == BMI_SEND ?
+                     bmi_tcp_send_event_id : bmi_tcp_recv_event_id),
+                    bmi_tcp_pid, NULL,
+                    query_op->event_id, actual_size_array[*outcount]);
+                dealloc_tcp_method_op(query_op);
+                (*outcount)++;
+            }
+        }
+    }
+
+    gen_mutex_unlock(&interface_mutex);
+    return(0);
+}
+
+
+/* BMI_tcp_testunexpected()
+ * 
+ * Checks to see if any unexpected messages have completed.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_testunexpected(int incount,
+                           int *outcount,
+                           struct bmi_method_unexpected_info *info,
+                           int max_idle_time)
+{
+    int ret = -1;
+    method_op_p query_op = NULL;
+
+    gen_mutex_lock(&interface_mutex);
+
+    if(op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP]))
+    {
+        /* do some ``real work'' here */
+        ret = tcp_do_work(max_idle_time);
+        if (ret < 0)
+        {
+            gen_mutex_unlock(&interface_mutex);
+            return (ret);
+        }
+    }
+
+    *outcount = 0;
+
+    /* go through the completed/unexpected list as long as we are finding 
+     * stuff and we have room in the info array for it
+     */
+    while ((*outcount < incount) &&
+           (query_op =
+            op_list_shownext(op_list_array[IND_COMPLETE_RECV_UNEXP])))
+    {
+        info[*outcount].error_code = query_op->error_code;
+        info[*outcount].addr = query_op->addr;
+        info[*outcount].buffer = query_op->buffer;
+        info[*outcount].size = query_op->actual_size;
+        info[*outcount].tag = query_op->msg_tag;
+        op_list_remove(query_op);
+        dealloc_tcp_method_op(query_op);
+        (*outcount)++;
+    }
+    gen_mutex_unlock(&interface_mutex);
+    return (0);
+}
+
+
+/* BMI_tcp_testcontext()
+ * 
+ * Checks to see if any messages from the specified context have completed.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_testcontext(int incount,
+                     bmi_op_id_t* out_id_array,
+                     int *outcount,
+                     bmi_error_code_t * error_code_array,
+                     bmi_size_t * actual_size_array,
+                     void **user_ptr_array,
+                     int max_idle_time,
+                     bmi_context_id context_id)
+{
+    int ret = -1;
+    method_op_p query_op = NULL;
+
+    *outcount = 0;
+
+    gen_mutex_lock(&interface_mutex);
+
+    if(op_list_empty(completion_array[context_id]))
+    {
+        /* if there are unexpected ops ready to go, then short out so
+         * that the next testunexpected call can pick it up without
+         * delay
+         */
+        if(check_unexpected &&
+           !op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP]))
+        {
+            gen_mutex_unlock(&interface_mutex);
+            return(0);
+        }
+
+        /* do some ``real work'' here */
+        ret = tcp_do_work(max_idle_time);
+        if (ret < 0)
+        {
+            gen_mutex_unlock(&interface_mutex);
+            return (ret);
+        }
+    }
+
+    /* pop as many items off of the completion queue as we can */
+    while((*outcount < incount) && 
+          (query_op =
+           op_list_shownext(completion_array[context_id])))
+    {
+        assert(query_op);
+        assert(query_op->context_id == context_id);
+
+        /* this one's done; pop it out */
+        op_list_remove(query_op);
+        error_code_array[*outcount] = query_op->error_code;
+        actual_size_array[*outcount] = query_op->actual_size;
+        out_id_array[*outcount] = query_op->op_id;
+        if (user_ptr_array != NULL)
+        {
+            user_ptr_array[*outcount] = query_op->user_ptr;
+        }
+
+        PINT_EVENT_END((query_op->send_recv == BMI_SEND ?
+                        bmi_tcp_send_event_id : bmi_tcp_recv_event_id),
+                       bmi_tcp_pid, NULL, query_op->event_id,
+                       query_op->actual_size);
+
+        dealloc_tcp_method_op(query_op);
+        query_op = NULL;
+        (*outcount)++;
+    }
+
+    gen_mutex_unlock(&interface_mutex);
+    return(0);
+}
+
+
+
+/* BMI_tcp_post_send_list()
+ *
+ * same as the BMI_tcp_post_send() function, except that it sends
+ * from an array of possibly non contiguous buffers
+ *
+ * returns 0 on success, 1 on immediate successful completion,
+ * -errno on failure
+ */
+int BMI_tcp_post_send_list(bmi_op_id_t * id,
+                           bmi_method_addr_p dest,
+                           const void *const *buffer_list,
+                           const bmi_size_t *size_list,
+                           int list_count,
+                           bmi_size_t total_size,
+                           enum bmi_buffer_type buffer_type,
+                           bmi_msg_tag_t tag,
+                           void *user_ptr,
+                           bmi_context_id context_id,
+                           PVFS_hint hints)
+{
+    struct tcp_msg_header my_header;
+    int ret = -1;
+
+    /* clear the id field for safety */
+    *id = 0;
+
+    /* fill in the TCP-specific message header */
+    if (total_size > TCP_MODE_REND_LIMIT)
+    {
+        gossip_lerr("Error: BMI message too large!\n");
+        return (bmi_tcp_errno_to_pvfs(-EMSGSIZE));
+    }
+
+    if (total_size <= TCP_MODE_EAGER_LIMIT)
+    {
+        my_header.mode = TCP_MODE_EAGER;
+    }
+    else
+    {
+        my_header.mode = TCP_MODE_REND;
+    }
+    my_header.tag = tag;
+    my_header.size = total_size;
+    my_header.magic_nr = BMI_MAGIC_NR;
+
+    gen_mutex_lock(&interface_mutex);
+
+    ret = tcp_post_send_generic(id, dest, buffer_list,
+                                size_list, list_count, buffer_type,
+                                my_header, user_ptr, context_id, hints);
+    gen_mutex_unlock(&interface_mutex);
+    return(ret);
+}
+
+/* BMI_tcp_post_recv_list()
+ *
+ * same as the BMI_tcp_post_recv() function, except that it recvs
+ * into an array of possibly non contiguous buffers
+ *
+ * returns 0 on success, 1 on immediate successful completion,
+ * -errno on failure
+ */
+int BMI_tcp_post_recv_list(bmi_op_id_t * id,
+                           bmi_method_addr_p src,
+                           void *const *buffer_list,
+                           const bmi_size_t *size_list,
+                           int list_count,
+                           bmi_size_t total_expected_size,
+                           bmi_size_t * total_actual_size,
+                           enum bmi_buffer_type buffer_type,
+                           bmi_msg_tag_t tag,
+                           void *user_ptr,
+                           bmi_context_id context_id,
+                           PVFS_hint hints)
+{
+    int ret = -1;
+
+    if (total_expected_size > TCP_MODE_REND_LIMIT)
+    {
+        return (bmi_tcp_errno_to_pvfs(-EINVAL));
+    }
+
+    gen_mutex_lock(&interface_mutex);
+
+    ret = tcp_post_recv_generic(id, src, buffer_list, size_list,
+                                list_count, total_expected_size,
+                                total_actual_size, buffer_type, tag, user_ptr,
+                                context_id, hints);
+
+    gen_mutex_unlock(&interface_mutex);
+    return (ret);
+}
+
+
+/* BMI_tcp_post_sendunexpected_list()
+ *
+ * same as the BMI_tcp_post_sendunexpected() function, except that 
+ * it sends from an array of possibly non contiguous buffers
+ *
+ * returns 0 on success, 1 on immediate successful completion,
+ * -errno on failure
+ */
+int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id,
+                                     bmi_method_addr_p dest,
+                                     const void *const *buffer_list,
+                                     const bmi_size_t *size_list,
+                                     int list_count,
+                                     bmi_size_t total_size,
+                                     enum bmi_buffer_type buffer_type,
+                                     bmi_msg_tag_t tag,
+                                     void *user_ptr,
+                                     bmi_context_id context_id,
+                                     PVFS_hint hints)
+{
+    struct tcp_msg_header my_header;
+    int ret = -1;
+
+    /* clear the id field for safety */
+    *id = 0;
+
+    if (total_size > TCP_MODE_EAGER_LIMIT)
+    {
+        return (bmi_tcp_errno_to_pvfs(-EMSGSIZE));
+    }
+
+    my_header.mode = TCP_MODE_UNEXP;
+    my_header.tag = tag;
+    my_header.size = total_size;
+    my_header.magic_nr = BMI_MAGIC_NR;
+
+    gen_mutex_lock(&interface_mutex);
+
+    ret = tcp_post_send_generic(id, dest, buffer_list,
+                                size_list, list_count, buffer_type,
+                                my_header, user_ptr, context_id, hints);
+
+    gen_mutex_unlock(&interface_mutex);
+    return(ret);
+}
+
+
+/* BMI_tcp_open_context()
+ *
+ * opens a new context with the specified context id
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_open_context(bmi_context_id context_id)
+{
+
+    gen_mutex_lock(&interface_mutex);
+
+    /* start a new queue for tracking completions in this context */
+    completion_array[context_id] = op_list_new();
+    if (!completion_array[context_id])
+    {
+        gen_mutex_unlock(&interface_mutex);
+        return(bmi_tcp_errno_to_pvfs(-ENOMEM));
+    }
+
+    gen_mutex_unlock(&interface_mutex);
+    return(0);
+}
+
+
+/* BMI_tcp_close_context()
+ *
+ * shuts down a context, previously opened with BMI_tcp_open_context()
+ *
+ * no return value
+ */
+void BMI_tcp_close_context(bmi_context_id context_id)
+{
+    op_list_p iterator = NULL;
+    op_list_p scratch = NULL;
+    method_op_p tmp_method_op = NULL;
+    
+    gen_mutex_lock(&interface_mutex);
+    
+    /* tear down completion queue for this context */
+    op_list_cleanup(completion_array[context_id]);
+
+    gen_mutex_unlock(&interface_mutex);
+    return;
+}
+
+
+/* BMI_tcp_cancel()
+ *
+ * attempt to cancel a pending bmi tcp operation
+ *
+ * returns 0 on success, -errno on failure
+ */
+int BMI_tcp_cancel(bmi_op_id_t id, bmi_context_id context_id)
+{
+    method_op_p query_op = NULL;
+    
+    gen_mutex_lock(&interface_mutex);
+
+    query_op = (method_op_p)id_gen_fast_lookup(id);
+    if(!query_op)
+    {
+        /* if we can't find the operattion, then assume that it has already
+         * completed naturally
+         */
+        gen_mutex_unlock(&interface_mutex);
+        return(0);
+    }
+
+    /* easy case: is the operation already completed? */
+    if(((struct tcp_op*)(query_op->method_data))->tcp_op_state ==
+        BMI_TCP_COMPLETE)
+    {
+        /* only close socket in forceful cancel mode */
+        if(forceful_cancel_mode)
+            tcp_forget_addr(query_op->addr, 0, -BMI_ECANCEL);
+        /* we are done! status will be collected during test */
+        gen_mutex_unlock(&interface_mutex);
+        return(0);
+    }
+
+    /* has the operation started moving data yet? */
+    if(query_op->env_amt_complete)
+    {
+        /* be pessimistic and kill the socket, even if not in forceful
+         * cancel mode */
+        /* NOTE: this may place other operations beside this one into
+         * EINTR error state 
+         */
+        tcp_forget_addr(query_op->addr, 0, -BMI_ECANCEL);
+        gen_mutex_unlock(&interface_mutex);
+        return(0);
+    }
+
+    /* if we fall to this point, op has been posted, but no data has moved
+     * for it yet as far as we know
+     */
+
+    /* mark op as canceled, move to completion queue */
+    query_op->error_code = -BMI_ECANCEL;
+    if(query_op->send_recv == BMI_SEND)
+    {
+        BMI_socket_collection_remove_write_bit(tcp_socket_collection_p,
+                                           query_op->addr);
+    }
+    op_list_remove(query_op);
+    ((struct tcp_op*)(query_op->method_data))->tcp_op_state = 
+        BMI_TCP_COMPLETE;
+    /* only close socket in forceful cancel mode */
+    if(forceful_cancel_mode)
+        tcp_forget_addr(query_op->addr, 0, -BMI_ECANCEL);
+    op_list_add(completion_array[query_op->context_id], query_op);
+    gen_mutex_unlock(&interface_mutex);
+    return(0);
+}
+
+/*
+ * For now, we only support wildcard strings that are IP addresses
+ * and not *hostnames*!
+ */
+static int check_valid_wildcard(const char *wildcard_string, unsigned long *octets)
+{
+    int i, len = strlen(wildcard_string), last_dot = -1, octet_count = 0;
+    char str[16];
+    for (i = 0; i < len; i++)
+    {
+        char c = wildcard_string[i];
+        memset(str, 0, 16);
+        if ((c < '0' || c > '9') && c != '*' && c != '.')
+            return -EINVAL;
+        if (c == '*') {
+            if (octet_count >= 4)
+                return -EINVAL;
+            octets[octet_count++] = 256;
+        }
+        else if (c == '.')
+        {
+            char *endptr = NULL;
+            if (octet_count >= 4)
+                return -EINVAL;
+            strncpy(str, &wildcard_string[last_dot + 1], (i - last_dot - 1));
+            octets[octet_count++] = strtol(str, &endptr, 10);
+            if (*endptr != '\0' || octets[octet_count-1] >= 256)
+                return -EINVAL;
+            last_dot = i;
+        }
+    }
+    for (i = octet_count; i < 4; i++)
+    {
+         octets[i] = 256;
+    }
+    return 0;
+}
+
+/*
+ * return 1 if the addr specified is part of the wildcard specification of octet
+ * return 0 otherwise.
+ */
+static int check_octets(struct in_addr addr, unsigned long *octets)
+{
+#define B1_MASK  0xff000000
+#define B1_SHIFT 24
+#define B2_MASK  0x00ff0000
+#define B2_SHIFT 16
+#define B3_MASK  0x0000ff00
+#define B3_SHIFT 8
+#define B4_MASK  0x000000ff
+    uint32_t host_addr = ntohl(addr.s_addr);
+    /* * stands for all clients */
+    if (octets[0] == 256)
+    {
+        return 1;
+    }
+    if (((host_addr & B1_MASK) >> B1_SHIFT) != octets[0])
+    {
+        return 0;
+    }
+    if (octets[1] == 256)
+    {
+        return 1;
+    }
+    if (((host_addr & B2_MASK) >> B2_SHIFT) != octets[1])
+    {
+        return 0;
+    }
+    if (octets[2] == 256)
+    {
+        return 1;
+    }
+    if (((host_addr & B3_MASK) >> B3_SHIFT) != octets[2])
+    {
+        return 0;
+    }
+    if (octets[3] == 256)
+    {
+        return 1;
+    }
+    if ((host_addr & B4_MASK) != octets[3])
+    {
+        return 0;
+    }
+    return 1;
+#undef B1_MASK
+#undef B1_SHIFT 
+#undef B2_MASK 
+#undef B2_SHIFT
+#undef B3_MASK
+#undef B3_SHIFT
+#undef B4_MASK
+}
+/* BMI_tcp_query_addr_range()
+ * Check if a given address is within the network specified by the wildcard string!
+ * or if it is part of the subnet mask specified
+ */
+int BMI_tcp_query_addr_range(bmi_method_addr_p map, const char *wildcard_string, int netmask)
+{
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data;
+    struct sockaddr_in map_addr;
+    socklen_t map_addr_len = sizeof(map_addr);
+    const char *tcp_wildcard = wildcard_string + 6 /* strlen("tcp://") */;
+    int ret = -1;
+
+    memset(&map_addr, 0, sizeof(map_addr));
+    if(getpeername(tcp_addr_data->socket, (struct sockaddr *) &map_addr, (int *) &map_addr_len) < 0)
+    {
+        ret =  bmi_tcp_errno_to_pvfs(-EINVAL);
+        gossip_err("Error: failed to retrieve peer name for client.\n");
+        return(ret);
+    }
+    /* Wildcard specification */
+    if (netmask == -1)
+    {
+        unsigned long octets[4];
+        if (check_valid_wildcard(tcp_wildcard, octets) < 0)
+        {
+            gossip_lerr("Invalid wildcard specification: %s\n", tcp_wildcard);
+            return -EINVAL;
+        }
+        gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Map Address is : %s, Wildcard Octets: %lu.%lu.%lu.%lu\n", inet_ntoa(map_addr.sin_addr),
+                octets[0], octets[1], octets[2], octets[3]);
+        if (check_octets(map_addr.sin_addr, octets) == 1)
+        {
+            return 1;
+        }
+    }
+    /* Netmask specification */
+    else {
+        struct sockaddr_in mask_addr, network_addr;
+        memset(&mask_addr, 0, sizeof(mask_addr));
+        memset(&network_addr, 0, sizeof(network_addr));
+        /* Convert the netmask address */
+        convert_mask(netmask, &mask_addr.sin_addr);
+        /* Invalid network address */
+        if (inet_aton(tcp_wildcard, &network_addr.sin_addr) == 0)
+        {
+            gossip_err("Invalid network specification: %s\n", tcp_wildcard);
+            return -EINVAL;
+        }
+        /* Matches the subnet mask! */
+        if ((map_addr.sin_addr.s_addr & mask_addr.sin_addr.s_addr)
+                == (network_addr.sin_addr.s_addr & mask_addr.sin_addr.s_addr))
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/* BMI_tcp_addr_rev_lookup_unexpected()
+ *
+ * looks up an address that was initialized unexpectedly and returns a string
+ * hostname
+ *
+ * returns string on success, "UNKNOWN" on failure
+ */
+const char* BMI_tcp_addr_rev_lookup_unexpected(bmi_method_addr_p map)
+{
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data;
+    int debug_on;
+    uint64_t mask;
+    socklen_t peerlen;
+    struct sockaddr_in peer;
+    int ret;
+    struct hostent *peerent;
+    char* tmp_peer;
+
+    /* return default response if we don't have support for the right socket
+     * calls 
+     */
+#if !defined(HAVE_GETHOSTBYADDR)
+    return(tcp_addr_data->peer);
+#else 
+
+    /* Only resolve hostnames if a gossip mask is set to request it.
+     * Otherwise we leave it at ip address 
+     */
+    gossip_get_debug_mask(&debug_on, &mask);
+
+    if(!debug_on || (!(mask & GOSSIP_ACCESS_HOSTNAMES)))
+    {
+        return(tcp_addr_data->peer);
+    }
+
+    peerlen = sizeof(struct sockaddr_in);
+
+    if(tcp_addr_data->peer_type == BMI_TCP_PEER_HOSTNAME)
+    {
+        /* full hostname already cached; return now */
+        return(tcp_addr_data->peer);
+    }
+
+    /* if we hit this point, we need to resolve hostname */
+    ret = getpeername(tcp_addr_data->socket, (struct sockaddr*) &(peer), (int *) &peerlen);
+    if(ret < 0)
+    {
+        /* default to use IP address */
+        return(tcp_addr_data->peer);
+    }
+
+    peerent = gethostbyaddr((const char *) &peer.sin_addr.s_addr, 
+        sizeof(struct in_addr), AF_INET);
+    if(peerent == NULL)
+    {
+        /* default to use IP address */
+        return(tcp_addr_data->peer);
+    }
+ 
+    tmp_peer = (char*)malloc(strlen(peerent->h_name) + 1);
+    if(!tmp_peer)
+    {
+        /* default to use IP address */
+        return(tcp_addr_data->peer);
+    }
+    strcpy(tmp_peer, peerent->h_name);
+    if(tcp_addr_data->peer)
+    {
+        free(tcp_addr_data->peer);
+    }
+    tcp_addr_data->peer = tmp_peer;
+    tcp_addr_data->peer_type = BMI_TCP_PEER_HOSTNAME;
+    return(tcp_addr_data->peer);
+
+#endif
+
+}
+
+/* tcp_forget_addr()
+ *
+ * completely removes a tcp method address from use, and aborts any
+ * operations that use the address.  If the
+ * dealloc_flag is set, the memory used by the address will be
+ * deallocated as well.
+ *
+ * no return value
+ */
+void tcp_forget_addr(bmi_method_addr_p map,
+                     int dealloc_flag,
+                     int error_code)
+{
+    struct tcp_addr* tcp_addr_data = (struct tcp_addr *) map->method_data;
+    BMI_addr_t bmi_addr = tcp_addr_data->bmi_addr;
+    int tmp_outcount;
+    bmi_method_addr_p tmp_addr;
+    int tmp_status;
+
+    if (tcp_socket_collection_p)
+    {
+        BMI_socket_collection_remove(tcp_socket_collection_p, map);
+        /* perform a test to force the socket collection to act on the remove
+         * request before continuing
+         */
+        if(!sc_test_busy)
+        {
+            BMI_socket_collection_testglobal(tcp_socket_collection_p,
+                0, &tmp_outcount, &tmp_addr, &tmp_status, 0);
+        }
+    }
+
+    tcp_shutdown_addr(map);
+    tcp_cleanse_addr(map, error_code);
+    tcp_addr_data->addr_error = error_code;
+    if (dealloc_flag)
+    {
+        dealloc_tcp_method_addr(map);
+    }
+    else
+    {
+        /* this will cause the bmi control layer to check to see if 
+         * this address can be completely forgotten
+         */
+        bmi_method_addr_forget_callback(bmi_addr);
+    }
+    return;
+};
+
+/******************************************************************
+ * Internal support functions
+ */
+
+
+/*
+ * dealloc_tcp_method_addr()
+ *
+ * destroys method address structures generated by the TCP/IP module.
+ *
+ * no return value
+ */
+static void dealloc_tcp_method_addr(bmi_method_addr_p map)
+{
+
+    struct tcp_addr *tcp_addr_data = NULL;
+
+    tcp_addr_data = (struct tcp_addr *) map->method_data;
+    /* close the socket, as long as it is not the one we are listening on
+     * as a server.
+     */
+    if (!tcp_addr_data->server_port)
+    {
+        if (tcp_addr_data->socket > -1)
+        {
+            shutdown(tcp_addr_data->socket, SD_BOTH);
+            closesocket(tcp_addr_data->socket);
+        }
+    }
+
+    if (tcp_addr_data->hostname)
+        free(tcp_addr_data->hostname);
+    if (tcp_addr_data->peer)
+        free(tcp_addr_data->peer);
+
+    bmi_dealloc_method_addr(map);
+
+    return;
+}
+
+
+/*
+ * alloc_tcp_method_addr()
+ *
+ * creates a new method address with defaults filled in for TCP/IP.
+ *
+ * returns pointer to struct on success, NULL on failure
+ */
+bmi_method_addr_p alloc_tcp_method_addr(void)
+{
+
+    struct bmi_method_addr *my_method_addr = NULL;
+    struct tcp_addr *tcp_addr_data = NULL;
+
+    my_method_addr =
+        bmi_alloc_method_addr(tcp_method_params.method_id, sizeof(struct tcp_addr));
+    if (!my_method_addr)
+    {
+        return (NULL);
+    }
+
+    /* note that we trust the alloc_method_addr() function to have zeroed
+     * out the structures for us already 
+     */
+
+    tcp_addr_data = (struct tcp_addr *) my_method_addr->method_data;
+    tcp_addr_data->socket = -1;
+    tcp_addr_data->port = -1;
+    tcp_addr_data->map = my_method_addr;
+    tcp_addr_data->sc_index = -1;
+
+    return (my_method_addr);
+}
+
+
+/*
+ * tcp_server_init()
+ *
+ * this function is used to prepare a node to recieve incoming
+ * connections if it is initialized in a server configuration.   
+ *
+ * returns 0 on succes, -errno on failure
+ */
+static int tcp_server_init(void)
+{
+
+    int oldfl = 0;                /* old socket flags */
+    struct tcp_addr *tcp_addr_data = NULL;
+    int tmp_errno = bmi_tcp_errno_to_pvfs(-EINVAL);
+    int ret = 0;
+
+    /* create a socket */
+    tcp_addr_data = (struct tcp_addr *) tcp_method_params.listen_addr->method_data;
+    if ((tcp_addr_data->socket = BMI_sockio_new_sock()) < 0)
+    {
+        tmp_errno = WSAGetLastError();
+        gossip_err("Error: BMI_sockio_new_sock: %d\n", tmp_errno);
+        return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+    }
+
+    /* set it to non-blocking operation */
+    /*
+    oldfl = fcntl(tcp_addr_data->socket, F_GETFL, 0);
+    if (!(oldfl & O_NONBLOCK))
+    {
+        fcntl(tcp_addr_data->socket, F_SETFL, oldfl | O_NONBLOCK);
+    }
+    */
+    SET_NONBLOCK(tcp_addr_data->socket);
+
+    /* setup for a fast restart to avoid bind addr in use errors */
+    BMI_sockio_set_sockopt(tcp_addr_data->socket, SO_REUSEADDR, 1);
+
+    /* bind it to the appropriate port */
+    if(tcp_method_params.method_flags & BMI_TCP_BIND_SPECIFIC)
+    {
+        ret = BMI_sockio_bind_sock_specific(tcp_addr_data->socket,
+            tcp_addr_data->hostname,
+            tcp_addr_data->port);
+        /* NOTE: this particular function converts errno in advance */
+        if(ret < 0)
+        {
+            PVFS_perror_gossip("BMI_sockio_bind_sock_specific", ret);
+            return(ret);
+        }
+    }
+    else
+    {
+        ret = BMI_sockio_bind_sock(tcp_addr_data->socket,
+            tcp_addr_data->port);
+    }
+    
+    if (ret < 0)
+    {
+        tmp_errno = WSAGetLastError();
+        gossip_err("Error: BMI_sockio_bind_sock: %d\n", tmp_errno);
+        return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+    }
+
+    /* go ahead and listen to the socket */
+    if (listen(tcp_addr_data->socket, TCP_BACKLOG) != 0)
+    {
+        tmp_errno = WSAGetLastError();
+        gossip_err("Error: listen: %s\n", tmp_errno);
+        return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+    }
+
+    return (0);
+}
+
+
+/* find_recv_inflight()
+ *
+ * checks to see if there is a recv operation in flight (when in flight
+ * means that some of the data or envelope has been read) for a 
+ * particular address. 
+ *
+ * returns pointer to operation on success, NULL if nothing found.
+ */
+static method_op_p find_recv_inflight(bmi_method_addr_p map)
+{
+    struct op_list_search_key key;
+    method_op_p query_op = NULL;
+
+    memset(&key, 0, sizeof(struct op_list_search_key));
+    key.method_addr = map;
+    key.method_addr_yes = 1;
+
+    query_op = op_list_search(op_list_array[IND_RECV_INFLIGHT], &key);
+
+    return (query_op);
+}
+
+
+/* tcp_sock_init()
+ *
+ * this is an internal function which is used to build up a TCP/IP
+ * connection in the situation of a client side operation.
+ * addressing information to determine which fields need to be set.
+ * If the connection is already established then it does no work.
+ *
+ * NOTE: this is safe to call repeatedly.  However, always check the
+ * value of the not_connected field in the tcp address before using the
+ * address.
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int tcp_sock_init(bmi_method_addr_p my_method_addr)
+{
+
+    int oldfl = 0;                /* socket flags */
+    int ret = -1;
+    struct pollfd poll_conn;
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) my_method_addr->method_data;
+    int tmp_errno = 0;
+
+    /* check for obvious problems */
+    assert(my_method_addr);
+    assert(my_method_addr->method_type == tcp_method_params.method_id);
+    assert(tcp_addr_data->server_port == 0);
+
+    /* fail immediately if the address is in failure mode and we have no way
+     * to reconnect
+     */
+    if(tcp_addr_data->addr_error && tcp_addr_data->dont_reconnect)
+    {
+        gossip_debug(GOSSIP_BMI_DEBUG_TCP, 
+        "Warning: BMI communication attempted on an address in failure mode.\n");
+        return(tcp_addr_data->addr_error);
+    }
+
+    if(tcp_addr_data->addr_error)
+    {
+        gossip_debug(GOSSIP_BMI_DEBUG_TCP, "%s: attempting reconnect.\n",
+          __func__);
+        tcp_addr_data->addr_error = 0;
+        assert(tcp_addr_data->socket < 0);
+        tcp_addr_data->not_connected = 1;
+    }
+
+    /* is there already a socket? */
+    if (tcp_addr_data->socket > -1)
+    {
+        /* check to see if we still need to work on the connect.. */
+        if (tcp_addr_data->not_connected)
+        {
+            /* this is a little weird, but we complete the nonblocking
+             * connection by polling */            
+            poll_conn.fd = tcp_addr_data->socket;
+            poll_conn.events = POLLOUT;
+            ret = WSAPoll(&poll_conn, 1, 2);
+            if ((ret < 0) || (poll_conn.revents & POLLERR))
+            {
+                tmp_errno = WSAGetLastError();
+                gossip_lerr("Error: poll: %d\n", tmp_errno);
+                return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+            }
+            if (poll_conn.revents & POLLOUT)
+            {
+                tcp_addr_data->not_connected = 0;
+            }
+
+            /* use select on Windows */
+           /*fd_set writefds;
+           struct timeval timeout;
+
+           timeout.tv_sec = 0;
+           timeout.tv_usec = 2000;   /* 2ms */
+/*
+           FD_ZERO(&writefds);
+           FD_SET(tcp_addr_data->socket, &writefds);
+           ret = select(1, NULL, &writefds, NULL, (const struct timeval *) &timeout);
+           if (ret == SOCKET_ERROR)
+           {
+               tmp_errno = WSAGetLastError();
+               gossip_lerr("Error: select (tcp_sock_init): %d\n", tmp_errno);
+               return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+           }
+           if (FD_ISSET(tcp_addr_data->socket, &writefds))
+           {
+               tcp_addr_data->not_connected = 0;
+           }
+*/
+        }
+        /* return.  the caller should check the "not_connected" flag to
+         * see if the socket is usable yet. */
+        return (0);
+    }
+    
+    bmi_set_sock_buffers(tcp_addr_data->socket);
+
+    /* at this point there is no socket.  try to build it */
+    if (tcp_addr_data->port < 1)
+    {
+        return (bmi_tcp_errno_to_pvfs(-EINVAL));
+    }
+
+    /* make a socket */
+    if ((tcp_addr_data->socket = BMI_sockio_new_sock()) < 0)
+    {
+        tmp_errno = WSAGetLastError();
+        return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+    }
+
+    /* set it to non-blocking operation */
+    /* oldfl = fcntl(tcp_addr_data->socket, F_GETFL, 0);
+    if (!(oldfl & O_NONBLOCK))
+    {
+        fcntl(tcp_addr_data->socket, F_SETFL, oldfl | O_NONBLOCK);
+    }*/
+    SET_NONBLOCK(tcp_addr_data->socket);
+
+#if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__)
+    /* make sure if we need to bind or not to some local port ranges */
+    tcp_enable_trusted(tcp_addr_data);
+#endif
+
+    /* turn off Nagle's algorithm */
+    if (BMI_sockio_set_tcpopt(tcp_addr_data->socket, TCP_NODELAY, 1) < 0)
+    {
+        tmp_errno = WSAGetLastError();
+        gossip_lerr("Error: failed to set TCP_NODELAY option.\n");
+        closesocket(tcp_addr_data->socket);
+        return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+    }
+
+    bmi_set_sock_buffers(tcp_addr_data->socket);
+
+    if (tcp_addr_data->hostname)
+    {
+        gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,
+                      "Connect: socket=%d, hostname=%s, port=%d\n",
+                      tcp_addr_data->socket, tcp_addr_data->hostname,
+                      tcp_addr_data->port);
+        ret = BMI_sockio_connect_sock(tcp_addr_data->socket,
+                      tcp_addr_data->hostname,
+                      tcp_addr_data->port);
+    }
+    else
+    {
+        return (bmi_tcp_errno_to_pvfs(-EINVAL));
+    }
+
+    if (ret < 0)
+    {
+        if (ret == -WSAEWOULDBLOCK)
+        {
+            tcp_addr_data->not_connected = 1;
+            /* this will have to be connected later with a poll */
+        }
+        else
+        {
+            /* NOTE: BMI_sockio_connect_sock returns a PVFS error */
+            char buff[300];
+
+            _snprintf(buff, 300, "Error: BMI_sockio_connect_sock: (%s):", 
+                      tcp_addr_data->hostname);
+
+            PVFS_perror_gossip(buff, ret);
+            return (ret);
+        }
+    }
+
+    return (0);
+}
+
+
+/* enqueue_operation()
+ *
+ * creates a new operation based on the arguments to the function.  It
+ * then makes sure that the address is added to the socket collection,
+ * and the operation is added to the appropriate operation queue.
+ *
+ * Damn, what a big prototype!
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int enqueue_operation(op_list_p target_list,
+                             enum bmi_op_type send_recv,
+                             bmi_method_addr_p map,
+                             void *const *buffer_list,
+                             const bmi_size_t *size_list,
+                             int list_count,
+                             bmi_size_t amt_complete,
+                             bmi_size_t env_amt_complete,
+                             bmi_op_id_t * id,
+                             int tcp_op_state,
+                             struct tcp_msg_header header,
+                             void *user_ptr,
+                             bmi_size_t actual_size,
+                             bmi_size_t expected_size,
+                             bmi_context_id context_id,
+                             int32_t eid)
+{
+    method_op_p new_method_op = NULL;
+    struct tcp_op *tcp_op_data = NULL;
+    struct tcp_addr* tcp_addr_data = NULL;
+    int i;
+
+    /* allocate the operation structure */
+    new_method_op = alloc_tcp_method_op();
+    if (!new_method_op)
+    {
+        return (bmi_tcp_errno_to_pvfs(-ENOMEM));
+    }
+
+    *id = new_method_op->op_id;
+    new_method_op->event_id = eid;
+
+    /* set the fields */
+    new_method_op->send_recv = send_recv;
+    new_method_op->addr = map;
+    new_method_op->user_ptr = user_ptr;
+    /* this is on purpose; we want to use the buffer_list all of
+     * the time, no special case for one contig buffer
+     */
+    new_method_op->buffer = NULL;
+    new_method_op->actual_size = actual_size;
+    new_method_op->expected_size = expected_size;
+    new_method_op->send_recv = send_recv;
+    new_method_op->amt_complete = amt_complete;
+    new_method_op->env_amt_complete = env_amt_complete;
+    new_method_op->msg_tag = header.tag;
+    new_method_op->mode = header.mode;
+    new_method_op->list_count = list_count;
+    new_method_op->context_id = context_id;
+
+    /* set our current position in list processing */
+    i=0;
+    new_method_op->list_index = 0;
+    new_method_op->cur_index_complete = 0;
+    while(amt_complete > 0)
+    {
+        if(amt_complete >= size_list[i])
+        {
+            amt_complete -= size_list[i];
+            new_method_op->list_index++;
+            i++;
+        }
+        else
+        {
+            new_method_op->cur_index_complete = amt_complete;
+            amt_complete = 0;
+        }
+    }
+
+    tcp_op_data = (struct tcp_op *) new_method_op->method_data;
+    tcp_op_data->tcp_op_state = (enum bmi_tcp_state) tcp_op_state;
+    tcp_op_data->env = header;
+
+    /* if there is only one item in the list, then keep the list stored
+     * in the op structure.  This allows us to use the same code for send
+     * and recv as we use for send_list and recv_list, without having to 
+     * malloc lists for those special cases
+     */
+    if (list_count == 1)
+    {
+        new_method_op->buffer_list = &tcp_op_data->buffer_list_stub;
+        new_method_op->size_list = &tcp_op_data->size_list_stub;
+        ((void**)new_method_op->buffer_list)[0] = buffer_list[0];
+        ((bmi_size_t*)new_method_op->size_list)[0] = size_list[0];
+    }
+    else
+    {
+        new_method_op->size_list = size_list;
+        new_method_op->buffer_list = buffer_list;
+    }
+
+    tcp_addr_data = (struct tcp_addr *) map->method_data;
+
+    if(tcp_addr_data->addr_error)
+    {
+        /* server should always fail here, client should let receives queue
+         * as if nothing were wrong
+         */
+        if(tcp_addr_data->dont_reconnect || send_recv == BMI_SEND)
+        {
+            gossip_debug(GOSSIP_BMI_DEBUG_TCP, 
+                       "Warning: BMI communication attempted on an "
+                       "address in failure mode.\n");
+            new_method_op->error_code = tcp_addr_data->addr_error;
+            op_list_add(op_list_array[new_method_op->context_id],
+                        new_method_op);
+            return(tcp_addr_data->addr_error);
+        }
+    }
+
+#if 0
+    if(tcp_addr_data->addr_error)
+    {
+        /* this address is bad, don't try to do anything with it */
+        gossip_err("Warning: BMI communication attempted on an "
+                   "address in failure mode.\n");
+
+        new_method_op->error_code = tcp_addr_data->addr_error;
+        op_list_add(op_list_array[new_method_op->context_id],
+                    new_method_op);
+        return(tcp_addr_data->addr_error);
+    }
+#endif
+
+    /* add the socket to poll on */
+    BMI_socket_collection_add(tcp_socket_collection_p, map);
+    if(send_recv == BMI_SEND)
+    {
+        BMI_socket_collection_add_write_bit(tcp_socket_collection_p, map);
+    }
+
+    /* keep up with the operation */
+    op_list_add(target_list, new_method_op);
+
+    return (0);
+}
+
+
+/* tcp_post_recv_generic()
+ *
+ * does the real work of posting an operation - works for both
+ * eager and rendezvous messages
+ *
+ * returns 0 on success that requires later poll, returns 1 on instant
+ * completion, -errno on failure
+ */
+static int tcp_post_recv_generic(bmi_op_id_t * id,
+                                 bmi_method_addr_p src,
+                                 void *const *buffer_list,
+                                 const bmi_size_t *size_list,
+                                 int list_count,
+                                 bmi_size_t expected_size,
+                                 bmi_size_t * actual_size,
+                                 enum bmi_buffer_type buffer_type,
+                                 bmi_msg_tag_t tag,
+                                 void *user_ptr,
+                                 bmi_context_id context_id,
+                                 PVFS_hint hints)
+{
+    method_op_p query_op = NULL;
+    int ret = -1;
+    struct tcp_addr *tcp_addr_data = NULL;
+    struct tcp_op *tcp_op_data = NULL;
+    struct tcp_msg_header bogus_header;
+    struct op_list_search_key key;
+    bmi_size_t copy_size = 0;
+    bmi_size_t total_copied = 0;
+    int i;
+    PINT_event_id eid = 0;
+
+    PINT_EVENT_START(
+        bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, &eid,
+        PINT_HINT_GET_CLIENT_ID(hints),
+        PINT_HINT_GET_REQUEST_ID(hints),
+        PINT_HINT_GET_RANK(hints),
+        PINT_HINT_GET_HANDLE(hints),
+        PINT_HINT_GET_OP_ID(hints),
+        expected_size);
+
+    tcp_addr_data = (struct tcp_addr *) src->method_data;
+
+    /* short out immediately if the address is bad and we have no way to
+     * reconnect
+     */
+    if(tcp_addr_data->addr_error && tcp_addr_data->dont_reconnect)
+    {
+        gossip_debug(
+            GOSSIP_BMI_DEBUG_TCP,
+            "Warning: BMI communication attempted "
+            "on an address in failure mode.\n");
+        return(tcp_addr_data->addr_error);
+    }
+
+    /* lets make sure that the message hasn't already been fully
+     * buffered in eager mode before doing anything else
+     */
+    memset(&key, 0, sizeof(struct op_list_search_key));
+    key.method_addr = src;
+    key.method_addr_yes = 1;
+    key.msg_tag = tag;
+    key.msg_tag_yes = 1;
+
+    query_op =
+        op_list_search(op_list_array[IND_RECV_EAGER_DONE_BUFFERING], &key);
+    if (query_op)
+    {
+        /* make sure it isn't too big */
+        if (query_op->actual_size > expected_size)
+        {
+            gossip_err("Error: message ordering violation;\n");
+            gossip_err("Error: message too large for next buffer.\n");
+            return (bmi_tcp_errno_to_pvfs(-EPROTO));
+        }
+
+        /* whoohoo- it is already done! */
+        /* copy buffer out to list segments; handle short case */
+        for (i = 0; i < list_count; i++)
+        {
+            copy_size = size_list[i];
+            if (copy_size + total_copied > query_op->actual_size)
+            {
+                copy_size = query_op->actual_size - total_copied;
+            }
+            memcpy(buffer_list[i], (void *) ((char *) query_op->buffer +
+                                             total_copied), copy_size);
+            total_copied += copy_size;
+            if (total_copied == query_op->actual_size)
+            {
+                break;
+            }
+        }
+        /* copy out to correct memory regions */
+        (*actual_size) = query_op->actual_size;
+        free(query_op->buffer);
+        *id = 0;
+        op_list_remove(query_op);
+        dealloc_tcp_method_op(query_op);
+        PINT_EVENT_END(bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, eid, 0,
+                       *actual_size);
+
+        return (1);
+    }
+
+    /* look for a message that is already being received */
+    query_op = op_list_search(op_list_array[IND_RECV_INFLIGHT], &key);
+    if (query_op)
+    {
+        tcp_op_data = (struct tcp_op *) query_op->method_data;
+    }
+
+    /* see if it is being buffered into a temporary memory region */
+    if (query_op && tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING)
+    {
+        /* make sure it isn't too big */
+        if (query_op->actual_size > expected_size)
+        {
+            gossip_err("Error: message ordering violation;\n");
+            gossip_err("Error: message too large for next buffer.\n");
+            return (bmi_tcp_errno_to_pvfs(-EPROTO));
+        }
+
+        /* copy what we have so far into the correct buffers */
+        total_copied = 0;
+        for (i = 0; i < list_count; i++)
+        {
+            copy_size = size_list[i];
+            if (copy_size + total_copied > query_op->amt_complete)
+            {
+                copy_size = query_op->amt_complete - total_copied;
+            }
+            if (copy_size > 0)
+            {
+                memcpy(buffer_list[i], (void *) ((char *) query_op->buffer +
+                                                 total_copied), copy_size);
+            }
+            total_copied += copy_size;
+            if (total_copied == query_op->amt_complete)
+            {
+                query_op->list_index = i;
+                query_op->cur_index_complete = copy_size;
+                break;
+            }
+        }
+
+        /* see if we ended on a buffer boundary */
+        if (query_op->cur_index_complete ==
+            query_op->size_list[query_op->list_index])
+        {
+            query_op->list_index++;
+            query_op->cur_index_complete = 0;
+        }
+
+        /* release the old buffer */
+        if (query_op->buffer)
+        {
+            free(query_op->buffer);
+        }
+
+        *id = query_op->op_id;
+        tcp_op_data = (struct tcp_op *) query_op->method_data;
+        tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS;
+
+        query_op->list_count = list_count;
+        query_op->user_ptr = user_ptr;
+        query_op->context_id = context_id;
+        /* if there is only one item in the list, then keep the list stored
+         * in the op structure.  This allows us to use the same code for send
+         * and recv as we use for send_list and recv_list, without having to 
+         * malloc lists for those special cases
+         */
+        if (list_count == 1)
+        {
+            query_op->buffer_list = &tcp_op_data->buffer_list_stub;
+            query_op->size_list = &tcp_op_data->size_list_stub;
+            ((void **)query_op->buffer_list)[0] = buffer_list[0];
+            ((bmi_size_t *)query_op->size_list)[0] = size_list[0];
+        }
+        else
+        {
+            query_op->buffer_list = buffer_list;
+            query_op->size_list = size_list;
+        }
+
+        if (query_op->amt_complete < query_op->actual_size)
+        {
+            /* try to recv some more data */
+            tcp_addr_data = (struct tcp_addr *) query_op->addr->method_data;
+            ret = payload_progress(tcp_addr_data->socket,
+                                   query_op->buffer_list,
+                                   query_op->size_list,
+                                   query_op->list_count,
+                                   query_op->actual_size,
+                                   &(query_op->list_index),
+                                   &(query_op->cur_index_complete),
+                                   BMI_RECV,
+                                   NULL,
+                                   0);
+            if (ret < 0)
+            {
+                PVFS_perror_gossip("Error: payload_progress", ret);
+                /* payload_progress() returns BMI error codes */
+                tcp_forget_addr(query_op->addr, 0, ret);
+                return (ret);
+            }
+
+            query_op->amt_complete += ret;
+        }
+        assert(query_op->amt_complete <= query_op->actual_size);
+        if (query_op->amt_complete == query_op->actual_size)
+        {
+            /* we are done */
+            op_list_remove(query_op);
+            *id = 0;
+            (*actual_size) = query_op->actual_size;
+            dealloc_tcp_method_op(query_op);
+            PINT_EVENT_END(
+                bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, eid,
+                0, *actual_size);
+
+            return (1);
+        }
+        else
+        {
+            /* there is still more work to do */
+            tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS;
+            return (0);
+        }
+    }
+
+    /* NOTE: if the message was in flight, but not buffering, then
+     * that means that it has already matched an earlier receive
+     * post or else is an unexpected message that doesn't require a
+     * matching receive post - at any rate it shouldn't be handled
+     * here
+     */
+
+    /* if we hit this point we must enqueue */
+    if (expected_size <= TCP_MODE_EAGER_LIMIT)
+    {
+        bogus_header.mode = TCP_MODE_EAGER;
+    }
+    else
+    {
+        bogus_header.mode = TCP_MODE_REND;
+    }
+    bogus_header.tag = tag;
+    ret = enqueue_operation(op_list_array[IND_RECV],
+                            BMI_RECV, src, buffer_list, size_list,
+                            list_count, 0, 0, id, BMI_TCP_INPROGRESS,
+                            bogus_header, user_ptr, 0,
+                            expected_size, context_id, eid);
+    /* just for safety; this field isn't valid to the caller anymore */
+    (*actual_size) = 0;
+    /* TODO: figure out why this causes deadlocks; observable in 2
+     * scenarios:
+     * - pvfs2-client-core with threaded library and nptl
+     * - pvfs2-server threaded with nptl sending messages to itself
+     */
+#if 0
+    if (ret >= 0)
+    {
+        /* go ahead and try to do some work while we are in this
+         * function since we appear to be backlogged.  Make sure that
+         * we do not wait in the poll, however.
+         */
+        ret = tcp_do_work(0);
+    }
+#endif
+    return (ret);
+}
+
+
+/* tcp_cleanse_addr()
+ *
+ * finds all active operations matching the given address, places them
+ * in an error state, and moves them to the completed queue.
+ *
+ * NOTE: this function does not shut down the address.  That should be
+ * handled separately
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int tcp_cleanse_addr(bmi_method_addr_p map, int error_code)
+{
+    int i = 0;
+    struct op_list_search_key key;
+    method_op_p query_op = NULL;
+
+    memset(&key, 0, sizeof(struct op_list_search_key));
+    key.method_addr = map;
+    key.method_addr_yes = 1;
+
+    /* NOTE: we know the unexpected completed queue is the last index! */
+    for (i = 0; i < (NUM_INDICES - 1); i++)
+    {
+        if (op_list_array[i])
+        {
+            while ((query_op = op_list_search(op_list_array[i], &key)))
+            {
+                op_list_remove(query_op);
+                query_op->error_code = error_code;
+                if (query_op->mode == TCP_MODE_UNEXP && query_op->send_recv
+                    == BMI_RECV)
+                {
+                    op_list_add(op_list_array[IND_COMPLETE_RECV_UNEXP],
+                                query_op);
+                }
+                else
+                {
+                    ((struct tcp_op*)(query_op->method_data))->tcp_op_state = 
+                        BMI_TCP_COMPLETE;
+                    op_list_add(completion_array[query_op->context_id], query_op);
+                }
+            }
+        }
+    }
+
+    return (0);
+}
+
+
+/* tcp_shutdown_addr()
+ *
+ * closes connections associated with a tcp method address
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int tcp_shutdown_addr(bmi_method_addr_p map)
+{
+
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data;
+    if (tcp_addr_data->socket > -1)
+    {
+        shutdown(tcp_addr_data->socket, SD_BOTH);
+        closesocket(tcp_addr_data->socket);
+    }
+    tcp_addr_data->socket = -1;
+    tcp_addr_data->not_connected = 1;
+
+    return (0);
+}
+
+
+/* tcp_do_work()
+ *
+ * this is the function that actually does communication work during
+ * BMI_tcp_testXXX and BMI_tcp_waitXXX functions.  The amount of work 
+ * that it does is tunable.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+static int tcp_do_work(int max_idle_time)
+{
+    int ret = -1;
+    bmi_method_addr_p addr_array[TCP_WORK_METRIC];
+    int status_array[TCP_WORK_METRIC];
+    int socket_count = 0;
+    int i = 0;
+    int stall_flag = 0;
+    int busy_flag = 1;
+    struct timespec req;
+    struct tcp_addr* tcp_addr_data = NULL;
+    struct timespec wait_time;
+    struct timeval start;
+
+    if(sc_test_busy)
+    {
+        /* another thread is already polling or working on sockets */
+        if(max_idle_time == 0)
+        {
+            /* we don't want to spend time waiting on it; return
+             * immediately.
+             */
+            return(0);
+        }
+
+        /* Sleep until working thread thread signals that it has finished
+         * its work and then return.  No need for this thread to poll;
+         * the other thread may have already finished what we wanted.
+         * This condition wait is used strictly as a best effort to
+         * prevent busy spin.  We'll sort out the results later.
+         */
+        gettimeofday(&start, NULL);
+        wait_time.tv_sec = start.tv_sec + max_idle_time / 1000;
+        wait_time.tv_nsec = (start.tv_usec + ((max_idle_time % 1000)*1000))*1000;
+        if (wait_time.tv_nsec > 1000000000)
+        {
+            wait_time.tv_nsec = wait_time.tv_nsec - 1000000000;
+            wait_time.tv_sec++;
+        }
+        gen_cond_timedwait(&interface_cond, &interface_mutex, &wait_time);
+        return(0);
+    }
+
+    /* this thread has gained control of the polling.  */
+    sc_test_busy = 1;
+    gen_mutex_unlock(&interface_mutex);
+
+    /* our turn to look at the socket collection */
+    ret = BMI_socket_collection_testglobal(tcp_socket_collection_p,
+                                       TCP_WORK_METRIC, &socket_count,
+                                       addr_array, status_array,
+                                       max_idle_time);
+
+    gen_mutex_lock(&interface_mutex);
+    sc_test_busy = 0;
+
+    if (ret < 0)
+    {
+        /* wake up anyone else who might have been waiting */
+        gen_cond_broadcast(&interface_cond);
+        PVFS_perror_gossip("Error: socket collection:", ret);
+        /* BMI_socket_collection_testglobal() returns BMI error code */
+        return (ret);
+    }
+
+    if(socket_count == 0)
+        busy_flag = 0;
+
+    /* do different kinds of work depending on results */
+    for (i = 0; i < socket_count; i++)
+    {
+        tcp_addr_data = (struct tcp_addr *) addr_array[i]->method_data;
+        /* skip working on addresses in failure mode */
+        if(tcp_addr_data->addr_error)
+        {
+            /* addr_error field is in BMI error code format */
+            tcp_forget_addr(addr_array[i], 0, tcp_addr_data->addr_error);
+            continue;
+        }
+
+        if (status_array[i] & SC_ERROR_BIT)
+        {
+            ret = tcp_do_work_error(addr_array[i]);
+            if (ret < 0)
+            {
+                PVFS_perror_gossip("Warning: BMI error handling failure, continuing", ret);
+            }
+        }
+        else
+        {
+            if (status_array[i] & SC_WRITE_BIT)
+            {
+                ret = tcp_do_work_send(addr_array[i], &stall_flag);
+                if (ret < 0)
+                {
+                    PVFS_perror_gossip("Warning: BMI send error, continuing", ret);
+                }
+                if(!stall_flag)
+                    busy_flag = 0;
+            }
+            if (status_array[i] & SC_READ_BIT)
+            {
+                ret = tcp_do_work_recv(addr_array[i], &stall_flag);
+                if (ret < 0)
+                {
+                    PVFS_perror_gossip("Warning: BMI recv error, continuing", ret);
+                }
+                if(!stall_flag)
+                    busy_flag = 0;
+            }
+        }
+    }
+
+    /* IMPORTANT NOTE: if we have set the following flag, then it indicates that
+     * poll() is finding data on our sockets, yet we are not able to move
+     * any of it right now.  This means that the sockets are backlogged, and
+     * BMI is in danger of busy spinning during test functions.  Let's sleep
+     * for a millisecond here in hopes of letting the rest of the system
+     * catch up somehow (either by clearing a backlog in another I/O
+     * component, or by posting more matching BMI recieve operations)
+     */
+    if(busy_flag)
+    {
+        /* req.tv_sec = 0;
+        req.tv_nsec = 1000; */
+        gen_mutex_unlock(&interface_mutex);
+        /* nanosleep(&req, NULL); */
+        Sleep(1);
+        gen_mutex_lock(&interface_mutex);
+    }
+
+    /* wake up anyone else who might have been waiting */
+    gen_cond_broadcast(&interface_cond);
+    return (0);
+}
+
+
+/* tcp_do_work_send()
+ *
+ * does work on a TCP address that is ready to send data.
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int tcp_do_work_send(bmi_method_addr_p map, int* stall_flag)
+{
+    method_op_p active_method_op = NULL;
+    struct op_list_search_key key;
+    int blocked_flag = 0;
+    int ret = 0;
+    int tmp_stall_flag;
+
+    *stall_flag = 1;
+
+    while (blocked_flag == 0 && ret == 0)
+    {
+        /* what we want to do here is find the first operation in the send
+         * queue for this address.
+         */
+        memset(&key, 0, sizeof(struct op_list_search_key));
+        key.method_addr = map;
+        key.method_addr_yes = 1;
+        active_method_op = op_list_search(op_list_array[IND_SEND], &key);
+        if (!active_method_op)
+        {
+            /* ran out of queued sends to work on */
+            return (0);
+        }
+
+        ret = work_on_send_op(active_method_op, &blocked_flag, &tmp_stall_flag);
+        if(!tmp_stall_flag)
+            *stall_flag = 0;
+    }
+
+    return (ret);
+}
+
+
+/* handle_new_connection()
+ *
+ * this function should be called only on special tcp method addresses
+ * that represent local server ports.  It will attempt to accept a new
+ * connection and create a new method address for the remote host.
+ *
+ * side effect: destroys the temporary method_address that is passed in
+ * to it.
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int handle_new_connection(bmi_method_addr_p map)
+{
+    struct tcp_addr *tcp_addr_data = NULL;
+    int accepted_socket = -1;
+    bmi_method_addr_p new_addr = NULL;
+    int ret = -1;
+    char* tmp_peer = NULL;
+
+    ret = tcp_accept_init(&accepted_socket, &tmp_peer);
+    if (ret < 0)
+    {
+        return (ret);
+    }
+    if (accepted_socket < 0)
+    {
+        /* guess it wasn't ready after all */
+        return (0);
+    }
+
+    /* ok, we have a new socket.  what now?  Probably simplest
+     * thing to do is to create a new method_addr, add it to the
+     * socket collection, and return.  It will get caught the next
+     * time around */
+    new_addr = alloc_tcp_method_addr();
+    if (!new_addr)
+    {
+        return (bmi_tcp_errno_to_pvfs(-ENOMEM));
+    }
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,
+                  "Assigning socket %d to new method addr.\n",
+                  accepted_socket);
+    tcp_addr_data = (struct tcp_addr *) new_addr->method_data;
+    tcp_addr_data->socket = accepted_socket;
+    tcp_addr_data->peer = tmp_peer;
+    tcp_addr_data->peer_type = BMI_TCP_PEER_IP;
+
+    /* set a flag to make sure that we never try to reconnect this address
+     * in the future
+     */
+    tcp_addr_data->dont_reconnect = 1;
+    /* register this address with the method control layer */
+    tcp_addr_data->bmi_addr = bmi_method_addr_reg_callback(new_addr);
+    if (ret < 0)
+    {
+        tcp_shutdown_addr(new_addr);
+        dealloc_tcp_method_addr(new_addr);
+        dealloc_tcp_method_addr(map);
+        return (ret);
+    }
+    BMI_socket_collection_add(tcp_socket_collection_p, new_addr);
+
+    dealloc_tcp_method_addr(map);
+    return (0);
+
+}
+
+
+/* tcp_do_work_recv()
+ * 
+ * does work on a TCP address that is ready to recv data.
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag)
+{
+
+    method_op_p active_method_op = NULL;
+    int ret = -1;
+    void *new_buffer = NULL;
+    struct op_list_search_key key;
+    struct tcp_msg_header new_header;
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) map->method_data;
+    struct tcp_op *tcp_op_data = NULL;
+    int tmp_errno;
+    int tmp;
+    bmi_size_t old_amt_complete = 0;
+    time_t current_time;
+
+    *stall_flag = 1;
+
+    /* figure out if this is a new connection */
+    if (tcp_addr_data->server_port)
+    {
+        /* just try to accept connection- no work yet */
+        *stall_flag = 0;
+        return (handle_new_connection(map));
+    }
+
+    /* look for a recv for this address that is already in flight */
+    active_method_op = find_recv_inflight(map);
+    /* see if we found one in progress... */
+    if (active_method_op)
+    {
+        tcp_op_data = (struct tcp_op *) active_method_op->method_data;
+        if (active_method_op->mode == TCP_MODE_REND &&
+            tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING)
+        {
+            /* we must wait for recv post */
+            return (0);
+        }
+        else
+        {
+            old_amt_complete = active_method_op->amt_complete;
+            ret = work_on_recv_op(active_method_op, stall_flag);
+            gossip_debug(GOSSIP_BMI_DEBUG_TCP, "actual_size=%d, "
+                         "amt_complete=%d, old_amt_complete=%d\n",
+                         (int)active_method_op->actual_size,
+                         (int)active_method_op->amt_complete,
+                         (int)old_amt_complete);
+
+            if ((ret == 0) &&
+                (old_amt_complete == active_method_op->amt_complete) &&
+                active_method_op->actual_size &&
+                (active_method_op->amt_complete <
+                 active_method_op->actual_size))
+            {
+                gossip_debug(
+                    GOSSIP_BMI_DEBUG_TCP, "Warning: bmi_tcp unable "
+                    "to recv any data reported by poll(). [1]\n");
+
+                if (tcp_addr_data->zero_read_limit++ ==
+                    BMI_TCP_ZERO_READ_LIMIT)
+                {
+                    gossip_debug(GOSSIP_BMI_DEBUG_TCP,
+                                 "...dropping connection.\n");
+                    tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE));
+                }
+            }
+            else
+            {
+                tcp_addr_data->zero_read_limit = 0;
+            }
+            return(ret);
+        }
+    }
+
+    /* let's see if a the entire header is ready to be received.  If so
+     * we will go ahead and pull it.  Otherwise, we will try again later.
+     * It isn't worth the complication of reading only a partial message
+     * header - we really want it atomically
+     */
+    ret = BMI_sockio_nbpeek(tcp_addr_data->socket,
+                            new_header.enc_hdr, TCP_ENC_HDR_SIZE);
+    if (ret < 0)
+    {
+        tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-WSAGetLastError()));
+        return (0);
+    }
+
+    if (ret == 0)
+    {
+        gossip_debug(
+            GOSSIP_BMI_DEBUG_TCP, "Warning: bmi_tcp unable "
+            "to recv any data reported by poll(). [2]\n");
+
+        if (tcp_addr_data->zero_read_limit++ ==
+            BMI_TCP_ZERO_READ_LIMIT)
+        {
+            gossip_debug(GOSSIP_BMI_DEBUG_TCP,
+                         "...dropping connection.\n");
+            tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE));
+        }
+        return(0);
+    }
+    else
+    {
+        tcp_addr_data->zero_read_limit = 0;
+    }
+
+    if (ret < TCP_ENC_HDR_SIZE)
+    {
+        current_time = time(NULL);
+        if(!tcp_addr_data->short_header_timer)
+        {
+            tcp_addr_data->short_header_timer = current_time;
+        }
+        else if((current_time - tcp_addr_data->short_header_timer) > 
+            BMI_TCP_HEADER_WAIT_SECONDS)
+        {
+            gossip_err("Error: incomplete BMI TCP header after %d seconds, closing connection.\n",
+                BMI_TCP_HEADER_WAIT_SECONDS);
+            tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE));
+            return (0);
+        }
+
+        /* header not ready yet, but we will keep hoping */
+        return (0);
+    }
+
+    tcp_addr_data->short_header_timer = 0;
+    *stall_flag = 0;
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Reading header for new op.\n");
+    ret = BMI_sockio_nbrecv(tcp_addr_data->socket,
+                           new_header.enc_hdr, TCP_ENC_HDR_SIZE);
+    if (ret < TCP_ENC_HDR_SIZE)
+    {
+        tmp_errno = WSAGetLastError();
+        gossip_err("Error: BMI_sockio_nbrecv: %d\n", tmp_errno);
+        tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-tmp_errno));
+        return (0);
+    }
+
+    /* decode the header */
+    BMI_TCP_DEC_HDR(new_header);
+
+    /* so we have the header. now what?  These are the possible
+     * scenarios:
+     * a) unexpected message
+     * b) eager message for which a recv has been posted
+     * c) eager message for which a recv has not been posted
+     * d) rendezvous messsage for which a recv has been posted
+     * e) rendezvous messsage for which a recv has not been posted
+     * f) eager message for which a rend. recv has been posted
+     */
+
+    /* check magic number of message */
+    if(new_header.magic_nr != BMI_MAGIC_NR)
+    {
+        gossip_err("Error: bad magic in BMI TCP message.\n");
+        tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EBADMSG));
+        return(0);
+    }
+
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Received new message; mode: %d.\n",
+                  (int) new_header.mode);
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "tag: %d\n", (int) new_header.tag);
+
+    if (new_header.mode == TCP_MODE_UNEXP)
+    {
+        /* allocate the operation structure */
+        active_method_op = alloc_tcp_method_op();
+        if (!active_method_op)
+        {
+            tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM));
+            return (bmi_tcp_errno_to_pvfs(-ENOMEM));
+        }
+        /* create data buffer */
+        new_buffer = malloc(new_header.size);
+        if (!new_buffer)
+        {
+            dealloc_tcp_method_op(active_method_op);
+            tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM));
+            return (bmi_tcp_errno_to_pvfs(-ENOMEM));
+        }
+
+        /* set the fields */
+        active_method_op->send_recv = BMI_RECV;
+        active_method_op->addr = map;
+        active_method_op->actual_size = new_header.size;
+        active_method_op->expected_size = 0;
+        active_method_op->amt_complete = 0;
+        active_method_op->env_amt_complete = TCP_ENC_HDR_SIZE;
+        active_method_op->msg_tag = new_header.tag;
+        active_method_op->buffer = new_buffer;
+        active_method_op->mode = TCP_MODE_UNEXP;
+        active_method_op->buffer_list = &(active_method_op->buffer);
+        active_method_op->size_list = &(active_method_op->actual_size);
+        active_method_op->list_count = 1;
+        tcp_op_data = (struct tcp_op *) active_method_op->method_data;
+        tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS;
+        tcp_op_data->env = new_header;
+
+        op_list_add(op_list_array[IND_RECV_INFLIGHT], active_method_op);
+        /* grab some data if we can */
+        return (work_on_recv_op(active_method_op, &tmp));
+    }
+
+    memset(&key, 0, sizeof(struct op_list_search_key));
+    key.method_addr = map;
+    key.method_addr_yes = 1;
+    key.msg_tag = new_header.tag;
+    key.msg_tag_yes = 1;
+
+    /* look for a match within the posted operations */
+    active_method_op = op_list_search(op_list_array[IND_RECV], &key);
+
+    if (active_method_op)
+    {
+        /* make sure it isn't too big */
+        if (new_header.size > active_method_op->expected_size)
+        {
+            gossip_err("Error: message ordering violation;\n");
+            gossip_err("Error: message too large for next buffer.\n");
+            gossip_err("Error: incoming size: %ld, expected size: %ld\n",
+                        (long) new_header.size,
+                        (long) active_method_op->expected_size);
+            /* TODO: return error here or do something else? */
+            return (bmi_tcp_errno_to_pvfs(-EPROTO));
+        }
+
+        /* we found a match.  go work on it and return */
+        op_list_remove(active_method_op);
+        active_method_op->env_amt_complete = TCP_ENC_HDR_SIZE;
+        active_method_op->actual_size = new_header.size;
+        op_list_add(op_list_array[IND_RECV_INFLIGHT], active_method_op);
+        return (work_on_recv_op(active_method_op, &tmp));
+    }
+
+    /* no match anywhere.  Start a new operation */
+    /* allocate the operation structure */
+    active_method_op = alloc_tcp_method_op();
+    if (!active_method_op)
+    {
+        tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM));
+        return (bmi_tcp_errno_to_pvfs(-ENOMEM));
+    }
+
+    if (new_header.mode == TCP_MODE_EAGER)
+    {
+        /* create data buffer for eager messages */
+        new_buffer = malloc(new_header.size);
+        if (!new_buffer)
+        {
+            dealloc_tcp_method_op(active_method_op);
+            tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-ENOMEM));
+            return (bmi_tcp_errno_to_pvfs(-ENOMEM));
+        }
+    }
+    else
+    {
+        new_buffer = NULL;
+    }
+
+    /* set the fields */
+    active_method_op->send_recv = BMI_RECV;
+    active_method_op->addr = map;
+    active_method_op->actual_size = new_header.size;
+    active_method_op->expected_size = 0;
+    active_method_op->amt_complete = 0;
+    active_method_op->env_amt_complete = TCP_ENC_HDR_SIZE;
+    active_method_op->msg_tag = new_header.tag;
+    active_method_op->buffer = new_buffer;
+    active_method_op->mode = new_header.mode;
+    active_method_op->buffer_list = &(active_method_op->buffer);
+    active_method_op->size_list = &(active_method_op->actual_size);
+    active_method_op->list_count = 1;
+    tcp_op_data = (struct tcp_op *) active_method_op->method_data;
+    tcp_op_data->tcp_op_state = BMI_TCP_BUFFERING;
+    tcp_op_data->env = new_header;
+
+    op_list_add(op_list_array[IND_RECV_INFLIGHT], active_method_op);
+
+    /* grab some data if we can */
+    if (new_header.mode == TCP_MODE_EAGER)
+    {
+        return (work_on_recv_op(active_method_op, &tmp));
+    }
+
+    return (0);
+}
+
+
+/*
+ * work_on_send_op()
+ *
+ * used to perform work on a send operation.  this is called by the poll
+ * function.
+ * 
+ * sets blocked_flag if no more work can be done on socket without
+ * blocking
+ * returns 0 on success, -errno on failure.
+ */
+static int work_on_send_op(method_op_p my_method_op,
+                           int *blocked_flag, int* stall_flag)
+{
+    int ret = -1;
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) my_method_op->addr->method_data;
+    struct tcp_op *tcp_op_data = (struct tcp_op *) my_method_op->method_data;
+
+    *blocked_flag = 1;
+    *stall_flag = 0;
+
+    /* make sure that the connection is done before we continue */
+    if (tcp_addr_data->not_connected)
+    {
+        ret = tcp_sock_init(my_method_op->addr);
+        if (ret < 0)
+        {
+            PVFS_perror_gossip("Error: socket failed to init", ret);
+            /* tcp_sock_init() returns BMI error code */
+            tcp_forget_addr(my_method_op->addr, 0, ret);
+            return (0);
+        }
+        if (tcp_addr_data->not_connected)
+        {
+            /* try again later- still could not connect */
+            tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS;
+            return (0);
+        }
+    }
+
+    ret = payload_progress(tcp_addr_data->socket,
+        my_method_op->buffer_list,
+        my_method_op->size_list,
+        my_method_op->list_count,
+        my_method_op->actual_size,
+        &(my_method_op->list_index),
+        &(my_method_op->cur_index_complete),
+        BMI_SEND,
+        tcp_op_data->env.enc_hdr,
+        &my_method_op->env_amt_complete);
+    if (ret < 0)
+    {
+        PVFS_perror_gossip("Error: payload_progress", ret);
+        /* payload_progress() returns BMI error codes */
+        tcp_forget_addr(my_method_op->addr, 0, ret);
+        return (0);
+    }
+
+    if(ret == 0)
+        *stall_flag = 1;
+
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Sent: %d bytes of data.\n", ret);
+    my_method_op->amt_complete += ret;
+    assert(my_method_op->amt_complete <= my_method_op->actual_size);
+
+    if (my_method_op->amt_complete == my_method_op->actual_size && my_method_op->env_amt_complete == TCP_ENC_HDR_SIZE)
+    {
+        /* we are done */
+        my_method_op->error_code = 0;
+        BMI_socket_collection_remove_write_bit(tcp_socket_collection_p,
+                                           my_method_op->addr);
+        op_list_remove(my_method_op);
+        ((struct tcp_op*)(my_method_op->method_data))->tcp_op_state = 
+            BMI_TCP_COMPLETE;
+        op_list_add(completion_array[my_method_op->context_id], my_method_op);
+        *blocked_flag = 0;
+    }
+    else
+    {
+        /* there is still more work to do */
+        tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS;
+    }
+
+    return (0);
+}
+
+
+/*
+ * work_on_recv_op()
+ *
+ * used to perform work on a recv operation.  this is called by the poll
+ * function.
+ * NOTE: this function assumes the method header has already been read.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+static int work_on_recv_op(method_op_p my_method_op, int* stall_flag)
+{
+
+    int ret = -1;
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) my_method_op->addr->method_data;
+    struct tcp_op *tcp_op_data = (struct tcp_op *) my_method_op->method_data;
+
+    *stall_flag = 1;
+
+    if (my_method_op->actual_size != 0)
+    {
+        /* now let's try to recv some actual data */
+        ret = payload_progress(tcp_addr_data->socket,
+            my_method_op->buffer_list,
+            my_method_op->size_list,
+            my_method_op->list_count,
+            my_method_op->actual_size,
+            &(my_method_op->list_index),
+            &(my_method_op->cur_index_complete),
+            BMI_RECV,
+            NULL,
+            0);
+        if (ret < 0)
+        {
+            PVFS_perror_gossip("Error: payload_progress", ret);
+            /* payload_progress() returns BMI error codes */
+            tcp_forget_addr(my_method_op->addr, 0, ret);
+            return (0);
+        }
+    }
+    else
+    {
+        ret = 0;
+    }
+
+    if(ret > 0)
+        *stall_flag = 0;
+
+    my_method_op->amt_complete += ret;
+    assert(my_method_op->amt_complete <= my_method_op->actual_size);
+
+    if (my_method_op->amt_complete == my_method_op->actual_size)
+    {
+        /* we are done */
+        op_list_remove(my_method_op);
+        if (tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING)
+        {
+            /* queue up to wait on matching post recv */
+            op_list_add(op_list_array[IND_RECV_EAGER_DONE_BUFFERING],
+                        my_method_op);
+        }
+        else
+        {
+            my_method_op->error_code = 0;
+            if (my_method_op->mode == TCP_MODE_UNEXP)
+            {
+                op_list_add(op_list_array[IND_COMPLETE_RECV_UNEXP],
+                            my_method_op);
+            }
+            else
+            {
+                ((struct tcp_op*)(my_method_op->method_data))->tcp_op_state = 
+                    BMI_TCP_COMPLETE;
+                op_list_add(completion_array[my_method_op->context_id], my_method_op);
+            }
+        }
+    }
+
+    return (0);
+}
+
+
+/* tcp_do_work_error()
+ * 
+ * handles a tcp address that has indicated an error during polling.
+ *
+ * returns 0 on success, -errno on failure
+ */
+static int tcp_do_work_error(bmi_method_addr_p map)
+{
+    struct tcp_addr *tcp_addr_data = NULL;
+    int buf;
+    int ret;
+    int tmp_errno;
+
+    tcp_addr_data = (struct tcp_addr *) map->method_data;
+
+    /* perform a read on the socket so that we can get a real errno */
+    ret = recv(tcp_addr_data->socket, &buf, sizeof(int), 0);
+    if (ret == 0)
+        tmp_errno = EPIPE;  /* report other side closed socket with this */
+    else
+        tmp_errno = WSAGetLastError();
+
+    gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Error: bmi_tcp: %d\n",
+      tmp_errno);
+
+    if (tcp_addr_data->server_port)
+    {
+        /* Ignore this and hope it goes away... we don't want to lose
+         * our local socket */
+        dealloc_tcp_method_addr(map);
+        gossip_lerr("Warning: error polling on server socket, continuing.\n");
+        return (0);
+    }
+
+    if(tmp_errno == 0)
+        tmp_errno = EPROTO;
+
+    tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-tmp_errno));
+
+    return (0);
+}
+
+#if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__)
+/*
+ * tcp_enable_trusted()
+ * Ideally, this function should look up the security configuration of
+ * the server and determines
+ * if it needs to bind to any specific port locally or not..
+ * For now look at the FIXME below.
+ */
+static int tcp_enable_trusted(struct tcp_addr *tcp_addr_data)
+{
+    /*
+     * FIXME:
+     * For now, there is no way for us to check if a given
+     * server is actually using port protection or not.
+     * For now we unconditionally use a trusted port range
+     * as long as USE_TRUSTED is #defined.
+     *
+     * Although most of the time we expect users
+     * to be using a range of 0-1024, it is hard to keep probing
+     * until one gets a port in the range specified.
+     * Hence this is a temporary fix. we will see if this
+     * requirement even needs to be met at all.
+     */
+    static unsigned short my_requested_port = 1023;
+    unsigned short my_local_port = 0;
+    struct sockaddr_in my_local_sockaddr;
+    socklen_t len = sizeof(struct sockaddr_in);
+    memset(&my_local_sockaddr, 0, sizeof(struct sockaddr_in));
+
+    /* setup for a fast restart to avoid bind addr in use errors */
+    if (BMI_sockio_set_sockopt(tcp_addr_data->socket, SO_REUSEADDR, 1) < 0)
+    {
+        gossip_lerr("Could not set SO_REUSEADDR on local socket (port %hd)\n", my_local_port);
+    }
+    if (BMI_sockio_bind_sock(tcp_addr_data->socket, my_requested_port) < 0)
+    {
+        gossip_lerr("Could not bind to local port %hd: %s\n", 
+                my_requested_port, strerror(errno));
+    }
+    else {
+        my_requested_port--;
+    }
+    my_local_sockaddr.sin_family = AF_INET;
+    if (getsockname(tcp_addr_data->socket, 
+                (struct sockaddr *)&my_local_sockaddr, &len) == 0)
+    {
+        my_local_port = ntohs(my_local_sockaddr.sin_port);
+    }
+    gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Bound locally to port: %hd\n", my_local_port);
+    return 0;
+}
+
+#endif
+
+#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__)
+
+static char *bad_errors[] = {
+    "invalid network address",
+    "invalid port",
+    "invalid network address and port"
+};
+
+/*
+ * tcp_allow_trusted()
+ * if trusted ports was enabled make sure
+ * that we can accept a particular connection from a given
+ * client
+ */
+static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr)
+{
+    char *peer_hostname = inet_ntoa(peer_sockaddr->sin_addr);
+    unsigned short peer_port = ntohs(peer_sockaddr->sin_port);
+    int   i, what_failed   = -1;
+
+    /* Don't refuse connects if there were any
+     * parse errors or if it is not enabled in the config file
+     */
+    if (gtcp_allowed_connection->port_enforce == 0
+            && gtcp_allowed_connection->network_enforce == 0)
+    {
+        return 0;
+    }
+    /* make sure that the client is within the allowed network */
+    if (gtcp_allowed_connection->network_enforce == 1)
+    {
+        /* Always allow localhost to connect */
+        if (ntohl(peer_sockaddr->sin_addr.s_addr) == INADDR_LOOPBACK)
+        {
+            goto port_check;
+        }
+        for (i = 0; i < gtcp_allowed_connection->network_count; i++)
+        {
+            /* check with all the masks */
+            if ((peer_sockaddr->sin_addr.s_addr & gtcp_allowed_connection->netmask[i].s_addr) 
+                    != (gtcp_allowed_connection->network[i].s_addr & gtcp_allowed_connection->netmask[i].s_addr ))
+            {
+                continue;
+            }
+            else {
+                goto port_check;
+            }
+        }
+        /* not from a trusted network */
+        what_failed = 0;
+    }
+port_check:
+    /* make sure that the client port numbers are within specified limits */
+    if (gtcp_allowed_connection->port_enforce == 1)
+    {
+        if (peer_port < gtcp_allowed_connection->ports[0]
+                || peer_port > gtcp_allowed_connection->ports[1])
+        {
+            what_failed = (what_failed < 0) ? 1 : 2;
+        }
+    }
+    /* okay, we are good to go */
+    if (what_failed < 0)
+    {
+        return 0;
+    }
+    /* no good */
+    gossip_err("Rejecting client %s on port %d: %s\n",
+           peer_hostname, peer_port, bad_errors[what_failed]);
+    return -1;
+}
+
+#endif
+
+/* 
+ * tcp_accept_init()
+ * 
+ * used to establish a connection from the server side.  Attempts an
+ * accept call and provides the socket if it succeeds.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+static int tcp_accept_init(int *socket, char** peer)
+{
+
+    int ret = -1;
+    int tmp_errno = 0;
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) tcp_method_params.listen_addr->method_data;
+    int oldfl = 0;
+    struct sockaddr_in peer_sockaddr;
+    int peer_sockaddr_size = sizeof(struct sockaddr_in);
+    char* tmp_peer;
+
+    /* do we have a socket on this end yet? */
+    if (tcp_addr_data->socket < 0)
+    {
+        ret = tcp_server_init();
+        if (ret < 0)
+        {
+            return (ret);
+        }
+    }
+
+    *socket = accept(tcp_addr_data->socket, (struct sockaddr*)&peer_sockaddr,
+              (int *)&peer_sockaddr_size);
+
+    if (*socket < 0)
+    {
+        tmp_errno = WSAGetLastError();
+        if ((tmp_errno == WSATRY_AGAIN) ||
+            (tmp_errno == WSAEWOULDBLOCK) ||
+            (tmp_errno == WSAENETDOWN) ||
+            /* (tmp_errno == EPROTO) || */
+            (tmp_errno == WSAENOPROTOOPT) ||
+            /* (tmp_errno == EHOSTDOWN) || */
+            /* (tmp_errno == ENONET) || */
+            (tmp_errno == WSAEHOSTUNREACH) ||
+            (tmp_errno == WSAEOPNOTSUPP) ||
+            (tmp_errno == WSAENETUNREACH) ||
+            /* (tmp_errno == WSAENFILE) || */
+            (tmp_errno == WSAEMFILE))
+        {
+            /* try again later */
+            if (tmp_errno == EMFILE)
+            {
+                gossip_err("Error: accept: %d (continuing)\n", tmp_errno);
+                bmi_method_addr_drop_callback(BMI_tcp_method_name);
+            }
+            return (0);
+        }
+        else
+        {
+            gossip_err("Error: accept: %d\n", tmp_errno);
+            return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+        }
+    }
+
+#if defined(USE_TRUSTED) && defined(__PVFS2_SERVER__)
+
+    /* make sure that we are allowed to accept this connection */
+    if (tcp_allow_trusted(&peer_sockaddr) < 0)
+    {
+        /* Force closure of the connection */
+        close(*socket);
+        return (bmi_tcp_errno_to_pvfs(-EACCES));
+    }
+
+#endif
+
+    /* we accepted a new connection.  turn off Nagle's algorithm. */
+    if (BMI_sockio_set_tcpopt(*socket, TCP_NODELAY, 1) < 0)
+    {
+        tmp_errno = WSAGetLastError();
+        gossip_lerr("Error: failed to set TCP_NODELAY option.\n");
+        closesocket(*socket);
+        return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+    }
+
+    /* set it to non-blocking operation */
+    /*oldfl = fcntl(*socket, F_GETFL, 0);
+    if (!(oldfl & O_NONBLOCK))
+    {
+        fcntl(*socket, F_SETFL, oldfl | O_NONBLOCK);
+    }*/
+    SET_NONBLOCK(*socket);
+
+    /* allocate ip address string */
+    tmp_peer = inet_ntoa(peer_sockaddr.sin_addr);
+    *peer = (char*)malloc(strlen(tmp_peer)+1);
+    if(!(*peer))
+    {
+        closesocket(*socket);
+        return(bmi_tcp_errno_to_pvfs(-BMI_ENOMEM));
+    }
+    strcpy(*peer, tmp_peer);
+
+    return (0);
+}
+
+
+/* alloc_tcp_method_op()
+ *
+ * creates a new method op with defaults filled in for tcp.
+ *
+ * returns pointer to structure on success, NULL on failure
+ */
+static method_op_p alloc_tcp_method_op(void)
+{
+    method_op_p my_method_op = NULL;
+
+    my_method_op = bmi_alloc_method_op(sizeof(struct tcp_op));
+
+    /* we trust alloc_method_op to zero it out */
+
+    return (my_method_op);
+}
+
+
+/* dealloc_tcp_method_op()
+ *
+ * destroys an existing tcp method op, freeing segment lists if
+ * needed
+ *
+ * no return value
+ */
+static void dealloc_tcp_method_op(method_op_p old_op)
+{
+    bmi_dealloc_method_op(old_op);
+    return;
+}
+
+/* tcp_post_send_generic()
+ * 
+ * Submits send operations (low level).
+ *
+ * returns 0 on success that requires later poll, returns 1 on instant
+ * completion, -errno on failure
+ */
+static int tcp_post_send_generic(bmi_op_id_t * id,
+                                 bmi_method_addr_p dest,
+                                 const void *const *buffer_list,
+                                 const bmi_size_t *size_list,
+                                 int list_count,
+                                 enum bmi_buffer_type buffer_type,
+                                 struct tcp_msg_header my_header,
+                                 void *user_ptr,
+                                 bmi_context_id context_id,
+                                 PVFS_hint hints)
+{
+    struct tcp_addr *tcp_addr_data = (struct tcp_addr *) dest->method_data;
+    method_op_p query_op = NULL;
+    int ret = -1;
+    bmi_size_t total_size = 0;
+    bmi_size_t amt_complete = 0;
+    bmi_size_t env_amt_complete = 0;
+    struct op_list_search_key key;
+    int list_index = 0;
+    bmi_size_t cur_index_complete = 0;
+    PINT_event_id eid = 0;
+
+    if(PINT_EVENT_ENABLED)
+    {
+        int i = 0;
+        for(; i < list_count; ++i)
+        {
+            total_size += size_list[i];
+        }
+    }
+
+    PINT_EVENT_START(
+        bmi_tcp_send_event_id, bmi_tcp_pid, NULL, &eid,
+        PINT_HINT_GET_CLIENT_ID(hints),
+        PINT_HINT_GET_REQUEST_ID(hints),
+        PINT_HINT_GET_RANK(hints),
+        PINT_HINT_GET_HANDLE(hints),
+        PINT_HINT_GET_OP_ID(hints),
+        total_size);
+
+    /* Three things can happen here:
+     * a) another op is already in queue for the address, so we just
+     * queue up
+     * b) we can send the whole message and return
+     * c) we send part of the message and queue the rest
+     */
+
+    /* NOTE: on the post_send side of an operation, it doesn't really
+     * matter whether the op is going to be eager or rendezvous.  It is
+     * handled the same way (except for how the header is filled in).
+     * The difference is in the recv processing for TCP.
+     */
+
+    /* NOTE: we also don't care what the buffer_type says, TCP could care
+     * less what buffers it is using.
+     */
+
+    /* encode the message header */
+    BMI_TCP_ENC_HDR(my_header);
+
+    /* the first thing we must do is find out if another send is queued
+     * up for this address so that we don't mess up our ordering.    */
+    memset(&key, 0, sizeof(struct op_list_search_key));
+    key.method_addr = dest;
+    key.method_addr_yes = 1;
+    query_op = op_list_search(op_list_array[IND_SEND], &key);
+    if (query_op)
+    {
+        /* queue up operation */
+        ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND,
+                                dest, (void **) buffer_list,
+                                size_list, list_count, 0, 0,
+                                id, BMI_TCP_INPROGRESS, my_header, user_ptr,
+                                my_header.size, 0,
+                                context_id,
+                                eid);
+
+        /* TODO: is this causing deadlocks?  See similar call in recv
+         * path for another example.  This particular one seems to be an
+         * issue under a heavy bonnie++ load that Neill has been
+         * debugging.  Comment out for now to see if the problem goes
+         * away.
+         */
+#if 0
+        if (ret >= 0)
+        {
+            /* go ahead and try to do some work while we are in this
+             * function since we appear to be backlogged.  Make sure that
+             * we do not wait in the poll, however.
+             */
+            ret = tcp_do_work(0);
+        }
+#endif
+        if (ret < 0)
+        {
+            gossip_err("Error: enqueue_operation() or tcp_do_work() returned: %d\n", ret);
+        }
+        return (ret);
+    }
+
+    /* make sure the connection is established */
+    ret = tcp_sock_init(dest);
+    if (ret < 0)
+    {
+        gossip_debug(GOSSIP_BMI_DEBUG_TCP, "tcp_sock_init() failure.\n");
+        /* tcp_sock_init() returns BMI error code */
+        tcp_forget_addr(dest, 0, ret);
+        PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, NULL, 0, ret);
+        return (ret);
+    }
+
+    tcp_addr_data = (struct tcp_addr *) dest->method_data;
+
+#if 0
+    /* TODO: this is a hack for testing! */
+    /* disables immediate send completion... */
+    ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND,
+                            dest, buffer_list, size_list, list_count, 0, 0,
+                            id, BMI_TCP_INPROGRESS, my_header, user_ptr,
+                            my_header.size, 0,
+                            context_id);
+    return(ret);
+#endif
+
+    if (tcp_addr_data->not_connected)
+    {
+        /* if the connection is not completed, queue up for later work */
+        ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND,
+                                dest, (void **) buffer_list, size_list,
+                                list_count, 0, 0,
+                                id, BMI_TCP_INPROGRESS, my_header, user_ptr,
+                                my_header.size, 0,
+                                context_id,
+                                eid);
+        if(ret < 0)
+        {
+            gossip_err("Error: enqueue_operation() returned: %d\n", ret);
+        }
+        return (ret);
+    }
+
+    /* try to send some data */
+    env_amt_complete = 0;
+    ret = payload_progress(tcp_addr_data->socket,
+        (void **) buffer_list,
+        size_list, list_count, my_header.size, &list_index,
+        &cur_index_complete, BMI_SEND, my_header.enc_hdr, &env_amt_complete);
+    if (ret < 0)
+    {
+        PVFS_perror_gossip("Error: payload_progress", ret);
+        /* payload_progress() returns BMI error codes */
+        tcp_forget_addr(dest, 0, ret);
+        PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, NULL, eid, 0, ret);
+        return (ret);
+    }
+
+    gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Sent: %d bytes of data.\n", ret);
+    amt_complete = ret;
+    assert(amt_complete <= my_header.size);
+    if (amt_complete == my_header.size && env_amt_complete == TCP_ENC_HDR_SIZE)
+    {
+        /* we are already done */
+        PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid,
+                       NULL, eid, 0, amt_complete);
+        return (1);
+    }
+
+    /* queue up the remainder */
+    ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND,
+                            dest, (void **) buffer_list,
+                            size_list, list_count,
+                            amt_complete, env_amt_complete, id,
+                            BMI_TCP_INPROGRESS, my_header, user_ptr,
+                            my_header.size, 0, context_id, eid);
+
+    if(ret < 0)
+    {
+        gossip_err("Error: enqueue_operation() returned: %d\n", ret);
+    }
+    return (ret);
+}
+
+
+/* payload_progress()
+ *
+ * makes progress on sending/recving data payload portion of a message
+ *
+ * returns amount completed on success, -errno on failure
+ */
+static int payload_progress(int s, void *const *buffer_list, const bmi_size_t* 
+    size_list, int list_count, bmi_size_t total_size, int* list_index, 
+    bmi_size_t* current_index_complete, enum bmi_op_type send_recv, 
+    char* enc_hdr, bmi_size_t* env_amt_complete)
+{
+    int i;
+    int count = 0;
+    int ret;
+    int completed;
+    /* used for finding the stopping point on short receives */
+    int final_index = list_count-1;
+    bmi_size_t final_size = size_list[list_count-1];
+    bmi_size_t sum = 0;
+    int vector_index = 0;
+    int header_flag = 0;
+    int tmp_env_done = 0;
+
+    if(send_recv == BMI_RECV)
+    {
+        /* find out if we should stop short in list processing */
+        for(i=0; i<list_count; i++)
+        {
+            sum += size_list[i];
+            if(sum >= total_size)
+            {
+                final_index = i;
+                final_size = size_list[i] - (sum-total_size);
+                break;
+            }
+        }
+    }
+
+    assert(list_count > *list_index);
+
+    /* make sure we don't overrun our preallocated iovec array */
+    if((list_count - (*list_index)) > BMI_TCP_IOV_COUNT)
+    {
+        list_count = (*list_index) + BMI_TCP_IOV_COUNT;
+    }
+
+    /* do we need to send any of the header? */
+    if(send_recv == BMI_SEND && *env_amt_complete < TCP_ENC_HDR_SIZE)
+    {
+        stat_io_vector[vector_index].buf = &enc_hdr[*env_amt_complete];
+        stat_io_vector[vector_index].len = TCP_ENC_HDR_SIZE - *env_amt_complete;
+        count++;
+        vector_index++;
+        header_flag = 1;
+    }
+
+    /* setup vector */
+    stat_io_vector[vector_index].buf = 
+        (char*)buffer_list[*list_index] + *current_index_complete;
+    count++;
+    if(final_index == 0)
+    {
+        stat_io_vector[vector_index].len = final_size - *current_index_complete;
+    }
+    else
+    {
+        stat_io_vector[vector_index].len = 
+            size_list[*list_index] - *current_index_complete;
+        for(i = (*list_index + 1); i < list_count; i++)
+        {
+            vector_index++;
+            count++;
+            stat_io_vector[vector_index].buf = (CHAR *) buffer_list[i];
+            if(i == final_index)
+            {
+                stat_io_vector[vector_index].len = final_size;
+                break;
+            }
+            else
+            {
+                stat_io_vector[vector_index].len = size_list[i];
+            }
+        }
+    }
+
+    assert(count > 0);
+
+    if(send_recv == BMI_RECV)
+    {
+        ret = BMI_sockio_nbvector(s, stat_io_vector, count, 1);
+    }
+    else
+    {
+        ret = BMI_sockio_nbvector(s, stat_io_vector, count, 0);
+    }
+
+    /* if error or nothing done, return now */
+    if(ret == 0)
+        return(0);
+    if(ret <= 0)
+        return(bmi_tcp_errno_to_pvfs(-WSAGetLastError()));
+
+    completed = ret;
+    if(header_flag && (completed >= 0))
+    {
+        /* take care of completed header status */
+        tmp_env_done = TCP_ENC_HDR_SIZE - *env_amt_complete;
+        if(tmp_env_done > completed)
+            tmp_env_done = completed;
+        completed -= tmp_env_done;
+        ret -= tmp_env_done;
+        (*env_amt_complete) += tmp_env_done;
+    }
+
+    i=header_flag;
+    while(completed > 0)
+    {
+        /* take care of completed data payload */
+        if(completed >= stat_io_vector[i].len)
+        {
+            completed -= stat_io_vector[i].len;
+            *current_index_complete = 0;
+            (*list_index)++;
+            i++;
+        }
+        else
+        {
+            *current_index_complete += completed;
+            completed = 0;
+        }
+    }
+
+    return(ret);
+}
+
+static void bmi_set_sock_buffers(int socket){
+        //Set socket buffer sizes:
+    gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Default socket buffers send:%d receive:%d\n",
+                GET_SENDBUFSIZE(socket), GET_RECVBUFSIZE(socket));
+    gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Setting socket buffer size for send:%d receive:%d \n",
+                tcp_buffer_size_send, tcp_buffer_size_receive);
+    if( tcp_buffer_size_receive != 0)
+         SET_RECVBUFSIZE(socket,tcp_buffer_size_receive);
+    if( tcp_buffer_size_send != 0)
+         SET_SENDBUFSIZE(socket,tcp_buffer_size_send);
+    gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Reread socket buffers send:%d receive:%d\n",
+                GET_SENDBUFSIZE(socket), GET_RECVBUFSIZE(socket));
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/socket-collection-epoll.c b/src/io/bmi/bmi_wintcp/socket-collection-epoll.c
new file mode 100755
index 0000000..3683846
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/socket-collection-epoll.c
@@ -0,0 +1,203 @@
+/* 
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * this is an implementation of a socket collection library.  It can be
+ * used to maintain a dynamic list of sockets and perform polling
+ * operations.
+ */
+
+/*
+ * NOTE:  I am making read bits implicit in the implementation.  A poll
+ * will always check to see if there is data to be read on a socket.
+ */
+
+#include <sys/poll.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/epoll.h>
+
+#include "gossip.h"
+#include "socket-collection-epoll.h"
+#include "bmi-method-support.h"
+#include "bmi-tcp-addressing.h"
+#include "gen-locks.h"
+
+/* errors that can occur on a poll socket */
+#define ERRMASK (EPOLLERR|EPOLLHUP)
+
+/* hint to kernel about how many sockets we expect to poll over */
+#define EPOLL_CREATE_SIZE 128
+
+/* socket_collection_init()
+ * 
+ * creates a new socket collection.  It also acquires the server socket
+ * from the caller if it is available.  Passing in a negative value
+ * indicates that this is being used on a client node and there is no
+ * server socket.
+ *
+ * returns a pointer to the collection on success, NULL on failure.
+ */
+socket_collection_p BMI_socket_collection_init(int new_server_socket)
+{
+    struct epoll_event event;
+    socket_collection_p tmp_scp = NULL;
+    int ret = -1;
+
+    tmp_scp = (struct socket_collection*) malloc(sizeof(struct
+	socket_collection));
+    if(!tmp_scp)
+    {
+	return(NULL);
+    }
+
+    memset(tmp_scp, 0, sizeof(struct socket_collection));
+
+    tmp_scp->epfd = epoll_create(EPOLL_CREATE_SIZE);
+    if(tmp_scp->epfd < 0)
+    {
+        gossip_err("Error: epoll_create() failure: %s.\n", strerror(errno));
+        free(tmp_scp);
+        return(NULL);
+    }
+
+    tmp_scp->server_socket = new_server_socket;
+
+    if(new_server_socket > -1)
+    {
+        memset(&event, 0, sizeof(event));
+        event.events = (EPOLLIN|EPOLLERR|EPOLLHUP);
+        event.data.ptr = NULL;
+        ret = epoll_ctl(tmp_scp->epfd, EPOLL_CTL_ADD, new_server_socket,
+            &event);
+        if(ret < 0 && errno != EEXIST)
+        {
+            gossip_err("Error: epoll_ctl() failure: %s.\n", strerror(errno));
+            free(tmp_scp);
+            return(NULL);
+        }
+    }
+
+    return (tmp_scp);
+}
+
+/* socket_collection_finalize()
+ *
+ * destroys a socket collection.  IMPORTANT:  It DOES NOT destroy the
+ * addresses contained within the collection, nor does it terminate
+ * connections.  This must be handled elsewhere.
+ *
+ * no return values.
+ */
+void BMI_socket_collection_finalize(socket_collection_p scp)
+{
+    free(scp);
+    return;
+}
+
+
+/* socket_collection_testglobal()
+ *
+ * this function is used to poll to see if any of the new sockets are
+ * available for work.  The array of method addresses and array of
+ * status fields must be passed into the function by the caller.
+ * incount specifies the size of these arrays.  outcount
+ * specifies the number of ready addresses.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+int BMI_socket_collection_testglobal(socket_collection_p scp,
+				 int incount,
+				 int *outcount,
+				 bmi_method_addr_p * maps,
+				 int * status,
+				 int poll_timeout)
+{
+    struct tcp_addr* tcp_addr_data = NULL;
+    int ret = -1;
+    int old_errno;
+    int tmp_count;
+    int i;
+
+    /* init the outgoing arguments for safety */
+    *outcount = 0;
+    memset(maps, 0, (sizeof(bmi_method_addr_p) * incount));
+    memset(status, 0, (sizeof(int) * incount));
+
+    if(incount == 0)
+    {
+        return(0);
+    }
+
+    /* actually do the epoll_wait() here */
+    do
+    {
+        tmp_count = incount;
+        if(tmp_count > BMI_EPOLL_MAX_PER_CYCLE)
+            tmp_count = BMI_EPOLL_MAX_PER_CYCLE;
+
+        ret = epoll_wait(scp->epfd, scp->event_array, tmp_count,
+            poll_timeout);
+
+    } while(ret < 0 && errno == EINTR);
+    old_errno = errno;
+
+    if(ret < 0)
+    {
+	return(-old_errno);
+    }
+
+    /* nothing ready, just return */
+    if(ret == 0)
+    {
+	return(0);
+    }
+
+    tmp_count = ret;
+
+    for(i=0; i<tmp_count; i++)
+    {
+        assert(scp->event_array[i].events);
+
+        if(scp->event_array[i].events & ERRMASK)
+            status[*outcount] |= SC_ERROR_BIT;
+        if(scp->event_array[i].events & POLLIN)
+            status[*outcount] |= SC_READ_BIT;
+        if(scp->event_array[i].events & POLLOUT)
+            status[*outcount] |= SC_WRITE_BIT;
+
+        if(scp->event_array[i].data.ptr == NULL)
+        {
+            /* server socket */
+            maps[*outcount] = alloc_tcp_method_addr();
+            /* TODO: handle this */
+            assert(maps[*outcount]);
+            tcp_addr_data = (maps[*outcount])->method_data;
+            tcp_addr_data->server_port = 1;
+            tcp_addr_data->socket = scp->server_socket;
+            tcp_addr_data->port = -1;
+        }
+        else
+        {
+            /* normal case */
+            maps[*outcount] = scp->event_array[i].data.ptr;
+        }
+
+        *outcount = (*outcount) + 1;
+    }
+
+    return (0);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/socket-collection-epoll.h b/src/io/bmi/bmi_wintcp/socket-collection-epoll.h
new file mode 100755
index 0000000..a4c6ac9
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/socket-collection-epoll.h
@@ -0,0 +1,120 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * This file contains the visible data structures and function interface
+ * for a socket collection library.  This library can maintain lists of
+ * sockets and perform polling operations on them.
+ */
+
+/*
+ * NOTE:  I am making read bits implicit in the implementation.  A poll
+ * will always check to see if there is data to be read on a socket.
+ */
+
+#ifndef __SOCKET_COLLECTION_EPOLL_H
+#define __SOCKET_COLLECTION_EPOLL_H
+
+#include <assert.h>
+#include <sys/epoll.h>
+
+#include "bmi-method-support.h"
+#include "bmi-tcp-addressing.h"
+#include "quicklist.h"
+#include "gen-locks.h"
+
+#define BMI_EPOLL_MAX_PER_CYCLE 16
+
+struct socket_collection
+{
+    int epfd;
+    
+    struct epoll_event event_array[BMI_EPOLL_MAX_PER_CYCLE];
+
+    int server_socket;
+};
+typedef struct socket_collection* socket_collection_p;
+
+enum
+{
+    SC_READ_BIT = 1,
+    SC_WRITE_BIT = 2,
+    SC_ERROR_BIT = 4
+};
+
+socket_collection_p BMI_socket_collection_init(int new_server_socket);
+
+/* the bmi_tcp code may try to add a socket to the collection before
+ * it is fully connected, just ignore in this case
+ */
+#define BMI_socket_collection_add(s, m) \
+do { \
+    struct tcp_addr* tcp_data = (m)->method_data; \
+    if(tcp_data->socket > -1){ \
+        struct epoll_event event;\
+        memset(&event, 0, sizeof(event));\
+        event.events = EPOLLIN|EPOLLERR|EPOLLHUP;\
+        event.data.ptr = tcp_data->map;\
+        epoll_ctl(s->epfd, EPOLL_CTL_ADD, tcp_data->socket, &event);\
+    } \
+} while(0)
+
+#define BMI_socket_collection_remove(s, m) \
+do { \
+    struct epoll_event event;\
+    struct tcp_addr* tcp_data = (m)->method_data; \
+    tcp_data->write_ref_count = 0; \
+    memset(&event, 0, sizeof(event));\
+    event.events = 0;\
+    event.data.ptr = tcp_data->map;\
+    epoll_ctl(s->epfd, EPOLL_CTL_DEL, tcp_data->socket, &event);\
+} while(0)
+
+/* we _must_ have a valid socket at this point if we want to write data */
+#define BMI_socket_collection_add_write_bit(s, m) \
+do { \
+    struct tcp_addr* tcp_data = (m)->method_data; \
+    struct epoll_event event;\
+    assert(tcp_data->socket > -1); \
+    tcp_data->write_ref_count++; \
+    memset(&event, 0, sizeof(event));\
+    event.events = EPOLLIN|EPOLLERR|EPOLLHUP|EPOLLOUT;\
+    event.data.ptr = tcp_data->map;\
+    epoll_ctl(s->epfd, EPOLL_CTL_MOD, tcp_data->socket, &event);\
+} while(0)
+
+#define BMI_socket_collection_remove_write_bit(s, m) \
+do { \
+    struct tcp_addr* tcp_data = (m)->method_data; \
+    struct epoll_event event;\
+    tcp_data->write_ref_count--; \
+    assert(tcp_data->write_ref_count > -1); \
+    if (tcp_data->write_ref_count == 0) { \
+        memset(&event, 0, sizeof(event));\
+        event.events = EPOLLIN|EPOLLERR|EPOLLHUP;\
+        event.data.ptr = tcp_data->map;\
+        epoll_ctl(s->epfd, EPOLL_CTL_MOD, tcp_data->socket, &event);\
+    }\
+} while(0)
+
+void BMI_socket_collection_finalize(socket_collection_p scp);
+int BMI_socket_collection_testglobal(socket_collection_p scp,
+				 int incount,
+				 int *outcount,
+				 bmi_method_addr_p * maps,
+				 int * status,
+				 int poll_timeout);
+
+#endif /* __SOCKET_COLLECTION_EPOLL_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/socket-collection.c b/src/io/bmi/bmi_wintcp/socket-collection.c
new file mode 100755
index 0000000..8a6c812
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/socket-collection.c
@@ -0,0 +1,477 @@
+/* 
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * this is an implementation of a socket collection library.  It can be
+ * used to maintain a dynamic list of sockets and perform polling
+ * operations.
+ */
+
+/*
+ * NOTE:  I am making read bits implicit in the implementation.  A poll
+ * will always check to see if there is data to be read on a socket.
+ */
+
+#include <WinSock2.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <io.h>
+
+#include "gossip.h"
+#include "socket-collection.h"
+#include "bmi-method-support.h"
+#include "bmi-tcp-addressing.h"
+#include "gen-locks.h"
+
+#include "pvfs2-debug.h"
+
+/* errors that can occur on a poll socket */
+#define ERRMASK (POLLERR+POLLHUP+POLLNVAL)
+
+#define POLLFD_ARRAY_START 32
+#define POLLFD_ARRAY_INC 32
+
+/* socket_collection_init()
+ * 
+ * creates a new socket collection.  It also acquires the server socket
+ * from the caller if it is available.  Passing in a negative value
+ * indicates that this is being used on a client node and there is no
+ * server socket.
+ *
+ * returns a pointer to the collection on success, NULL on failure.
+ */
+socket_collection_p BMI_socket_collection_init(int new_server_socket)
+{
+
+    socket_collection_p tmp_scp = NULL;
+
+    tmp_scp = (struct socket_collection*) malloc(sizeof(struct
+        socket_collection));
+    if(!tmp_scp)
+    {
+        return(NULL);
+    }
+
+    memset(tmp_scp, 0, sizeof(struct socket_collection));
+
+    gen_mutex_init(&tmp_scp->queue_mutex);
+
+    tmp_scp->pollfd_array = (struct
+        pollfd*)malloc(POLLFD_ARRAY_START*sizeof(WSAPOLLFD));
+    
+    tmp_scp->addr_array =
+        (bmi_method_addr_p*)malloc(POLLFD_ARRAY_START*sizeof(bmi_method_addr_p));
+    if(!tmp_scp->addr_array)
+    {
+        free(tmp_scp->pollfd_array);
+        free(tmp_scp);
+        return NULL;
+    }
+    /* not used on Windows
+    if (pipe(tmp_scp->pipe_fd) < 0)
+    if (!CreatePipe(&(tmp_scp->pipe_fd[0]), 
+                    &(tmp_scp->pipe_fd[1]),
+                    NULL, 128*1024))
+    {
+        perror("pipe failed:");
+        BMI_socket_collection_finalize(tmp_scp);
+        return NULL;
+    }
+    */
+
+    tmp_scp->array_max = POLLFD_ARRAY_START;    
+    tmp_scp->array_count = 0;
+    INIT_QLIST_HEAD(&tmp_scp->remove_queue);
+    INIT_QLIST_HEAD(&tmp_scp->add_queue);
+    tmp_scp->server_socket = new_server_socket;
+
+    if(new_server_socket > -1)
+    {
+        tmp_scp->pollfd_array[tmp_scp->array_count].fd = new_server_socket;
+        tmp_scp->pollfd_array[tmp_scp->array_count].events = POLLIN;
+        tmp_scp->addr_array[tmp_scp->array_count] = NULL;
+        tmp_scp->array_count++;
+    }
+
+    /* Add the pipe_fd[0] fd to the poll in set always */
+    /* -- must be handled separately on Windows 
+    tmp_scp->pollfd_array[tmp_scp->array_count].fd = tmp_scp->pipe_fd[0];
+    tmp_scp->pollfd_array[tmp_scp->array_count].events = POLLIN;
+    tmp_scp->addr_array[tmp_scp->array_count] = NULL;
+    tmp_scp->array_count++;    
+    */
+
+    return (tmp_scp);
+}
+
+/* socket_collection_queue()
+ * 
+ * queues a tcp method_addr for addition or removal from the collection.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+void BMI_socket_collection_queue(socket_collection_p scp,
+                           bmi_method_addr_p map, struct qlist_head* queue)
+{
+    struct qlist_head* iterator = NULL;
+    struct qlist_head* scratch = NULL;
+    struct tcp_addr* tcp_addr_data = NULL;
+
+    /* make sure that this address isn't already slated for addition/removal */
+    qlist_for_each_safe(iterator, scratch, &scp->remove_queue)
+    {
+        tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link);
+        if(tcp_addr_data->map == map)
+        {
+            qlist_del(&tcp_addr_data->sc_link);
+            break;
+        }
+    }
+    qlist_for_each_safe(iterator, scratch, &scp->add_queue)
+    {
+        tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link);
+        if(tcp_addr_data->map == map)
+        {
+            qlist_del(&tcp_addr_data->sc_link);
+            break;
+        }
+    }
+
+    /* add it on to the appropriate queue */
+    tcp_addr_data = map->method_data;
+    /* add to head, we are likely to access it again soon */
+    qlist_add(&tcp_addr_data->sc_link, queue);
+
+    return;
+}
+
+
+/* socket_collection_finalize()
+ *
+ * destroys a socket collection.  IMPORTANT:  It DOES NOT destroy the
+ * addresses contained within the collection, nor does it terminate
+ * connections.  This must be handled elsewhere.
+ *
+ * no return values.
+ */
+void BMI_socket_collection_finalize(socket_collection_p scp)
+{
+    free(scp->addr_array);
+    free(scp->pollfd_array);
+    free(scp);
+    return;
+}
+
+/* socket_collection_testglobal()
+ *
+ * this function is used to poll to see if any of the new sockets are
+ * available for work.  The array of method addresses and array of
+ * status fields must be passed into the function by the caller.
+ * incount specifies the size of these arrays.  outcount
+ * specifies the number of ready addresses.
+ *
+ * returns 0 on success, -errno on failure.
+ */
+int BMI_socket_collection_testglobal(socket_collection_p scp,
+                                 int incount,
+                                 int *outcount,
+                                 bmi_method_addr_p * maps,
+                                 int * status,
+                                 int poll_timeout)
+{
+    struct qlist_head* iterator = NULL;
+    struct qlist_head* scratch = NULL;
+    struct tcp_addr* tcp_addr_data = NULL;
+    struct tcp_addr* shifted_tcp_addr_data = NULL;
+    WSAPOLLFD* tmp_pollfd_array = NULL;
+    bmi_method_addr_p* tmp_addr_array = NULL;
+    int ret = -1;
+    int old_errno;
+    /* int tmp_count; */
+    int i;
+    int skip_flag;
+    int out_flag;
+    /* int pipe_notify = 0; 
+    struct timeval start, end; */
+    int allowed_poll_time = poll_timeout;
+    /* DWORD bytes; */
+
+/*
+    gettimeofday(&start, NULL);
+do_again:
+*/
+    /* pipe_notify = 0; */
+    /* init the outgoing arguments for safety */
+    *outcount = 0;
+    memset(maps, 0, (sizeof(bmi_method_addr_p) * incount));
+    memset(status, 0, (sizeof(int) * incount));
+
+    gen_mutex_lock(&scp->queue_mutex);
+
+    /* look for addresses slated for removal */
+    qlist_for_each_safe(iterator, scratch, &scp->remove_queue)
+    {
+        tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link);
+        qlist_del(&tcp_addr_data->sc_link);
+        /* take out of poll array, shift last entry into its place */
+        if(tcp_addr_data->sc_index > -1)
+        {
+            scp->pollfd_array[tcp_addr_data->sc_index] = 
+                scp->pollfd_array[scp->array_count-1];
+            scp->addr_array[tcp_addr_data->sc_index] = 
+                scp->addr_array[scp->array_count-1];
+            shifted_tcp_addr_data =
+                scp->addr_array[tcp_addr_data->sc_index]->method_data;
+            shifted_tcp_addr_data->sc_index = tcp_addr_data->sc_index;
+            scp->array_count--;
+            tcp_addr_data->sc_index = -1;
+            tcp_addr_data->write_ref_count = 0;
+        }
+    }
+
+    /* look for addresses slated for addition */
+    qlist_for_each_safe(iterator, scratch, &scp->add_queue)
+    {
+        tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link);
+        qlist_del(&tcp_addr_data->sc_link);
+        if(tcp_addr_data->sc_index > -1)
+        {
+            /* update existing entry */
+#if 0
+            gossip_err("HELLO: updating addr: %p, index: %d, ref: %d.\n",
+                scp->addr_array[tcp_addr_data->sc_index],
+                tcp_addr_data->sc_index,
+                tcp_addr_data->write_ref_count);
+#endif
+            scp->pollfd_array[tcp_addr_data->sc_index].events = POLLIN;
+            if(tcp_addr_data->write_ref_count > 0)
+                scp->pollfd_array[tcp_addr_data->sc_index].events |= POLLOUT;
+        }
+        else
+        {
+            /* new entry */
+            if(scp->array_count == scp->array_max)
+            {
+                /* we must enlarge the poll arrays */
+                tmp_pollfd_array = (WSAPOLLFD*)malloc(
+                    (scp->array_max+POLLFD_ARRAY_INC)*sizeof(WSAPOLLFD)); 
+                /* TODO: handle this */
+                assert(tmp_pollfd_array);
+                tmp_addr_array = (bmi_method_addr_p*)malloc(
+                    (scp->array_max+POLLFD_ARRAY_INC)*sizeof(bmi_method_addr_p)); 
+                /* TODO: handle this */
+                assert(tmp_addr_array);
+                memcpy(tmp_pollfd_array, scp->pollfd_array,
+                    scp->array_max*sizeof(WSAPOLLFD));
+                free(scp->pollfd_array);
+                scp->pollfd_array = tmp_pollfd_array;
+                memcpy(tmp_addr_array, scp->addr_array,
+                    scp->array_max*sizeof(bmi_method_addr_p));
+                free(scp->addr_array);
+                scp->addr_array = tmp_addr_array;
+                scp->array_max = scp->array_max+POLLFD_ARRAY_INC;
+            }
+            /* add into pollfd array */
+            tcp_addr_data->sc_index = scp->array_count;
+            scp->array_count++;
+            scp->addr_array[tcp_addr_data->sc_index] = tcp_addr_data->map;
+            scp->pollfd_array[tcp_addr_data->sc_index].fd =
+                tcp_addr_data->socket;
+            scp->pollfd_array[tcp_addr_data->sc_index].events = POLLIN;
+            if(tcp_addr_data->write_ref_count > 0)
+                scp->pollfd_array[tcp_addr_data->sc_index].events |= POLLOUT;
+        }
+    }
+    gen_mutex_unlock(&scp->queue_mutex);
+
+    /* actually do the poll() work */
+    /*
+    do
+    {
+        DWORD bytes;
+
+        /* poll for 1ms */
+   /*        ret = WSAPoll(scp->pollfd_array, scp->array_count, 1);
+        old_errno = WSAGetLastError();
+        allowed_poll_time--;
+
+    } while(ret == 0 && allowed_poll_time > 0);    
+    */
+    /* ignore the request if no sockets are available */
+    if (scp->array_count > 0)
+    {
+        ret = WSAPoll(scp->pollfd_array, scp->array_count, allowed_poll_time);
+        old_errno = WSAGetLastError();
+    }
+    else
+    {
+        ret = old_errno = 0;
+    }
+
+    if(ret < 0)
+    {
+        return(bmi_tcp_errno_to_pvfs(-old_errno));
+    }
+
+    /* check our pipe */
+    /*
+    if (PeekNamedPipe(scp->pipe_fd[0], NULL, 0, NULL, &bytes, NULL))
+    {
+        if (bytes)
+        {
+            char c;
+            DWORD count;
+
+            pipe_notify = 1;
+            /* drain the pipe */
+    /*      ReadFile(scp->pipe_fd[0], &c, 1, &count, NULL);
+    
+        }
+    }
+    else
+    {
+        return(bmi_tcp_errno_to_pvfs(GetLastError()));
+    }
+    */
+
+    /* nothing ready, just return 
+       -- there may actually be an error: see below */
+    /*
+    if(ret == 0 && !pipe_notify)
+    {
+        return(0);
+    }
+    */
+
+    /* tmp_count = ret; */
+
+    for(i=0; i<scp->array_count; i++)
+    {
+        /* short out if we hit count limit */
+        if(*outcount == incount /* || *outcount == tmp_count */)
+        {
+            break;
+        }
+
+        skip_flag = out_flag = 0;
+
+        /* make sure that this addr hasn't been removed */
+        gen_mutex_lock(&scp->queue_mutex);
+        qlist_for_each_safe(iterator, scratch, &scp->remove_queue)
+        {
+            tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link);
+            if(tcp_addr_data->map == scp->addr_array[i])
+            {
+                skip_flag = 1;
+                break;
+            }
+        }
+        gen_mutex_unlock(&scp->queue_mutex);
+        if(skip_flag)
+            continue;
+
+        /* anything ready on this socket? */
+        if (scp->pollfd_array[i].revents)
+        {
+
+            if(scp->pollfd_array[i].revents & ERRMASK)
+                status[*outcount] |= SC_ERROR_BIT;
+            if(scp->pollfd_array[i].revents & POLLIN)
+                status[*outcount] |= SC_READ_BIT;
+            if(scp->pollfd_array[i].revents & POLLOUT)
+                status[*outcount] |= SC_WRITE_BIT;
+
+            /* Special case--POLLHUP has been received but data 
+               is available. A graceful close has been initiated.
+               Clear the error flag so data is read/sent normally. */
+            if ((scp->pollfd_array[i].revents & POLLHUP) &&
+                (!(scp->pollfd_array[i].revents & POLLERR+POLLNVAL)) &&
+                ((scp->pollfd_array[i].revents & POLLIN) ||
+                 (scp->pollfd_array[i].revents & POLLOUT)))
+            {
+                status[*outcount] &= ~SC_ERROR_BIT;
+            }
+
+            out_flag = 1;
+        }
+        else
+        {
+            /* on Windows there may be an error on the socket that WSAPoll 
+               doesn't report--use getsockopt to find */        
+            int rc, optval, optlen = sizeof(int);
+
+            rc = getsockopt(scp->pollfd_array[i].fd, SOL_SOCKET, SO_ERROR, (char *) &optval, &optlen);
+
+            if (rc != 0)
+            {
+                return(bmi_tcp_errno_to_pvfs(-WSAGetLastError()));
+            }
+           
+            if (optval)
+            {
+                gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, 
+                              "Socket %d error: %d\n",
+                              scp->pollfd_array[i].fd,
+                              optval);
+                status[*outcount] |= SC_ERROR_BIT;
+
+                out_flag = 1;
+            }
+        }
+        
+        if (out_flag)
+        {
+            if(scp->addr_array[i] == NULL)
+            {
+                /* server socket */
+                maps[*outcount] = alloc_tcp_method_addr();
+                /* TODO: handle this */
+                assert(maps[*outcount]);
+                tcp_addr_data = (maps[*outcount])->method_data;
+                tcp_addr_data->server_port = 1;
+                tcp_addr_data->socket = scp->server_socket;
+                tcp_addr_data->port = -1;
+            }
+            else
+            {
+                /* normal case */
+                maps[*outcount] = scp->addr_array[i];
+            }
+
+            *outcount = (*outcount) + 1;
+        }
+    }
+
+    /* Under the following conditions (i.e. all of them must be true) we go back to redoing poll
+     * a) There were no outstanding sockets/fds that had data
+     * b) There was a pipe notification that our socket sets have changed
+     * c) we havent exhausted our allotted time
+     */
+    /*
+    if (*outcount == 0 && pipe_notify == 1)
+    {
+        gettimeofday(&end, NULL);
+        timersub(&end, &start, &end);
+        allowed_poll_time -= (end.tv_sec * 1000 + end.tv_usec/1000);
+        if (allowed_poll_time > 0)
+            goto do_again;
+    }
+    */
+
+    return (0);
+}
+
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/socket-collection.h b/src/io/bmi/bmi_wintcp/socket-collection.h
new file mode 100755
index 0000000..d0ec57e
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/socket-collection.h
@@ -0,0 +1,126 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * This file contains the visible data structures and function interface
+ * for a socket collection library.  This library can maintain lists of
+ * sockets and perform polling operations on them.
+ */
+
+/*
+ * NOTE:  I am making read bits implicit in the implementation.  A poll
+ * will always check to see if there is data to be read on a socket.
+ */
+
+#ifndef __SOCKET_COLLECTION_H
+#define __SOCKET_COLLECTION_H
+
+#include <assert.h>
+#include "bmi-method-support.h"
+#include "bmi-tcp-addressing.h"
+#include "quicklist.h"
+#include "gen-locks.h"
+
+struct socket_collection
+{
+    /*struct pollfd* pollfd_array;*/
+    WSAPOLLFD *pollfd_array;
+    bmi_method_addr_p* addr_array;
+    int array_max;
+    int array_count;
+
+    gen_mutex_t queue_mutex;
+    struct qlist_head remove_queue;
+    struct qlist_head add_queue;
+
+    int server_socket;
+    HANDLE pipe_fd[2];
+};
+typedef struct socket_collection* socket_collection_p;
+
+enum
+{
+    SC_READ_BIT = 1,
+    SC_WRITE_BIT = 2,
+    SC_ERROR_BIT = 4
+};
+
+socket_collection_p BMI_socket_collection_init(int new_server_socket);
+void BMI_socket_collection_queue(socket_collection_p scp,
+			   bmi_method_addr_p map, struct qlist_head* queue);
+
+/* the bmi_tcp code may try to add a socket to the collection before
+ * it is fully connected, just ignore in this case
+ */
+/* write a byte on the pipe_fd[1] so that poll breaks out in case it is idling */
+#define BMI_socket_collection_add(s, m) \
+do { \
+    struct tcp_addr* tcp_data = (struct tcp_addr *) (m)->method_data; \
+    if(tcp_data->socket > -1){ \
+        char c; \
+        DWORD count; \
+	gen_mutex_lock(&((s)->queue_mutex)); \
+	BMI_socket_collection_queue(s, m, &((s)->add_queue)); \
+	gen_mutex_unlock(&((s)->queue_mutex)); \
+        /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\
+    } \
+} while(0)
+
+#define BMI_socket_collection_remove(s, m) \
+do { \
+    char c; \
+    DWORD count; \
+    gen_mutex_lock(&((s)->queue_mutex)); \
+    BMI_socket_collection_queue(s, m, &((s)->remove_queue)); \
+    gen_mutex_unlock(&((s)->queue_mutex)); \
+    /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\
+} while(0)
+
+/* we _must_ have a valid socket at this point if we want to write data */
+#define BMI_socket_collection_add_write_bit(s, m) \
+do { \
+    char c;\
+    DWORD count; \
+    struct tcp_addr* tcp_data = (struct tcp_addr *) (m)->method_data; \
+    assert(tcp_data->socket > -1); \
+    gen_mutex_lock(&((s)->queue_mutex)); \
+    tcp_data->write_ref_count++; \
+    BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \
+    gen_mutex_unlock(&((s)->queue_mutex)); \
+    /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\
+} while(0)
+
+#define BMI_socket_collection_remove_write_bit(s, m) \
+do { \
+    char c;\
+    DWORD count; \
+    struct tcp_addr* tcp_data = (struct tcp_addr *) (m)->method_data; \
+    gen_mutex_lock(&((s)->queue_mutex)); \
+    tcp_data->write_ref_count--; \
+    assert(tcp_data->write_ref_count > -1); \
+    BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \
+    gen_mutex_unlock(&((s)->queue_mutex)); \
+    /*WriteFile(s->pipe_fd[1], &c, 1, &count, NULL);*/\
+} while(0)
+
+void BMI_socket_collection_finalize(socket_collection_p scp);
+int BMI_socket_collection_testglobal(socket_collection_p scp,
+				 int incount,
+				 int *outcount,
+				 bmi_method_addr_p * maps,
+				 int * status,
+				 int poll_timeout);
+
+#endif /* __SOCKET_COLLECTION_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/sockio.c b/src/io/bmi/bmi_wintcp/sockio.c
new file mode 100755
index 0000000..a6ae5ca
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/sockio.c
@@ -0,0 +1,415 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "pvfs2-config.h"
+
+#include <WinSock2.h>
+/* #include <unistd.h> */
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+/* #include <sys/socket.h> */
+/* #include <sys/time.h> */
+#include <fcntl.h>
+/* #include <netinet/in.h>
+#include <netinet/tcp.h>
+*/
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+#ifdef HAVE_ARPA_INET_H
+#include <arpa/inet.h>
+#endif
+/* #include <sys/poll.h>
+#include <sys/uio.h> */
+#include <assert.h>
+
+#include "sockio.h"
+#include "gossip.h"
+
+typedef unsigned int socklen_t;
+
+/* if the platform provides a MSG_NOSIGNAL option (which disables the
+ * generation of signals on broken pipe), then use it
+ */
+#ifdef MSG_NOSIGNAL
+#define DEFAULT_MSG_FLAGS MSG_NOSIGNAL
+#else
+#define DEFAULT_MSG_FLAGS 0
+#endif
+
+int BMI_sockio_new_sock()
+{
+    return(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+}
+
+int BMI_sockio_bind_sock(int sockd,
+              int service)
+{
+    struct sockaddr_in saddr;
+
+    memset((char *) &saddr, 0, sizeof(saddr));
+    saddr.sin_family = AF_INET;
+    saddr.sin_port = htons((u_short) service);
+    saddr.sin_addr.s_addr = INADDR_ANY;
+  bind_sock_restart:
+    if (bind(sockd, (struct sockaddr *) &saddr, sizeof(saddr)) < 0)
+    {
+        if (WSAGetLastError() == WSAEINTR)
+            goto bind_sock_restart;
+        return (-1);
+    }
+    return (sockd);
+}
+
+/* NOTE: this function returns BMI error codes */
+int BMI_sockio_bind_sock_specific(int sockd,
+              const char *name,
+              int service)
+{
+    struct sockaddr saddr;
+    int ret;
+
+    if ((ret = BMI_sockio_init_sock(&saddr, name, service)) != 0)
+        return (ret); /* converted to PVFS error code below */
+
+  bind_sock_restart:
+    if (bind(sockd, &saddr, sizeof(saddr)) < 0)
+    {
+        if (WSAGetLastError() == WSAEINTR)
+            goto bind_sock_restart;
+        return(bmi_errno_to_pvfs(-WSAGetLastError()));
+    }
+    return (sockd);
+}
+
+
+/* NOTE: this function returns BMI error codes */
+int BMI_sockio_connect_sock(int sockd,
+                 const char *name,
+                 int service)
+{
+    struct sockaddr saddr;
+    int ret;
+
+    if ((ret = BMI_sockio_init_sock(&saddr, name, service)) != 0)
+        return (ret);
+  connect_sock_restart:
+    if (connect(sockd, (struct sockaddr *) &saddr, sizeof(saddr)) < 0)
+    {
+        if (WSAGetLastError() == WSAEINTR)
+            goto connect_sock_restart;
+        return(bmi_errno_to_pvfs(-WSAGetLastError()));
+    }
+    return (sockd);
+}
+
+#ifdef HAVE_GETHOSTBYNAME
+static int conv_h_errno(int herr)
+{   
+    switch (herr)
+    {
+    case WSAHOST_NOT_FOUND :
+        return BMI_EHOSTNTFD;
+    case WSANO_ADDRESS :
+        return BMI_EADDRNTFD;
+    case WSANO_RECOVERY :
+        return BMI_ENORECVR;
+    case WSATRY_AGAIN :
+        return BMI_ETRYAGAIN;
+    default :
+        return herr;
+    }
+}
+
+/* gethostbyname version */
+int BMI_sockio_init_sock(struct sockaddr *saddrp,
+                         const char *name,
+                         int service)
+{
+    struct hostent *hep;
+
+    memset((char *) saddrp, 0, sizeof(struct sockaddr_in));
+    if (name == NULL)
+    {
+        if ((hep = gethostbyname("localhost")) == NULL)
+        {
+            return (-conv_h_errno(WSAGetLastError()));
+        }
+    }
+    else if ((hep = gethostbyname(name)) == NULL)
+    {
+        return (-conv_h_errno(WSAGetLastError()));
+    }
+    ((struct sockaddr_in *) saddrp)->sin_family = AF_INET;
+    ((struct sockaddr_in *) saddrp)->sin_port = htons((u_short) service);
+    memcpy((char *) &(((struct sockaddr_in *) saddrp)->sin_addr), hep->h_addr, 
+          hep->h_length);
+    return (0);
+}
+#else
+/* inet_aton version */
+int BMI_sockio_init_sock(struct sockaddr *saddrp,
+                         const char *name,
+                         int service)
+{
+    int ret;
+    struct in_addr addr;
+
+    bzero((char *) saddrp, sizeof(struct sockaddr_in));
+    if (name == NULL)
+    {
+        ret = inet_aton("127.0.0.1", &addr);
+    }
+    else
+    {
+        ret = inet_aton(name, &addr);
+    }
+
+    if (ret == 0) return -1;
+
+    ((struct sockaddr_in *) saddrp)->sin_family = AF_INET;
+    ((struct sockaddr_in *) saddrp)->sin_port = htons((u_short) service);
+    memcpy((char *) &(((struct sockaddr_in *) saddrp)->sin_addr), &addr, 
+          sizeof(addr));
+
+    return 0;
+}
+#endif
+
+
+/* nonblocking receive */
+int BMI_sockio_nbrecv(int s,
+           void *buf,
+           int len)
+{
+    int ret, comp = len, err;
+
+    /* We can't read the blocking state on Windows */
+    /* assert(fcntl(s, F_GETFL, 0) & O_NONBLOCK); */
+
+    while (comp)
+    {
+      nbrecv_restart:
+        ret = recv(s, (char *) buf, comp, DEFAULT_MSG_FLAGS);
+        err = WSAGetLastError();
+        if (ret == 0) /* socket closed */
+        {
+            errno = EPIPE;
+            return (-1);
+        }
+        if (ret == -1 && err == WSAEINTR) 
+        {
+            goto nbrecv_restart;
+        }
+        else if (ret == -1 && err == WSAEWOULDBLOCK)
+        {
+            /* return what we got so far, this is a nonblocking call */
+            return(len-comp);
+        }
+        else if (ret == -1)
+        {
+            return (-1);
+        }
+        comp -= ret;
+        buf = (char *)buf + ret;
+    }
+    return (len - comp);
+}
+
+/* BMI_sockio_nbpeek()
+ *
+ * performs a nonblocking check to see if the amount of data requested
+ * is actually available in a socket.  Does not actually read the data
+ * out.
+ *
+ * returns number of bytes available on succes, -1 on failure.
+ */
+int BMI_sockio_nbpeek(int s, void* buf, int len)
+{
+    int ret, err;
+    
+    /* We can't read the blocking state on Windows */
+    /* assert(fcntl(s, F_GETFL, 0) & O_NONBLOCK); */
+
+  nbpeek_restart:
+    ret = recv(s, (char *) buf, len, (MSG_PEEK|DEFAULT_MSG_FLAGS));
+    err = WSAGetLastError();
+    if(ret == 0)
+    {
+        /* errno = EPIPE; */
+        return (-1);
+    }
+    else if (ret == -1 && err == WSAEWOULDBLOCK)
+    {
+        return(0);
+    }
+    else if (ret == -1 && err == WSAEINTR)
+    {
+        goto nbpeek_restart;
+    }
+    else if (ret == -1)
+    {
+        return (-1);
+    }
+    
+    return(ret);
+}
+
+
+/* nonblocking send */
+/* should always return 0 when nothing gets done! */
+int BMI_sockio_nbsend(int s,
+           void *buf,
+           int len)
+{
+    int ret, comp = len, err;
+
+    while (comp)
+    {
+      nbsend_restart:
+        ret = send(s, (char *) buf, comp, DEFAULT_MSG_FLAGS);
+        err = WSAGetLastError();
+        if (ret == 0 || (ret == -1 && err == WSAEWOULDBLOCK))
+            return (len - comp);        /* return amount completed */
+        if (ret == -1 && err == WSAEINTR)
+        {
+            goto nbsend_restart;
+        }
+        else if (ret == -1)
+            return (-1);
+        comp -= ret;
+        buf = (char *)buf + ret;
+    }
+    return (len - comp);
+}
+
+/* nonblocking vector send */
+int BMI_sockio_nbvector(int s,
+            LPWSABUF vector,
+            int count, 
+            int recv_flag)
+{
+    int ret, err;
+    DWORD bytes, flags;
+
+    /* NOTE: this function is different from the others that will
+     * keep making the I/O system call until EWOULDBLOCK is encountered; we 
+     * give up after one call
+     */
+
+    /* loop over if interrupted */
+    do
+    {
+        if (recv_flag)
+        {
+            /* ret = readv(s, vector, count); */
+            flags = MSG_PARTIAL;
+            ret = WSARecv(s, vector, count, &bytes, &flags, NULL, NULL);
+            err = WSAGetLastError();
+        }
+        else
+        {
+            /* ret = writev(s, vector, count); */
+            flags = 0;
+            ret = WSASend(s, vector, count, &bytes, flags, NULL, NULL);
+            err = WSAGetLastError();
+        }
+    } while ((ret == 0 && flags & MSG_PARTIAL) || (ret == -1 && err == WSAEINTR));
+
+    /* return zero if can't do any work at all */
+    if (ret == -1 && err == WSAEWOULDBLOCK)
+        return(0);
+
+    /* if data transferred or an error */
+    return ret == -1 ? -1 : bytes;
+}
+
+#ifdef __USE_SENDFILE__
+/* NBSENDFILE() - nonblocking (on the socket) send from file
+ *
+ * Here we are going to take advantage of the sendfile() call provided
+ * in the linux 2.2 kernel to send from an open file directly (ie. w/out
+ * explicitly reading into user space memory or memory mapping).
+ *
+ * We are going to set the non-block flag on the socket, but leave the
+ * file as is.
+ *
+ * Boy, that type on the offset for sockfile() sure is lame, isn't it?
+ * That's going to cause us some headaches when we want to do 64-bit
+ * I/O...
+ *
+ * Returns -1 on error, amount of data written to socket on success.
+ */
+int BMI_sockio_nbsendfile(int s,
+               int f,
+               int off,
+               int len)
+{
+    int ret, comp = len, myoff;
+
+    while (comp)
+    {
+      nbsendfile_restart:
+        myoff = off;
+        ret = sendfile(s, f, &myoff, comp);
+        if (ret == 0 || (ret == -1 && errno == EWOULDBLOCK))
+            return (len - comp);        /* return amount completed */
+        if (ret == -1 && errno == EINTR)
+        {
+            goto nbsendfile_restart;
+        }
+        else if (ret == -1)
+            return (-1);
+        comp -= ret;
+        off += ret;
+    }
+    return (len - comp);
+}
+#endif
+
+/* routines to get and set socket options */
+int BMI_sockio_get_sockopt(int s,
+                int optname)
+{
+    int val;
+    socklen_t len = sizeof(val);
+
+    if (getsockopt(s, SOL_SOCKET, optname, (char *) &val, (int *) &len) == -1)
+        return (-1);
+    else
+        return (val);
+}
+
+int BMI_sockio_set_tcpopt(int s,
+               int optname,
+               int val)
+{
+    if (setsockopt(s, IPPROTO_TCP, optname, (char *) &val, sizeof(val)) == -1)
+        return (-1);
+    else
+        return (val);
+}
+
+int BMI_sockio_set_sockopt(int s,
+                int optname,
+                int val)
+{
+    if (setsockopt(s, SOL_SOCKET, optname, (char *) &val, sizeof(val)) == -1)
+        return (-1);
+    else
+        return (val);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/io/bmi/bmi_wintcp/sockio.h b/src/io/bmi/bmi_wintcp/sockio.h
new file mode 100755
index 0000000..41d7535
--- /dev/null
+++ b/src/io/bmi/bmi_wintcp/sockio.h
@@ -0,0 +1,130 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+
+/* 
+ * These are the exported functions from the sockio library.  They
+ * provide a simple intuitive interface to the TCP/IP sockets API.
+ */
+
+/*
+ * Defines which may be set at compile time to determine functionality:
+ *
+ * __USE_SENDFILE__ turns on the use of sendfile() in the library and
+ * makes the BMI_sockio_nbsendfile function available to the application.
+ * Older glibc systems do not have this functionality so we leave it to
+ * be turned on manually.
+ */
+
+#ifndef SOCKIO_H
+#define SOCKIO_H
+
+#include <WinSock2.h>
+#include <sys/types.h>
+/* #include <sys/socket.h> */
+/* #include <netinet/in.h> */
+#include <stdio.h>
+
+#include "bmi-types.h"
+
+int BMI_sockio_new_sock(void);
+int BMI_sockio_bind_sock(int,
+			 int);
+int BMI_sockio_bind_sock_specific(int sockd,
+              const char *name,
+	      int service);
+int BMI_sockio_connect_sock(int,
+			    const char *,
+			    int);
+int BMI_sockio_init_sock(struct sockaddr *,
+			 const char *,
+			 int);
+int BMI_sockio_nbrecv(int s,
+		      void *buf,
+		      int len);
+int BMI_sockio_nbsend(int s,
+		      void *buf,
+		      int len);
+int BMI_sockio_nbvector(int s,
+			LPWSABUF vector,
+			int count,
+			int recv_flag);
+int BMI_sockio_get_sockopt(int s,
+			   int optname);
+int BMI_sockio_set_tcpopt(int s,
+			  int optname,
+			  int val);
+int BMI_sockio_set_sockopt(int s,
+			   int optname,
+			   int size);
+int BMI_sockio_nbpeek(int s,
+		      void* buf,
+		      int len);
+#ifdef __USE_SENDFILE__
+int BMI_sockio_nbsendfile(int s,
+			  int f,
+			  int off,
+			  int len);
+#endif
+
+#define GET_RECVBUFSIZE(s) BMI_sockio_get_sockopt(s, SO_RCVBUF)
+#define GET_SENDBUFSIZE(s) BMI_sockio_get_sockopt(s, SO_SNDBUF)
+
+/* some OS's (ie. Linux 1.3.xx) can't handle buffer sizes of certain
+ * sizes, and will hang up
+ */
+#ifdef BRAINDEADSOCKS
+/* setting socket buffer sizes can do bad things */
+#define SET_RECVBUFSIZE(s, size)
+#define SET_SENDBUFSIZE(s, size)
+#else
+#define SET_RECVBUFSIZE(s, size) BMI_sockio_set_sockopt(s, SO_RCVBUF, size)
+#define SET_SENDBUFSIZE(s, size) BMI_sockio_set_sockopt(s, SO_SNDBUF, size)
+#endif
+
+#define GET_MINSENDSIZE(s) BMI_sockio_get_sockopt(s, SO_SNDLOWAT)
+#define GET_MINRECVSIZE(s) BMI_sockio_get_sockopt(s, SO_RCVLOWAT)
+#define SET_MINSENDSIZE(s, size) BMI_sockio_set_sockopt(s, SO_SNDLOWAT, size)
+#define SET_MINRECVSIZE(s, size) BMI_sockio_set_sockopt(s, SO_RCVLOWAT, size)
+
+/* BLOCKING / NONBLOCKING MACROS */
+
+/* Windows uses ioctlsocket */
+/* #define SET_NONBLOCK(x_fd) fcntl((x_fd), F_SETFL, O_NONBLOCK | \
+   fcntl((x_fd), F_GETFL, 0)) */
+#define SET_NONBLOCK(x_fd) \
+do { \
+    u_long enable = 1; \
+    ioctlsocket((x_fd), FIONBIO, &enable); \
+} while (0)
+
+/* There is no equivalent for FASYNC on Windows, so just set blocking mode */
+/*#define SET_NONBLOCK_AND_SIGIO(x_fd) \
+do { \
+    fcntl((x_fd), F_SETOWN, getpid()); \
+    fcntl((x_fd), F_SETFL, FASYNC | O_NONBLOCK | fcntl((x_fd), F_GETFL, 0)); \
+} while (0) */
+#define SET_NONBLOCK_AND_SIGIO(x_fd) SET_NONBLOCK(x_fd)
+
+/* #define CLR_NONBLOCK(x_fd) fcntl((x_fd), F_SETFL, fcntl((x_fd), F_GETFL, 0) & \
+   (~O_NONBLOCK)) */
+#define CLR_NONBLOCK(x_fd) \
+do { \
+    u_long enable = 0; \
+    ioctlsocket((x_fd), FIONBIO, &enable); \
+} while (0)
+
+#endif
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/server/mgmt-get-uid.sm b/src/server/mgmt-get-uid.sm
new file mode 100644
index 0000000..5285208
--- /dev/null
+++ b/src/server/mgmt-get-uid.sm
@@ -0,0 +1,138 @@
+/* 
+ * (C) 2001 Clemson University and The University of Chicago 
+ *
+ * See COPYING in top-level directory.
+ */
+#include <stdio.h>
+
+#include "pvfs2-server.h"
+#include "pint-uid-mgmt.h"
+#include "pint-util.h"
+
+/* static array used to quickly pull uid stats from the server */
+static PVFS_uid_info_s *static_array = NULL;
+
+%%
+
+machine pvfs2_uid_mgmt_sm
+{
+    state prelude
+    {
+        jump pvfs2_prelude_sm;
+        default => do_work;
+    }
+
+    state do_work
+    {
+        run uid_mgmt_do_work;
+        default => final_response;
+    }
+
+    state final_response
+    {
+        jump pvfs2_final_response_sm;
+        default => cleanup;
+    }
+
+    state cleanup
+    {
+        run uid_mgmt_cleanup;
+        default => terminate;
+    }
+}
+
+%%
+
+/** uid_mgmt_cleanup()
+ *
+ * cleans up any resources consumed by this state machine and ends
+ * execution of the machine
+ */
+static PINT_sm_action uid_mgmt_cleanup(
+        struct PINT_smcb *smcb, job_status_s *js_p)
+{
+    struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+
+    if(s_op->resp.u.mgmt_get_uid.uid_info_array)
+        free(s_op->resp.u.mgmt_get_uid.uid_info_array);
+
+    return(server_state_machine_complete(smcb));
+}
+
+/** uid_mgmt_do_work()
+ *
+ * gathers uid statistics from server and builds response
+ */
+static PINT_sm_action uid_mgmt_do_work(
+        struct PINT_smcb *smcb, job_status_s *js_p)
+{
+    struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+    int i;
+    struct timeval oldest;
+
+    /* allocate memory for a static array, used to quickly pull the uid
+     * statistics from the server without blocking access to the uid lists
+     */ 
+    if (!static_array)
+    {
+        static_array = (PVFS_uid_info_s *)
+                       malloc(UID_MGMT_MAX_HISTORY * sizeof(PVFS_uid_info_s));
+        if (!static_array)
+        {
+            s_op->resp.u.mgmt_get_uid.uid_info_array = NULL;
+            js_p->error_code = -PVFS_ENOMEM;
+            return SM_ACTION_COMPLETE; 
+        }
+    }
+
+    /* gather all uid statistics and store them in the static array */
+    PINT_dump_all_uid_stats(static_array);
+
+    /* get a timestamp for the max history we want to look through */
+    PINT_util_get_current_timeval(&oldest);
+    oldest.tv_sec -= s_op->req->u.mgmt_get_uid.history;
+
+    /* scan uid stats to determine how much info we need to send back */
+    for (i = 0; i < UID_MGMT_MAX_HISTORY; i++)
+    {
+        if((static_array[i].count == 0) || 
+            !(IN_UID_HISTORY(static_array[i].tv, oldest)))
+        {
+            break;
+        }
+    }
+
+    /* allocate memory for and fill in our response back */
+    s_op->resp.u.mgmt_get_uid.uid_info_array_count = i;
+    s_op->resp.u.mgmt_get_uid.uid_info_array = (PVFS_uid_info_s *)
+                 malloc(i * sizeof(PVFS_uid_info_s));
+    if (!(s_op->resp.u.mgmt_get_uid.uid_info_array))
+    {
+        js_p->error_code = -PVFS_ENOMEM;
+        return SM_ACTION_COMPLETE; 
+    }
+
+    memcpy(s_op->resp.u.mgmt_get_uid.uid_info_array, static_array,
+      (s_op->resp.u.mgmt_get_uid.uid_info_array_count * sizeof(PVFS_uid_info_s)));
+
+    js_p->error_code = 0;
+    return SM_ACTION_COMPLETE;
+}
+
+struct PINT_server_req_params pvfs2_uid_mgmt_params =
+{
+    .string_name = "mgmt_get_uid",
+    .perm = PINT_SERVER_CHECK_NONE,
+    .state_machine = &pvfs2_uid_mgmt_sm
+};
+
+
+/*
+ * Local variables:
+ *  mode: c
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ft=c ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/test/ci/jenkins-build.sh b/test/ci/jenkins-build.sh
new file mode 100755
index 0000000..bc760e7
--- /dev/null
+++ b/test/ci/jenkins-build.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+
+# build parameters coming in as arguments
+# for jenkins should correspond to appropriate job and matrix parameters
+if [ $# -ne 3 ]
+then
+    echo "usage: $0 <test-name> <os> <vfs>"
+    exit
+fi
+
+# NAME is just a friendly, no-white space name that should match the jenkins
+#     job so the nightly test run can get the right artifact
+# OS is just the distro name
+# VFS is what interface to use for VFS, valid values are:
+#     kernel
+#     kernel_helper
+#     fuse
+NAME=$1
+OS=$2
+VFS=$3
+TEST_NAME="${1}.${2}.${3}"
+
+JENKINS_BUILD=1
+# use BUILD_NUMBER to guess if this is running under Jenkins or not. If not,
+# just assume we should build what's in pwd
+if [ -z "${BUILD_NUMBER}" ]
+then
+    WORKSPACE=`pwd`
+    JENKINS_BUILD=0
+fi
+
+INSTALL_PATH="${WORKSPACE}/install"
+BUILD_PATH="${WORKSPACE}/build"
+
+## jenkins gives us the CVS branch, otherwise it default to main
+CVS_BRANCH=${CVS_BRANCH:="main"}
+
+## setup appropriate configure flags
+# common flags
+flags=" --prefix=${INSTALL_PATH} --enable-shared" 
+
+# make sure we have our local db4 version available
+if [ -d /opt/db4 ]
+then
+    flags="${flags} --with-db=/opt/db4"
+else
+    # see if standard location works (ubuntu with db4.8)
+    maj=`cat /usr/include/db.h | grep DB_VERSION_MAJOR | awk '{ print $3 }'`
+    min=`cat /usr/include/db.h | grep DB_VERSION_MINOR | awk '{ print $3 }'`
+    if [ "${maj}" -eq 4 -a "${min}" -ge 8 ]
+    then
+        echo "Using default system db library"
+    else
+        echo "No /opt/db4 directory"
+        exit 1
+    fi
+fi
+
+# find kernel sources and set appropriate flags taking 2.4/2.6 into account
+# centos 3.9 has a 2.4 kernel, and has the same changes as redhat24
+if [ "${VFS:0:6}" = "kernel" ]
+then 
+    kern=""
+    if [ -f /etc/redhat-release ] && 
+       [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ]
+    then
+        kern=`find /usr/src -maxdepth 1 -type d -name "*$(uname -r)*"`
+        flags="${flags} --enable-redhat24 --with-kernel24=${kern}"
+    elif [ -f /etc/SuSE-release ]
+    then
+        flavor="desktop"
+        kern=`find /usr/src/ -maxdepth 5 -type d -name ${flavor} | head -n 1` 
+        flags="${flags} --with-kernel=${kern}"
+    elif [ -f /etc/debian_version ]
+    then
+        kconf=`find /usr/src -maxdepth 3 -type f -path "*linux-source*" -name ".config"`
+        kern=`dirname "${kconf}"`
+        flags="${flags} --with-kernel=${kern}"
+    else 
+        kern=`find /usr/src/kernels/ -maxdepth 1 -type d -name "*$(uname -r)*"`
+        flags="${flags} --with-kernel=${kern}"
+    fi
+
+    if [ -z "${kern}" ]
+    then
+        echo "No kernel source found"
+        exit 1
+    else
+        echo "Using kernel source at ${kern}"
+    fi
+elif [ "${VFS}" = "fuse" ]
+then
+    flags="${flags} --enable-fuse"
+else
+    echo "Unknown interface type!"
+    exit 1
+fi
+
+## just set enable kmod_helper if that's what we want
+if [ "${VFS}" = "kernel_helper" ]
+then
+    flags="${flags} --enable-threaded-kmod-helper"
+fi
+
+## build exceptions for distros
+# centos3 apparently doesn't have epoll
+if [ -f /etc/redhat-release ] && 
+   [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ]
+then
+    flags="${flags} --disable-epoll"
+fi
+
+
+
+## configure and build it!
+# run configure, clean out previous builds, and build it
+rm -rf ${BUILD_PATH}
+rm -rf ${INSTALL_PATH}
+
+mkdir ${BUILD_PATH}
+cd ${BUILD_PATH}
+
+echo "Configuring with flags \"$flags\""
+${WORKSPACE}/./configure $flags
+if [ $? -ne 0 ]
+then
+    echo "configure failed, failing"
+    exit 1
+fi
+
+make all
+if [ $? -ne 0 ]
+then
+    echo "make all failed, failing"
+    exit 1
+fi
+
+if [ "${VFS:0:6}" = "kernel" ]
+then
+    # build kmod, handling 2.4/2.6 cases
+    if [ -f /etc/redhat-release ] &&
+       [  "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ]
+    then
+        make kmod24
+    else
+        make kmod 
+    fi
+fi
+if [ $? -ne 0 ]
+then
+    echo "make kmod failed, failing"
+    exit 1
+fi
+
+make install
+if [ $? -ne 0 ]
+then
+    echo "make install failed, failing"
+    exit 1
+fi
+
+if [ "${VFS:0:6}" = "kernel" ]
+then
+    # install kmod, handling 2.4/2.6 cases
+    if [ -f /etc/redhat-release ] && 
+       [  "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ]
+    then
+        make "DESTDIR=${INSTALL_PATH}" just_kmod24_install
+        make just_kmod24_apps_install
+    else
+        make kmod_prefix=${INSTALL_PATH} kmod_install
+    fi
+fi
+
+# now make the tests
+cd test
+../../test/./configure $flags
+make all
+if [ $? -ne 0 ]
+then
+    echo "configure tests failed, failing"
+    exit 1
+fi
+
+make install
+if [ $? -ne 0 ]
+then
+    echo "make install tests failed, failing"
+    exit 1
+fi
+# back to build directory
+cd ../
+
+# back to original workspace
+cd ../
+tar -cjf ${TEST_NAME}-bin.tar.bz2 install
+if [ $? -ne 0 ]
+then
+    echo "Failure creating tar of installed binaries"
+    exit 1
+fi
+
+exit 0
diff --git a/test/ci/jenkins-doc.sh b/test/ci/jenkins-doc.sh
new file mode 100755
index 0000000..e4d38a1
--- /dev/null
+++ b/test/ci/jenkins-doc.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+cd ${WORKSPACE}
+
+rm -rf build 
+mkdir build && cd build
+if [ $? -ne 0 ]
+then
+    echo "failure creating build directory"
+fi
+
+echo -n "Configuring source ... "
+.././configure >/dev/null 2>&1
+if [ $? -ne 0 ]
+then
+    echo "failure configuring source"
+    exit 1
+fi
+echo "okay"
+
+echo -n "Making docs ... "
+make docs >/dev/null 2>&1
+if [ $? -ne 0 ]
+then
+    echo "failure making docs"
+    exit 1
+fi
+echo "okay"
+
+files=`find doc/ -regextype posix-egrep -regex ".+\.(pdf|html)"`
+file_count=`echo ${files} | wc -l`
+if [ ${file_count} -ne 60 ]
+then
+    echo "Not enough documents, only ${file_count}"
+fi
+
+echo -n "Creating tar of docs ... "
+tar -cjvf ${WORKSPACE}/orange-branch-docs.tar.bz2 ${files}
+if [ $? -ne 0 ]
+then
+    echo "failed"
+fi
+echo "okay"
+
+cd ${WORKSPACE}
+rm -rf build
+
+exit 0
diff --git a/test/ci/jenkins-test.sh b/test/ci/jenkins-test.sh
new file mode 100644
index 0000000..222ecc3
--- /dev/null
+++ b/test/ci/jenkins-test.sh
@@ -0,0 +1,398 @@
+#!/bin/bash
+
+export TEST_NAME=${1}.${2}.${3}.${4}.${5}                 # unique name
+export BUILD_TEST_NAME=${1}.${2}.${3}                 # unique name
+export OS=$2
+export VFS=$3
+export IO=$4
+export FS=$5
+
+# change the tests portion of the job URL to the build portion so we can find
+# the artifact with the binaries from the last successful build
+export BIN_NAME="${BUILD_TEST_NAME}-bin.tar.bz2"
+export BIN_URL="$(echo ${JOB_URL} | sed s/-tests/-build/ |  sed s/io=[^,]*,//)lastSuccessfulBuild/artifact/${BIN_NAME}"
+
+export PVFS2_LOCATION=${WORKSPACE}/install      # install location
+export PVFS2_SRC=${WORKSPACE}                   # source tree
+export VERBOSE="yes"
+
+## server/client related locations ##
+export BASE="/tmp/jenkins/nightly/${TEST_NAME}"  # base dir. of all tests
+export PVFS2_MOUNT="${BASE}/mount"              # VFS mount
+export PVFS2_LOG="${BASE}/logs/"                # all logs go here
+export PVFS2_STORAGE="${BASE}/storage/"         # server backing storage
+
+## extra files copied in by jenkins
+export PVFS2_TESTS_NAME="pvfs2-tests-nextgen.tar.bz2"
+export PVFS2_EXTRA_TESTS_NAME="benchmarks-20110616.tar.bz2"
+export PVFS2_TESTS="${WORKSPACE}/new_tests/"
+export PVFS2_EXTRA_TESTS="${BASE}/extra/"       # work dir. of extra tests
+
+## tests to run ##
+export PVFS2_SYSINT_TESTS="${PVFS2_TESTS}/sysint-tests.d"
+export PVFS2_VFS_TESTS="${PVFS2_TESTS}/vfs-tests.d/"
+
+export LD_LIBRARY_PATH="${PVFS2_LOCATION}/lib:/opt/db4/lib:${LD_LIBRARY_PATH}"
+
+log() {
+    if [ -n "${VERBOSE}" ]
+    then
+        echo $1 $2 | tee -a ${PVFS2_LOG}/all_tests.log
+    fi
+}
+
+echo_tee() {
+        echo -e "$1" "$2" | tee -a ${PVFS2_LOG}/all_tests.log
+}
+
+check_return() {
+    rc=$1
+    msg=$2
+    if [ ${rc} -ne 0 ]
+    then
+       echo_tee "aborting tests due to failed return code of ${msg}"
+       exit 1
+    fi
+}
+
+## create log location right off the bat
+mkdir -p ${PVFS2_LOG}
+echo_tee "Running tests on $(uname -n -r -m)"
+
+rm -rf ${BIN_URL}*
+echo_tee -n "Retrieving artifact [\"$BIN_URL\"]... "
+wget -q ${BIN_URL} &>/dev/null
+check_return $? "wget binary tarball artifact failed"
+echo_tee "ok"
+
+# remove previous binaries
+rm -rf ${PVFS2_LOCATION}
+echo_tee -n "Extracting binaries ... "
+tar -xjf ${BIN_NAME} >/dev/null
+check_return $? "extract binary tarball failed"
+echo_tee "ok"
+rm -f ${BIN_NAME} > /dev/null
+
+if [ ! -d ${PVFS2_LOCATION} -o ! -f ${PVFS2_LOCATION}/sbin/pvfs2-server ]
+then
+    echo_tee "no install directory"
+    exit 1
+fi
+
+if [ ! -d ${PVFS_SRC} ]
+then
+    echo_tee "no source directory"
+    exit 1
+fi
+
+if [ ! -f ${PVFS2_TESTS_NAME} ]
+then
+    echo_tee "Test tarball ${PVFS2_TESTS_NAME} doesn't exist"
+    exit 1
+fi
+
+# remove previous and extract current tests
+rm -rf ${PVFS2_TESTS}
+mkdir -p ${PVFS2_TESTS}
+echo_tee -n "Extracting tests ... "
+tar -xjvf ${PVFS2_TESTS_NAME} -C ${PVFS2_TESTS} >/dev/null
+check_return $? "extract tests failed"
+echo_tee "ok"
+
+pvfs2_client_kernel_cleanup() {
+    # unmount anything, kill client processes, wait, then rmmod 
+    sudo /bin/umount $PVFS2_MOUNT &>/dev/null
+    sleep 2
+    sudo killall -9 pvfs2-client &>/dev/null
+    sleep 2
+    sudo killall -9 pvfs2-client-core &>/dev/null
+    sleep 2
+    sudo /sbin/rmmod pvfs2 &>/dev/null
+    return 0
+}
+
+pvfs2_client_fuse_cleanup() {
+    sudo /bin/umount $PVFS2_MOUNT &>/dev/null
+    sleep 2
+    sudo /sbin/rmmod fuse &>/dev/null
+    return 0
+}
+
+pvfs2_server_cleanup() {
+    sudo killall -9 pvfs2-server &>/dev/null
+    return 0
+}
+
+pvfs2_client_kernel_start() {
+
+    # add kernel module
+    # 2.4 is .o, 2.6 is .ko
+    mod_base=`find ${PVFS2_LOCATION}/lib -type d -name kernel`
+    mod_loc=`find ${mod_base} -type f -name "pvfs2.*o"`
+    sudo /sbin/insmod ${mod_loc}
+    check_return $? "insmod failed"
+
+    # start client-core, some older sudo version won't let LD_* pass through
+    if [ -z "$(sudo sudo -V | grep "Environment variables to preserve")" ]
+    then
+        echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}; ${PVFS2_LOCATION}/sbin/pvfs2-client -p ${PVFS2_LOCATION}/sbin/pvfs2-client-core -L ${PVFS2_LOG}/pvfs2-client-core.log" > client_run.sh
+         chmod +x client_run.sh
+         sudo ./client_run.sh
+         rm client_run.sh
+    else
+         sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
+                ${PVFS2_LOCATION}/sbin/pvfs2-client \
+                -p ${PVFS2_LOCATION}/sbin/pvfs2-client-core \
+                -L ${PVFS2_LOG}/pvfs2-client-core.log
+    fi
+    check_return $? "client core start failed"
+
+    # make sure we can read the log
+    sudo chmod 777 ${PVFS2_LOG}/pvfs2-client-core.log
+    sleep 3
+
+    # mount it up
+    if [ -f /etc/redhat-release ] &&
+       [ "3.9" = "$(cat /etc/redhat-release | awk '{ print $3 }')" ]
+    then
+        echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}; ${PVFS2_LOCATION}/sbin/mount.pvfs2 tcp://`hostname -s`:3396/pvfs2-fs ${PVFS2_MOUNT}" > client_mount.sh
+         chmod +x client_mount.sh
+         sudo ./client_mount.sh
+         rm client_mount.sh
+    else
+        sudo /bin/mount -t pvfs2 tcp://`hostname -s`:3396/pvfs2-fs \
+            ${PVFS2_MOUNT}
+    fi
+    check_return $? "mount failed"
+    return 0
+}
+
+pvfs2_client_fuse_start() {
+
+    mod=`/sbin/lsmod | grep fuse`
+    if [ -z "${mod}" ]
+    then
+        sudo /sbin/modprobe fuse
+        check_return $? "modprobe failed"
+    fi
+
+    # mount it up
+    # start pvfs2fuse, some older sudo version won't let LD_* pass through
+    if [ -z "$(sudo sudo -V | grep "Environment variables to preserve")" ]
+    then
+        echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}; ${PVFS2_LOCATION}/bin/pvfs2fuse -o fs_spec=tcp://`hostname -s`:3396/pvfs2-fs,allow_other ${PVFS2_MOUNT}" > client_run.sh
+         chmod +x client_run.sh
+         sudo ./client_run.sh
+    else
+        sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
+	     ${PVFS2_LOCATION}/bin/pvfs2fuse \
+                 -o fs_spec=tcp://`hostname -s`:3396/pvfs2-fs,allow_other \
+                 ${PVFS2_MOUNT}
+    fi
+}
+
+pvfs2_server_start() {
+    cd $BASE
+    ${PVFS2_LOCATION}/bin/pvfs2-genconfig ${BASE}/fs.conf \
+        --protocol tcp \
+        --iospec="`hostname -s`:{3396-3399}" \
+        --metaspec="`hostname -s`:{3396-3399}"  \
+        --storage ${PVFS2_STORAGE} \
+        --trove-method=${IO:=alt-aio} \
+        --logging "none" \
+        --logfile=${PVFS2_LOG}/pvfs2-server-${TEST_NAME}.log --quiet
+    check_return $? "pvfs2-genconfig failed"
+
+    for alias in `grep 'Alias ' fs.conf | cut -d ' ' -f 2`
+    do
+        # create the space
+        ${PVFS2_LOCATION}/sbin/pvfs2-server \
+            -p ${BASE}/pvfs2-server-${alias}.pid \
+            -f ${BASE}/fs.conf -a $alias \
+            &>${PVFS2_LOG}/pvfs2-server-create-${alias}.log
+        check_return $? "pvfs2-server -f failed for $alias"
+
+        # start the server
+        ${PVFS2_LOCATION}/sbin/pvfs2-server \
+            -p ${BASE}/pvfs2-server-${alias}.pid  \
+            ${BASE}/fs.conf $server_conf -a $alias \
+            &>${PVFS2_LOG}/pvfs2-server-start-${alias}.log
+        check_return $? "pvfs2-server failed for $alias"
+    done
+
+    # store out mount path
+    echo "tcp://`hostname -s`:3396/pvfs2-fs ${PVFS2_MOUNT} pvfs2 defaults 0 0" \
+         > ${BASE}/pvfs2tab
+    export PVFS2TAB_FILE=${BASE}/pvfs2tab
+
+    # up the logging chatter
+    ${PVFS2_LOCATION}/bin/pvfs2-set-debugmask -m ${PVFS2_MOUNT} "all" \
+        &>/dev/null
+    check_return $? "pvfs2-set-debugmask failed"
+    return 0
+}
+
+external_tests_setup()
+{
+    rm -rf ${PVFS2_EXTRA_TESTS}/*
+
+    tar -xjf ${WORKSPACE}/${PVFS2_EXTRA_TESTS_NAME} -C ${PVFS2_EXTRA_TESTS} \
+        >/dev/null
+    check_return $? "extra tests untar extra failed"
+ 
+    mv ${PVFS2_EXTRA_TESTS}/benchmarks/* ${PVFS2_EXTRA_TESTS}/
+    check_return $? "extra tests mv failed"
+}
+
+run_parts() {
+    cd $1
+    echo_tee "* TEST GROUP START *"
+    for f in *
+    do
+        [ -d $f ] && continue
+        if [ -x $f ]
+        then
+            echo_tee -n "* TEST $f:  "
+            ./$f > ${PVFS2_LOG}/${f}-${TEST_NAME}.log
+            rc=$?
+            if [ ${rc} -eq 0 ]
+            then
+                nr_passed=$((nr_passed + 1))
+                echo_tee "OK"
+            else
+                nr_failed=$((nr_failed + 1))
+                echo_tee "FAILED (${rc})" 
+            fi
+        fi
+    done
+    echo_tee "* TEST GROUP DONE *"
+}
+
+echo_tee "Running test ${TEST_NAME} in ${BASE}"
+# clean up if a nasty failure from the last test
+if [ "${VFS:0:6}" = "kernel" ]
+then
+    pvfs2_client_kernel_cleanup
+elif [ "${VFS}" = "fuse" ]
+then
+    pvfs2_client_fuse_cleanup
+fi
+
+pvfs2_server_cleanup
+
+sudo rm -rf $BASE
+mkdir -p ${PVFS2_MOUNT}; check_return $? "mkdir on ${PVFS2_MOUNT}"
+chmod 777 ${PVFS2_MOUNT}
+mkdir -p ${PVFS2_LOG}; check_return $? "mkdir on ${PVFS2_LOG}"
+chmod 777 ${PVFS2_MOUNT}
+mkdir -p ${PVFS2_STORAGE}; check_return $? "mkdir on ${PVFS2_STORAGE}"
+chmod 777 ${PVFS2_MOUNT}
+mkdir -p ${PVFS2_EXTRA_TESTS}; check_return $? "mkdir on ${PVFS2_EXTRA_TESTS}"
+chmod 777 ${PVFS2_MOUNT}
+
+#exec 6<&1
+#exec 7<&2
+exec 2>&1
+
+echo_tee -n "setup external tests ... "
+external_tests_setup
+if [ $? -eq 0 ]
+then
+    echo_tee "okay"
+else
+    echo_tee "failed"
+    exit 1
+fi
+
+echo_tee -n "setup servers... " 
+pvfs2_server_start
+if [ $? -eq 0 ]
+then
+   echo_tee "okay"
+else
+   echo_tee "failed"
+   exit 1
+fi
+
+echo_tee -n "starting client... "
+if [ "${VFS:0:6}" = "kernel" ]
+then
+    pvfs2_client_kernel_start
+elif [ "${VFS}" = "fuse" ]
+then
+    pvfs2_client_fuse_start
+else
+    echo_tee "Unknown VFS test type: ${VFS}, exiting"
+    exit 1
+fi
+if [ $? -eq 0 ]
+then
+   echo_tee "okay"
+else
+   echo_tee "failed"
+   exit 1
+fi
+
+nr_passed=0
+nr_failed=0
+
+echo_tee "running SYSINT tests"
+run_parts ${PVFS2_SYSINT_TESTS}
+sleep 3
+echo_tee "running VFS tests" 
+run_parts ${PVFS2_VFS_TESTS}
+
+# cleanup client and then server
+
+echo_tee -n "stopping client... "
+if [ "${VFS:0:6}" = "kernel" ]
+then
+    pvfs2_client_kernel_cleanup 
+elif [ "${VFS}" = "fuse" ]
+then
+    pvfs2_client_fuse_cleanup
+else
+    echo_tee "Unknown test, exiting"
+    exit 1
+fi
+
+if [ $? -eq 0 ]
+then
+   echo_tee "okay"
+fi
+
+echo_tee -n "stopping server... "
+pvfs2_server_cleanup
+if [ $? -eq 0 ]
+then
+   echo_tee "okay" 
+fi
+
+# restore file descriptors and close temporary fds
+#exec 1<&6 6<&-
+#exec 2<&7 7<&-
+
+echo_tee "Total Failed: ${nr_failed}"
+
+# remove extracted binaries and artifact
+rm -rf ${PVFS2_LOCATION}
+rm -rf ${BIN_NAME}*
+
+# remove extraced tests and tarball
+rm -rf ${WORKSPACE}/${PVFS2_TESTS_NAME}*
+rm -rf ${PVFS2_TESTS}
+
+# remove benchmarks tar ball
+rm -rf ${WORKSPACE}/${PVFS2_EXTRA_TESTS_NAME}
+
+echo_tee -n "creating log tarball... "
+cd ${WORKSPACE}
+tar -cjvf ${WORKSPACE}/test-logs.tar.bz2 ${PVFS2_LOG} >/dev/null 2>&1
+echo_tee "done"
+
+if [ ${nr_failed} -gt 0 ]
+then
+    exit 1
+else
+    exit 0
+fi
diff --git a/test/common/gen-locks/condvar1.c b/test/common/gen-locks/condvar1.c
new file mode 100755
index 0000000..2f85cad
--- /dev/null
+++ b/test/common/gen-locks/condvar1.c
@@ -0,0 +1,22 @@
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include "gen-locks.h"
+
+static gen_cond_t cv = NULL;
+
+int main()
+{
+  assert(cv == NULL);
+
+  assert(gen_cond_init(&cv) == 0);
+
+  assert(cv != NULL);
+
+  assert(gen_cond_destroy(&cv) == 0);
+
+  assert(cv == NULL);
+ 
+  return 0;
+}
\ No newline at end of file
diff --git a/test/common/gen-locks/condvar2_1.c b/test/common/gen-locks/condvar2_1.c
new file mode 100755
index 0000000..3aa74c2
--- /dev/null
+++ b/test/common/gen-locks/condvar2_1.c
@@ -0,0 +1,185 @@
+/*
+ * File: condvar2_1.c
+ *
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2005 Pthreads-win32 contributors
+ * 
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ * 
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ * 
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ * 
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ * 
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Test Synopsis:
+ * - Test timeout of multiple waits on a CV with no signal/broadcast.
+ *
+ * Test Method (Validation or Falsification):
+ * - Validation
+ *
+ * Requirements Tested:
+ * - 
+ *
+ * Features Tested:
+ * - 
+ *
+ * Cases Tested:
+ * - 
+ *
+ * Description:
+ * - Because the CV is never signaled, we expect the waits to time out.
+ *
+ * Environment:
+ * -
+ *
+ * Input:
+ * - None.
+ *
+ * Output:
+ * - File name, Line number, and failed expression on failure.
+ * - No output on success.
+ *
+ * Assumptions:
+ * - 
+ *
+ * Pass Criteria:
+ * - pthread_cond_timedwait returns ETIMEDOUT.
+ * - Process returns zero exit status.
+ *
+ * Fail Criteria:
+ * - pthread_cond_timedwait does not return ETIMEDOUT.
+ * - Process returns non-zero exit status.
+ */
+
+// #define _WIN32_WINNT 0x400
+
+/* #include "test.h" */
+#define _USE_32BIT_TIME_T
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/timeb.h>
+
+#include "gen-locks.h"
+
+static gen_cond_t cv;
+static gen_mutex_t mutex;
+static struct timespec abstime = { 0, 0 };
+
+enum {
+  NUMTHREADS = 5
+};
+
+DWORD WINAPI
+mythread(void * arg)
+{
+  assert(gen_mutex_lock(&mutex) == 0);
+  fprintf(stderr, "thread %d locked mutex\n", (DWORD) arg);
+  
+  assert(gen_cond_timedwait(&cv, &mutex, &abstime) == ETIMEDOUT);
+
+  assert(gen_mutex_unlock(&mutex) == 0);
+  fprintf(stderr, "thread %d unlocked mutex\n", (DWORD) arg);
+
+  return (DWORD) arg;
+}
+
+int thread_join(gen_thread_t thread, LPDWORD retval)
+{
+    BOOL rc;
+    DWORD iretval;
+    LPDWORD pretval = (retval) ? retval : &iretval;
+
+    do
+    {
+        rc = GetExitCodeThread(thread, pretval);
+        if (rc && *pretval == STILL_ACTIVE)
+        {
+            Sleep(500);
+        }
+    } while (rc && *pretval == STILL_ACTIVE);
+
+    return 0;
+}
+
+int
+main()
+{
+  int i;
+  gen_thread_t t[NUMTHREADS + 1];
+  int result = 0;
+  struct _timeb currSysTime;
+  const DWORD NANOSEC_PER_MILLISEC = 1000000;
+
+  assert(gen_cond_init(&cv) == 0);
+
+  assert(gen_mutex_init(&mutex) == 0);
+
+  /* get current system time */
+  _ftime_s(&currSysTime);
+
+  abstime.tv_sec = currSysTime.time;
+  abstime.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm;
+
+  abstime.tv_sec += 5;
+
+  assert(gen_mutex_lock(&mutex) == 0);
+  fprintf(stderr, "main thread locked mutex\n");
+
+  for (i = 1; i <= NUMTHREADS; i++)
+  {
+      /* assert(pthread_create(&t[i], NULL, mythread, (void *) i) == 0); */
+      assert((t[i] = CreateThread(NULL, 0, mythread, (void *) i, 0, NULL)) != NULL);
+  }
+
+  assert(gen_mutex_unlock(&mutex) == 0);
+  fprintf(stderr, "main thread unlocked mutex\n");
+
+  for (i = 1; i <= NUMTHREADS; i++)
+  {
+      assert(thread_join(t[i], (LPDWORD) &result) == 0);
+      fprintf(stderr, "i = %d    result = %ld\n", i, result);
+	  assert(result == i);
+  }
+
+  {
+  int result = gen_cond_destroy(&cv);
+  if (result != 0)
+  {
+      fprintf(stderr, "Result = %d\n", result);
+	  fprintf(stderr, "\tWaitersBlocked = %ld\n", cv->nWaitersBlocked);
+	  fprintf(stderr, "\tWaitersGone = %ld\n", cv->nWaitersGone);
+	  fprintf(stderr, "\tWaitersToUnblock = %ld\n", cv->nWaitersToUnblock);
+	  fflush(stderr);
+  }
+      assert(result == 0);
+  }
+
+  getchar();
+
+  return 0;
+}
diff --git a/test/common/gen-locks/condvar3.c b/test/common/gen-locks/condvar3.c
new file mode 100755
index 0000000..d3012f6
--- /dev/null
+++ b/test/common/gen-locks/condvar3.c
@@ -0,0 +1,174 @@
+/*
+ * File: condvar3.c
+ *
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2005 Pthreads-win32 contributors
+ * 
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ * 
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ * 
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ * 
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ * 
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Test Synopsis:
+ * - Test basic function of a CV
+ *
+ * Test Method (Validation or Falsification):
+ * - Validation
+ *
+ * Requirements Tested:
+ * - 
+ *
+ * Features Tested:
+ * - 
+ *
+ * Cases Tested:
+ * - 
+ *
+ * Description:
+ * - The primary thread takes the lock before creating any threads.
+ *   The secondary thread blocks on the lock allowing the primary
+ *   thread to enter the cv wait state which releases the lock.
+ *   The secondary thread then takes the lock and signals the waiting
+ *   primary thread.
+ *
+ * Environment:
+ * - 
+ *
+ * Input:
+ * - None.
+ *
+ * Output:
+ * - File name, Line number, and failed expression on failure.
+ * - No output on success.
+ *
+ * Assumptions:
+ * - 
+ *
+ * Pass Criteria:
+ * - pthread_cond_timedwait returns 0.
+ * - Process returns zero exit status.
+ *
+ * Fail Criteria:
+ * - pthread_cond_timedwait returns ETIMEDOUT.
+ * - Process returns non-zero exit status.
+ */
+
+#define _USE_32BIT_TIME_T
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/timeb.h>
+
+#include "gen-locks.h"
+
+static gen_cond_t cv;
+static gen_mutex_t mutex;
+static int shared = 0;
+
+enum {
+  NUMTHREADS = 2         /* Including the primary thread. */
+};
+
+DWORD WINAPI
+mythread(void * arg)
+{
+  int result = 0;
+
+  assert(gen_mutex_lock(&mutex) == 0);
+  shared++;
+  assert(gen_mutex_unlock(&mutex) == 0);
+
+  if ((result = gen_cond_signal(&cv)) != 0)
+    {
+      printf("Error = %d\n", result);
+    }
+  assert(result == 0);
+
+
+  return 0;
+}
+
+int thread_join(gen_thread_t thread, LPDWORD retval)
+{
+    BOOL rc;
+    DWORD iretval;
+    LPDWORD pretval = (retval) ? retval : &iretval;
+
+    do
+    {
+        rc = GetExitCodeThread(thread, pretval);
+        if (rc && *pretval == STILL_ACTIVE)
+        {
+            Sleep(500);
+        }
+    } while (rc && *pretval == STILL_ACTIVE);
+
+    return 0;
+}
+
+int
+main()
+{
+  gen_thread_t t[NUMTHREADS];
+  struct timespec abstime = { 0, 0 };
+  struct _timeb currSysTime;
+  const DWORD NANOSEC_PER_MILLISEC = 1000000;
+
+  /* assert((t[0] = pthread_self()).p != NULL); */
+  t[0] = gen_thread_self();
+
+  assert(gen_cond_init(&cv) == 0);
+
+  assert(gen_mutex_init(&mutex) == 0);
+
+  assert(gen_mutex_lock(&mutex) == 0);
+
+  /* get current system time */
+  _ftime_s(&currSysTime);
+
+  abstime.tv_sec = currSysTime.time;
+  abstime.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm;
+
+  /* assert(pthread_create(&t[1], NULL, mythread, (void *) 1) == 0); */
+  assert((t[1] = CreateThread(NULL, 0, mythread, (void *) 1, 0, NULL)) != NULL);
+
+  abstime.tv_sec += 5;
+
+  while (! (shared > 0))
+    assert(gen_cond_timedwait(&cv, &mutex, &abstime) == 0);
+
+  assert(shared > 0);
+
+  assert(gen_mutex_unlock(&mutex) == 0);
+
+  assert(thread_join(t[1], NULL) == 0);
+
+  assert(gen_cond_destroy(&cv) == 0);
+
+  return 0;
+}
diff --git a/test/common/gen-locks/condvar3_1.c b/test/common/gen-locks/condvar3_1.c
new file mode 100755
index 0000000..6c19854
--- /dev/null
+++ b/test/common/gen-locks/condvar3_1.c
@@ -0,0 +1,222 @@
+/*
+ * File: condvar3_1.c
+ *
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2005 Pthreads-win32 contributors
+ * 
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ * 
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ * 
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ * 
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ * 
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Test Synopsis:
+ * - Test timeout of multiple waits on a CV with some signaled.
+ *
+ * Test Method (Validation or Falsification):
+ * - Validation
+ *
+ * Requirements Tested:
+ * - 
+ *
+ * Features Tested:
+ * - 
+ *
+ * Cases Tested:
+ * - 
+ *
+ * Description:
+ * - Because some CVs are never signaled, we expect their waits to time out.
+ *   Some are signaled, the rest time out. Pthread_cond_destroy() will fail
+ *   unless all are accounted for, either signaled or timedout.
+ *
+ * Environment:
+ * -
+ *
+ * Input:
+ * - None.
+ *
+ * Output:
+ * - File name, Line number, and failed expression on failure.
+ * - No output on success.
+ *
+ * Assumptions:
+ * - 
+ *
+ * Pass Criteria:
+ * - pthread_cond_timedwait returns ETIMEDOUT.
+ * - Process returns zero exit status.
+ *
+ * Fail Criteria:
+ * - pthread_cond_timedwait does not return ETIMEDOUT.
+ * - Process returns non-zero exit status.
+ */
+
+//#define _WIN32_WINNT 0x400
+#define _USE_32BIT_TIME_T
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/timeb.h>
+
+#include "gen-locks.h"
+
+static gen_cond_t cv;
+static gen_cond_t cv1;
+static gen_mutex_t mutex;
+static gen_mutex_t mutex1;
+static struct timespec abstime = { 0, 0 };
+static int timedout = 0;
+static int signaled = 0;
+static int awoken = 0;
+static int waiting = 0;
+
+enum {
+  NUMTHREADS = 30
+};
+
+DWORD WINAPI
+mythread(void * arg)
+{
+  int result;
+
+  assert(gen_mutex_lock(&mutex1) == 0);
+  ++waiting;
+  assert(gen_mutex_unlock(&mutex1) == 0);
+  assert(gen_cond_signal(&cv1) == 0);
+
+  assert(gen_mutex_lock(&mutex) == 0);
+  result = gen_cond_timedwait(&cv, &mutex, &abstime);
+  if (result == ETIMEDOUT)
+    {
+      timedout++;
+    }
+  else
+    {
+      awoken++;
+    }
+  assert(gen_mutex_unlock(&mutex) == 0);
+
+  return (DWORD) arg;
+}
+
+int thread_join(gen_thread_t thread, LPDWORD retval)
+{
+    BOOL rc;
+    DWORD iretval;
+    LPDWORD pretval = (retval) ? retval : &iretval;
+
+    do
+    {
+        rc = GetExitCodeThread(thread, pretval);
+        if (rc && *pretval == STILL_ACTIVE)
+        {
+            Sleep(500);
+        }
+    } while (rc && *pretval == STILL_ACTIVE);
+
+    return 0;
+}
+
+int
+main()
+{
+  int i;
+  gen_thread_t t[NUMTHREADS + 1];
+  int result = 0;
+  struct _timeb currSysTime;
+  const DWORD NANOSEC_PER_MILLISEC = 1000000;
+
+  assert(gen_cond_init(&cv) == 0);
+  assert(gen_cond_init(&cv1) == 0);
+
+  assert(gen_mutex_init(&mutex) == 0);
+  assert(gen_mutex_init(&mutex1) == 0);
+
+  /* get current system time */
+  _ftime_s(&currSysTime);
+
+  abstime.tv_sec = currSysTime.time;
+  abstime.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm;
+
+  abstime.tv_sec += 5;
+
+  assert(gen_mutex_lock(&mutex1) == 0);
+
+  for (i = 1; i <= NUMTHREADS; i++)
+    {
+      /* assert(pthread_create(&t[i], NULL, mythread, (void *) i) == 0); */
+      assert((t[i] = CreateThread(NULL, 0, mythread, (void *) i, 0, NULL)) != NULL);
+    }
+
+  do {
+    assert(gen_cond_wait(&cv1, &mutex1) == 0);
+  } while ( NUMTHREADS > waiting );
+
+  assert(gen_mutex_unlock(&mutex1) == 0);
+
+  for (i = NUMTHREADS/3; i <= 2*NUMTHREADS/3; i++)
+    {
+      assert(gen_cond_signal(&cv) == 0);
+
+      signaled++;
+    }
+
+  for (i = 1; i <= NUMTHREADS; i++)
+    {
+        assert(thread_join(t[i], (LPDWORD) &result) == 0);
+        assert(result == i);
+    }
+
+      fprintf(stderr, "awk = %d\n", awoken);
+      fprintf(stderr, "sig = %d\n", signaled);
+      fprintf(stderr, "tot = %d\n", timedout);
+
+  assert(signaled == awoken);
+  assert(timedout == NUMTHREADS - signaled);
+
+  assert(gen_cond_destroy(&cv1) == 0);
+
+  {
+  int result = gen_cond_destroy(&cv);
+  if (result != 0)
+    {
+      fprintf(stderr, "Result = %d\n", result);
+        fprintf(stderr, "\tWaitersBlocked = %ld\n", cv->nWaitersBlocked);
+        fprintf(stderr, "\tWaitersGone = %ld\n", cv->nWaitersGone);
+        fprintf(stderr, "\tWaitersToUnblock = %ld\n", cv->nWaitersToUnblock);
+        fflush(stderr);
+    }
+  assert(result == 0);
+  }
+
+  assert(gen_mutex_destroy(&mutex1) == 0);
+  assert(gen_mutex_destroy(&mutex) == 0);
+
+  return 0;
+}
diff --git a/test/common/gen-locks/condvar3_2.c b/test/common/gen-locks/condvar3_2.c
new file mode 100755
index 0000000..9a673cf
--- /dev/null
+++ b/test/common/gen-locks/condvar3_2.c
@@ -0,0 +1,211 @@
+/*
+ * File: condvar3_2.c
+ *
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2005 Pthreads-win32 contributors
+ * 
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ * 
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ * 
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ * 
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ * 
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Test Synopsis:
+ * - Test timeout of multiple waits on a CV with remainder broadcast awoken.
+ *
+ * Test Method (Validation or Falsification):
+ * - Validation
+ *
+ * Requirements Tested:
+ * - 
+ *
+ * Features Tested:
+ * - 
+ *
+ * Cases Tested:
+ * - 
+ *
+ * Description:
+ * - Because some CVs are never signaled, we expect their waits to time out.
+ *   Some time out, the rest are broadcast signaled. Pthread_cond_destroy() will fail
+ *   unless all are accounted for, either signaled or timedout.
+ *
+ * Environment:
+ * -
+ *
+ * Input:
+ * - None.
+ *
+ * Output:
+ * - File name, Line number, and failed expression on failure.
+ * - No output on success.
+ *
+ * Assumptions:
+ * - 
+ *
+ * Pass Criteria:
+ * - pthread_cond_timedwait returns ETIMEDOUT.
+ * - Process returns zero exit status.
+ *
+ * Fail Criteria:
+ * - pthread_cond_timedwait does not return ETIMEDOUT.
+ * - Process returns non-zero exit status.
+ */
+
+#define _USE_32BIT_TIME_T
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/timeb.h>
+
+#include "gen-locks.h"
+
+static gen_cond_t cv;
+static gen_mutex_t mutex;
+static struct timespec abstime = { 0, 0 };
+static struct timespec abstime2 = { 0, 0 };
+static int timedout = 0;
+static int awoken = 0;
+
+enum {
+  NUMTHREADS = 30
+};
+
+DWORD WINAPI
+mythread(void * arg)
+{
+  int result;
+
+  assert(gen_mutex_lock(&mutex) == 0);
+
+  abstime2.tv_sec = abstime.tv_sec;
+
+  if ((int) arg % 3 == 0)
+    {
+      abstime2.tv_sec += 2;
+    }
+
+  result = gen_cond_timedwait(&cv, &mutex, &abstime2);
+  assert(gen_mutex_unlock(&mutex) == 0);
+  if (result == ETIMEDOUT)
+    {
+      InterlockedIncrement((LPLONG)&timedout);
+    }
+  else
+    {
+      InterlockedIncrement((LPLONG)&awoken);
+    }
+
+
+  return (DWORD) arg;
+}
+
+int thread_join(gen_thread_t thread, LPDWORD retval)
+{
+    BOOL rc;
+    DWORD iretval;
+    LPDWORD pretval = (retval) ? retval : &iretval;
+
+    do
+    {
+        rc = GetExitCodeThread(thread, pretval);
+        if (rc && *pretval == STILL_ACTIVE)
+        {
+            Sleep(500);
+        }
+    } while (rc && *pretval == STILL_ACTIVE);
+
+    return 0;
+}
+
+
+int
+main()
+{
+  int i;
+  gen_thread_t t[NUMTHREADS + 1];
+  int result = 0;
+  struct _timeb currSysTime;
+  const DWORD NANOSEC_PER_MILLISEC = 1000000;
+
+  assert(gen_cond_init(&cv) == 0);
+
+  assert(gen_mutex_init(&mutex) == 0);
+
+  /* get current system time */
+  _ftime_s(&currSysTime);
+
+  abstime.tv_sec = abstime2.tv_sec = currSysTime.time + 5;
+  abstime.tv_nsec = abstime2.tv_nsec = NANOSEC_PER_MILLISEC * currSysTime.millitm;
+
+  assert(gen_mutex_lock(&mutex) == 0);
+
+  for (i = 1; i <= NUMTHREADS; i++)
+    {
+      assert((t[i] = CreateThread(NULL, 0, mythread, (void *) i, 0, NULL)) != NULL);
+    }
+
+  assert(gen_mutex_unlock(&mutex) == 0);
+
+  for (i = 1; i <= NUMTHREADS; i++)
+    {
+      assert(thread_join(t[i], (LPDWORD) &result) == 0);
+	assert(result == i);
+      /*
+       * Approximately 2/3rds of the threads are expected to time out.
+       * Signal the remainder after some threads have woken up and exited
+       * and while some are still waking up after timeout.
+       * Also tests that redundant broadcasts don't return errors.
+       */
+
+      if (InterlockedExchangeAdd((LPLONG)&awoken, 0L) > NUMTHREADS/3)
+        {
+          assert(gen_cond_broadcast(&cv) == 0);
+        }
+
+    }
+
+  assert(awoken == NUMTHREADS - timedout);
+
+  {
+  int result = gen_cond_destroy(&cv);
+  if (result != 0)
+    {
+      fprintf(stderr, "Result = %d\n", result);
+	fprintf(stderr, "\tWaitersBlocked = %ld\n", cv->nWaitersBlocked);
+	fprintf(stderr, "\tWaitersGone = %ld\n", cv->nWaitersGone);
+	fprintf(stderr, "\tWaitersToUnblock = %ld\n", cv->nWaitersToUnblock);
+	fflush(stderr);
+    }
+  assert(result == 0);
+  }
+
+  assert(gen_mutex_destroy(&mutex) == 0);
+
+  return 0;
+}
diff --git a/test/io/job/test-job-client.c b/test/io/job/test-job-client.c
new file mode 100755
index 0000000..91e2c85
--- /dev/null
+++ b/test/io/job/test-job-client.c
@@ -0,0 +1,158 @@
+/* This test is mainly to check that the job library can be linked */
+
+#include <stdio.h>
+
+#include "job.h"
+#include "gossip.h"
+
+/* some fake items to send around */
+struct request_foo
+{
+	int x;
+};
+struct ack_foo
+{
+	int x;
+};
+
+int main(int argc, char **argv)	
+{
+
+	int ret = -1;
+	struct request_foo* req = NULL;
+	struct ack_foo* ack = NULL;
+	PVFS_BMI_addr_t server_addr;
+	job_status_s status1;
+	job_id_t tmp_id;
+	job_context_id context;
+
+	/* set debugging level */
+	gossip_enable_stderr();
+	gossip_set_debug_mask(0, 0);
+
+	/* start the BMI interface */
+	ret = BMI_initialize(NULL, NULL, 0);
+	if(ret < 0)
+	{
+		fprintf(stderr, "BMI_initialize failure.\n");
+		return(-1);
+	}
+
+/*
+	ret = trove_initialize(
+	    TROVE_METHOD_DBPF, NULL, "/tmp/pvfs2-test-space", "/tmp/pvfs2-test-space", 0);
+	if(ret < 0)
+	{
+		fprintf(stderr, "trove_initialize failure.\n");
+		return(-1);
+	}
+*/
+	/* start the job interface */
+	ret = job_initialize(0);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_initialize failure.\n");
+		return(-1);
+	}
+
+	ret = job_open_context(&context);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_open_context() failure.\n");
+		return(-1);
+	}
+
+	/* lookup the server to get a BMI style address for it */
+	ret = BMI_addr_lookup(&server_addr, "tcp://localhost:3334");
+	if(ret < 0)
+	{
+		fprintf(stderr, "BMI_addr_lookup failure.\n");
+		return(-1);
+	}
+
+	/* allocate some buffers for the req and ack */
+	req = BMI_memalloc(server_addr, sizeof(struct request_foo),
+		BMI_SEND);
+	ack = BMI_memalloc(server_addr, sizeof(struct ack_foo),
+		BMI_RECV);
+	if(!ack || ! req)
+	{
+		fprintf(stderr, "BMI_memalloc failure.\n");
+		return(-1);
+	}
+
+	/* send a message */
+	ret = job_bmi_send(server_addr, req, sizeof(struct request_foo),
+		0, BMI_PRE_ALLOC, 1, NULL, 0, &status1, &tmp_id, context,
+		JOB_TIMEOUT_INF, NULL);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_bmi_send() failure.\n");
+		return(-1);
+	}
+	if(ret == 0)
+	{
+		int count = 0;
+		ret = job_test(tmp_id, &count, NULL, &status1, -1, context);
+		if(ret < 0)
+		{
+			fprintf(stderr, "job_test() failure.\n");
+			return(-1);
+		}
+	}
+
+	/* check status */
+	if(status1.error_code != 0)
+	{
+		fprintf(stderr, "job failure.\n");
+		return(-1);
+	}
+
+	/* receive a message */
+	ret = job_bmi_recv(server_addr, ack, sizeof(struct ack_foo),
+		0, BMI_PRE_ALLOC, NULL, 0, &status1, &tmp_id, context,
+		JOB_TIMEOUT_INF, NULL);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_bmi_recv() failure.\n");
+		return(-1);
+	}
+	if(ret == 0)
+	{
+		int count = 0;
+		ret = job_test(tmp_id, &count, NULL, &status1, -1, context);
+		if(ret < 0)
+		{
+			fprintf(stderr, "job_test() failure.\n");
+			return(-1);
+		}
+	}
+		
+	/* check status */
+	if(status1.error_code != 0)
+	{
+		fprintf(stderr, "job failure.\n");
+		return(-1);
+	}
+
+	/* check the size */
+	if(status1.actual_size != sizeof(struct ack_foo))
+	{
+		fprintf(stderr, "short recv.\n");
+		return(-1);
+	}
+
+	/* free memory buffers */
+	BMI_memfree(server_addr, req, sizeof(struct request_foo), 
+		BMI_SEND);
+	BMI_memfree(server_addr, ack, sizeof(struct ack_foo), 
+		BMI_RECV);
+
+	/* shut down the interfaces */
+	job_close_context(context);
+	job_finalize();
+	BMI_finalize();
+/*	trove_finalize(TROVE_METHOD_DBPF); */
+
+	return(0);
+}
\ No newline at end of file
diff --git a/test/io/job/test-job-server.c b/test/io/job/test-job-server.c
new file mode 100755
index 0000000..9442fca
--- /dev/null
+++ b/test/io/job/test-job-server.c
@@ -0,0 +1,182 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* this is an example server application that uses the job interface */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "job.h"
+#include "gossip.h"
+
+/* some fake items to send around */
+struct request_foo
+{
+	int x;
+};
+struct ack_foo
+{
+	int x;
+};
+
+
+
+int main(int argc, char **argv)	
+{
+
+	int ret = -1;
+	struct ack_foo* ack = NULL;
+	job_status_s status1;
+	struct BMI_unexpected_info req_info;
+	job_id_t job_id;
+	int outcount;
+	job_id_t tmp_id;
+	job_context_id context;
+
+	/* set debugging level */
+	gossip_enable_stderr();
+	gossip_set_debug_mask(0, 0);
+
+
+	/* start the BMI interface */
+	ret = BMI_initialize("bmi_tcp", "tcp://localhost:3334", BMI_INIT_SERVER);
+	if(ret < 0)
+	{
+		fprintf(stderr, "BMI_initialize failure.\n");
+		return(-1);
+	}
+        
+        /*
+	ret = trove_initialize(
+	    TROVE_METHOD_DBPF, NULL, "/tmp/pvfs2-test-space", "/tmp/pvfs2-test-space", 0);
+	if(ret < 0)
+	{
+		fprintf(stderr, "trove_initialize failure.\n");
+		return(-1);
+	}
+        */
+
+	/* start the flow interface */
+	ret = PINT_flow_initialize("flowproto_multiqueue", 0);
+	if(ret < 0)
+	{
+		fprintf(stderr, "flow_init failure.\n");
+		return(-1);
+	}
+
+	/* start the job interface */
+	ret = job_initialize(0);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_initialize failure.\n");
+		return(-1);
+	}
+
+	ret = job_open_context(&context);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_open_context() failure.\n");
+		return(-1);
+	}
+
+
+
+	/* post a job for unexpected receive */
+	ret = job_bmi_unexp(&req_info, NULL, 0, &status1, &job_id, 0, context);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_bmi_unexp() failure.\n");
+		return(-1);
+	}
+	if(ret != 1)
+	{
+#if 0
+		/* exercise testworld() interface, block indefinitely */
+		outcount = 1;
+		ret = job_testworld(&job_id, &outcount, NULL, &status1, -1);
+		if(ret < 0 || outcount == 0)
+		{	
+			fprintf(stderr, "job_testworld() failure.\n");
+			return(-1);
+		}
+
+		/* alternatively, try out the testsome interface */
+		outcount = 1;
+		ret = job_testsome(&job_id, &outcount, &foo, NULL, &status1, -1);
+		if(ret < 0 || outcount == 0)
+		{
+			fprintf(stderr, "job_testsome() failure.\n");
+			return(-1);
+		}
+#else
+
+		/* ... or maybe even give job_test() a whirl */
+		ret = job_test(job_id, &outcount, NULL, &status1, -1, context);
+		if(ret < 0 || outcount == 0)
+		{
+			fprintf(stderr, "job_test() failure.\n");
+			return(-1);
+		}
+
+#endif
+	}
+
+	/* check status */
+	if(status1.error_code != 0)
+	{
+		fprintf(stderr, "Bad status in unexp recv.\n");
+		return(-1);
+	}
+
+	/* allocate a buffer for the ack */
+	ack = BMI_memalloc(req_info.addr, sizeof(struct ack_foo),
+		BMI_SEND);
+	if(!ack)
+	{
+		fprintf(stderr, "BMI_memalloc failure.\n");
+		return(-1);
+	}
+
+	/* send a message */
+	ret = job_bmi_send(req_info.addr, ack, sizeof(struct ack_foo),
+		0, BMI_PRE_ALLOC, 0, NULL, 0, &status1, &tmp_id, context,
+		JOB_TIMEOUT_INF, NULL);
+	if(ret < 0)
+	{
+		fprintf(stderr, "job_bmi_send() failure.\n");
+		return(-1);
+	}
+	if(ret == 0)
+	{
+		int count = 0;
+		ret = job_test(tmp_id, &count, NULL, &status1, -1, context);
+		if(ret < 0)
+		{
+			fprintf(stderr, "job_test() failure.\n");
+			return(-1);
+		}
+	}
+
+
+	/* check status */
+	if(status1.error_code != 0)
+	{
+		fprintf(stderr, "job failure.\n");
+		return(-1);
+	}
+
+	BMI_memfree(req_info.addr, ack, sizeof(struct ack_foo), BMI_RECV);
+	BMI_unexpected_free(req_info.addr, req_info.buffer);
+
+	/* shut down the interfaces */
+	job_close_context(context);
+	job_finalize();
+	PINT_flow_finalize();
+	BMI_finalize();
+	/* trove_finalize(TROVE_METHOD_DBPF); */
+
+	return(0);
+}