diff --git a/ChangeLog b/ChangeLog index 89683f3..8b164f1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,1586 +1,291 @@ ----------------------- -PVFS2 Release ChangeLog +OrangeFS Release ChangeLog ----------------------- -pvfs-2.7.0 -============== -* ib: free cached registrations when out of space (openib only) - -* HEC posix extensions now supported by PVFS. -* HEC posix extensions patches to the vfs are provided in the patches subdir. -* Support for immutible (read-only) files set by xattr and vfs caching of -read-only files through the vfs. -* ib: optimistic buffer registration to avoid segmented memory registration -* ib: redesign and streamline credit-based flow control scheme -* ib: cache explict BMI_memalloc registrations; huge latency improvement -* bmi: avoid gettimeofday() to determine poll vs block for multi-method - scenario -* ib: rely on bmi to make poll vs block decision -* ib: only check for new connections and async events during blocking periods - -* state machine changes: - - cleanup codegen to use structs instead of unions/arrays - - provide a universal smcb (state machine control block) for - all state machine actions - - add support for concurrent state machines - -* portals: new BMI implementation -* make/configure: support Cray XT3/XT4 compute node build environment - -* Added a RootSquashExceptions config option - -* support for Linux kernels 2.6.22 and 2.6.23 - -* New layout parameter to PVFS_[i]sys_create. - -* Immutable file support, allowing client caching of files. - -* Added a new two-dimensional distribution for incast scenarios. - -* MX: new BMI implementation - -* Fixes to: - - servers running over an extended period of time. - - kernel module for simul workloads - - readdir handling of tokens - - alt aio cleanup path - -pvfs-2.6.3 -============== - -* fix to metadata coalescing that caused file corruption by writing to invalid -file handles. Occurs with large (>70) mpi parallel write workloads. -* fix for race condition on metadata completion -* fix for alt-aio with non-contig requests -* fix to kzalloc configure checks for older kernels -* fix to support older (0.9.6x) openssl versions -* fix 2.4 kernel crash in lookup -* fix cleanup of server internal state machines -* fix remove-object to decrement dirent count -* fix a number of error code problems in bmi and elsewhere -* fix to prevent failures on decoding response errors from server -* fix 2.4 mount bugs -* fixes to acl checking, group checking, and root squash - -pvfs-2.6.2 -============== - -* fixes in trove dbpf to support latest Berkeley DB (version 4.5). -* fix kmod compile issues with --enable-mmap-racache -* fix for IO failures caused by invalid sizes used in file.c -* fixes for 2.4 kernels - -pvfs-2.6.1 -============== - -* fix --enable-fast -* fix to build for client-core-threaded (make clean and make kmod before make) -* fix problems on ppc64 (long/int type mismatches, no _syscall, removed -warnings) -* fix overwrites past end of request -* allow kernel buffer size to be tuned from pvfs2-client -* fixups for linux kernel version 2.6.19 - - check return type of kmem_cache_destroy - - combine readv/writev to aio_read/aio_write - -pvfs-2.6.0 -============== -* kernel module cleanups -* appropriate support for 64bit handles in kernel module -* added debugs for kernel to pvfs2-debug.h -* added offsetof macro -* added PVFS_U_SUID (root squash) -* pvfs-utils.h: PVFS2_translate -> PVFS_translate -* removed function: PVFS_mgmt_toggle_admin_mode -* changed all the PVFS_credentials params in system interfaces to const -* added PVFS_(i)sys_readdir_plus -* made PVFS_sysresp_readdir_s align properly for 64bit -* PVFS_sysresp_geteattr struct: added err_array field -* PVFS_sys_attr struct: fix alignment for dist_name and dist_params fields -* +1 of major version: 1.3 -> 2.3 -* removed parent_*time from crdirent/rmdirent request -* added listattr request -* added (optional) params: - - TCPBindSpecific - - FlowBufferSizeBytes - - FlowBuffersPerFlow - - TroveAltAIOMode - - RootSquash - - ReadOnly - - AllSquash - - AnonUID - - AnonGID - - SecretKey -* fixed setgid bug -* root squash -* support for getdents[64]_plus_lite -* build w/ 2.6.18 and 2.6.19 kernels -* added pvfs2-lsplus -* configure arguments: - --with/without-openssl[=] -* alt_lio_listio impl (threaded pread/pwrite) -* server SIGHUP handler waits for all ops completion [...] -* migration tool fixes -* fixes for simultaneous create/unlink hangs (reqsched and sync coalesce) -* add connection close to bmi for resetting the socket buffer size for initial -socket -* atime, mtime, and ctime fixes for VFS interface -* verify hash of config files - - added openssl options and checks -* updated faq entry for heartbeat -* config check for linux/mount.h -* config check that statfs callback has struct dentry arg -* LTP fixes to pass tests - - s/EOPNOTSUPP/EACCES/ - - acl fixes -* ib: - - check for ib reregister event - - close connection on cancel - - fix duplicate free bug during message cancel - -pvfs2-1.5.0 -=============== -- trove dbpf keyval changes: The dbpf keyval implementation has been - changed from using individual DB objects for each handle, to one - DB object for all keyval entries. This affects the storage format, - and requires the pvfs2-migrate-collection tool to be run on storage - spaces created previous to this release. -- sync coalescing: A new syncing feature has been added to the trove layer that - provides better performance with metadata in high-load cases. New options - were added to the server config: TroveSyncCoalesce, CoalescingHighWatermark, - CoalescingLowWatermark. -- added stranded bstream checking and handling. This is useful for correctly - reusing handles in failure cases. -- consolidate I/O paths through the VFS for read,write and readv,writev. -- added testsome, wait, and release functions to external interfaces -- fixes to karma and performance monitoring. -- better error handling on server and client and kernel module -- fixes to ownership flags on root directory. Allow chmod to work. -- fixes for small io and non-contiguous io. -- fixes to sys-io state machine to prevent deadlock on the server caused by - concurrent writes from the same client. -- replaced keyval strings with shorter versions to save space in the keyval - database. -- cleanups to the kernel module: cleanup cache subsystem structures and - functions. Better error checking on to make module reloading more robust. -- fixes for fsck and fs-dump. -- better xattr support for 2.4 kernels -- added support for berkeley db environments with the pvfs2 bdbs, and using - shared memory for db cache instead of mmapped file. -- better support for 64bit (esp. ppc64) platforms and 32/64 bit mixed env. -- better support for darwin (OS X) -- added configure options: --disable-aio-callbacks, No check for aio callback - support is done any longer. --enable-kernel-sendfile to configure for users - that want sendfile system call support, it is now disabled by default. - Epoll is now the default if its available, otherwise poll is used, - --enable-epoll and --disable-epoll now force without checking. -- Performance improvements for bmi_tcp: allow socket buffer size and tcp - window size to be changed dynamically. Use a pipe file descriptor - for poll to prevent stalls. Reorganize epoll to add and remove epoll - watches. -- pvfs2-genconfig now supports --iospec and --metaspec options. Also fixed - some hostname sorting and range spec bugs. -- pvfs2-client: added --logstamp. nonblocking statfs and mount support. -- handle allocation wasn't resulting in a uniform distribution. Now we - pick a server at random and then select the remaining handles in a - round-robin fashion -- pvfs2-ping does better per-server error reporting and keeps track of errors - better -- pvfs2-viewdist now fails gracefully on old systems without xattrs. Also - prints out handle number and filename for each datafile - -pvfs2-1.4.1-pre1 -=============== -- ib: encode data structures to support heterogenous architectures -- deal with update_atime vanishing from kernel: test for touch_atime and use - that if available. -- fix configure-time kernel tests to work better with 2.6.16 kernels -- ib: separate config options for includes and libraries, optionally -- ib: support mellanox ibgd-1.8.2, compile quietly on x86_64 -- ib: avoid more compiler warnings -- ib: list hcas rather than enumerating in code -- ib: fix mosal startup for ancient libraries -- ib: update internals documentation -- ib: fix CTS out-of-order problem -- ib: separate message headers to make math and debugging easier -- ib: avoid trying to actively connect as a server -- ib: move static variables into an allocated struct to avoid namespace - collisions -- ib: generalize memcache to be used by multiple consumers, not just VAPI -- ib: generalize bmi_ib to work with multiple verbs interfaces. Add support - for OpenIB. - -pvfs2-1.4.0 -- add a simple sendfile callback stub -- getattr through pvfs2-client-core now specifies exactly what it needs -- fix case where pvfs2-fs-dump would fall into infinite loop -- added new pvfs2-viewdist and pvfs2-perror utility -- many zero-fill cleanups -- SIO (small-io) optimization: piggyback small I/O requests in initial - communication between server and client (like MPI eager mode, but for I/O) -- fix a truncate bug -- pvfs2-genconfig can create config files for running several servers on one - machine. useful for testing -- better pvfs2-client and pvfs2-client-core logging -- better interaction between pvfs2-client and pvfs2-client-core -- improvements to doxygen generation -- rework the acache into generic tcache infrastructure upon which acache is - built -- timeouts for kernel operations can now be adjusted via /proc/sys/pvfs2 -- cleanups for --enable-strict and building on 64-bit systems -- update kernel interface for 2.6.15 -- assorted improvements to tests: more coverage, better reporting -- fix several immediate completion cases: others probably remain -- remove an extraneous call to make_bad_inode -- better logging when msgpairarray fails -- clean up the pvfs2-server shutdown process -- improvements to nightly test error checking -- fix immediate completion error with zero-byte reads and writes -- more robust directory renaming technique - -pvfs2-1.3.2 -=============== -- new configure option for kernels which export internal symbols: - --disable-kernel-aio -- fix for getattr error handling -- fix for trove printing out a resource after (sometimes) invalidating it - -pvfs2-1.3.1 -=============== -- ib: add memory registration caching -- ib: add support for cancel operation -- ib: fix minor protocol problems -- ib: cleanup build -- better pvfs2-client and pvfs2-client-core logging -- --enable-fast was broken -- fix compile error on FC3 and RHEL 2.6.9 kernels -- async i/o support (but needs libaio and libaio-devel at this time) -- "trusted ports" -- --enable-strict now compiles w/ a lot fewer warnings -- fixed timestamp problems for pvfs2 files -- listeattr system interface function -- fixed bugs in timestamps -- fixed bugs in zero-fill handling -- no longer ship generated .c files from .sm. instead, ship the sm parser - (allows for easier release patching) -- Phil Carns: server checks fs id on startup -- Phil Carns: --num-dfiles option to pvfs2-genconfig -- Phil Carns: fix setgid suport -- Phil Carns: proper symlink support (created as 777, no way to change them) -- Lots of portability fixes. PVFS2 now builds on Darwin (userspace, no - kernel) -- better tests for location of Berkely DB headers/libraries - -- Trach-Minh Tran reported bug in append mode. fixed -- - - -pvfs2-1.3.0 -=============== -- Murali Vilayannur contributed extened attribute support to the VFS interface -- Murali and Dean Hildebrand also worked on implementing readv and writev in - the VFS interface -- Phil Carns contributed logging improvements to pvfs2-client and pvfs2-server -- Phil also contributed improvements to the request processor which should - help speed up some concurrent workloads. -- Phil further contributed protocol versioning to PVFS2. Major version - changes are incompatible up or down. New clients can't talk to old servers, - but new servers can talk to old clients. -- clean up some warnings on 64 bit platforms -- Phil contributed a pvfs2-set-sync tool -- lots of configure tests for various kernel versions -- rework acache -- memory leak in flow descriptors -- bugfixes for noncontig requests -- improvements to the req scheduler: more operations can occur concurently now -- improved handling of "zero-fill" case: sparse files now treated correctly -- further improvements to nightly testsuite - - -pvfs2-1.2.0 -=============== -- make server better able to handle bogus file names (David Metheny) -- better casts for ppc64 -- even better configure tests for various berkely db features -- documentation updates for Fedora 4 changes -- extended attribute support (not available through VFS yet) -- nightly build infrastructure now mostly in place -- --enable-strict now works again -- new utilities: pvfs2-touch, pvfs2-stat, pvfs2-ln -- add additional write() checks to kernel module -- hold st_nlink field at 1: keeping a true count is quite hard to do - correctly in a distributed environment -- new distribution: "varstrip"j -- better pvfs2 kernel module debugging interface (/proc/sys/pvfs2/debug) -- pvfs2 now supports setgid -- build: avoid some warnings from gcc4 -- kernel: understand new backing_dev_info format in 2.6.12 -- Added DefaultNumDFiles as filesystem config option. -- Added num_dfiles tabfile option. i - The number of dfiles is set by searching for the first given hint in this - order: application hints, tabfile, server config. The selected - distribution may modify the hint if neccesary. -- Phil Carns contributed general improvements and bug fixes for the dotconf - parser. -- Added DefaultDistribution section support for server config file. Section - is optional and accepts Name, Param, and Value (Param and Value may be - repeated multiple times in the section). -- added 'retry' and 'timeout' config file options -- will consult /etc/mtab as an additional source for pvfs2 tabfile entries - -pvfs2-1.1.0 -=============== -- network protocol: more 64-bit alignment fixes -- build: hide karma details in its module.mk -- build: remove duplicate commands used for QUIET_COMPILE -- build: fix test configure library detection -- build: simplify karma and vis some more -- build: add tests for features of Berkeley DB 4.3.xx -- trove: fix bugs introduced by new features of DB 4.3.xx -- docs: added doxygen configuration file and doxygen-style docs - to many source files -- pvfs2-cp: can pass strip size as a parameter (thanks David Beilloin - ) -- gossip: avoid double-timestamp in debug and error messages -- feature-test for kernel routines i_size_write, i_size_read, and parent_ino -- recover better from server failures -- kernel: export superblock magic number for romio autodetection -- 64-bit: change some protocol encodings to avoid unaligned accesses, fix - server crash -- documentation updates -- Phil Carns and David Beilloin reported various memory leaks and memory - misuse throughout the code -- Phil Carns and David Metheny contributed their improvment to the - permissions-checking routines. Secondary groups work much better now. -- configure-time options to select behavior needed on redhat-like environments -- can disable server support at configure time -- various tweaks to make cross compiling easier -- Phil Carns contributed a new config file option (LogStamp) to select - timestamp format -- By default we sync metadata (TroveSyncMeta yes) and not data (TroveSyncData - no). This configuration is a good compromise between reliability and - performance for most workloads. -- Joachim Worringen's noncontig benchmark turned up several bugs now addressed - in this release -- bugfix where pvfs2-chmod would cause all future operations to fail -- bugfix for 64 bit servers. pvfs2 on opteron now passes the bonnie benchmark - -pvfs2-1.0.1 -=========== -- ib: add configure-time checks for mosal library variations -- ib: flush send queue work requests periodically -- ib: search receive queue for both eager and rts (D. Beilloin) -- improved support for some cases of file holes (that require zero fill) -- fixed a crash bug due to incorrect assertion regarding zero fill -- applied annotations and cleanups to the kernel module in response to - using the automated correctness checker called 'sparse' -- fixed a bug that did not properly allow storage space removals when - using ReiserFS as the underlying file system -- added more redhat checks for determining if we need to patch the - kernel module code to work properly with particular releases -- FAQ documentation update - - -pvfs2-1.0.0 -=========== -- disabled mmap readahead cache by default; allow configure option to - optionally enable it -- added support for handling several cases of file hole zero fills - (i.e. sparse files) -- configured acache timeout to be 5 seconds by default through the - vfs; sysint was already 5 seconds -- added karma program installation to make install target -- redhat 9 related changes (in order to work out of the box) - - used some 2.6.x kernel code on redhat 9 2.4.x kernels, if detected - - no longer mark pages as reserved on redhat 9 2.4.x kernels, as it - causes an oops on their kernel (i.e. not necessary) - - installed a wrapper script in place of the pvfs2-server that - exports the LD_ASSUME_KERNEL environment variable before running - the server(to avoid using the redhat NPTL library which is - incompatible) - - fixed make kmod{24}_install even if there are multiple UTS_RELEASE - definitions in the version.h file (as there are in redhat 9) - - -pvfs2-0.9.0 -=========== -- documentation updates to HA and FAQ documents -- better balancing (randomization) of data server selection when - creating new files -- clean up a few request-related 8-byte alignment issues for alpha -- fixed an AMD64 kernel oops due to a bad cast -- cleaned up error code types in the exposed sysint and mgmt methods - (using PVFS_error rather than int) -- added a setattr debugging mask and changed most detailed setattr - debugging to use it -- added a mkdir debugging mask and changed most detailed mkdir - debugging to use it -- added some inlined methods in PVFS_util (as they need to be used on - both the server and the client) for getting the current time in - PVFS_time format, encoding a PVFS_time as a version (finer grained - than a 'normal' PVFS_time since we can use the high 32 bits), and - decoding the version as a PVFS_time -- added a compatibility hack that _should_ allow no noticeable - breakage on existing storage space, but will eventually migrate to - the slightly new storage format over time -- modified client side sys-mkdir, sys-create, and sys-symlink to - encode the mtime as a version when passing it to the server (so it's - transparent from the server perspective) -- modified server side get-attr to decode the version read from disk - back into an mtime (so it's transparent from the client perspective) -- modified mkspace method to properly version newly created root and - lost+found directories -- modified the server side mkdir operation to return -PVFS_EINVAL if - the object attr type is not a directory object -- vfs readdir modifications to detect directory version changes on - listings; default behaviour is unchanged -- fixed bug in collection remove that didn't remove the collection - information from the collection db (which manifested as having a bug - where a collection that was once created and then removed could - never be created again due to a lookup succeeding before it should) -- added documentation on how to create multiple file systems within a - single pvfs2 storage space -- added documentation on return codes of the sysint methods -- pvfs2-mkspace improvements: - - now requires a collection ID, storage space and a handle range - - fixed types used to represent actual root handle and collection id - to be the proper types, rather than integers - - remove many defaults in favor of requiring cmdline options - - replace atoi calls with strtou{l}l to accomodate larger values if - specified - - cosmetic output improvements - - fixed mkspace method to make sure that we only use handle ranges - that are both non-null, AND are not an empty string - - -pvfs2-0.8.0 -=========== -- adjusted mask values for event logging to skip 0 mask -- moved handle counting in statfs to server side so that it is - done in parallel -- replaced uniq() subroutine in PAV to avoid recursion -- pvfs2-statfs cleanups and cast fixing -- pvfs2-shell-test: replaced a directory test -- add a clientcore_timing debug mask that only logs the operation - timings from the pvfs2-client-core -- added some pts debugging -- fixed bad assumption in getattr-acache that might have looked at the - wrong handle/fs_id during some operations (e.g. lookup) -- removed potential latency on system startup by trying to store temp - config files in /tmp/ first rather than the current dir -- make sure that our error routines strip any known class error bits - instead of deciding that they're unrecognized if set -- improved trove error handling -- improved pvfs2-server error handling -- fixed crash bug in our config parser that tried to strdup strings - that might be NULL -- pvfs2-types.h header file simplification and cleanups -- extended buffer size of filename for use with mkstemp as it started - failing on ppc with misc unexpected characters -- addition of a simple vfs test program that creates a file with a - hole in it and tries to read from that hole (this demonstrates a - currently known bug) -- addition of the same test program using only the system interface -- fixed some misc memory/resource leaks -- fixed test program ls.c and admin program pvfs2-ls.c to not issue an - extra call to readdir on every run by fixing a slight logic bug -- added a directory version that is passed back from the server to the - client on each successful readdir call and updated the client and - server response structures and encoder to accomodate this -- ncache related: - - increased ncache timeout from 5 seconds to 30 seconds by default, - but keep it disabled by default (it's still experimental) - - cleaned up the ncache and make it useable by the - lookup/remove/create/mkdir/etc calls (if enabled) - - added a flag to the client sysint lookup sm that records if the - final object resolved was a followed symlink or not (regardless of - if it was followed or not) - - extended the ncache interface (lookup/insert/remove) to have the - ability to have multiple entries with the same name with different - handles based on some other tag (e.g. resolving a symlink object, - rather than what it points to). this is also not in use at the - moment. for now, no symlink objects will be inserted into the - ncache. - - added more ncache debugging - - added a fastpath return from client sysint lookup method on ncache - hit - - updated and extended all ncache test programs to work with the - changed API and fixed broken tests/verified that they still work -- kernel related: - - fixed all warnings generated from compiling the pvfs2 kernel - module under 2.6.9 kernels - - removed write_inode method, as it was unused and probably won't be - used - - removed unused -v command option to pvfs2-ls - - added command option -V (and --verbose) which can detect version - differences in the directory being read and will report them if - this switch is enabled - - fixed misc compilation warnings on Opteron/AMD64 machines - - added directory version to readdir downcall (passed from - pvfs2-client-core) - - removed include of modversions.h directly from pvfs2-kernel.h - - -pvfs2-0.7.0 -=========== -- fsck: server handles bogus datafiles better -- fsck: pvfs2-fsck is a little smarter about what it tries to salvage -- ib: avoid unsignaled WQE pileup -- ib: avoid polling cancelled connections (first stab toward true - cancel) -- ib: spin-block in test functions for better performance -- ib: do not "open" HCA, just grab a handle -- ib: fix leaking connection structs (thanks to D. Beilloin) -- ib: use faster 1k mtu -- ib: cleanup warning texts -- configure: add option to disable AIO threaded callbacks -- configure: fix GM and IB to enable only when explicitly requested -- add timestamps to gossip debugging to file and stderr (not syslog) -- ib: initial cancel support, works for pvfs2-ls -- add multihome support: servers can listen on multiple addresses (and - BMI types), clients choose the first address from tabfile that works -- bmi: poll adaptively based on recent usage -- added trove-dbpf support for internally limiting the number of - simultaneous aio operations allowed in progress; default value is 16 -- increased mmap-ra-small buf size from 4K to 16K to avoid many small - allocations (causes slightly larger mem footprint of client-core) -- adjusted client sysint setattr operation to work in a write-through - manner with the acache (when enabled) -- fixed client sysint setattr to invalidate the acache entry (if any) -- changed acache lookup to return the status (to avoid another acache - call that will always follow a successful lookup) -- internal acache improvements, such as removal of duplicated code -- updated acache-torture test program to match slightly new acache - interface -- make sure mmap's requesting MAP_SHARED fail (mmap2 returns -EINVAL, - userspace mmap returns standard MMAP_FAILED value) -- added compile time support for measuring roughly how long it takes - to process particular operations issued from the vfs -- modified PINT_sys_testsome to return immediately without calling - testcontext if any ops are completed and ready to be returned right - away -- made statfs return load information from servers -- gm: fixed performance bug, make sure test calls check completion - queue before sleeping -- gm: implement cancel functionality and verified for common cases -- fixed dbpf SYNC macro bugs that caused hangs on systems without aio - callback support -- updated pvfs2-config tool to use LIBS variable from configure -- removed -ldb from client side link stage -- added --serverlibs option to pvfs2-config to determine what - libraries have been linked against pvfs2-server -- updated makefile system for tests directory to use pvfs2-config and - deprecate use of .libs file -- added make install target for test harness programs -- introduced mutex locking in trove-handle-mgmt to avoid race - conditions that appear when statfs is called while trove operations - are in progress -- added utility functions for measuring elapsed wall, system, and user - time between specified points in code -- fix glaring alignment failure in PVFS_server_req -- fixed type of readahead_size in upcall for mmap-ra-cache that caused - the mmap-ra-cache to not work properly on PowerPC -- no longer look in the mmap-ra-cache for data on reads unless we're - sure we're doing an mmap or execution (as opposed to always) -- removed kernel module pvfs2_link call, as it was never used -- moved static copy_mntent method to PVFS_util_copy_mntent method -- added error handling to PVFS_util_copy_mntent -- renamed PVFS_sys_free_mntent to PVFS_util_free_mntent -- fixed successful return value of PVFS_util_copy_sys_attr -- fixed initialize-dyn test program that broken due to mntent changes -- changed get_data_sync_mode to explicitly return TROVE_SYNC if called - on an unregistered fs_id - - -pvfs2-0.6.5 -=========== -- broke several server state machines into multiple nested machines -- some PAV cleanups, including support for a larger number of meta - servers than io servers -- update k_size (number of keyvals) in dspace attr cache when the - number of keyvals on disk changes -- replaced all vfs 64 bit operation tags to be unsigned -- replaced pint-dev code to work with 64 bit unsigned tags -- moved all op initialization out of the constructor and into the - op_alloc routine -- fixed tag cancellation upcall/method to use a 64 bit tag, rather - than an unsigned long -- added a PVFS_util method to get the current system and return it as - a valid PVFS_time type -- added arguments to the crdirent/chdirent/rmdirent operations that - now can take optional parent atime/mtime/ctime fields -- added support on server *dirent operations that update the parent - time attributes (if specified) on successful completion of the - *dirent operation; null/zero time fields are not modified -- modified sys-create/sys-mkdir/sys-symlink/sys-rename/sys-remove to - work with the new operations properly; in general, - create/remove/rename operations updates the mtime and ctime of the - parent directory, which is now properly supported -- moved some common server side attribute handling code into a macro - to make it re-useable across several server state machines -- updated en{de}coding of modified operations -- remove sync calls on readonly trove operations -- added sanity check to kernel's device poll; makes sure the device is - opened by only one user before returning valid results -- make the device's request list waitqueue non-exclusive, since poll - waits as well -- some formatting changes on kernel error reporting -- added mmap-ra-cache support for partial cache fills on incomplete - requests; misc mmap-ra-cache enhancements -- cast setparam values to uint64_t rather than int64_t in admin utils -- remove trove sync call on server side root handle check -- modify server side remove state machine to check the k_size of the - dirdata object being removed if it's a directory to report early - back to the client that the directory is not empty (if it's not) -- remove the readdir checks in the client remove path now that the - server-side remove can tell us directly if a directory is not empty -- modified PVFS specific error routines to print the value of - unrecognized error codes if encountered -- fixed memory leak on check_fs_id setparam -- fixed 64bit compile warnings (AMD64/Opteron) -- split msgpair debugging into it's own class (no longer part of - 'client') -- break out clientcore debugging into its own debugging mask -- fixed trove error codes -- give trove flush and resize operations the ability to create the - bstream they're working with if it doesn't exist instead of - returning an error -- fixed bug in (threaded) trove that did not properly reinitialize - condition variables after the first finalize -- improvements to the storage space and collection removal code -- extensive trove cleanups and cruft removal -- check for error codes when doing server side getattr and reading - metafile information; stop processing if an error is reached -- invalidate attr cache on write_at calls (never used) -- changed pvfs2-showcoll coll_id prints from hex to decimal -- kernel changes: - - enable kernel readahead hints on mmap/execution (i.e. sequential) - - initialize kernel module's request_list_waitq at declaration time - - put large print statement into a macro for convenience - - make all bufmap allocations GFP_KERNEL, regardless of highmem usage - - use add_waitqueue_exclusive for the io_completion_waitq entries - - clear random readahead hint on mmap - - -pvfs2-0.6.4 -=========== -- added a pvfs2-config tool to help when compiling against pvfs2 -- new PAV option (COMPUTENODES_LAST) that controls whether compute - nodes are chosen first or last out of the total set of nodes - available -- fix locking bug introduced in 0.6.3 that shows up when trove is used - without aio callbacks -- explicitly name invalid msg tag value (0) -- modified pvfs2-ls to show the mtime (rather than atime) in long - listings -- updated set-info test program to update mtime (rather than atime) -- fixed possible flow mutex memory leak -- changed PVFS_time from an int64_t to a uint64_t -- fixed a pvfs2-fsck compile time warning -- fixed bug in sys-remove that erroneously tried to back out of the - remove even on error codes that are acceptable to return -- moved to fully 64 bit unsigned gossip masks (for some more possible - debugging levels) -- fixed bug in shared-state-machine getattr method that could allow - attributes to come out of the acache with incorrect types/fields -- fixed bug in crdirent msgpair setups for create/mkdir/symlink (used - incorrect handles in mappings) -- modified mgmt setparam call to take uint64_t parameters instead of - int64_t -- kernel changes: - - fixed bug that allowed a file removed on one client to still - appear on another with bad attributes - - fixed dentry revalidation to never return that a dentry is valid - if's negative - - fixed directory inode's mtime and ctime changes on updates - - fixed directory link counts for some changes - - simplify atime/mtime/ctime conversion to sys_attr (and vice versa) - to and from inode's time fields - - -pvfs2-0.6.3 -=========== -- added ability to negate log categories in - PVFS_debug_eventlog_to_mask -- added pvfs2-fsck -- updated pvfs2-quickstart to better discuss client configuration -- added optional epoll() based socket management component (enable - with configure option --enable-epoll on linux 2.6 systems) -- fixed configure bug that dropped default CFLAGS in some cases -- adjusted trove id generation to make it easier to log trove events -- fixed Makefile.in bug that caused pvfs2-server.c dependency - information to be lost -- rewrote both threaded and nonthreaded versions of all job_testXXX() - functions - - fixed condition variable usage to avoid signaling races that could - lead to unecessary wait times - - simplified code path - - removed superfluous calls to gettimeofday() -- adjustment of condition variable usage in trove-dbpf to avoid races - similar to those found in job_testXXX() -- applied vim formatting patch to use spaces instead of tabs -- fixed pvfs2-server permission check to allow users of the same group - to have access to other group members attributes (for listing) -- fixed incorrect/invalid error codes and error handling -- fixed pvfs2-server crash bug due to io state machine potentially - freeing a reqsched id that was never posted -- added mgmt methods PVFS_{i}mgmt_get_dirdata_handle for retrieving - the internal dirdata handle of a specified 'parent' object (if any) -- fixed pvfs2-server crash bug (race) in trove that could return an - object on queue addition that has already been serviced -- extended server side mkdir state machine to create the internal - dirdata dspace when creating the directory (rather than waiting for - crdirent to create it if it doesn't already exist) -- added mgmt methods PVFS_{i}mgmt_create_dirent mgmt for creating - directory entries at a low level -- fixed request scheduler's acceptance of the mgmt-remove-* operations - while in admin mode (since these can be used in admin mode) -- extended mkspace (and thus pvfs2-mkspace program) to initalize a - lost+found directory after the root directory is made -- kernel changes: - - fixed credential usage bug that resulted in (seemingly random) - permission problems when using the file system as multiple users - - fixed a bug that appeared as data corruption due to mapped memory - pages in use being swapped out under heavy memory load under 2.4.x - kernels (and containing incorrect data on future accesses) - - remove slab poison flag from debug allocators, as some kernels - complain about how it was used - - do not use kmap/kunmap calls if not configured for HIGHMEM, as - some configurations cannot resolve these symbols - - -pvfs2-0.6.2 -=========== -- updated documentation for dist-simple-stripe.c and modified - the basic distribution to use a single data file object. -- fixed error reporting in pvfs2-server that didn't understand pvfs2 - specific error codes -- extended request scheduler to allow operations to pass through - without being scheduled (and allow the noop operations to work like - this) -- disabled the (unused) device interface on server builds, reducing - server run-time threads by one -- bmi optimizations: avoid polling in test calls if operations are - complete, or no operations are complete but there are unexpected - operations ready to be serviced -- extended the pvfs2-mkspace tool to be able to work with data and - meta handle ranges separately (also extended the internal interfaces - as necessary for this functionality) -- fixed bug in mkspace that could create the root directory dirent - object in a non-meta handle range -- added client side attribute type checking that avoids contacting - servers if we've determined that the operation we're about to - perform cannot be performed on an object of the type we were told to - work on. e.g., a readdir operation will not be issued to the server - if the getattr reports that the object we want to readdir on is not - a directory. most sysint operations now have this checking in place -- fixed crash bug on the server crdirent operation if passed a - non-directory object type -- fixed race condition triggered when pvfs2-server is getting many - more incoming operations than it has posted unexpected msgs for -- increase the default number of posted unexpected msgs by the server -- properly re-integrate and simplify the usage of the mmap-ra-cache, - which wasn't working properly after the pvfs2-client-core re-write -- fixed crash bug on the server due to an unexpected error code coming - out of pthread_cond_timedwait inside of job test calls -- modify configure script to append date and time information to non - release builds (for more fine grained version information if needed) -- fixed acache usage in sysint create/mkdir/symlink operations -- open-cache remove now syncs (if necessary) after the remove, rather - than just before -- removed duplicate 'open_cache' debugging keyword definition -- allowed client side logging (via PVFS2_DEBUGMASK/PVFS2_DEBUGFILE env - variables) to append to the specified logfile, rather than - truncating it if it already exists -- fixed bug in device test method that didn't properly adjust the - outcount on failure; also added the number of bytes read on a failed - (short) read -- started adding debugging code for printing out server side response - structures (only getattr is currently implemented) -- kernel changes: added some debugging and simplified the - handle_io_error macro. also added translation of non-errno pvfs2 - error codes (should it get one) to avoid returning entirely - incorrect error codes in some cases; avoid allowing -PVFS_ECANCEL to - be passed down to the kernel code from the pvfs2-client-core. use - i_size_read to update file position on opens for append. remove old - code that manually updated the inode's file size on writes -- replaced debugging mask of cancelled i/o operations from io to - cancel -- simplified pvfs2-statfs by retrieving all server statistics in one - call, rather than separate ones for meta and data servers; also - fixed reporting of server duties (previously all servers were - reported as serving both meta and i/o data) -- added a start banner to pvfs2-client-core if client debugging is - enabled. this allows easier visual inspection of restarts in logs -- fixed bug when copying credentials to first make sure that they're - valid in all sysint/mgmt calls -- added both server and client side (mgmt) operations to remove - arbitrary pvfs2 objects and directory entries without prejudice -- added a 'pvfs2-remove-object' tool that can remove pvfs2 objects and - directory entries (use is not generally encouraged) -- fixed pvfs2-fs-dump crash bugs when it fails to find some objects it - was expecting to see -- adjusted the time that pav allows for copying binaries out to remote - machines -- fixed bug bmi_tcp potential send deadlock under heavy loads -- added a mgmt method that allows parameter sets on single servers - (rather than the previous one that allowed only all servers) -- extended pvfs2-set-debugmask program to have the ability to set the - debug mask on a single server (rather than all) via cmdline options -- replaced TroveSyncMode configuration option with separate - TroveSyncMeta and TroveSyncData options. the default values are yes - for both -- updated pvfs2-genconfig to reflect TroveSync* config option changes -- implemented understanding of the data sync mode option in the flow - interface via the setinfo method -- modified pvfs2-server to pass data sync mode option to the flow - implementation via setinfo on a per fs basis - - -pvfs2-0.6.1 -=========== -- misc. updates to msgpairarray and associated tools to allow for - server side use -- reorganization of server request parameters so that more (but not - all yet) are stored in a centralized table -- removed some deprecated test programs -- new pint-util.[ch] to contain utility functions shared between - client and server -- some refactoring of server state machines (mainly crdirent) to allow - for code reuse in other state machines -- no longer need to fix up generated html by hand -- fixed a bug that allowed an open through the vfs without the O_CREAT - and O_EXCL flags to return -EEXIST (should be success in that case) -- added a file_open.c test program that tests opens in various modes -- error code cleanups (added PVFS_EACCES mapping to EACCES; moved - PVFS_EDETAIL to be a non-errno error code) -- fixed pvfs2-ls to properly convert reported file times in user's - configured system localtime, rather than in UTC -- fixed the kernel module information so that modinfo doesn't report - many duplicate entries -- fixed server pidfile usage (-p cmdline option) -- fixed msgpair retries to properly delay between retry attempts -- added trove hooks to be able to peek at handles that will be - allocated later -- cleaned up the remove state machine and removed an unnecessary - allocation/copy -- fixed existing acache usage in the sysint calls to be more uniform; - removed some overhead in the sysint -- fixed kernel device close to be aware if mounts are still active -- added more tests to pvfs2-shell-test that test deeply nested - subdirectories -- added a method to determine the min handle recycle time across all - configured servers' file systems -- changed code formatting rules to use spaces rather than tabs in vim -- fixed bug that didn't reload the cached config information on file - system removal (i.e. unmounts) -- added a cmdline argument to pvfs2-client to specify the acache - timeout to use at run-time (default is 0 milliseconds, which is - effectively disabled). this timeout is upper bounded by the min - handle recycle time across all servers, and it can change - dynamically as new mounts come and go -- fixed a bug that allowed servers to create internal storage objects - outside of the meta handle ranges in some circumstances -- replace single element msgpair/array setup code where possible with - a macro - - -pvfs2-0.6.0 -=========== -- removed pvfs2-import and pvfs2-export in favor of pvfs2-cp -- added compile time option for disabling thread-safety in the client - library (enabled by default; --disable-thread-safety to disable) -- improved configure summary information emitted at configure time -- added missing non-blocking sysint declarations to sysint header -- made sysint test and testsome() calls more useable -- merged dev unexp polling/handling with system interface -- added PINT_sys_dev_unexp call that allows posting unexpected device - messages so that they can be returned from the sysint testsome - method in addition to completed sysint operations -- added a id_gen_fast_unregister macro that is a no op, to make the - api more consistent with the id_gen_safe_* calls -- modified device driver to work properly in non-blocking mode from - userspace by implementing the character device poll method and - modified the pint-dev device interface to make sure it can handle - the pvfs2 device in a non-blocking manner; added a no immediate - completion option to the device interface; made test more efficient - by not polling if no idle time is specified; check for poll errors; - use proper buffer sizes for reads across the device -- added a method to free the mapped memory region on pvfs2-client-core - shutdown (valgrind complained) -- modified all job uses of the id-generator to use the safe, rather - than fast, methods (useful for several reasons including safe - cancellation of already completed operations) -- modified the job_dev_unexp method to have (and honor) the no - immediate completion flag if passed (used in the pvfs2-client-core) -- re-wrote pvfs2-client-core to use sysint non-blocking operations -- improved human readable size reporting -- allow human readable size reporting to optionally use si units -- added --si option to pvfs2-ls (similar to ls's --si option) -- added -H option to pvfs2-statfs (similar to du's -H option) -- added a method able to cancel I/O operations in progress -- removed kernel ability to kill device file on cancelled I/O -- added the ability of sys-io.sm to handle run-time cancellation while - still transitioning properly -- added support in the pvfs2-client for ignoring an upcall retry for - an op already in progress (as opposed to servicing it multiple - times) -- added kernel method that allows the cancellation upcall/downcall - cycle to progress, ignoring the fact that a signal is pending (since - the only time a cancellation is issued is when there's a signal - pending) -- fixed kernel oops on setattr (signal) interruption by returning a - valid error code in this case -- full acache cleanup, bugfixes, and thread-safety support -- allow the sysint post() method to complete an op if finished - (i.e. all states are immed. completion) by adding it to the - completion list (which test/testsome can retrieve) -- removed 'cached' attribute fields from getattr_sm object -- removed mostly duplicated code from shared-state-methods.c -- added support (testing only) for building a threaded client library -- gracefully terminate thread-mgr testing threads on errors (this is - only really useful for graceful shutdown of the threaded client - library) -- added a set_info option to bmi that tells it to be more aggressive - when cancelling operations (bmi_tcp will now close sockets) -- fixed kernel bugs that didn't release the bufmap indices in use on - some error paths (eventually caused all of them to be permanently - used which caused hangs on I/O) -- extended msgpairarray code to only retry msgpairs that haven't - completed already -- increased kernel module single operation timeout to be 60 seconds - instead of 30 seconds (note: as a side-effect this also extends the - amount of time you'll have to wait on interrupted operations via - signal for now) -- added PVFS_strerror_r; an equivalent to strerror_r (the thread-safe - strerror call) that handles PVFS_error codes -- added a simple file_write test program (tests are not in releases) -- always delay for 1 second before spawning the pvfs2-client-core from - within the pvfs2-client to avoid filling dmesg with harmful looking - errors while restarting (and re-opening the device file) -- new trove component called dbpf-open-cache that caches open file - descriptors and db references; replaces dbpf-bstream-fd-cache, - dbpf-dspace-db-cache, and dbpf-keyval-db-cache; fixes a few long - standing file size and attribute bugs -- added a few missing admin tools to make install target -- more attribute debugging messages or server -- added admin utility program pvfs2-chown -- added make lib/pvfs2-threaded.a target that can be used to build a - multithreaded version of the pvfs2 client library -- thread safety for dynamic activation of bmi methods -- fixed makefile LD selection and html docs target -- fixed bmi deadlock that appears in multithreaded library -- fixed responsiveness of pvfs2 kernel driver device poll -- added new option to BMI called BMI_FORCEFUL_CANCEL_MODE to serve as - a hint to modules to be more aggressive when cancelling operation - (i.e., tell tcp to always close sockets when cancelling) -- renamed pint-bucket component to pint-cached-config -- enabled pint-cached-config on server side -- unified how autogenerated .c files are tracked across client and - server builds -- fixed distclean for release tarballs so that it does not destroy .c - files that cannot be regenerated without full cvs source -- moved msgpairarry from client code to common code and made many - adjustments to enable general usage; linked into server -- misc. msgpairarray cleanups -- removed depricated PINT_client_bmi_cancel() function -- updated state machine compiler to allow multiple machine definitions - per file -- added new null job type that can be used to manually trigger - asynchronous state machine transitions -- refactored server side functions used to start state machines that - are not tied to incoming requests -- refactored server crdirent state machine using nested state machines - and a little bit clearer error handling -- added job functionality to reset timeout timer on pending jobs -- removed old "contig" encoder from source tree -- added new "cancel" debugging mask to look at job timeout behavior -- documentation updates: a new high availability document, faq update, - guide update, manpages -- moved exported pvfs2-util string functions to private str-utils -- added permission checking debugging mask and keyword to debug what - the server is doing on permission granting or failure -- modified cosmetic output of admin tool pvfs2-set-debugmask -- make sure pvfs2-client doesn't cause a hang on ssh logout due to - improperly closed fds; this was fixed in the past but unfixed to - report exec failures. we can't do that anymore with this setup, so - we'll have to either require an absolute path to the client - (preferred), or scan the PATH ourself to be sure it can be found; or - log to a file like the server -- added debugging regarding the handle re-use timeout -- increased the default handle re-use timeout from 45 to 360 seconds -- removed redefinition of the default handle timeout constant -- change permissions to reasonable defaults on newly created - directories using pvfs2-mkdir -- extended pvfs2-cp to preserve src to dest permissions where - applicable -- extended pvfs2-cp to allow symlinks to be copied to and from pvfs2 - (as the link target file, not as a link -- similar to normal 'cp') -- extended pvfs2-cp to truncate target files if they exist -- added a threaded vfs test program to perform heavy I/O on a single - file -- fixed pvfs2-server to continue processing after recoverable - BMI_testcontext errors -- added some gossip debugging along the acache code path -- kernel bugfixes: - - don't attempt to shrink the sb's dcache on device close in the - case that the sb has not yet been filled - - translate error codes that occurred while waiting for the - downcall, rather than assuming an error came back from the - downcall (in the case that it didn't); fixes a possible oops on - invalid mounts - - use fully 64-bit op tags instead of relying on unreliable casts - and assumptions moving from kernel space to userspace; changes use - of an atomic_t type to a spinlock protected int64_t; works on ppc - without problems now -- changed pre-posted write_ack recv posting to have an infinite - timeout that is reset on flow completion to avoid timeouts before - operation completion -- fixed crash bugs on (unexpected) job expiration in various places -- fixed crash bugs when an invalid (or unsupported) encoding type is - specified by reporting the error to the user, and the completing the - operation if possible by cycling through the valid encoders -- added pvfs2tab support for the line encoding=default - - -pvfs2-0.5.1 -=========== -- added a pidfile option to pvfs2-server for init script integration -- auto-tuned BMI timeout to poll during activity, sleep a bit - otherwise -- pay attention to BMI timeouts in IB layer -- fail quitely on unrecognized BMI method types in pvfs2tab -- disabled TCP method for IB builds -- added pvfs2-cp, which should have all of the functionaly of - pvfs2-import and pvfs2-export, but none of the bugs (!) -- bug fixed to idle detection logic in job.c (non-threaded case) -- some tweaks to BMI timeout behavior -- removal of some deprecated BMI code -- bug fixed that caused problem when trying to add an invalid mount - entry -- bug fixed when trying to add general dynamic mount entries -- distribution framework cleanups -- updated test programs and pts test harness to use new distribution - code -- bug fixed that allows i/o attempts on non-datafiles to be handled -- bug fixed that caused breakage on the verbose debugging mask -- implemented non-blocking client library calls for all sysint and - most mgmt operations -- changed all credential arguments to credential references -- export id-generator methods for sysint users -- removed pvfs2-threaded-client-core application from code base -- added pvfs2-mkdir program -- added pvfs2-cp program (soon to replace both pvfs2-import & - pvfs2-export) -- added a GPL license notice in the top level files for the kernel - module -- moved cached configuration information our of the configuration - object -- allow server setattr operations to be performed even if write access - on the object being modified is not granted (fixes utime failures - and misc other permission problems like untarring some files) -- kernel bugfixes: - - fixed an oops on cancelled object operations (file,dir,symlink) - - fixed kernel error codes in several places - - fixed a memory leak that didn't free symlink targets - - replaced new_inode call with iget to avoid the possibility of - duplicate inodes with the same number in the vfs - - fixed kernel create mode translation when creating new objects -- kernel 2.4.x. support introduced (supports 2.4.19 and up) -- allow pvfs2-client to restart on device open failure -- updated quickstart with the following information: - - gcc 2.96 or above is required (as gcc 2.95 generates buggy code) - - how to use the kernel interface under 2.4.19 or above -- misc cleanups, error handling, and debugging - - -pvfs2-0.5.0 -=========== -- consolidate msgpair and msgpairarray implementations -- bug fixed in "make kmod" path handling -- added ability to build kernel module and test programs to - pvfs2-build.sh -- added scripts (from Justin Luck) for nightly vfs testing -- started nightly vfs testing (see pvfs2-testing mailing list) -- implemented job cancel api -- added job timeouts, and started using in both client & server -- added ability to (manually) reconnect after transient failure in - bmi_tcp -- allow queueing of receives for failed bmi_tcp addresses that may get - reconnected -- removed some depricated client library code -- started using PVFS_ECANCEL rather than PVFS_EINTR for cancelled - operations -- added bmi address reference counting -- allow server to discard anonymous bmi addresses after connection has - been closed (to conserve resources) -- removed test subdirectory from distributed releases -- cleanup of protocol version mismatch error messages, including a way - to propigate errors detected on server back to client -- fixed db cursor leak in trove -- fixed for busy spin condition in client library when retrying - operations -- various cleanups to error handling in msgpairarray code -- added some shell script testing and bonnie++ testing to automated - tests -- fixed pvfs2-ls handling of large usernames -- fixed kernel bug that allowed you to remove the module while in use -- kernel bufmap general I/O path improvements -- increase the default size of the server-side attribute cache - (configurable in pvfs2-genconfig) -- randomize the file system IDs generated by pvfs2-genconfig -- internal id-generator code enhancements -- fixed a gcc compilation warning on older versions -- added a new error code system that allows the addition of addition - pvfs2 specific error codes for use within the system -- added logic to automatically retry sends and receives of network - operations on communication failure -- proper unposting and cleanup of posted recvs that are no longer - needed -- added high level retry logic for system operations to help tolerate - transient server failures -- fixed truncate operation to properly return error code and not - operate on anything that's not regular file -- added a chdirent server operation that replaces an existing dirent - and returns the old (useful for renames where the target exists) -- refactored the client side remove state machine so that it can be - used by both the remove and rename client operations -- general cleanups and improvements to the client side rename - operation - - -pvfs2-0.1.3 -=========== -- added event logging to request encoder, server state machines, and - most job calls -- some changes to the event logging mask system -- some new test programs for event logging -- fixed flow protocol handling of zero byte request processing results -- moved pvfs2-client* to src/apps/kernel/linux-2.6 -- added an independent configure and make setup for test subdirectory -- removed all dependencies on test subdirectory from src subdirectory -- properly report if the pvfs2-client fails to execute the core - program -- reworked the error code handling in the kernel module to better - match the userspace pvfs2 error codes -- handle kernel mount attempts that fail without crashing -- added support for the pvfs2-client-core to request that the kernel - module pass all of the mount information it knows about back to the - client so that a client restart can seamlessly pick up from where it - left off -- added a client/kernel umount operation that removes dynamic mount - entries -- properly report symlink types in the pvfs2-ls tool -- fixed karma from hanging on start due to a previous api change -- reworked the client side system interface to be able to handle - dealing with multiple server configuration files at once -- kernel now supports multiple mount points from different servers -- kernel mount option parsing improvements and error handling -- fixed broken configure options: --enable-karma and --disable-karma -- better error handling and synchronization in the kernel module -- fixed a kernel crash if pvfs2-client-core was restarted while - another process was attempting to do I/O -- added a mechanism for ensuring that remount operations are serviced - first by the pvfs2-client-core on restart, rather than failing all - pending operations until the mount tables are built -- reworked the dbpf-attr-cache to use both fs_id and handle as the - lookup key to allow it to work properly when multiple file systems - are mounted -- fixed a kernel crash bug that tried to overwrite pages at invalid - offsets on read error - - -pvfs2-0.1.2 -=========== -- BMI IB module: - - fix bug resulting in mixed-up connections - - clean up incoming message processing - - refactor test and testcontext code - - simplify state machine -- add shared and static libraries build options -- honor PVFS2_DEBUGFILE environment variable in client -- Kernel module builds: - - build the module with everything else, if configured - - clean up properly - - work out-of-tree -- Build system: - - list explicit include directories - - add cflags provided by module makefile stubs - - install kernel module if configured - - simplify dependency generator script, add .po -- BMI: initialize needed methods dynamically, not at startup -- add ability to force use of alternative pvfs2tab file at run time -- general cleanup of API used for system interface initialization, - including tabfile parsing -- added reserved value for fsid type to indicate uninitialized value -- BMI and flow interfaces now accept NULL list of modules to - initialize defaults, BMI interprets this by delaying init of modules - until needed, flow interface interprets by initializing all possible - modules -- new api functions for resolving local file system paths -- added ability to dynamically add and remove file systems from the - system interface at run time -- fix request scheduler bug that was leading to odd behavior under - heavy I/O load -- added some scripts started by Wes Emeneker to automated the steps - needed to download and build ROMIO with PVFS2 support -- re-enable compilation of Jiesheng's caching flow protocol -- added new automated nightly test scripts started by Wes Emeneker - into test/automated subdir -- started running automated tests on Adenine cluster at Clemson as a - nightly cron job -- isolated a request processing corner case with a test program -- renamed PVFS_pinode_reference -> PVFS_object_ref to cleanup the - system interface a little further -- added some first stubs for hooks to specify distribution parameters - at runtime -- added more specific server and client side debugging keywords -- enhanced vfs error reporting -- client sysint cleanups and error reporting improvements -- server operation cleanups and error reporting improvements -- bmi error code translation hooks to work properly with pvfs2 error - codes -- fixed mmap-ra-cache bug that didn't flush data at the right times -- made the mmap readahead cache functionality a configure option - (enabled by default) -- fixed vfs bug that caused multiple processes accessing the same file - trigger a SIGBUS on file close -- enhanced vfs test pvfs2-shell-test.sh to test a tricky case of - filling and emptying the page cache repeatedly in a worst case - scenario -- start client sysint thread safety by making accesses to the global - configuration object serialized -- reworked the vfs statfs method to not cause overflows on some - kernels -- re-introduced a client sysint lookup optimization to work with the - overhauled client lookup operation -- fixed size reporting of symlinks in pvfs2-ls program -- fixed vfs getattr calls for directories and symlinks -- added an experimental testbed program for testing threaded client - sysint operations based on the pvfs2-client-core (called - pvfs2-threaded-client-core) -- mostly consolidated credential generation to one place -- modified the kernel mount and initialization to use the dynamic - mount system, where a server and file system name is specific on the - mount command line (no more hard coded coll_id and root_handle!) -- fixed a particularly nasty vfs race bug that could crash the kernel - during writes -- fixed a vfs bug that could cause a client to think a file did not - exist when in fact it did -- added full support for dynamic vfs mount (which breaks the old way - of mounting, but is more convenient and makes more sense) -- Better documentation for building MPICH2+ROMIO+PVFS2 - - -pvfs2-0.1.1 -=========== -- can now specify Trove sync behavior when setting up PVFS2 volumes w/ - pav -- added a new mpi-io test to exercise ROMIO's scalable operations -- better detection of dead sockets in BMI TCP/IP module -- fixed pvfs2tab file parsing bug -- completely rewritten TCP/IP socket management -- fixed potential server deadlock on shutdown bugs -- fixed vfs bug that stalled the server on signal cancelled operations -- enhance vfs support to ignore non-fatal signals caught while waiting - for a remote operation to complete -- added support for the "intr" mount option to allow any signal to - interrupt remote operations (similar to the nfs option) -- enhanced test/kernel/linux-2.6/pvfs2-shell-test.sh test script to - test mmap reads and allow more run-time flexibility - - -pvfs2-0.1.0 -=========== -- initial support for logging traces with both MPE and Pablo libraries -- fix some warnings on x86-64 -- work around older Berkeley-DB versions w/o support for DB_DIRTY_READ -- implementation of cancel() in BMI, Trove, Flow, and thread mgmt - interfaces -- graceful recovery from I/O errors within flows -- updated perf interface to return metadata access statistics -- various bmi_tcp optimizations (cut down on system calls) -- fixes to bmi_tcp cpu utilization -- dynamic traffic graphs added to Karma -- improved cleanup from messaging errors in client libraries -- added ability to report partial errors with details from mgmt - interface -- revisited locking approach in jobs, flows, and bmi_gm -- new config file option to control default Trove sync behavior - (metadata) -- pvfs2-genconfig update to generate default sync'd metadata sync mode -- added experimental metadata mode (nosync, instead of sync) for - allowing unsync'd operations to occur that reads from dirty memory - (written but non-committed data) to help enhance performance -- graceful shutdown when server gets ctrl-c (SIGINT) -- pvfs2-ls output and argument bugfixes and better error reporting -- handle allocator fix to no longer return invalid handles if no valid - ranges have been registered -- improved server version reporting and exit path (using --version/-v) -- vfs kernel module warning removal (code without proper locks held) -- fixed vfs bug that assigned incorrect link counts on new entries -- fixed vfs bug that used uninitialized data structures when compiled - without kernel debugging support -- fixed vfs bug that didn't properly initialize root inode's - handle/fs_id -- fixed vfs bug that failed to adjust file position on files opened - for append -- fixed vfs bug that didn't properly rebuild attributes of files that - could cause erroneous type changes in the vfs -- fixed vfs bug that could not remove directories with more than 32 - entries in them -- fixed potential vfs race regarding op queueing with a lock - re-ordering -- improved error reporting of error codes from pvfs2 through the vfs -- improved error reporting and detection in the configuration parser -- optimized server flush operation to only flush either metadata or - data, depending on the object being worked on -- test/server/showconfig output improvements -- properly clean up meta and data handles during sys_create if the - crdirent step fails -- make pvfs2-import check if the target file exists before attempting - to create it -- pvfs2-fs-dump output improvements (made more readable) -- added test/kernel/linux-2.6/pvfs2-shell-test.sh that is useful for - running some simple vfs tests on a mounted pvfs2 volume -- added configure option to allow the karma gui to not be built -- fixed a bug that would not allow sysint initialization if the user - does not have write permission in the current directory -- properly cache attributes at create and write time instead of - waiting until the first read is done - - -pvfs2-0.0.9 -============ -- added karma administration (new gtk+ gui) for interactive monitoring - of pvfs2 servers (statistics and performance) -- fix support for creating/using/removing multiple collections (file - systems) within a storage space -- improved pvfs2-client backgrounding (better descriptor management) -- pvfs2-ls improvements for handling multiple mount points specified - on the same command line -- capture some more data in the mgmt server statfs object, - pvfs2-statfs program update -- add kernel support for being an nfs exportable file system -- add kernel support for being able to mount more than one pvfs2 volume -- misc code cleanups and removals all over -- test/client/sysint program bugfixes and cleanups -- pvfstab parsing bugfix -- pvfs2-ping and pvfs2-statfs support for multiple file systems -- improved server error reporting if failure occurs before backgrounding -- fix hang on server shutdown via signal -- new maint script (pvfs2smdot.pl) for generating a graphical - representation of the state machines and transitions (dotty format) -- updates to state machine syntax, including support for comments and - explicit termination of state machines -- initial work on ability to gracefully cancel pending I/O operations - within the system -- sync and truncate bug fixes -- mpi-io-test reports variance among processes -- fix potential bug in le-bytefield encoder -- use correct compiler flags with Intel cc -- fixed and enhanced the pvfs2-mkspace program for creating/removing - collections and storage spaces -- adjusted all gossip masks to have a standard prefix - - -pvfs2-0.0.8 -=========== -- many alpha platform improvements, including configure check passes, - compilation fixes, and vfs fixes (alignment/overflow bugs) -- pvfs2-genconfig quiet mode improvements -- removed extraneous sample server configuration files from source - tree (can now be generated at compile time) -- kernel mmap/readahead cache improvements and bug fixes -- improved pvfs2-server signal handling, error reporting, and handling -- added a configure/compile time option for allowing function - backtraces on critical errors (see ./configure --help) and backtrace - documentation for analyzing what they mean (see doc/coding/) -- properly implement the rmdir operation through the kernel interface -- improved dbpf-attr-cache error handling and serious bug fixes (race - condition and potential memory corruption fixes) -- race condition fixes on the trove dbpf-thread code, job thread - manage, and job desc queueing code -- a complete re-write of client side system interface lookup operation - that now handles path components such as '.' and '..' and allows - better absolute and relative recursive symlink resolution, including - a new client debugging keyword/level specific to the lookup - operation -- added a symlink test to the pts test suite -- added some test shell scripts to the test/client/sysint directory, - mostly useful for stressing the new lookup operation -- kernel and client side i/o path improvements that now gracefully - handle errors such as the case of i/o happening on a file that is - removed by another process -- implemented a previously unimplemented kernel method (inode:getattr) -- pvfs2-client command line parsing bug fix (no longer crashes on - invalid args) -- improvements to several test programs (in test/client/sysint) -- removed several files and function that are no longer needed -- on disk storage format is now versioned -- bound acache size, while it's only holding invalid entries and - growing too large -- powerpc file size listing fixed in pvfs2-ls -- added appendices to quickstart to cover a few more scenarios -- more sanity checks in remove path (in particular for directories) -- reorganized thread management for kernel / userspace communication - used by pvfs2-client -- some pvfs2-ls bug fixes - - -pvfs2-0.0.7 -=========== -- re-worked architecture of metadata storage (generic dspace attrs are - used instead of a separate keyval/data pair) -- added a server-side (trove-dbpf) attribute write-through caching - layer for caching generic attributes and keyval/data pairs that are - user configurable -- added a readahead data cache specifically for vfs mmap-read and - execution, improving their performance greatly (running, editing, - compiling, etc on a pvfs2 volume is now much more responsive) -- fix size reporting using 'du' through the vfs interface -- GNU/Linux PowerPC 32bit support, including the 2.6.x kernel - interface -- updated pvfs2-genconfig to account for new config file format -- fixed an I/O bug that caused data access problems on some multiple - I/O server configurations -- removed extra berkeley db error reporting since it's not integrated - with our gossip error reporting system. can now be explicitly - enabled using a new configure option --with-berkdb-debug -- fixes to state machine compiler to workaround hangs on some machines -- new scripts in maint/build for building and testing PVFS2 -- better balancing (randomization) of metadata server selection when - creating new files and directories -- various error msg cleanups (in several components) -- mutex locking improvements, allowing support for NPTL thread - implementations and also improving performance for some operations -- fixed a bug in client side lookup that can cause nested entries - across multiple meta servers to work incorrectly (does not affect - kernel interface) - - -pvfs2-0.0.6 -=========== -- improved kernel truncate support -- fixed kernel ioctl return values (so that 'tar -xv[z|j]f' can work - on pvfs2 volumes) -- minor configuration file format changes -- improved performance monitoring framework -- pvfs2-vis-bw-2d tool for server throughput monitoring -- many symbol name cleanups -- removal of deprecated code -- kernel inode link count fixes -- bucketized (single indirection) 'on-disk' representation of bstream - and keyval files in the host storage space -- a pvfs2_rmspace (opposite of mkspace); can be invoked by server - using the -r command line option -- added kernel module highmem support -- renamed attribute cache 'pcache' to 'acache' -- large file support bug fixes -- improved client sysint rename implementation (state-machine) -- improved signal handling in pvfs2-client -- switch to a new protocol encoder which translates requests and - responses into a machine-independent format, allowing - interoperability between 32- and 64-bit machines and little- and - big-endian machines. - - -pvfs2-0.0.5 -=========== -- clean compilation on 64-bit machines -- vfs stability improvements during I/O, including across client - restarts -- statfs reporting improvements (closer to correct) -- sysint remove method fix to properly clean up datafiles -- partial path symlink resolution support -- vfs symlink resolution/following support (including partial path) -- added sysint parameter to sys_lookup that dictates if the final - object on lookup should be resolved if it's a symlink -- runtime options to choose request encoder (see pvfs2tab man page) -- new, much faster, default flow protocol -- streamlined support for threaded and non-threaded server builds - - -pvfs2-0.0.4 -=========== -- improved truncate support (pcache usage/integration) -- kernel truncate support (improved file size reporting) -- removed several run-time verbose (expected) warnings -- fixed a handle rollover bug in extentlist handler -- fixed bit-rotted non-threaded trove/job compilation -- improved client sysint readdir implementation (state-machine) -- improved client sysint mkdir implementation (state-machine) -- improved client sysint setattr implementation (state-machine) -- improved client sysint lookup implementation (state-machine) -- symlink resolution/following support -- multiple metadata server configurations are now supported -- pvfs2-genconfig can now generate multiple metadata server - configurations -- pav and pvfs2-genconfig now run in 'perl -w' mode w/o any warnings -- added event logging framework, instrumented a few code paths -- example tools to manage event log (pvfs2-set-eventmask and - pvfs2-event-mon-example) -- started some example perf monitor visualization tools, src/apps/vis - subdir -- new network encoding for heterogeneous systems (le-bytefield) -- env variable to set client side debugging level - - -pvfs2-0.0.3 -=========== -- more pts tests for pvfs2 datatypes -- ability to specify flowprotocol in pvfs2tab file per fs -- truncate implementation -- new flow design document -- statfs implementation -- rewritten mgmt interface, and several new functions -- pvfs2-statfs admin tool -- configurable handle timeout on server -- fixed symbol name collisions with pvfs1 -- better sigpipe handling -- added backtrace feature to gossip_lerr() -- non interactive mode for pvfs2-genconfig -- request scheduler support for timer events -- server performance counters -- mgmt functions and example tool (pvfs2-perf-mon-example) for - gathering real time performance statistics from servers -- added CREDITS file -- addition of PAV (PVFS auto volume) tool for automated test runs -- kernel rename() improvements -- import of mpi-io-test program -- redhat style rc script for server -- addition of administrative mode for servers -- pvfs2-set-mode admin tool to toggle admin mode for file system -- mgmt API hooks necessary for fsck and dumping fs -- beginning of pvfs2-fs-dump tool, that can show file system structure - and potential problems, optional dotty output -- "make dist" build target -- removal of depricated test programs -- man pages for many admin tools etc. -- new/updated quickstart, users guide, and status document, README, - and INSTALL -- removal of depricated design documents -- request scheduler support for batch/list operations -- many many many bug fixes and code cleanups -- vfs mmap and execution support (read-only) - - -pvfs2-0.0.2 -=========== -- server side memory leaks almost entirely eliminated -- configuration file options for specifying BMI and Flow modules -- job level thread management optimizations -- integration of a buffer cache into tree (not yet hooked up) -- support for symlinks -- support for vfs symlinks (can exist; can't be followed) -- symlink attributes are now able to be added to the pcache -- BMI infiniband driver merged in -- new pvfstab format and parsing code -- kernel module reports big block size on stat -- job api changes to prevent client side race conditions -- improved I/O path attribute caching - - -pvfs2-0.0.1 -========== -- limited initial public release +orangefs-2.8.6 + +* New Feature: users can now specify at configure time how they want the +* kernel module to handle the file pointer when an error occurs. The +* default behavior is to leave the file pointer alone, i.e., the file +* pointer will always represent the position of the last byte written, +* even though the user's entire request is not totally satisfied. In +* this case, a the actual number of bytes written is returned to the user. +* The new feature will change this default behavior such that the file pointer +* is repositioned to the byte just prior to the user's write request, a +* -1 is returned to the user's call, and errno is appropriately set. To +* enable this feature, add --enable-reset-file-pos on the configure +* command line when your OrangeFS system is configured. + +* Bug fix: modified pvfs2_readdir, which is executed by the kernel module +* on behalf of an "ls", to properly retry or quit when pvfs2-client-core +* aborts/restarts or when pvfs2-client terminates. Prior to these +* changes, pvfs2_readdir could possibly corrupt another process's "ls" +* command by releasing access to a shared memory buffer index that may +* be legitimately in use. + +* Bug fix: modified the kernel code to re-post data to the shared memory +* system (for I/O requests) whenever pvfs2-client-core aborts. Because the +* data was not re-posted prior to these changes, the resulting file contained +* corrupted data. + +* New Feature: added hint support to pvfs_open() to allow user to +* provide various things like the number of servers to use, the +* distribution to use, the layout to use, as well as tags that can be +* used to trace requests. + +* New Feature: added new layout LOCAL which puts a file on the server on +* the same node as the client, if there is one. Otherwise defaults to +* ROUND_ROBIN + +* Bug Fix: repaired a bug in the layout processing + +* Bug Fix: fixed some problems with void pointers and replaced a missing +* function prototype + +* Bug Fix: Corrected a long time bug: if the number of unexpected requests +defined in the config file is greater than 64, then the code inadvertently +used invalid job_status_s addresses when calling +server_post_unexpected_recv(). If a failure occurs, the error code in the +status structure is updated with the error value. In this case, the code would +NOT segfault, because the address is actually within valid address space on +the heap; however, the code would be overwriting other global structures, like +the known_method_table, etc. To correct this problem, I use a local +job_status_s structure inside the server_post_unexpected_recv(). Please read +the comments in that function to understand why this works. + SVN: Change #9272 + +* New Feature: added usrint and ucache code allows programs to link +* directly bypassing kernel + +* Bug Fix: rewrote layout algorithms for RANDOM and ROUND_ROBIN which +* did not perform as advertized. Still a question if metadata +* selection works correctly or stuffed file allocation. + +* Enhancement: pvfs2-ls.c - modified the display of handles from 16 characters +to 20 characters. + +* Bug Fix: uninitialized variable could cause seg fault when file system +* cannot be initialized. + +orangefs-2.8.5 + +SVN changes can be seen at +http://www.orangefs.org/fisheye/orangefs/changelog/orangefs/branches/orangefs-2-8-5 + +* Bug Fix: Problem: server seg faulted when the unstuff state machine tried + to unstuff a file defined as needing more datafilesthan I/O servers currently + in the system. A side affect was that this worked when there was more than + one server in the system. The correct behavior is to NEVER define more datahandles than + there are I/O servers in the system. + Correction: Modified function PINT_cached_config_get_num_dfiles to return no + more than the number of I/O servers, regardless of the number requested by the user + or needed by the distribution. This function is called during the sys-create state machine + BEFORE the number of requested datafiles is sent to the server. + SVN: Change #9082 + +* Enhancement: Added optimization. When a file is created and the requested number of dfiles is 1, + then we create the metadata without "stuffed" values,i.e., key="nd". When data + is written to this file, there is no need to execute the "unstuff" server side + state machine. + SVN: Change #9083 + +Text after "CVS: " can be prepended with "http://www.pvfs.org/fisheye/changelog/~br=Orange-Branch/PVFS/?cs=Orange-Branch:" to view related change sets in FishEye. + +* Enhancement: change to kernel source path to support SuSE style kernel sources + CVS: mtmoore:20110831005433 + +* Enhancement: add prompt after X removals during a pvfs2-fsck when -y specified + CVS: mtmoore:20110824144127 + +* Enhancement: Add sanity checks to extended attribute operations + CVS: mtmoore:20110811180653 + +* Enhancement addition of counters and application to feed SNMP counters + CVS: walt:20110628184842 + CVS: walt:20110628161840 + CVS: walt:20110617152339 + +* Enhancement: addition of scripts used for OrangeFS CI Jenkins testing + CVS: mtmoore:20110623183144 + +* Enhancement: add backtrace handler to SIGABRT + CVS: mtmoore:20110607235041 + +* Enhancement: add sanity checks during trove cleanup/initialization + CVS: mtmoore:20110607232817 + +* Enhancement: allow server log to be re-opened on SIGHUP to allow log rotation + CVS: mtmoore:20110607023901 + CVS: mtmoore:20110530143543 + +* Enhancement: allow debug mask to be reset during SIGHUP + CVS: bligon:20110504200411 + +* Enhancement: add Berkeley DB configure check for version 4.8.30 + CVS: mtmoore:20110405050554 + Additional Information: https://www.orangefs.org/trac/orangefs/wiki/DBVersionRequired + +* Enhancement add configure checks for binaries required during build + CVS: mtmoore:20110404165443 + +* Enhancement: kernel changes to support newer kernels (up to 2.6.40 tested and 3.X compiles, not tested) + CVS: mtmoore:20110831005433 + CVS: mtmoore:20110823184109 + CVS: mtmoore:20110822232017 + CVS: mtmoore:20110412175323 + CVS: mtmoore:20110525205504 + CVS: mtmoore:20110405142136 + CVS: mtmoore:20110405141506 + +* Bug Fix: man pages are now gzip'd during installation + CVS: mtmoore:20110904010919 + +* Bug Fix: berkeley db gives back unaligned pages (in custom compare/multi-key cases), use local variables to fix ARM failures + CVS: mtmoore:20110902173424 + +* Bug Fix: pointer dereference size issue in dbpf-mgmt on non-x86 platforms, Neal Ferguson + CVS: mtmoore:20110823171834 + +* Bug Fix: file corruption using direct backend when writes continuously appending to single file + CVS: elaine:20110819120934 + CVS: mtmoore:20110629130442 + +* Bug Fix: segfault in pvfs2-xattr due to mirror handle string comparison, Nick Mills + CVS: mtmoore:20110809180325 + +* Bug Fix: add backtrace functionality back in for newer distributions + CVS: mtmoore:20110801124552 + CVS: mtmoore:20110607215819 + +* Bug Fix: server-side memory leak related to operation completion + CVS: bligon:20110708194724 + +* Bug Fix: slab cache fix, Bart Taylor + CVS: bligon:20110613200154 + +* Bug Fix: duplicate handle fix in iterate-handles, Phil Carns + CVS: bligon:20110610202309 + +* Bug Fix: correctly parse pvfs2-server alias argument + CVS: mtmoore:20110608010721 + +* Bug Fix: mx_address lookup failure with multiple protocols + CVS: bligon:20110530173949 + +* Bug Fix: admin apps segfault with IB + CVS: bligon:20110512215357 + +* Bug Fix: allow pvfs2-server to re-read configuratio file regardless of absolute or relative path + CVS: mtmoore:20110506194814 + +* Bug Fix: root squash functionality, Bart Taylor + CVS: bligon:20110506173620 + +* Bug Fix: various kernel related stability and error-handling fixes + CVS: mtmoore:20110729151843 + CVS: mtmoore:20110729143317 + CVS: mtmoore:20110530154853 + CVS: mtmoore:20110412175444 + +* Bug Fix: pin memory passed in to the kernel to prevent swap pages and kernel panic + CVS: mtmoore:20110725160953 + +------------------------------------------------------------------------------- + +orangefs-2.8.4 +* Change verisioning to no longer include dates, just increment the sub version for every release + +orangefs-2.8.3-20110323 +* Bug fix: resolve assert in dbpf_open_cache_remove() when using tree code for file remove + CVS: Orange-Branch:bligon:20110322141529 + +* Bug fix: PINT_dist_encode() segfault in pvfs2-server caused by the client create state machine not sending dist information on create retries. + CVS: Orange-Branch:mtmoore:20110322014656 + CVS: Orange-Branch:mtmoore:20110322031703 + CVS: Orange-Branch:mtmoore:20110322040628 + CVS: Orange-Branch:mtmoore:20110322114637 + CVS: Orange-Branch:mtmoore:20110322121208 + +* Bug fix: fix incorrect attribute output using sys attrs + CVS: Orange-Branch:mtmoore:20110301023749 + +* Bug fix: fix segfault in pvfs2-server if client provides too large of buffer in the list-eattr state machine. Includes additional sanity checks on extended attribute operations + CVS: Orange-Branch:mtmoore:20110225222047 + +* Bug fix: merge dir.c fix from main branch + CVS: Orange-Branch:bligon:20110126215228 + +* Bug fix: resolve data corruption caused by ki_pos not being updated during synchronous I/O through asynchronous path (relevant to newer kernels). + CVS: Orange-Branch:mtmoore:20110123143300 + +* Bug fix: fix resolving symbolic links from pvfs2-* utilities + CVS: Orange-Branch:mtmoore:20110114215452 + +* Bug fix: Fix ncache entry update when moving files by David Metheny + CVS: Orange-Branch:mtmoore:20110111154332 + +* Bug fix: PVFS_hint_add fix by Julian Kunkel and refinement of patch + CVS: Orange-Branch:mtmoore:20110105051448 + CVS: Orange-Branch:bligon:20110224203131 + +* Bug fix: change where unlocks occur w.r.t __PVFS_JOB_THREADED__ to prevent deadlock + CVS: Orange-Branch:mtmoore:20101221163004 + +* Bug fix: change incorrect use of union members in tree-communicate + CVS: Orange-Branch:mtmoore:20101214182035 + +* Bug fix: initialize values in trove-migrate to resolve segfault + CVS: Orange-Branch:bligon:20101213175120 + +* Bug fix: pvfs2-mkspace not set collection-only properly + CVS: Orange-Branch:bligon:20101213174458 + +* Bug fix: update iteration in quickhash to use safe variant + CVS: Orange-Branch:mtmoore:20101207004159 + +* Bug fix: null handle/fs_id sent to getattr in client create state machine + CVS: Orange-Branch:bligon:20101201170021 + +* Added support for newer (> 2.6.18) kernels up through 2.6.34. + CVS: Orange-Branch:mtmoore:20101216185529 + CVS: Orange-Branch:mtmoore:20101221153412 + CVS: Orange-Branch:mtmoore:20101221153514 + CVS: Orange-Branch:mtmoore:20101221153709 + CVS: Orange-Branch:mtmoore:20101221153806 + CVS: Orange-Branch:mtmoore:20101221154012 + CVS: Orange-Branch:mtmoore:20101221154240 + CVS: Orange-Branch:mtmoore:20101221160327 + CVS: Orange-Branch:mtmoore:20110123143015 + CVS: Orange-Branch:mtmoore:20110124130323 + CVS: Orange-Branch:mtmoore:20110203182026 + +* Added beginning of new user interface + CVS: Orange-Branch:walt:20110208213934 + +* Cleanup: change PINT_dist_finalize to unregister and free distributions cleanly + CVS: Orange-Branch:mtmoore:20101221163224 + +* Cleanup: delete items from timer queue list when element is freed + CVS: Orange-Branch:mtmoore:20101221163041 + +------------------------------------------------------------------------------- + + +orangefs-2.8.2-20100920 +* Changed unstuff state machine to handle writing to files with read-only +permissions on create + CVS: Orange-Branch:bligon:20100920181650 + +* Bug fix for kernel panics from list corruption + CVS: Orange-Branch:mtmoore:20100915132022 + +* Trove version increase to 0.1.5 to support precreate pools of multiple types + CVS: Orange-Branch:mtmoore:20100908153740 + +* PJMP memory leak fix + CVS: Orange-Branch:bligon:20100906203050 + +* BMI memory leak fix + CVS: Orange-Branch:bligon:20100831190531, + Orange-Branch:bligon:20100831190748, + Orange-Branch:bligon:20100902155433 diff --git a/INSTALL b/INSTALL index 3b13c0c..3ad2cfc 100644 --- a/INSTALL +++ b/INSTALL @@ -1,4 +1,8 @@ -Please see the quickstart guide as found in: +Please see the quickstart guide which can be generated with +`make docs` and found in: doc/pvfs2-quickstart.pdf doc/pvfs2-quickstart.ps + +Also available at: +http://www.orangefs.org/documentation/releases/current/doc/pvfs2-quickstart.pdf diff --git a/Makefile.in b/Makefile.in index abf6c84..bebdd0d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -5,12 +5,15 @@ # # all default rule; builds libs, server, and test programs # clean cleans up files +# develtools builds development related tools # distclean _really_ cleans up; returns to pristine tree # docs builds documentation in docs subdirectory # docsclean cleans up documentation files # publish copy over documents to the PVFS.org web pags # admintools builds admin tools -# kernapps builds userland helper programs for kernel driver +# usertools builds user tools +# ucachedtools builds ucached tools +# kernapps builds userland helper programs for kernel driver # cscope generates information for cscope utility # tags generates tags file for use by editors # codecheck checks source code for nonconformance to our std. @@ -49,13 +52,20 @@ ############################################################### # Generic makefile setup -# define a few generic variables that we need to use +# define a few generic variables that we need to use; DESTDIR may +# be overridden on the command line during make install DESTDIR = srcdir = @srcdir@ -prefix = $(DESTDIR)@prefix@ -datarootdir = $(DESTDIR)@datarootdir@ +builddir = @BUILD_ABSOLUTE_TOP@ +prefix = @prefix@ +datarootdir = @datarootdir@ +exec_prefix = @exec_prefix@ +includedir = $(DESTDIR)@includedir@ mandir = $(DESTDIR)@mandir@ -exec_prefix = $(DESTDIR)@exec_prefix@ +sbindir = $(DESTDIR)@sbindir@ +bindir = $(DESTDIR)@bindir@ +libdir = $(DESTDIR)@libdir@ +sysconfdir = $(DESTDIR)@sysconfdir@ VPATH = $(srcdir) SHELL = @SHELL@ @@ -63,15 +73,24 @@ INSTALL = @INSTALL@ # TODO: should probably check for bison and flex in configure BISON = bison FLEX = flex +LN_S = ln -snf BUILD_BMI_TCP = @BUILD_BMI_TCP@ +BUILD_BMI_ONLY = @BUILD_BMI_ONLY@ BUILD_GM = @BUILD_GM@ BUILD_MX = @BUILD_MX@ BUILD_IB = @BUILD_IB@ BUILD_OPENIB = @BUILD_OPENIB@ BUILD_PORTALS = @BUILD_PORTALS@ +BUILD_ZOID = @BUILD_ZOID@ BUILD_VIS = @BUILD_VIS@ BUILD_KARMA = @BUILD_KARMA@ +BUILD_USRINT = @BUILD_USRINT@ +BUILD_UCACHE = @BUILD_UCACHE@ +BUILD_FUSE = @BUILD_FUSE@ BUILD_SERVER = @BUILD_SERVER@ +BUILD_TAU = @BUILD_TAU@ +BUILD_KERNEL = @BUILD_KERNEL@ +ENABLE_SECURITY = @ENABLE_SECURITY@ NEEDS_LIBRT = @NEEDS_LIBRT@ TARGET_OS_DARWIN = @TARGET_OS_DARWIN@ TARGET_OS_LINUX = @TARGET_OS_LINUX@ @@ -108,7 +127,7 @@ build_static = @build_static@ # of a file that has the same name as the target. Listing them # here keeps make from accidentally doing too much work (see GNU # make manual). -.PHONY: all clean dist distclean docs docsclean publish cscope tags codecheck admintools kernapps +.PHONY: all clean develtools dist distclean docs docsclean publish cscope tags codecheck admintools kernapps usertools ucachedtools ################################################################ # Find project subdirectories @@ -119,6 +138,22 @@ build_static = @build_static@ MODULES := $(shell find . -name "*.mk" | sed -e 's/^.\///;s/module.mk//') # List of directories to search for headers. +ifdef BUILD_BMI_ONLY +BUILD_SERVER="" +INCLUDES := \ + include \ + src/io/bmi \ + src/common/misc \ + src/common/quickhash \ + src/common/quicklist \ + src/common/id-generator \ + src/common/gossip \ + src/common/gen-locks \ + src/common/events \ + src/client/usrint +GENINCLUDES := \ + include +else INCLUDES := \ src/client/sysint \ src/common/misc \ @@ -127,17 +162,23 @@ INCLUDES := \ src/common/id-generator \ src/common/gossip \ src/common/gen-locks \ + src/common/events \ + src/common/security \ + src/client/usrint \ src/io/trove \ src/io/bmi \ src/io/description \ src/io/buffer \ src/io/job \ src/io/dev \ - src/proto + src/proto \ + src/common/mgmt +GENINCLUDES := \ + include +endif ################################################################# # Setup global flags --include ../Makedefs # These should all be self explanatory; they are standard flags # for compiling and linking unless otherwise noted @@ -154,17 +195,23 @@ LDFLAGS += @LDFLAGS@ SERVER_LDFLAGS = -L@BUILD_ABSOLUTE_TOP@/lib SERVER_LDFLAGS += @SERVER_LDFLAGS@ DB_CFLAGS = @DB_CFLAGS@ -LDSHARED = $(CC) -shared +LDSHARED = $(CC) -shared -L@BUILD_ABSOLUTE_TOP@/lib PICFLAGS = -fPIC LIBS += -lpvfs2 @LIBS@ LIBS_THREADED += -lpvfs2-threaded @LIBS@ # need to include external dependency libs when building shared libraries DEPLIBS := @LIBS@ +ULIBDEPLIBS := -lpvfs2 MMAP_RA_CACHE = @MMAP_RA_CACHE@ +RESET_FILE_POS = @RESET_FILE_POS@ TRUSTED_CONNECTIONS = @TRUSTED_CONNECTIONS@ REDHAT_RELEASE = @REDHAT_RELEASE@ NPTL_WORKAROUND = @NPTL_WORKAROUND@ STRICT_CFLAGS = @STRICT_CFLAGS@ +SO_VER = @PVFS2_VERSION_MAJOR@ +SO_MINOR = @PVFS2_VERSION_MINOR@ +SO_RELEASE = @PVFS2_VERSION_SUB@ +SO_FULLVER = $(SO_VER).$(SO_MINOR).$(SO_RELEASE) # for Solaris: # LIBS += -lsocket -lnsl @@ -174,15 +221,22 @@ STRICT_CFLAGS = @STRICT_CFLAGS@ CFLAGS += -D__STATIC_FLOWPROTO_MULTIQUEUE__ # turn on large file support by default CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE + # define __GNU_SOURCE in includes to replace incorrect usage of __USE_GNU +CFLAGS += -D_GNU_SOURCE # include current directory (for pvfs2-config.h) CFLAGS += -I . # include toplevel source dir CFLAGS += -I $(srcdir) # add selected include directories CFLAGS += $(patsubst %,-I$(srcdir)/%,$(INCLUDES)) +CFLAGS += $(patsubst %,-I$(builddir)/%,$(GENINCLUDES)) + # add package version information CFLAGS += -DPVFS2_VERSION="\"@PVFS2_VERSION@\"" + # make bindir and sysconfdir available to source files +CFLAGS += -DBINDIR='"$(bindir)"' -DSYSCONFDIR='"$(sysconfdir)"' + # always want these gcc flags GCC_CFLAGS := -pipe -Wall -Wstrict-prototypes @@ -197,6 +251,13 @@ ifneq (,$(STRICT_CFLAGS)) #GCC_CFLAGS += -Wcast-qual #GCC_CFLAGS += -Wshadow #GCC_CFLAGS += -Wwrite-strings +else + # these are noisy but come with Wall + # use strict if you want them on + #GCC_CFLAGS += -Wno-unused-value + #GCC_CFLAGS += -Wno-unused-result + #GCC_CFLAGS += -Wno-unused-but-set-variable + #GCC_CFLAGS += -Wno-unused-but-set-parameter endif # Intel cc options, enable all warnings, then disable some @@ -275,12 +336,15 @@ SERVERCFLAGS = @CFLAGS@ -D__GEN_POSIX_LOCKING__ -D__PVFS2_JOB_THREADED__ \ # server side flow protocol connecting BMI with NCAC cache SERVERCFLAGS += -D__STATIC_FLOWPROTO_BMI_CACHE__ -SERVERLIBS = -lpvfs2-server @LIBS@ @DB_LIB@ -lpthread +SERVERLIBS = -lpvfs2-server @LIBS@ @DB_LIB@ ifdef NEEDS_LIBRT SERVERLIBS += -lrt endif +# must be after -lrt because -lrt may use -lpthread +SERVERLIBS += -lpthread + # you can optionally disable thread safety support in the client # though it's not recommended unless *required*. # @@ -362,11 +426,22 @@ ifneq (,@PORTALS_LIBS@) endif endif +ifdef BUILD_ZOID + CFLAGS += -D__STATIC_METHOD_BMI_ZOID__ +endif + # enable mmap-readahead cache (unless disabled by configure) ifdef MMAP_RA_CACHE CFLAGS += @MMAP_RA_CACHE@ endif +# reset the file position pointer when a write call encounters errors (kernel only) +# by default, this feature is disabled. Default behavior is to increment the file +# position pointer as bytes are written. +ifdef RESET_FILE_POS +CFLAGS += @RESET_FILE_POS@ +endif + # enable trusted connections (unless disabled by configure) ifdef TRUSTED_CONNECTIONS CFLAGS += @TRUSTED_CONNECTIONS@ @@ -398,10 +473,20 @@ ADMINSRC := # ADMINSRC_SERVER special version of ADMINSRC for tools that need server # library ADMINSRC_SERVER := +# usRSRC is source code for userland programs +USERSRC := # LIBSRC is source code for libpvfs2 LIBSRC := +# ULIBSRC is source code for libofs +ULIBSRC := +# UCACHEDSRC is source code for ucached programs +UCACHEDSRC := # SERVERSRC is souce code for the pvfs2 server SERVERSRC := +ifdef BUILD_BMI_ONLY +# LIBBMISRC is source code for libbmi +LIBBMISRC := +endif # SERVERBINSRC is source files that don't get added to the server library but must be added to the server binary SERVERBINSRC := # DOCSRC is source code for documentation @@ -413,6 +498,8 @@ VISSRC := VISMISCSRC := # KARMASRC is source for the karma gui KARMASRC := +# FUSESRC is source for the FUSE interface daemon +FUSESRC := # userland helper programs for kernel drivers KERNAPPSRC := KERNAPPTHRSRC := @@ -420,6 +507,8 @@ KERNAPPTHRSRC := MISCSRC := # c files generated from state machines SMCGEN := +# DEVELSRC is source for development related tools +DEVELSRC := ################################################################ # Top level (default) targets @@ -434,19 +523,38 @@ SERVER := src/server/pvfs2-server endif # LIBRARIES is a list of the pvfs2 client libraries that will be installed -LIBRARIES := -LIBRARIES_THREADED := -ifeq ($(build_shared),yes) -LIBRARIES_SHARED = lib/libpvfs2.so -LIBRARIES += $(LIBRARIES_SHARED) -LIBRARIES_THREADED += lib/libpvfs2-threaded.so +LIBRARIES_STATIC := lib/libpvfs2.a lib/liborange.a +LIBRARIES_SHARED := lib/libpvfs2.so lib/liborange.so +LIBRARIES_THREADED_STATIC := lib/libpvfs2-threaded.a +LIBRARIES_THREADED_SHARED := lib/libpvfs2-threaded.so + +ifdef BUILD_BMI_ONLY +LIBRARIES_STATIC += lib/libbmi.a +LIBRARIES_SHARED += lib/libbmi.so +BMILIBRARIES := lib/libbmi.a lib/libbmi.so endif -ifeq ($(build_static),yes) -LIBRARIES_STATIC = lib/libpvfs2.a -LIBRARIES += $(LIBRARIES_STATIC) -LIBRARIES_THREADED += lib/libpvfs2-threaded.a + +ifdef BUILD_USRINT +LIBRARIES_STATIC += lib/libofs.a lib/liborangeposix.a +LIBRARIES_SHARED += lib/libofs.so lib/liborangeposix.so +LIBRARIES_THREADED_STATIC += lib/libofs-threaded.a +LIBRARIES_THREADED_SHARED += lib/libofs-threaded.so endif +ifneq ($(build_static),yes) +LIBRARIES_STATIC := +LIBRARIES_THREADED_STATIC := +endif + +ifneq ($(build_shared),yes) +LIBRARIES_SHARED := +LIBRARIES_THREADED_SHARED := +endif + +LIBRARIES_THREADED := $(LIBRARIES_THREADED_STATIC) $(LIBRARIES_THREADED_SHARED) +LIBRARIES := $(LIBRARIES_SHARED) $(LIBRARIES_STATIC) $(LIBRARIES_THREADED) + + ################################################################ # Default target forward pointer, to avoid other targets in make stubs all:: @@ -469,23 +577,61 @@ include $(patsubst %, %/module.mk, $(MODULES)) # LIBOBJS is a list of objects to put in the client lib LIBOBJS := $(patsubst %.c,%.o, $(filter %.c,$(LIBSRC))) +# ULIBOBJS is a list of objects to put in the ofs lib +ULIBOBJS := $(patsubst %.c,%.o, $(filter %.c,$(ULIBSRC))) # LIBPICOBJS are the same, but compiled for use in a shared library LIBPICOBJS := $(patsubst %.c,%.po, $(filter %.c,$(LIBSRC))) +# ULIBPICOBJS are the same, but compiled for use in a shared library +ULIBPICOBJS := $(patsubst %.c,%.po, $(filter %.c,$(ULIBSRC))) # LIBDEPENDS is a list of dependency files for the client lib LIBDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(LIBSRC))) +# ULIBDEPENDS is a list of dependency files for the client lib +ULIBDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(ULIBSRC))) + +ifdef BUILD_BMI_ONLY +# LIBBMIOBJS is a list of objects to put in the bmi lib +LIBBMIOBJS := $(patsubst %.c,%.o, $(filter %.c,$(LIBBMISRC))) +# LIBBMIPICOBJS are the same, but compiled for use in a shared library +LIBBMIPICOBJS := $(patsubst %.c,%.po, $(filter %.c,$(LIBBMISRC))) +# LIBBMIDEPENDS is a list of dependency files for the bmi lib +LIBBMIDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(LIBBMISRC))) +endif # LIBTHREADEDOBJS is a list of objects to put in the multithreaded client lib LIBTHREADEDOBJS := $(patsubst %.c,%-threaded.o, $(filter %.c,$(LIBSRC))) +# ULIBTHREADEDOBJS is a list of objects to put in the multithreaded ofs lib +ULIBTHREADEDOBJS := $(patsubst %.c,%-threaded.o, $(filter %.c,$(ULIBSRC))) + # LIBTHREADEDPICOBJS are the same, but compiled for use in a shared library LIBTHREADEDPICOBJS := $(patsubst %.c,%-threaded.po, $(filter %.c,$(LIBSRC))) +# ULIBTHREADEDPICOBJS are the same, but compiled for use in a shared ofs library +ULIBTHREADEDPICOBJS := $(patsubst %.c,%-threaded.po, $(filter %.c,$(ULIBSRC))) + # LIBTHREADEDDEPENDS is a list of dependency files for the multithreaded client lib LIBTHREADEDDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(LIBSRC))) +# ULIBTHREADEDDEPENDS is a list of dependency files for the # multithreaded ofs lib +ULIBTHREADEDDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(ULIBSRC))) + # ADMINOBJS is a list of admin program objects ADMINOBJS := $(patsubst %.c,%.o, $(filter %.c,$(ADMINSRC))) # ADMINTOOLS is a list of admin program executables ADMINTOOLS := $(patsubst %.c,%, $(filter %.c, $(ADMINSRC))) # ADMINDEPENDS is a list of dependency files for admin programs ADMINDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(ADMINSRC))) +# +# USEROBJS is a list of user program objects +USEROBJS := $(patsubst %.c,%.o, $(filter %.c,$(USERSRC))) +# USERTOOLS is a list of user program executables +USERTOOLS := $(patsubst %.c,%, $(filter %.c, $(USERSRC))) +# USERDEPENDS is a list of dependency files for user programs +USERDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(USERSRC))) + +# UCACHEDOBJS is a list of ucached program objects +UCACHEDOBJS := $(patsubst %.c,%.o, $(filter %.c,$(UCACHEDSRC))) +# UCACHEDTOOLS is a list of ucached program executables +UCACHEDTOOLS := $(patsubst %.c,%, $(filter %.c, $(UCACHEDSRC))) +# UCACHEDDEPENDS is a list of dependency files for ucached programs +UCACHEDDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(UCACHEDSRC))) ifdef BUILD_SERVER ADMINOBJS_SERVER := $(patsubst %.c,%.o, $(filter %.c,$(ADMINSRC_SERVER))) @@ -537,6 +683,10 @@ VISMISCDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(VISMISCSRC))) KARMAOBJS := $(patsubst %.c,%.o, $(filter %.c,$(KARMASRC))) KARMADEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(KARMASRC))) +# FUSEOBJS +FUSEOBJS := $(patsubst %.c,%.o, $(filter %.c,$(FUSESRC))) +FUSEDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(FUSESRC))) + # state machine generation tool, built for the build machine, not the # host machine, in the case of cross-compilation STATECOMPOBJS := $(patsubst %.c,%.o,$(STATECOMPSRC)) @@ -554,29 +704,54 @@ DOCSCRUFT += $(patsubst %.tex,%.dvi, $(filter %.tex,$(DOCSRC))) DOCSCRUFT += $(patsubst %.tex,%.log, $(filter %.tex,$(DOCSRC))) DOCSCRUFT += $(patsubst %.tex,%.toc, $(filter %.tex,$(DOCSRC))) +# DEVELOBJS is a list of development program objects +DEVELOBJS := $(patsubst %.c,%.o, $(filter %.c,$(DEVELSRC))) +# DEVELTOOLS is a list of development program executables +DEVELTOOLS := $(patsubst %.c,%, $(filter %.c, $(DEVELSRC))) +# DEVELDEPENDS is a list of dependency files for development programs +DEVELDEPENDS := $(patsubst %.c,%.d, $(filter %.c,$(DEVELSRC))) + # DEPENDS is a global list of all of our dependency files. # NOTE: sort is just a trick to remove duplicates; the order # doesn't matter at all. -DEPENDS := $(sort $(LIBDEPENDS) $(SERVERDEPENDS) $(SERVERBINDEPENDS) \ - $(MISCDEPENDS) \ - $(ADMINDEPENDS) $(ADMINDEPENDS_SERVER) $(KERNAPPDEPENDS) $(VISDEPENDS) \ - $(VISMISCDEPENDS) $(KARMADEPENDS) $(STATECOMPDEPS)) +ifdef BUILD_BMI_ONLY +DEPENDS := $(sort $(LIBBMIDEPENDS)) +else +DEPENDS := $(sort $(LIBDEPENDS) $(SERVERDEPENDS) \ + $(SERVERBINDEPENDS) $(MISCDEPENDS) $(USERDEPENDS) \ + $(ADMINDEPENDS) $(ADMINDEPENDS_SERVER) $(KERNAPPDEPENDS) \ + $(VISDEPENDS) $(VISMISCDEPENDS) $(KARMADEPENDS) \ + $(STATECOMPDEPS) $(FUSEDEPENDS) $(UCACHEDDEPENDS) ) +endif #################################################################### # Rules and dependencies # default rule builds server, library, and applications -all:: $(SERVER) $(KARMA) $(LIBRARIES) admintools $(VISS) $(KARMA) +ifdef BUILD_BMI_ONLY +all:: $(BMILIBRARIES) +else +all:: $(SERVER) $(KARMA) $(LIBRARIES) admintools usertools ucachedtools $(VISS) $(KARMA) $(FUSE) +endif # target for building admin tools admintools: $(ADMINTOOLS) $(ADMINTOOLS_SERVER) +#target for building user tools +usertools: $(USERTOOLS) + +# target for building ucached tools +ucachedtools: $(UCACHEDTOOLS) + # target for building kernel driver userland programs -kernapps: $(KERNAPPS) +kernapps: $(KERNAPPS) $(KERNAPPSTHR) # this is needed for the make dist statecompgen: $(STATECOMPGEN) +# target for builging development tools +develtools: $(DEVELTOOLS) + # Build linux-2.6 kernel module if requested. # Can't use the actual file target since we don't know how to figure out # dependencies---only the kernel source tree can do that. @@ -615,15 +790,6 @@ modldflags = $(MODLDFLAGS_$(call canonname,$(1))) \ # note: this will look better if you use two tabs instead of spaces between # SHORT_NAME and the object -ifeq ($(PVFS_OSD_INTEGRATED),1) -# Need -whole-archive this to get the constructors for the various device -# types. Another way would be to force a ref to an object in the file -# somehow, or just use shared libs. -SERVERCFLAGS += -DPVFS_OSD_INTEGRATED -SERVERLIBS += -Wl,-whole-archive ../stgt/libstgt.a -Wl,-no-whole-archive \ - ../osd-target/libosdtgt.a ../osd-util/libosdutil.a -lsqlite3 -lm -endif - # rule for building the pvfs2 server $(SERVER): $(SERVERBINOBJS) lib/libpvfs2-server.a $(Q) " LD $@" @@ -639,6 +805,20 @@ $(ADMINTOOLS_SERVER): %: %.o $(Q) " LD $@" $(E)$(LD) $< $(LDFLAGS) $(SERVER_LDFLAGS) $(SERVERLIBS) -o $@ +ifdef BUILD_BMI_ONLY +# rule for building the bmi library +lib/libbmi.a: $(LIBBMIOBJS) + $(Q) " RANLIB $@" + $(E)$(INSTALL) -d lib + $(E)ar rcs $@ $(LIBBMIOBJS) + +# rule for building the shared bmi library +lib/libbmi.so: $(LIBBMIPICOBJS) + $(Q) " LDSO $@" + $(E)$(INSTALL) -d lib + $(E)$(LDSHARED) -Wl,-soname,libbmi.so -o $@ $(LIBBMIPICOBJS) $(DEPLIBS) +endif + # rule for building the pvfs2 library lib/libpvfs2.a: $(LIBOBJS) $(Q) " RANLIB $@" @@ -651,17 +831,58 @@ lib/libpvfs2-threaded.a: $(LIBTHREADEDOBJS) $(E)$(INSTALL) -d lib $(E)ar rcs $@ $(LIBTHREADEDOBJS) -# rule for building the pvfs2 library +# rule for building the shared pvfs2 library lib/libpvfs2.so: $(LIBPICOBJS) $(Q) " LDSO $@" $(E)$(INSTALL) -d lib - $(E)$(LDSHARED) -o $@ $(LIBPICOBJS) $(DEPLIBS) + $(E)$(LDSHARED) -Wl,-soname,libpvfs2.so -o $@ $(LIBPICOBJS) $(DEPLIBS) -# rule for building the pvfs2 _multithreaded_ library +# rule for building the shared pvfs2 _multithreaded_ library lib/libpvfs2-threaded.so: $(LIBTHREADEDPICOBJS) $(Q) " LDSO $@" $(E)$(INSTALL) -d lib - $(E)$(LDSHARED) -o $@ $(LIBTHREADEDPICOBJS) $(DEPLIBS) + $(E)$(LDSHARED) -Wl,-soname,libpvfs2-threaded.so -o $@ $(LIBTHREADEDPICOBJS) $(DEPLIBS) + +# rule for building the ofs library +lib/libofs.a: $(ULIBOBJS) + $(Q) " RANLIB $@" + $(E)$(INSTALL) -d lib + $(E)ar rcs $@ $(ULIBOBJS) + +# rule for building the _multithreaded_ ofs library +lib/libofs-threaded.a: $(ULIBTHREADEDOBJS) + $(Q) " RANLIBTHREADED $@" + $(E)$(INSTALL) -d lib + $(E)ar rcs $@ $(ULIBTHREADEDOBJS) + +# rule for building the shared ofs library +lib/libofs.so: $(ULIBPICOBJS) + $(Q) " LDSO $@" + $(E)$(INSTALL) -d lib + $(E)$(LDSHARED) -Wl,-soname,libofs.so -o $@ $(ULIBPICOBJS) $(ULIBDEPLIBS) $(DEPLIBS) + +# rule for building the shared ofs _multithreaded_ library +lib/libofs-threaded.so: $(ULIBTHREADEDPICOBJS) + $(Q) " LDSO $@" + $(E)$(INSTALL) -d lib + $(E)$(LDSHARED) -Wl,-soname,libofs-threaded.so -o $@ $(ULIBTHREADEDPICOBJS) $(ULIBDEPLIBS) $(DEPLIBS) + +# rules for building virtual libraries +lib/liborange.a: lib/libpvfs2.a + $(Q) " VLIB $@" + $(E)printf "GROUP ( $(LIBS) )\n" > lib/liborange.a + +lib/liborange.so: lib/libpvfs2.so + $(Q) " VLIB $@" + $(E)printf "GROUP ( $(LIBS) )\n" > lib/liborange.so + +lib/liborangeposix.a: lib/libpvfs2.a + $(Q) " VLIB $@" + $(E)printf "GROUP ( -lofs $(LIBS) )\n" > lib/liborangeposix.a + +lib/liborangeposix.so: lib/libpvfs2.so + $(Q) " VLIB $@" + $(E)printf "GROUP ( -lofs $(LIBS) )\n" > lib/liborangeposix.so # rule for building the pvfs2 server library lib/libpvfs2-server.a: $(SERVEROBJS) @@ -674,11 +895,22 @@ $(KARMA): $(KARMAOBJS) $(LIBRARIES) $(Q) " LD $@" $(E)$(LD) -o $@ $(LDFLAGS) $(KARMAOBJS) $(LIBS) $(call modldflags,$<) +# fule for building FUSE interface and its objects +$(FUSE): $(FUSEOBJS) $(LIBRARIES) + $(Q) " LD $@" + $(E)$(LD) -o $@ $(LDFLAGS) $(FUSEOBJS) $(LIBS) $(call modldflags,$<) + # rule for building vis executables from object files $(VISS): %: %.o $(VISMISCOBJS) $(LIBRARIES) $(Q) " LD $@" $(E)$(LD) -o $@ $(LDFLAGS) $< $(VISMISCOBJS) $(LIBS) $(call modldflags,$<) +# rule for building development tools and its objects. don't know why db isn't +# already in libs. +$(DEVELTOOLS): $(DEVELOBJS) $(LIBRARIES) + $(Q) " LD $@" + $(E)$(LD) -o $@ $(LDFLAGS) $(DEVELOBJS) $(LIBS) -ldb $(call modldflags,$<) + # default rule for building executables from object files %: %.o $(LIBRARIES) $(Q) " LD $@" @@ -713,6 +945,31 @@ $(VISS): %: %.o $(VISMISCOBJS) $(LIBRARIES) $(Q) " CCPIC $@" $(E)$(CC) $(LIBCFLAGS) $(CFLAGS) $(PICFLAGS) $(call modcflags,$<) $< -c -o $@ +# c++ rule for building server objects +%-server.o: %.cpp + $(Q) " CC $@" + $(E)$(CC) $(CFLAGS) $(SERVERCFLAGS) $(call modcflags,$<) $< -c -o $@ + +# c++ default rule for building objects for threaded library +%-threaded.o: %.cpp + $(Q) " CC $@" + $(E)$(CC) $(LIBTHREADEDCFLAGS) $(LIBCFLAGS) $(CFLAGS) $(call modcflags,$<) $< -c -o $@ + +# c++ rule for building shared objects for threaded library +%-threaded.po: %.cpp + $(Q) " CCPIC $@" + $(E)$(CC) $(LIBTHREADEDCFLAGS) $(CFLAGS) $(PICFLAGS) $(call modcflags,$<) $< -c -o $@ + +# c++ default rule for building objects +%.o: %.cpp + $(Q) " CC $@" + $(E)$(CC) $(LIBCFLAGS) $(CFLAGS) $(call modcflags,$<) $< -c -o $@ + +# c++ rule for building shared objects +%.po: %.cpp + $(Q) " CCPIC $@" + $(E)$(CC) $(LIBCFLAGS) $(CFLAGS) $(PICFLAGS) $(call modcflags,$<) $< -c -o $@ + # bison and yacc %.c: %.y $(Q) " BISON $@" @@ -740,6 +997,10 @@ $(VISS): %: %.o $(VISMISCOBJS) $(LIBRARIES) $(ADMINTOOLS): %: %.o $(LIBRARIES) $(ADMINTOOLS_SERVER): %: %.o $(LIBRARIES) lib/libpvfs2-server.a +$(USERTOOLS): %: %.o $(LIBRARIES) + +$(UCACHEDTOOLS): %: %.o $(LIBRARIES) + $(KERNAPPS): %: %.o $(LIBRARIES) $(KERNAPPSTHR): %: %.o $(LIBRARIES_THREADED) $(Q) " LD $@" @@ -787,24 +1048,34 @@ publish: docs # rule for cleaning up documentation # latex2html puts all its output in a directory +# don't get rid of generated files in dist releases docsclean: - rm -f $(DOCSPS) $(DOCSPDF) $(DOCSCRUFT) + rm -f $(DOCSCRUFT) +ifndef DIST_RELEASE + rm -f $(DOCSPS) $(DOCSPDF) rm -rf $(basename $(DOCSHTML)) +endif # top rule for cleaning up tree clean:: $(Q) " CLEAN" $(E)rm -f $(LIBOBJS) $(LIBTHREADEDOBJS) \ + $(ULIBOBJS) $(ULIBTHREADEDOBJS) \ $(SERVEROBJS) $(SERVERBINOBJS) $(MISCOBJS) \ $(LIBRARIES) $(LIBRARIES_THREADED) $(DEPENDS) $(SERVER) \ $(ADMINOBJS) $(ADMINOBJS_SERVER) $(ADMINTOOLS)\ $(ADMINTOOLS_SERVER) lib/libpvfs2-server.a\ + $(USERTOOLS) $(USEROBJS) \ + $(UCACHEDTOOLS) $(UCACHEDOBJS) \ $(KERNAPPOBJS) $(KERNAPPS) $(KERNAPPSTHR) \ $(VISS) $(VISMISCOBJS) $(VISOBJS) $(VISDEPENDS)\ $(VISMISCDEPENDS) $(KARMAOBJS) $(LIBPICOBJS) \ + $(STATECOMP) $(STATECOMPOBJS) $(LIBBMIOBJS) \ + $(BMILIBRARIES) $(FUSEOBJS) \ + $(VISMISCDEPENDS) $(KARMAOBJS) $(LIBPICOBJS)\ $(STATECOMP) $(STATECOMPOBJS) \ src/server/pvfs2-server-server.o \ - src/apps/karma/karma + src/apps/karma/karma src/apps/fuse/pvfs2fuse ifndef DIST_RELEASE $(E)rm -f $(STATECOMPGEN) endif @@ -823,6 +1094,13 @@ endif dist: $(SMCGEN) cleaner @sh $(srcdir)/maint/make-dist.sh $(srcdir) @PVFS2_VERSION@ +ifdef BUILD_BMI_ONLY +# builds a tarball of the BMI source tree suitable for distribution +bmidist: cleaner + @sh $(srcdir)/maint/make-bmi-dist.sh $(srcdir) $(builddir) @PVFS2_VERSION@ + cp -u $(builddir)/config.save $(builddir)/config.status +endif + # some stuff that is cleaned in both distclean and dist targets cleaner: clean rm -f tags @@ -835,6 +1113,7 @@ cleaner: clean rm -rf autom4te*.cache rm -f pvfs2-config.h.in~ rm -f $(srcdir)/cscope.out $(srcdir)/cscope.files + cp -p config.status config.save rm -f config.log config.status config.cache rm -f pvfs-@PVFS2_VERSION@.tar.gz @@ -842,7 +1121,8 @@ cleaner: clean # except, don't remove generated .c files if this is a distributed release distclean: cleaner docsclean find . -name "module.mk" -exec rm \{\} \; - rm -f Makefile pvfs2-config.h + rm -f Makefile pvfs2-config.h aclocal.m4 + rm -rf lib rm -f src/server/simple.conf rm -f src/apps/admin/pvfs2-config ifndef DIST_RELEASE @@ -897,6 +1177,7 @@ src/server/simple.conf: src/apps/admin/pvfs2-genconfig # whether this is a distribution tarball or not, drop some config files # into the "examples" subdir of the build dir +ifndef BUILD_BMI_ONLY all:: examples/fs.conf examples/fs.conf: src/apps/admin/pvfs2-genconfig $(Q) " GENCONFIG $@" @@ -904,53 +1185,116 @@ examples/fs.conf: src/apps/admin/pvfs2-genconfig --ioservers localhost --metaservers localhost --logfile /tmp/pvfs2-server.log \ --storage /tmp/pvfs2-test-space \ --quiet examples/fs.conf +endif install_doc: install -d $(mandir)/man1 install -d $(mandir)/man5 + rm -f ${mandir}/man1/*.gz + rm -f ${mandir}/man5/*.gz install -m 644 $(srcdir)/doc/man/*.1 $(mandir)/man1 install -m 644 $(srcdir)/doc/man/*.5 $(mandir)/man5 + gzip -f ${mandir}/man1/*.1 + gzip -f ${mandir}/man5/*.5 -install:: all install_doc - install -d $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-request.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-debug.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-sysint.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-mgmt.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-types.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-util.h $(prefix)/include - install -m 644 $(srcdir)/include/pvfs2-encode-stubs.h $(prefix)/include - - install -d $(prefix)/lib - install -m 755 $(LIBRARIES) $(prefix)/lib +ifdef BUILD_BMI_ONLY +install:: all + install -d $(includedir) + install -m 644 $(srcdir)/src/io/bmi/bmi.h $(includedir) + install -m 644 $(srcdir)/src/io/bmi/bmi-types.h $(includedir) + install -d $(libdir) + install -m 755 lib/*.* $(libdir) +else +install:: all install_doc + install -d $(includedir) + install -m 644 $(builddir)/include/pvfs2.h $(includedir) + install -m 644 $(srcdir)/include/orange.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-request.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-debug.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-sysint.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-usrint.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-mgmt.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-types.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-util.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-encode-stubs.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-hint.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-compat.h $(includedir) + install -m 644 $(srcdir)/include/pvfs2-mirror.h $(includedir) + + install -d $(libdir) +ifneq (,$(LIBRARIES_STATIC)) + for i in $(notdir $(LIBRARIES_STATIC)) ; do \ + install -m 755 lib/$$i $(libdir) ;\ + done ifdef TARGET_OS_DARWIN # TOC needs to be regenerated in libs after they get moved - ranlib $(patsubst %,$(prefix)/%,$(LIBRARIES)) + ranlib $(patsubst %,$(prefix)/%,$(LIBRARIES_STATIC)) +endif +endif +ifneq (,$(LIBRARIES_THREADED_STATIC)) + for i in $(notdir $(LIBRARIES_THREADED_STATIC)) ; do \ + install -m 755 lib/$$i $(libdir) ;\ + done +ifdef TARGET_OS_DARWIN +# TOC needs to be regenerated in libs after they get moved + ranlib $(patsubst %,$(prefix)/%,$(LIBRARIES_THREADED_STATIC)) +endif +endif +ifneq (,$(LIBRARIES_SHARED)) + for i in $(notdir $(LIBRARIES_SHARED)) ; do \ + install -m 755 lib/$$i $(libdir)/$$i.$(SO_FULLVER) ;\ + $(LN_S) $$i.$(SO_FULLVER) $(libdir)/$$i.$(SO_VER) ;\ + $(LN_S) $$i.$(SO_VER) $(libdir)/$$i ;\ + done +endif +ifneq (,$(LIBRARIES_THREADED_SHARED)) + for i in $(notdir $(LIBRARIES_THREADED_SHARED)) ; do \ + install -m 755 lib/$$i $(libdir)/$$i.$(SO_FULLVER) ;\ + $(LN_S) $$i.$(SO_FULLVER) $(libdir)/$$i.$(SO_VER) ;\ + $(LN_S) $$i.$(SO_VER) $(libdir)/$$i ;\ + done endif - install -d $(prefix)/bin - install -m 755 $(ADMINTOOLS) $(prefix)/bin - install -m 755 src/apps/admin/pvfs2-config $(prefix)/bin + install -d $(bindir) + install -m 755 $(ADMINTOOLS) $(bindir) + install -m 755 $(USERTOOLS) $(bindir) +ifdef BUILD_UCACHE + install -m 755 $(UCACHEDTOOLS) $(sbindir) +endif + # for compatibility in case anyone really wants "lsplus" + $(LN_S) pvfs2-ls $(bindir)/pvfs2-lsplus + install -m 755 src/apps/admin/pvfs2-config $(bindir) @# if we ever auto-generate genconfig, remove the $(srcdir) - install -m 755 $(srcdir)/src/apps/admin/pvfs2-genconfig $(prefix)/bin - install -m 755 $(srcdir)/src/apps/admin/pvfs2-config-convert $(prefix)/bin + install -m 755 $(srcdir)/src/apps/admin/pvfs2-genconfig $(bindir) + install -m 755 $(srcdir)/src/apps/admin/pvfs2-config-convert $(bindir) + install -m 755 $(srcdir)/src/apps/admin/pvfs2-getmattr $(bindir) + install -m 755 $(srcdir)/src/apps/admin/pvfs2-setmattr $(bindir) ifdef BUILD_KARMA - install -m 755 $(KARMA) $(prefix)/bin + install -m 755 $(KARMA) $(bindir) endif - install -d $(prefix)/sbin +ifdef BUILD_FUSE + install -m 755 $(FUSE) $(bindir) +endif + + # install any development tools built + for i in $(notdir $(DEVELTOOLS)) ; do \ + if [ -f $(srcdir)/src/apps/devel/$$i ]; then install -m 755 $(srcdir)/src/apps/devel/$$i $(bindir); fi;\ + done + + install -d $(sbindir) ifdef BUILD_SERVER - install -m 755 $(ADMINTOOLS_SERVER) $(prefix)/bin + install -m 755 $(ADMINTOOLS_SERVER) $(bindir) ifeq ($(NPTL_WORKAROUND),) - install -m 755 $(SERVER) $(prefix)/sbin + install -m 755 $(SERVER) $(sbindir) else - install -m 755 $(srcdir)/$(SERVER_STUB) $(prefix)/sbin/pvfs2-server - install -m 755 $(SERVER) $(prefix)/sbin/pvfs2-server.bin + install -m 755 $(srcdir)/$(SERVER_STUB) $(sbindir)/pvfs2-server + install -m 755 $(SERVER) $(sbindir)/pvfs2-server.bin endif endif +endif ifneq (,$(LINUX_KERNEL_SRC)) @@ -958,10 +1302,11 @@ NUM_UTS_LINES := $(shell grep -c UTS_RELEASE $(LINUX_KERNEL_SRC)/include/linux/v ifeq ($(NUM_UTS_LINES),1) KERNEL_VERS := $(shell grep UTS_RELEASE $(LINUX_KERNEL_SRC)/include/linux/version.h | cut -d\" -f2) else - KERNEL_VERS := $(shell uname -r) + # multiple locations of utsrelease.h, just find and grep so we don't have to change again + KERNEL_VERS := $(shell find ${LINUX_KERNEL_SRC}/include -name utsrelease.h -exec grep UTS_RELEASE '{}' \; | cut -d \" -f2 ) endif -#KMOD_DIR ?= $(DESTDIR)/${kmod_prefix}/lib/modules/$(KERNEL_VERS)/kernel/fs/pvfs2 -KMOD_DIR ?= $(prefix)/sbin + +KMOD_DIR ?= $(DESTDIR)/${kmod_prefix}/lib/modules/$(KERNEL_VERS)/kernel/fs/pvfs2 .PHONY: just_kmod_install just_kmod_install: just_kmod @@ -970,8 +1315,8 @@ just_kmod_install: just_kmod .PHONY: kmod_install kmod_install: kmod kernapps just_kmod_install - install -d $(prefix)/sbin - install -m 755 $(KERNAPPS) $(KERNAPPSTHR) $(prefix)/sbin + install -d $(sbindir) + install -m 755 $(KERNAPPS) $(KERNAPPSTHR) $(sbindir) endif ifneq (,$(LINUX24_KERNEL_SRC)) @@ -989,13 +1334,16 @@ just_kmod24_install: just_kmod24 install -d $(KMOD_DIR) install -m 755 src/kernel/linux-2.4/pvfs2.o $(KMOD_DIR) +.PHONY: just_kmod24_apps_install +just_kmod24_apps_install: kmod24 kernapps + install -d $(sbindir) + install -m 755 $(KERNAPPS) $(KERNAPPSTHR) $(sbindir) + install -m 755 src/apps/kernel/linux/mount.pvfs2 $(sbindir) + .PHONY: kmod24_install -kmod24_install: kmod24 kernapps just_kmod24_install - install -d $(prefix)/sbin - install -m 755 $(KERNAPPS) $(KERNAPPSTHR) $(prefix)/sbin - install -m 755 src/apps/kernel/linux/mount.pvfs2 $(prefix)/sbin +kmod24_install: kmod24 kernapps just_kmod24_install just_kmod24_apps_install @echo "" @echo "For improved linux-2.4 support," - @echo "install $(prefix)/sbin/mount.pvfs2 to /sbin/mount.pvfs2" + @echo "install $(sbindir)/mount.pvfs2 to /sbin/mount.pvfs2" @echo "" endif diff --git a/README.osd b/README.osd index 03980a2..12ebbcf 100644 --- a/README.osd +++ b/README.osd @@ -19,20 +19,20 @@ Merge from CVS on Wed Dec 5 15:00:48 EST 2007. ------------------------------------------------------------------------------- To configure: - CFLAGS=-g ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --enable-shared --disable-static --prefix=$(pwd)/install + CFLAGS=-g ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --enable-shared --disable-static --prefix=/usr/local Or, with optimization: - CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --enable-shared --disable-static --prefix=$(pwd)/install + CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --enable-shared --disable-static --prefix=/usr/local Or, with IB, but shared libs will not work: - CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --disable-static --enable-shared --prefix=$(pwd)/install --with-openib=/usr/local/openib-iser --without-bmi-tcp + CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --disable-static --enable-shared --prefix=/usr/local --with-openib=/usr/local/openib-iser --without-bmi-tcp To build kernel module, add: --with-kernel=/usr/src/linux On opt, with IB and kernel: - CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --disable-static --enable-shared --prefix=$(pwd)/install --with-openib=/usr --without-bmi-tcp --with-kernel=/home/pw/src/osd/linux + CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --disable-static --enable-shared --prefix=/usr/local --with-openib=/usr --without-bmi-tcp --with-kernel=/home/pw/src/osd/linux Then to build and install locally, in ./install: make @@ -172,9 +172,10 @@ directory there. Building the PVFS kernel module ------------------------------- -[alin@titan]$ CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --with-kernel=/usr/src/linux --prefix=$(pwd)/install +[alin@titan]$ CFLAGS="-O3 -DNDEBUG" ./configure --without-openssl --disable-karma --disable-thread-safety --enable-strict --with-kernel=/usr/src/linux --prefix=/usr/local [alin@titan]$ make kmod [alin@titan]$ KMOD_DIR=./install make kmod_install # vim: set tw=72 : + diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..5af196f --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,2303 @@ +# generated automatically by aclocal 1.7.9 -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + +AC_DEFUN([AX_OPENSSL], +[ + opensslpath=ifelse([$1], ,,$1) + + if test "x$1" != "xno"; then + + AC_MSG_CHECKING([for openssl library]) + + if test "x${opensslpath}" != "x"; then + CFLAGS="${CFLAGS} -I${opensslpath}/include" + LDFLAGS="$LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" + SERVER_LDFLAGS="$SERVER_LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" + fi + LIBS="$LIBS -lcrypto -lssl" + + AC_COMPILE_IFELSE( + [#include "openssl/bio.h"], + [], + [AC_MSG_ERROR(Invalid openssl path specified. No openssl/bio.h found.)]) + + AC_TRY_LINK( + [#include "openssl/bio.h"], + [BIO * b;], + [AC_MSG_RESULT(yes)], + [AC_MSG_ERROR(could not find openssl libs)]) + + AC_DEFINE(WITH_OPENSSL, 1, [Define if openssl exists]) + + AC_CHECK_HEADERS(openssl/evp.h) + AC_CHECK_HEADERS(openssl/crypto.h) + fi +]) + +AC_DEFUN([AX_OPENSSL_OPTIONAL], +[ + AC_MSG_CHECKING([for openssl library]) + TMPLIBS=${LIBS} + LIBS="$LIBS -lcrypto -lssl" + + AC_COMPILE_IFELSE( + [#include "openssl/bio.h"], + [], + [AC_MSG_WARN(No openssl headers found.)]) + + AC_TRY_LINK( + [#include "openssl/bio.h"], + [BIO * b;], + [AC_MSG_RESULT(yes) + AC_DEFINE(WITH_OPENSSL, 1, [Define if openssl exists]) + ], + [ + AC_MSG_WARN(No openssl headers found.) + LIBS=${TMPLIBS} + ]) + + AC_CHECK_HEADERS(openssl/evp.h) + AC_CHECK_HEADERS(openssl/crypto.h) + +]) + + +AC_DEFUN([AX_KERNEL_FEATURES], +[ + dnl + dnl kernel feature tests. Set CFLAGS once here and use it for all + dnl kernel features. reset to the old value at the end. + dnl + dnl on some systems, there is a /usr/include/linux/xattr_acl.h , so the + dnl check for xattr_acl.h down below will always pass, even if it + dnl should fail. this hack (-nostdinc -isystem ...) will bring in just + dnl enough system headers dnl for kernel compilation + + dnl -Werror can be overkill, but for these kernel feature tests + dnl 'implicit function declaration' usually ends up in an undefined + dnl symbol somewhere. + + NOSTDINCFLAGS="-Werror-implicit-function-declaration -nostdinc -isystem `$CC -print-file-name=include`" + + CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src/include -I$lk_src/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty) -DKBUILD_MODNAME=KBUILD_STR(empty)" + + dnl kernels > 2.6.32 now use generated/autoconf.h + if test -f $lk_src/include/generated/autoconf.h ; then + CFLAGS="$CFLAGS -imacros $lk_src/include/generated/autoconf.h" + else + CFLAGS="$CFLAGS -imacros $lk_src/include/linux/autoconf.h" + fi + + dnl we probably need additional includes if this build is intended + dnl for a different architecture + if test -n "${ARCH}" ; then + CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include -I$lk_src/arch/${ARCH}/include/asm/mach-default" + else + SUBARCH=`uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ -e s/sa110/arm/ \ + -e s/s390x/s390/ -e s/parisc64/parisc/ \ + -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ + -e s/sh.*/sh/` + if test "x$SUBARCH" = "xi386"; then + ARCH=x86 + elif test "x$SUBARCH" = "xx86_64"; then + ARCH=x86 + elif test "x$SUBARCH" = "xsparc64"; then + ARCH=sparc + else + ARCH=$SUBARCH + fi + + CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include -I$lk_src/arch/${ARCH}/include/asm/mach-default" + fi + + AC_MSG_CHECKING(for i_size_write in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel already defined it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + void i_size_write(struct inode *inode, + loff_t i_size) + { + return; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_SIZE_WRITE, 1, Define if kernel has i_size_write), + ) + + AC_MSG_CHECKING(for i_size_read in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel already defined it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + loff_t i_size_read(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_SIZE_READ, 1, Define if kernel has i_size_read), + ) + + AC_MSG_CHECKING(for iget_locked function in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel already defined it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + loff_t iget_locked(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IGET_LOCKED, 1, Define if kernel has iget_locked), + ) + + AC_MSG_CHECKING(for iget4_locked function in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel already defined it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + loff_t iget4_locked(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IGET4_LOCKED, 1, Define if kernel has iget4_locked), + ) + + AC_MSG_CHECKING(for iget5_locked function in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel already defined it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + loff_t iget5_locked(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IGET5_LOCKED, 1, Define if kernel has iget5_locked), + ) + + dnl Check if the kernel defines the xtvec structure. + dnl This is part of a POSIX extension. + AC_MSG_CHECKING(for struct xtvec in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct xtvec xv = { 0, 0 }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STRUCT_XTVEC, 1, Define if struct xtvec is defined in the kernel), + AC_MSG_RESULT(no) + ) + + dnl 2.6.20 deprecated kmem_cache_t; some old ones do not have struct + dnl kmem_cache, but may have kmem_cache_s. It's a mess. Just look + dnl for this, and assume _t if not found. + dnl This test relies on gcc complaining about declaring a struct + dnl in a parameter list. Fragile, but nothing better is available + dnl to check for the existence of a struct. We cannot see the + dnl definition of the struct in the kernel, it's private to the + dnl slab implementation. And C lets you declare structs freely as + dnl long as you don't try to deal with their contents. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for struct kmem_cache in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + + int foo(struct kmem_cache *s) + { + return (s == NULL) ? 3 : 4; + } + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STRUCT_KMEM_CACHE, 1, Define if struct kmem_cache is defined in kernel), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.20 removed SLAB_KERNEL. Need to use GFP_KERNEL instead + AC_MSG_CHECKING(for SLAB_KERNEL flag in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static int flags = SLAB_KERNEL; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SLAB_KERNEL, 1, Define if SLAB_KERNEL is defined in kernel), + AC_MSG_RESULT(no) + ) + + dnl The name of this field changed from memory_backed to capabilities + dnl in 2.6.12. + AC_MSG_CHECKING(for memory_backed in struct backing_dev_info in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct backing_dev_info bdi = { + .memory_backed = 0 + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDI_MEMORY_BACKED, 1, Define if struct backing_dev_info in kernel has memory_backed), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a sendfile callback + if test "x$enable_kernel_sendfile" = "xyes"; then + AC_MSG_CHECKING(for sendfile callback in struct file_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct file_operations fop = { + .sendfile = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SENDFILE_VFS_SUPPORT, 1, Define if struct file_operations in kernel has sendfile callback), + AC_MSG_RESULT(no) + ) + fi + + dnl checking if we have a readv callback in super_operations + AC_MSG_CHECKING(for readv callback in struct file_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct file_operations fop = { + .readv = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_READV_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readv callback), + AC_MSG_RESULT(no) + ) + dnl checking if we have a writev callback in super_operations + AC_MSG_CHECKING(for writev callback in struct file_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct file_operations fop = { + .writev = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_WRITEV_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has writev callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a find_inode_handle callback in super_operations + AC_MSG_CHECKING(for find_inode_handle callback in struct super_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct super_operations sop = { + .find_inode_handle = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has find_inode_handle callback), + AC_MSG_RESULT(no) + ) + + dnl 2.6.18.1 removed this member + AC_MSG_CHECKING(for i_blksize in struct inode) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct inode i = { + .i_blksize = 0, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_BLKSIZE_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_blksize member), + AC_MSG_RESULT(no) + ) + + dnl 2.6.16 removed this member + AC_MSG_CHECKING(for i_sem in struct inode) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct inode i = { + .i_sem = {0}, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_SEM_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_sem member), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a statfs_lite callback in super_operations + AC_MSG_CHECKING(for statfs_lite callback in struct super_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct super_operations sop = { + .statfs_lite = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STATFS_LITE_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has statfs_lite callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a fill_handle callback in inode_operations + AC_MSG_CHECKING(for fill_handle callback in struct inode_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct inode_operations iop = { + .fill_handle = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FILL_HANDLE_INODE_OPERATIONS, 1, Define if struct inode_operations in kernel has fill_handle callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a getattr_lite callback in inode_operations + AC_MSG_CHECKING(for getattr_lite callback in struct inode_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct inode_operations iop = { + .getattr_lite = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GETATTR_LITE_INODE_OPERATIONS, 1, Define if struct inode_operations in kernel has getattr_lite callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a get_fs_key callback in super_operations + AC_MSG_CHECKING(for get_fs_key callback in struct super_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct super_operations sop = { + .get_fs_key = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GET_FS_KEY_SUPER_OPERATIONS, 1, Define if struct super_operations in kernel has get_fs_key callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a readdirplus callback in file_operations + AC_MSG_CHECKING(for readdirplus member in file_operations structure) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct file_operations filop = { + .readdirplus = NULL + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_READDIRPLUS_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readdirplus callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a readdirplus_lite callback in file_operations + AC_MSG_CHECKING(for readdirplus_lite member in file_operations structure) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct file_operations filop = { + .readdirplus_lite = NULL + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_READDIRPLUSLITE_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readdirplus_lite callback), + AC_MSG_RESULT(no) + ) + + + dnl checking if we have a readx callback in file_operations + AC_MSG_CHECKING(for readx member in file_operations structure) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct file_operations filop = { + .readx = NULL + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_READX_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has readx callback), + AC_MSG_RESULT(no) + ) + + dnl checking if we have a writex callback in file_operations + AC_MSG_CHECKING(for writex member in file_operations structure) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct file_operations filop = { + .writex = NULL + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_WRITEX_FILE_OPERATIONS, 1, Define if struct file_operations in kernel has writex callback), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for aio support in kernel) + dnl if this test passes, the kernel has it + dnl if this test fails, the kernel does not have it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct kiocb iocb; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_AIO, 1, Define if kernel has aio support) + have_aio=yes, + AC_MSG_RESULT(no) + have_aio=no + ) + + if test "x$have_aio" = "xyes" -a "x$enable_kernel_aio" = "xyes"; then + AC_MSG_CHECKING(for ki_dtor in kiocb structure of kernel) + dnl if this test passes, the kernel does have it and we enable + dnl support for AIO. if this test fails, the kernel does not + dnl have this member and we disable support for AIO + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct kiocb io_cb = { + .ki_dtor = NULL, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_AIO_VFS_SUPPORT, 1, Define if we are enabling VFS AIO support in kernel), + AC_MSG_RESULT(no) + ) + + tmp_cflags=$CFLAGS + dnl if this test passes, the signature of aio_read has changed to the new one + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for new prototype of aio_read callback of file_operations structure) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + extern ssize_t my_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); + static struct file_operations fop = { + .aio_read = my_aio_read, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_AIO_NEW_AIO_SIGNATURE, 1, Define if VFS AIO support in kernel has a new prototype), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + fi + + AC_MSG_CHECKING(for dentry argument in kernel super_operations statfs) + dnl Rely on the fact that there is an external vfs_statfs that is + dnl of the same type as the .statfs in struct super_operations to + dnl verify the signature of that function pointer. There is a single + dnl commit in the git history where both changed at the same time + dnl from super_block to dentry. + dnl + dnl The alternative approach of trying to define a s_op.statfs is not + dnl as nice because that only throws a warning, requiring -Werror to + dnl catch it. This is a problem if the compiler happens to spit out + dnl other spurious warnings that have nothing to do with the test. + dnl + dnl If this test passes, the kernel uses a struct dentry argument. + dnl If this test fails, the kernel uses something else (old struct + dnl super_block perhaps). + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int vfs_statfs(struct dentry *de, struct kstatfs *kfs) + { + return 0; + } + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DENTRY_STATFS_SOP, 1, Define if super_operations statfs has dentry argument), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for vfsmount argument in kernel file_system_type get_sb) + dnl Same trick as above. A single commit changed mayn things at once: + dnl type and signature of file_system_type.get_sb, and signature of + dnl get_sb_bdev. This test is a bit more tenuous, as get_sb_bdev + dnl isn't used directly in a file_system_type, but is a popular helper + dnl for many FSes. And it has not exactly the same signature. + dnl + dnl If this test passes, the kernel has the most modern known form, + dnl which includes a stfuct vfsmount argument. + dnl If this test fails, the kernel uses something else. + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int get_sb_bdev(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, + int), + struct vfsmount *vfsm) + { + return 0; + } + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_VFSMOUNT_GETSB, 1, Define if file_system_type get_sb has vfsmount argument), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for xattr support in kernel) + dnl if this test passes, the kernel has it + dnl if this test fails, the kernel does not have it + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct inode_operations in_op = { + .getxattr = NULL + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_XATTR, 1, Define if kernel has xattr support) + have_xattr=yes, + AC_MSG_RESULT(no) + have_xattr=no + ) + + if test "x$have_xattr" = "xyes"; then + dnl Test to check if setxattr function has a const void * argument + AC_MSG_CHECKING(for const argument to setxattr function) + dnl if this test passes, there is a const void* argument + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], + [ + struct inode_operations inode_ops; + int ret; + struct dentry * dent = NULL; + const char * name = NULL; + const void * val = NULL; + size_t size = 0; + int flags = 0; + + ret = inode_ops.setxattr(dent, name, val, size, flags); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SETXATTR_CONST_ARG, 1, Define if kernel setxattr has const void* argument), + AC_MSG_RESULT(no) + ) + fi + + dnl the proc handler functions have changed over the years. + dnl pre-2.6.8: proc_handler(ctl_table *ctl, + dnl int write, + dnl struct file *filp, + dnl void *buffer, + dnl size_t *lenp) + dnl + dnl 2.6.8-2.6.31: proc_handler(ctl_table *ctl, + dnl int write, + dnl struct file *filp, + dnl void *buffer, + dnl size_t *lenp, + dnl loff_t *ppos) + dnl > 2.6.31: proc_handler(ctl_table *ctl, + dnl int write, + dnl void *buffer, + dnl size_t *lenp, + dnl loff_t *ppos) + + dnl Test to see if sysctl proc handlers have a file argument + AC_MSG_CHECKING(for file argument to sysctl proc handlers) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + struct ctl_table * ctl = NULL; + int write = 0; + struct file * filp = NULL; + void __user * buffer = NULL; + size_t * lenp = NULL; + loff_t * ppos = NULL; + + proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PROC_HANDLER_FILE_ARG, 1, Define if sysctl proc handlers have 6th argument), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for ppos argument to sysctl proc handlers) + dnl if this test passes, there is a ppos argument + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + struct ctl_table * ctl = NULL; + int write = 0; + void __user * buffer = NULL; + size_t * lenp = NULL; + loff_t * ppos = NULL; + + proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PROC_HANDLER_PPOS_ARG, 1, Define if sysctl proc handlers have ppos argument), + AC_MSG_RESULT(no) + ) + + AC_CHECK_HEADERS([linux/posix_acl.h], [], [], + [#define __KERNEL__ + #include + #ifdef HAVE_XATTR + #include + #endif + ] ) + + AC_CHECK_HEADERS([linux/posix_acl_xattr.h], [], [], + [#define __KERNEL__ + #include + #ifdef HAVE_XATTR + #include + #endif + ] ) + + dnl linux-2.6.11 had xattr_acl.h, but 2.6.12 did not! + AC_CHECK_HEADERS([linux/xattr_acl.h], [], [], + [#define __KERNEL__ + #include + #ifdef HAVE_XATTR + #include + #endif + ] ) + + AC_CHECK_HEADERS([linux/mount.h], [], [], + [#define __KERNEL__ + #include + ] ) + AC_CHECK_HEADERS([linux/ioctl32.h], [], [], + [#define __KERNEL__ + #include + ] ) + AC_CHECK_HEADERS([linux/compat.h], [], [], + [#define __KERNEL__ + #include + ] ) + AC_CHECK_HEADERS([linux/syscalls.h], [], [], + [#define __KERNEL__ + #include + ] ) + AC_CHECK_HEADERS([asm/ioctl32.h], [], [], + [#define __KERNEL__ + #include + ] ) + AC_CHECK_HEADERS([linux/exportfs.h], [],[], + [#define __KERNEL__ + #include + ]) + + AC_MSG_CHECKING(for generic_file_readv api in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel has it defined with a different + dnl signature! deliberately, the signature for this method has been + dnl changed for it to give a compiler error. + + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int generic_file_readv(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_FILE_READV, 1, Define if kernel has generic_file_readv), + ) + + AC_MSG_CHECKING(for generic_permission api in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel has it defined with a different + dnl signature! deliberately, the signature for this method has been + dnl changed for it to give a compiler error. + + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int generic_permission(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_PERMISSION, 1, Define if kernel has generic_permission), + ) + + AC_MSG_CHECKING(for generic_getxattr api in kernel) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel has it defined + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + int generic_getxattr(struct inode *inode) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_GETXATTR, 1, Define if kernel has generic_getxattr), + ) + + AC_MSG_CHECKING(for arg member in read_descriptor_t in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + read_descriptor_t x; + x.arg.data = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ARG_IN_READ_DESCRIPTOR_T, 1, Define if read_descriptor_t has an arg member), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for fh_to_dentry member in export_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct export_operations x; + x.fh_to_dentry = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FHTODENTRY_EXPORT_OPERATIONS, 1, Define if export_operations has an fh_to_dentry member), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for encode_fh member in export_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct export_operations x; + x.encode_fh = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ENCODEFH_EXPORT_OPERATIONS, 1, Define if export_operations has an encode_fh member), + AC_MSG_RESULT(no) + ) + + dnl Using -Werror is not an option, because some arches throw lots of + dnl warnings that would trigger false negatives. We know that the + dnl change to the releasepage() function signature was accompanied by + dnl a similar change to the exported function try_to_release_page(), + dnl and that one we can check without using -Werror. The test fails + dnl unless the previous declaration was identical to the one we suggest + dnl below. New kernels use gfp_t, not int. + AC_MSG_CHECKING(for second arg type int in address_space_operations releasepage) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + extern int try_to_release_page(struct page *page, int gfp_mask); + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INT_ARG2_ADDRESS_SPACE_OPERATIONS_RELEASEPAGE, 1, Define if sceond argument to releasepage in address_space_operations is type int), + AC_MSG_RESULT(no) + ) + + dnl Similar logic for the follow_link member in inode_operations. New + dnl kernels return a void *, not int. + AC_MSG_CHECKING(for int return in inode_operations follow_link) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + extern int page_follow_link_light(struct dentry *, + struct nameidata *); + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INT_RETURN_INODE_OPERATIONS_FOLLOW_LINK, 1, Define if return value from follow_link in inode_operations is type int), + AC_MSG_RESULT(no) + ) + + dnl kmem_cache_destroy function may return int only on pre 2.6.19 kernels + dnl else it returns a void. + AC_MSG_CHECKING(for int return in kmem_cache_destroy) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + extern int kmem_cache_destroy(kmem_cache_t *); + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INT_RETURN_KMEM_CACHE_DESTROY, 1, Define if return value from kmem_cache_destroy is type int), + AC_MSG_RESULT(no) + ) + + dnl more 2.6 api changes. return type for the invalidatepage + dnl address_space_operation is 'void' in new kernels but 'int' in old + dnl I had to turn on -Werror for this test because i'm not sure how + dnl else to make dnl "initialization from incompatible pointer type" + dnl fail. + AC_MSG_CHECKING(for older int return in invalidatepage) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], + [ + struct address_space_operations aso; + + int ret; + struct page * page = NULL; + unsigned long offset; + + ret = aso.invalidatepage(page, offset); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INT_RETURN_ADDRESS_SPACE_OPERATIONS_INVALIDATEPAGE, 1, Define if return type of invalidatepage should be int), + AC_MSG_RESULT(NO) + ) + + dnl In 2.6.18.1 and newer, including will throw off a + dnl warning + tmp_cflags=${CFLAGS} + CFLAGS="${CFLAGS} -Werror" + AC_MSG_CHECKING(for warnings when including linux/config.h) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [], + AC_MSG_RESULT(no) + AC_DEFINE(HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H, 1, Define if including linux/config.h gives no warnings), + AC_MSG_RESULT(yes) + ) + CFLAGS=$tmp_cflags + + AC_MSG_CHECKING(for compat_ioctl member in file_operations structure) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct file_operations filop = { + .compat_ioctl = NULL + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_COMPAT_IOCTL_HANDLER, 1, Define if there exists a compat_ioctl member in file_operations), + AC_MSG_RESULT(no) + ) + + dnl Gives wrong answer if header is missing; don't try then. + if test x$ac_cv_header_linux_ioctl32_h = xyes ; then + AC_MSG_CHECKING(for register_ioctl32_conversion kernel exports) + dnl if this test passes, the kernel does not have it + dnl if this test fails, the kernel has it defined + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + int register_ioctl32_conversion(void) + { + return 0; + } + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REGISTER_IOCTL32_CONVERSION, 1, Define if kernel has register_ioctl32_conversion), + ) + fi + + AC_MSG_CHECKING(for int return value of kmem_cache_destroy) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + int i = kmem_cache_destroy(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT_RETURN, 1, Define if kmem_cache_destroy returns int), + AC_MSG_RESULT(no) + ) + + dnl As of 2.6.19, combined readv/writev into aio_read and aio_write + dnl functions. Detect this by not finding a readv member. + AC_MSG_CHECKING(for combined file_operations readv and aio_read) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct file_operations filop = { + .readv = NULL + }; + ], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_COMBINED_AIO_AND_VECTOR, 1, Define if struct file_operations has combined aio_read and readv functions), + ) + + dnl Check for kzalloc + AC_MSG_CHECKING(for kzalloc) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + void * a; + a = kzalloc(1024, GFP_KERNEL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KZALLOC, 1, Define if kzalloc exists), + AC_MSG_RESULT(no) + ) + + dnl Check for two arg register_sysctl_table() + AC_MSG_CHECKING(for two arguments to register_sysctl_table) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + register_sysctl_table(NULL, 0); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TWO_ARG_REGISTER_SYSCTL_TABLE, 1, Define if register_sysctl_table takes two arguments), + AC_MSG_RESULT(no) + ) + + dnl FS_IOC_GETFLAGS and FS_IOC_SETFLAGS appeared + dnl somewhere around 2.6.20.1 as generic versions of fs-specific flags + AC_MSG_CHECKING(for generic FS_IOC ioctl flags) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + int flags = FS_IOC_GETFLAGS; + ], + AC_MSG_RESULT(yes), + AC_DEFINE(HAVE_NO_FS_IOC_FLAGS, 1, Define if FS_IOC flags missing from fs.h) + AC_MSG_RESULT(no) + ) + + dnl old linux kernels define struct page with a 'count' member, whereas + dnl other kernels (since at least 2.6.20) define struct page with a + dnl '_count' + AC_MSG_CHECKING(for obsolete struct page count without underscore) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct page *p; + int foo; + foo = atomic_read(&(p)->count); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_OBSOLETE_STRUCT_PAGE_COUNT_NO_UNDERSCORE, 1, Define if struct page defines a count member without leading underscore), + AC_MSG_RESULT(no) + ) + + dnl old linux kernels do not have class_create and related functions + dnl + dnl check for class_device_destroy() to weed out RHEL4 kernels that + dnl have some class functions but not others + AC_MSG_CHECKING(if kernel has device classes) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + class_device_destroy(NULL, "pvfs2") + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_DEVICE_CLASSES, 1, Define if kernel has device classes), + AC_MSG_RESULT(no) + ) + + dnl 2.6.23 removed the destructor parameter from kmem_cache_create + AC_MSG_CHECKING(for destructor param to kmem_cache_create) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + kmem_cache_create("config-test", 0, 0, 0, NULL, NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM, 1, [Define if kernel kmem_cache_create has destructor param]), + AC_MSG_RESULT(no) + ) + + dnl 2.6.27 changed the constructor parameter signature of + dnl kmem_cache_create. Check for this newer one-param style + dnl If they don't match, gcc complains about + dnl passing argument ... from incompatible pointer type, hence the + dnl need for the -Werror. Note that the next configure test will + dnl determine if we have a two param constructor or not. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for one-param kmem_cache_create constructor) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + void ctor(void *req) + { + } + ], [ + kmem_cache_create("config-test", 0, 0, 0, ctor); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM, 1, [Define if kernel kmem_cache_create constructor has newer-style one-parameter form]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.27 changed the parameter signature of + dnl inode_operations->permission. Check for this newer two-param style + dnl If they don't match, gcc complains about + dnl passing argument ... from incompatible pointer type, hence the + dnl need for the -Werror and -Wall. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror -Wall" + AC_MSG_CHECKING(for two param permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + #include + #include + int ctor(struct inode *i, int a) + { + return 0; + } + struct inode_operations iop = { + .permission = ctor, + }; + ], [ + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TWO_PARAM_PERMISSION, 1, [Define if kernel's inode_operations has two parameters permission function]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl 2.6.24 changed the constructor parameter signature of + dnl kmem_cache_create. Check for this newer two-param style and + dnl if not, assume it is old. Note we can get away with just + dnl struct kmem_cache (and not kmem_cache_t) as that change happened + dnl in older kernels. If they don't match, gcc complains about + dnl passing argument ... from incompatible pointer type, hence the + dnl need for the -Werror. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for two-param kmem_cache_create constructor) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + void ctor(struct kmem_cache *cachep, void *req) + { + } + ], [ + kmem_cache_create("config-test", 0, 0, 0, ctor); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM, 1, [Define if kernel kmem_cache_create constructor has new-style two-parameter form]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + AC_MSG_CHECKING(if kernel address_space struct has a spin_lock field named page_lock) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct address_space as; + spin_lock(&as.page_lock); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock member named page_lock instead of rw_lock]), + AC_MSG_RESULT(no) + ) + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(if kernel address_space struct has a rwlock_t field named tree_lock) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct address_space as; + read_lock(&as.tree_lock); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a rw_lock_t member named tree_lock]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(if kernel address_space struct has a spinlock_t field named tree_lock) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct address_space as; + spin_lock(&as.tree_lock); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock_t member named tree_lock]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + AC_MSG_CHECKING(if kernel address_space struct has a priv_lock field - from RT linux) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct address_space as; + spin_lock(&as.priv_lock); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock for private data instead of rw_lock -- used by RT linux]), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(if kernel defines mapping_nrpages macro - from RT linux) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct address_space idata; + int i = mapping_nrpages(&idata); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MAPPING_NRPAGES_MACRO, 1, [Define if kernel defines mapping_nrpages macro -- defined by RT linux]), + AC_MSG_RESULT(no) + ) + + dnl Starting with 2.6.25-rc1, .read_inode goes away. + AC_MSG_CHECKING(if kernel super_operations contains read_inode field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_operations sops; + sops.read_inode(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_READ_INODE, 1, [Define if kernel super_operations contains read_inode field]), + AC_MSG_RESULT(no) + ) + + dnl Starting with 2.6.26, drop_inode and put_inode go away + AC_MSG_CHECKING(if kernel super_operations contains drop_inode field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_operations sops; + sops.drop_inode(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DROP_INODE, 1, [Define if kernel super_operations contains drop_inode field]), + AC_MSG_RESULT(no) + ) + + dnl Starting with 2.6.26, drop_inode and put_inode go away + AC_MSG_CHECKING(if kernel super_operations contains put_inode field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_operations sops; + sops.put_inode(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PUT_INODE, 1, [Define if kernel super_operations contains put_inode field]), + AC_MSG_RESULT(no) + ) + + dnl older 2.6 kernels don't have MNT_NOATIME + AC_MSG_CHECKING(if mount.h defines MNT_NOATIME) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + int flag = MNT_NOATIME; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MNT_NOATIME, 1, [Define if mount.h contains + MNT_NOATIME flags]), + AC_MSG_RESULT(no) + ) + + dnl older 2.6 kernels don't have MNT_NODIRATIME + AC_MSG_CHECKING(if mount.h defines MNT_NODIRATIME) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + int flag = MNT_NODIRATIME; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MNT_NODIRATIME, 1, [Define if mount.h contains + MNT_NODIRATIME flags]), + AC_MSG_RESULT(no) + ) + + dnl newer 2.6 kernels (2.6.28) use d_obtain_alias instead of d_alloc_anon + AC_MSG_CHECKING(for d_alloc_anon) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct inode *i; + d_alloc_anon(i); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_ALLOC_ANON, 1, [Define if dcache.h contains + d_alloc_annon]), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for s_dirty in struct super_block) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_block *s; + list_empty(&s->s_dirty); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SB_DIRTY_LIST, 1, [Define if struct super_block has s_dirty list]), + AC_MSG_RESULT(no) + ) + + dnl newer 2.6 kernels (2.6.29-ish) use current_fsuid() macro instead + dnl of accessing task struct fields directly + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for current_fsuid) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + int uid = current_fsuid(); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CURRENT_FSUID, 1, [Define if cred.h contains current_fsuid]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.32 added a mandatory name field to the bdi structure + AC_MSG_CHECKING(if kernel backing_dev_info struct has a name field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + struct backing_dev_info foo = + { + .name = "foo" + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BACKING_DEV_INFO_NAME, 1, [Define if kernel backing_dev_info struct has a name field]), + AC_MSG_RESULT(no) + ) + + dnl some 2.6 kernels have functions to explicitly initialize bdi structs + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for bdi_init) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + int ret = bdi_init(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDI_INIT, 1, [Define if bdi_init function is present]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl 2.6.33 API change, + dnl Removed .ctl_name from struct ctl_table. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct ctl_table c = { .ctl_name = 0, }; + ],[ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CTL_NAME, 1, Define if struct ctl_table has ctl_name member), + AC_MSG_RESULT(no) + ) + + dnl Removed .strategy from struct ctl_table. + AC_MSG_CHECKING([whether struct ctl_table has strategy]) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct ctl_table c = { .strategy = 0, }; + ], [ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STRATEGY_NAME, 1, Define if struct ctl_table has strategy member), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.33 changed the parameter signature of xattr_handler get + dnl member functions to have a fifth argument and changed the first + dnl parameter from struct inode to struct dentry. if the test fails + dnl assume the old 4 param with struct inode + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for five-param xattr_handler.get) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct xattr_handler x; + static int get_xattr_h( struct dentry *d, const char *n, + void *b, size_t s, int h) + { return 0; } + ], + [ + x.get = get_xattr_h; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_XATTR_HANDLER_GET_FIVE_PARAM, 1, [Define if kernel xattr_handle get function has dentry as first parameter and a fifth parameter]), + AC_MSG_RESULT(no) + ) + + dnl 2.6.33 changed the parameter signature of xattr_handler set + dnl member functions to have a sixth argument and changed the first + dnl parameter from struct inode to struct dentry. if the test fails + dnl assume the old 5 param with struct inode + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for six-param xattr_handler.set) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct xattr_handler x; + static int set_xattr_h( struct dentry *d, const char *n, + const void *b, size_t s, int f, int h) + { return 0; } + ], + [ + x.set = set_xattr_h; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_XATTR_HANDLER_SET_SIX_PARAM, 1, [Define if kernel xattr_handle set function has dentry as first parameter and a sixth parameter]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl xattr_handler is also a const + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for const s_xattr member in super_block struct) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct super_block sb; + const struct xattr_handler *x[] = { NULL }; + ], + [ + sb.s_xattr = x; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CONST_S_XATTR_IN_SUPERBLOCK, 1, [Define if s_xattr member of super_block struct is const]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl early 2.6 kernels do not contain true/false enum in stddef.h + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(stddef.h true/false enum) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int f = true; + ], + [ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TRUE_FALSE_ENUM, 1, [Define if kernel stddef has true/false enum]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl fsync no longer has a dentry second parameter + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for dentry argument in fsync) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct file_operations f; + static int local_fsync(struct file *f, struct dentry *d, int i) + { return 0; } + ], + [ + f.fsync = local_fsync; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FSYNC_DENTRY_PARAM, 1, [Define if fsync function in file_operations struct wants a dentry pointer as the second parameter]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + CFLAGS=$oldcflags + +]) + + +AC_DEFUN([AX_BERKELEY_DB], +[ + dbpath=ifelse([$1], ,,$1) + + DB_LDFLAGS= + dnl + dnl if the db is specified, try to link with -ldb + dnl otherwise try -ldb4, then -ldb3, then -ldb + dnl $lib set to notfound on link failure + dnl + AC_MSG_CHECKING([for db library]) + oldlibs=$LIBS + lib=notfound + + if test "x$dbpath" != "x" ; then + oldcflags=$CFLAGS + for dbheader in db4 db3 notfound; do + AC_COMPILE_IFELSE( + [#include "$dbpath/include/$dbheader/db.h"], + [DB_CFLAGS="-I$dbpath/include/$dbheader/" + break]) + done + + if test "x$dbheader" = "xnotfound"; then + AC_COMPILE_IFELSE( + [#include "$dbpath/include/db.h"], + [DB_CFLAGS="-I$dbpath/include/"], + [AC_MSG_FAILURE( + Invalid libdb path specified. No db.h found.)]) + fi + + DB_LDFLAGS="-L${dbpath}/lib" + LDFLAGS="$DB_LDFLAGS ${LDFLAGS}" + + LIBS="${oldlibs} -ldb -lpthread" + DB_LIB="-ldb" + CFLAGS="$DB_CFLAGS $oldcflags" + AC_TRY_LINK( + [#include ], + [DB *dbp; db_create(&dbp, NULL, 0);], + lib=db) + CFLAGS=$oldcflags + + else + for lib in db4 db3 db notfound; do + LIBS="${oldlibs} -l$lib -lpthread" + DB_LIB="-l$lib" + AC_TRY_LINK( + [#include ], + [DB *dbp; db_create(&dbp, NULL, 0);], + [break]) + done + fi + + dnl reset LIBS value and just report through DB_LIB + LIBS=$oldlibs + if test "x$lib" = "xnotfound" ; then + AC_MSG_ERROR(could not find DB libraries) + else + AC_MSG_RESULT($lib) + fi + AC_SUBST(DB_CFLAGS) + AC_SUBST(DB_LIB) + + dnl See if we have a new enough version of Berkeley DB; needed for + dnl compilation of trove-dbpf component + dnl AC_MSG_CHECKING(whether version of Berkeley DB is new enough) + dnl AC_TRY_COMPILE([ + dnl #include + dnl ], [ + dnl #if DB_VERSION_MAJOR < 4 + dnl #error "DB_VERSION_MAJOR < 4; need newer Berkeley DB implementation" + dnl #endif + dnl ], AC_MSG_RESULT(yes), + dnl AC_MSG_RESULT(no) + dnl AC_MSG_ERROR(Need newer (4.x.x or later) version of Berkeley DB. + dnl try: http://www.sleepycat.com/download/index.shtml + dnl or: /parl/pcarns/rpms/db4-4.0.14-1mdk.src.rpm (to build rpm)) + dnl ) + + dnl Test to check for DB_ENV variable to error callback fn. Then + dnl test to see if third parameter must be const (related but not + dnl exactly the same). + AC_MSG_CHECKING(for dbenv parameter to DB error callback function) + oldcflags=$CFLAGS + CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" + AC_TRY_COMPILE([ + #include + + void error_callback_fn(const DB_ENV *dbenv, + const char *prefix, + const char *message) + { + return; + } + ], [ + DB *db; + + db->set_errcall(db, error_callback_fn); + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DBENV_PARAMETER_TO_DB_ERROR_CALLBACK, 1, + Define if DB error callback function takes dbenv parameter) + have_dbenv_parameter_to_db_error_callback=yes, + AC_MSG_RESULT(no) + have_dbenv_parameter_to_db_error_callback=no) + + if test "x$have_dbenv_parameter_to_db_error_callback" = "xyes" ; then + dnl Test if compilation succeeds without const; we expect that it will + dnl not. + dnl NOTE: still using -Werror! + AC_MSG_CHECKING(if third parameter to error callback function is const) + AC_TRY_COMPILE([ + #include + + void error_callback_fn(const DB_ENV *dbenv, + const char *prefix, + char *message) + { + return; + } + ], [ + DB *db; + + db->set_errcall(db, error_callback_fn); + ], AC_MSG_RESULT(no), + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK, 1, + Define if third param (message) to DB error callback function is const)) + fi + + CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" + dnl Test to check for unknown third param to DB stat (four params + dnl total). The unknown parameter is a function ptr so that the + dnl the user can pass in a replcaement for malloc. + dnl Note: this is a holdover from relatively old DB implementations, + dnl while the txnid parameter is new. So we don't test for the old + dnl unknown parameter if we found the new one. + AC_MSG_CHECKING(for DB stat with malloc function ptr) + AC_TRY_COMPILE([ + #include + #include + ], [ + int ret = 0; + DB *db = db; + int dummy = 0; + u_int32_t flags = 0; + + ret = db->stat(db, &dummy, malloc, flags); + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_UNKNOWN_PARAMETER_TO_DB_STAT, 1, + Define if DB stat function takes malloc function ptr) + have_db_stat_malloc=yes, + AC_MSG_RESULT(no) + have_db_stat_malloc=no) + + dnl Test to check for txnid parameter to DB stat (DB 4.3.xx+) + if test "x$have_db_stat_malloc" = "xno" ; then + + AC_MSG_CHECKING(for txnid parameter to DB stat function) + AC_TRY_COMPILE([ + #include + ], [ + int ret = 0; + DB *db = db; + DB_TXN *txnid = txnid; + u_int32_t flags = 0; + + ret = db->stat(db, txnid, NULL, flags); + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TXNID_PARAMETER_TO_DB_STAT, 1, + Define if DB stat function takes txnid parameter) + have_txnid_param_to_stat=yes, + AC_MSG_RESULT(no) + have_txnid_param_to_stat=no) + + fi + + dnl Test to check for txnid parameter to DB open (DB4.1+) + AC_MSG_CHECKING(for txnid parameter to DB open function) + AC_TRY_COMPILE([ + #include + ], [ + int ret = 0; + DB *db = NULL; + DB_TXN *txnid = NULL; + char *file = NULL; + char *database = NULL; + DBTYPE type = 0; + u_int32_t flags = 0; + int mode = 0; + + ret = db->open(db, txnid, file, database, type, flags, mode); + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TXNID_PARAMETER_TO_DB_OPEN, 1, + Define if DB open function takes a txnid parameter), + AC_MSG_RESULT(no)) + + dnl check for DB_DIRTY_READ (it is not in db-3.2.9, for example) + AC_MSG_CHECKING(for DB_DIRTY_READ flag) + AC_TRY_COMPILE([ + #include + ], [ + u_int32_t flags = DB_DIRTY_READ; + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DB_DIRTY_READ, 1, [Define if db library has DB_DIRTY_READ flag]), + AC_MSG_RESULT(no)) + + dnl check for DB_BUFFER_SMALL (it is returned by dbp->get in db-4.4 and up) + AC_MSG_CHECKING(for DB_BUFFER_SMALL error) + AC_TRY_COMPILE([ + #include + ], [ + int res = DB_BUFFER_SMALL; + res++; + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DB_BUFFER_SMALL, 1, [Define if db library has DB_BUFFER_SMALL error]), + AC_MSG_RESULT(no)) + + dnl Test to check for db->get_pagesize + AC_MSG_CHECKING(for berkeley db get_pagesize function) + AC_TRY_COMPILE([ + #include + ], [ + int ret = 0; + DB *db = NULL; + int pagesize; + + ret = db->get_pagesize(db, &pagesize); + ], AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DB_GET_PAGESIZE, 1, [Define if DB has get_pagesize function]), + AC_MSG_RESULT(no)) + + CFLAGS="$oldcflags" +]) + + +AC_DEFUN([AX_CHECK_NEEDS_LIBRT], +[ + +AC_MSG_CHECKING([if server lib needs -lrt]) +AC_TRY_LINK( + [#include + #include + #include ], + [lio_listio(LIO_NOWAIT, NULL, 0, NULL);], + [AC_MSG_RESULT(no)], + [ + oldlibs=$LIBS + LIBS="$LIBS -lrt" + AC_TRY_LINK( + [#include + #include + #include ], + [lio_listio(LIO_NOWAIT, NULL, 0, NULL);], + [NEEDS_LIBRT=1 + AC_SUBST(NEEDS_LIBRT) + AC_MSG_RESULT(yes)], + [AC_MSG_ERROR(failed attempting to link lio_listio)]) + LIBS=$oldlibs + ]) +]) + +# +# Configure rules for GM +# +# Copyright (C) 2008 Pete Wyckoff +# +# See COPYING in top-level directory. +# +AC_DEFUN([AX_GM], +[ + dnl Configure options for GM install path. + dnl --with-gm= is shorthand for + dnl --with-gm-includes=/include + dnl --with-gm-libs=/lib (or lib64 if that exists) + gm_home= + AC_ARG_WITH(gm, +[ --with-gm= Location of the GM install (default no GM)], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-gm requires the path to your GM tree.]) + elif test "$withval" != no ; then + gm_home="$withval" + fi + ) + AC_ARG_WITH(gm-includes, +[ --with-gm-includes= + Location of the GM includes], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-gm-includes requires path to GM headers.]) + elif test "$withval" != no ; then + GM_INCDIR="$withval" + fi + ) + AC_ARG_WITH(gm-libs, +[ --with-gm-libs= Location of the GM libraries], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-gm-libs requires path to GM libraries.]) + elif test "$withval" != no ; then + GM_LIBDIR="$withval" + fi + ) + dnl If supplied the incls and libs explicitly, use them, else populate them + dnl using guesses from the --with-gm dir. + if test -n "$gm_home" ; then + if test -z "$GM_INCDIR"; then + GM_INCDIR=$gm_home/include + fi + if test -z "$GM_LIBDIR"; then + GM_LIBDIR=$gm_home/lib64 + if test ! -d "$GM_LIBDIR" ; then + GM_LIBDIR=$gm_home/lib + fi + fi + fi + dnl If anything GM-ish was set, go look for header. + if test -n "$GM_INCDIR$GM_LIBDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$GM_INCDIR -I$GM_INCDIR/gm" + AC_CHECK_HEADER(gm.h,, AC_MSG_ERROR([Header gm.h not found.])) + dnl Run test is not possible on a machine that does not have a GM NIC. + dnl Link test would work, but just check for existence. + if test ! -f $GM_LIBDIR/libgm.so ; then + if test ! -f $GM_LIBDIR/libgm.a ; then + AC_MSG_ERROR([Neither GM library libgm.so or libgm.a found.]) + fi + fi + BUILD_GM=1 + CPPFLAGS="$save_cppflags" + fi + AC_SUBST(BUILD_GM) + AC_SUBST(GM_INCDIR) + AC_SUBST(GM_LIBDIR) +]) + +dnl vim: set ft=config : + +# +# Configure rules for MX +# +# Copyright (C) 2008 Pete Wyckoff +# +# See COPYING in top-level directory. +# +AC_DEFUN([AX_MX], +[ + dnl Configure options for MX install path. + dnl --with-mx= is shorthand for + dnl --with-mx-includes=/include + dnl --with-mx-libs=/lib (or lib64 if that exists) + mx_home= + AC_ARG_WITH(mx, +[ --with-mx= Location of the MX install (default no MX)], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-mx requires the path to your MX tree.]) + elif test "$withval" != no ; then + mx_home="$withval" + fi + ) + AC_ARG_WITH(mx-includes, +[ --with-mx-includes= + Location of the MX includes], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-mx-includes requires path to MX headers.]) + elif test "$withval" != no ; then + MX_INCDIR="$withval" + fi + ) + AC_ARG_WITH(mx-libs, +[ --with-mx-libs= Location of the MX libraries], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-mx-libs requires path to MX libraries.]) + elif test "$withval" != no ; then + MX_LIBDIR="$withval" + fi + ) + dnl If supplied the incls and libs explicitly, use them, else populate them + dnl using guesses from the --with-mx dir. + if test -n "$mx_home" ; then + if test -z "$MX_INCDIR"; then + MX_INCDIR=$mx_home/include + fi + if test -z "$MX_LIBDIR"; then + MX_LIBDIR=$mx_home/lib64 + if test ! -d "$MX_LIBDIR" ; then + MX_LIBDIR=$mx_home/lib + fi + fi + fi + dnl If anything MX-ish was set, go look for header. + if test -n "$MX_INCDIR$MX_LIBDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$MX_INCDIR -I$MX_INCDIR/mx" + AC_CHECK_HEADER(myriexpress.h,, + AC_MSG_ERROR([Header myriexpress.h not found.])) + dnl Run test is not possible on a machine that does not have a MX NIC. + dnl Link test would work, but just check for existence. + if test ! -f $MX_LIBDIR/libmyriexpress.so ; then + if test ! -f $MX_LIBDIR/libmyriexpress.a ; then + AC_MSG_ERROR([Neither MX library libmyriexpress.so or libmyriexpress.a found.]) + fi + fi + BUILD_MX=1 + CPPFLAGS="$save_cppflags" + fi + AC_SUBST(BUILD_MX) + AC_SUBST(MX_INCDIR) + AC_SUBST(MX_LIBDIR) + + if test -n "$BUILD_MX" ; then + dnl Check for existence of mx_decompose_endpoint_addr2 + save_ldflags="$LDFLAGS" + LDFLAGS="-L$MX_LIBDIR $LDFLAGS" + save_libs="$LIBS" + LIBS="-lmyriexpress -lpthread $LIBS" + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$MX_INCDIR" + + AC_MSG_CHECKING(for mx_decompose_endpoint_addr2) + AC_TRY_LINK([ + #include "mx_extensions.h" + #include + ], [ + mx_endpoint_addr_t epa; + mx_decompose_endpoint_addr2(epa, NULL, NULL, NULL); + ], + AC_MSG_RESULT(yes), + AC_MSG_RESULT(no) + AC_MSG_ERROR([Function mx_decompose_endpoint_addr2() not found.]) + ) + + LDFLAGS="$save_ldflags" + CPPFLAGS="$save_cppflags" + LIBS="$save_libs" + fi +]) + +dnl vim: set ft=config : + +AC_DEFUN([AX_IB], +[ + dnl Configure options for IB install path. + dnl --with-ib= is shorthand for + dnl --with-ib-includes=/include + dnl --with-ib-libs=/lib (or lib64 if that exists) + ib_home= + AC_ARG_WITH(ib, + [ --with-ib= Location of the IB installation (default no IB)], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-ib requires the path to your IB tree.]) + elif test "$withval" != no ; then + ib_home="$withval" + fi + ) + AC_ARG_WITH(ib-includes, +[ --with-ib-includes= + Location of the IB includes], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-ib-includes requires path to IB headers.]) + elif test "$withval" != no ; then + IB_INCDIR="$withval" + fi + ) + AC_ARG_WITH(ib-libs, +[ --with-ib-libs= Location of the IB libraries], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-ib-libs requires path to IB libraries.]) + elif test "$withval" != no ; then + IB_LIBDIR="$withval" + fi + ) + dnl If supplied the incls and libs explicitly, use them, else populate them + dnl using guesses from the --with-ib dir. + if test -n "$ib_home" ; then + if test -z "$IB_INCDIR"; then + IB_INCDIR=$ib_home/include + fi + if test -z "$IB_LIBDIR"; then + IB_LIBDIR=$ib_home/lib64 + if test ! -d "$IB_LIBDIR" ; then + IB_LIBDIR=$ib_home/lib + fi + fi + fi + dnl If anything IB-ish was set, go look for header. + if test -n "$IB_INCDIR$IB_LIBDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$IB_INCDIR" + AC_CHECK_HEADER(vapi.h,, AC_MSG_ERROR([Header vapi.h not found.])) + dnl Run test is not possible on a machine that does not have an IB NIC, + dnl and link test is hard because we need so many little libraries. Bail + dnl and just check for existence; full library list is in Makefile.in. + if test ! -f $IB_LIBDIR/libvapi.so ; then + if test ! -f $IB_LIBDIR/libvapi.a ; then + AC_MSG_ERROR([Infiniband library libvapi.so not found.]) + fi + fi + BUILD_IB=1 + AC_CHECK_HEADER(wrap_common.h, + AC_DEFINE(HAVE_IB_WRAP_COMMON_H, 1, Define if IB wrap_common.h exists.), + , + [#include ]) + CPPFLAGS="$save_cppflags" + fi + AC_SUBST(BUILD_IB) + AC_SUBST(IB_INCDIR) + AC_SUBST(IB_LIBDIR) + + dnl Configure options for OpenIB install path. + dnl --with-openib= is shorthand for + dnl --with-openib-includes=/include + dnl --with-openib-libs=/lib (or lib64 if that exists) + openib_home= + AC_ARG_WITH(openib, + [ --with-openib= Location of the OpenIB install (default no OpenIB)], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-openib requires the path to your OpenIB tree.]) + elif test "$withval" != no ; then + openib_home="$withval" + fi + ) + AC_ARG_WITH(openib-includes, +[ --with-openib-includes= + Location of the OpenIB includes], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-openib-includes requires path to OpenIB headers.]) + elif test "$withval" != no ; then + OPENIB_INCDIR="$withval" + fi + ) + AC_ARG_WITH(openib-libs, +[ --with-openib-libs= + Location of the OpenIB libraries], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-openib-libs requires path to OpenIB libraries.]) + elif test "$withval" != no ; then + OPENIB_LIBDIR="$withval" + fi + ) + dnl If supplied the incls and libs explicitly, use them, else populate them + dnl using guesses from the --with-openib dir. + if test -n "$openib_home" ; then + if test -z "$OPENIB_INCDIR"; then + OPENIB_INCDIR=$openib_home/include + fi + if test -z "$OPENIB_LIBDIR"; then + OPENIB_LIBDIR=$openib_home/lib64 + if test ! -d "$OPENIB_LIBDIR" ; then + OPENIB_LIBDIR=$openib_home/lib + fi + fi + fi + dnl If anything OpenIB-ish was set, go look for header. + if test -n "$OPENIB_INCDIR$OPENIB_LIBDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" + AC_CHECK_HEADER(infiniband/verbs.h,, + AC_MSG_ERROR([Header infiniband/verbs.h not found.])) + dnl Run test is not possible on a machine that does not have an IB NIC. + dnl Link test would work, but just check for existence. + if test ! -f $OPENIB_LIBDIR/libibverbs.so ; then + if test ! -f $OPENIB_LIBDIR/libibverbs.a ; then + AC_MSG_ERROR([OpenIB library libibverbs.so not found.]) + fi + fi + BUILD_OPENIB=1 + CPPFLAGS="$save_cppflags" + fi + AC_SUBST(BUILD_OPENIB) + AC_SUBST(OPENIB_INCDIR) + AC_SUBST(OPENIB_LIBDIR) + + if test -n "$BUILD_OPENIB" ; then + dnl Check for which version of the ibverbs library; device opening is + dnl different. This format is the older one, newer is + dnl ibv_get_device_list. + save_ldflags="$LDFLAGS" + LDFLAGS="-L$OPENIB_LIBDIR -libverbs" + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" + + AC_MSG_CHECKING(for ibv_get_devices) + AC_TRY_LINK([], [ + ibv_get_devices(); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IBV_GET_DEVICES, 1, + Define if libibverbs has ibv_get_devices), + AC_MSG_RESULT(no) + ) + + dnl Check for existence of reregister event; it's somewhat new. + AC_MSG_CHECKING(for IBV_EVENT_CLIENT_REREGISTER) + AC_TRY_COMPILE([ + #include "infiniband/verbs.h" + ], [ + enum ibv_event_type x = IBV_EVENT_CLIENT_REREGISTER; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IBV_EVENT_CLIENT_REREGISTER, 1, + Define if libibverbs has reregister event), + AC_MSG_RESULT(no) + ) + + LDFLAGS="$save_ldflags" + CPPFLAGS="$save_cppflags" + fi +]) + +dnl vim: set ft=config : + +AC_DEFUN([AX_PORTALS], +[ + dnl + dnl Configure to build Portals BMI method, if requested and available. + dnl Use + dnl --with-portals To find include files and libraries in standard + dnl system paths. + dnl --with-portals= To specify a location that has include and lib + dnl (or lib64) subdirectories with the goods. + dnl + dnl Or specify the -I an -L and -l flags exactly using, e.g.: + dnl + dnl --with-portals-includes="-I" + dnl --with-portals-libs="-L -l" + dnl + dnl The C file uses #include , so choose your include + dnl path accordingly. If it did not do this, portals/errno.h would sit in + dnl front of the system version. + dnl + use_portals= + home= + incs= + libs= + AC_ARG_WITH(portals, + [ --with-portals[=] Location of the Portals install (default no Portals)], + if test -z "$withval" -o "$withval" = yes ; then + use_portals=yes + elif test "$withval" != no ; then + home="$withval" + fi + ) + AC_ARG_WITH(portals-includes, +[ --with-portals-includes= + Extra CFLAGS to specify Portals includes], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-portals-includes requires an argument.]) + elif test "$withval" != no ; then + incs="$withval" + fi + ) + AC_ARG_WITH(portals-libs, +[ --with-portals-libs= + Extra LIBS to link Portals libraries], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-portals-libs requires an argument.]) + elif test "$withval" != no ; then + libs="$withval" + fi + ) + dnl If supplied the incls and libs explicitly, use them, else populate them + dnl using guesses from the --with-portals dir. + if test -n "$home" ; then + if test -z "$incs"; then + incs=-I$home/include + fi + if test -z "$libs"; then + libs=-L$home/lib64 + if test ! -d "$home/lib64" ; then + libs=-L$home/lib + fi + fi + fi + + dnl + dnl Look for headers and libs. + dnl + BUILD_PORTALS= + PORTALS_INCS= + PORTALS_LIBS= + if test "X$use_portals$home$incs$libs" != X ; then + # Save stuff + save_cppflags="$CPPFLAGS" + save_libs="$LIBS" + + PORTALS_INCS="$incs" + CPPFLAGS="$CPPFLAGS $PORTALS_INCS" + + PORTALS_LIBS="$libs" + LIBS="$save_libs $PORTALS_LIBS" + + AC_MSG_CHECKING([for portals3.h header]) + ok=no + AC_TRY_COMPILE( + [#include ], + [int m, n; m = PtlInit(&n);], + [ok=yes]) + + if test "$ok" = yes ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([Header portals/portals3.h not found.]) + fi + + dnl try without first, for Cray, then try TCP version + dnl Run test is not always possible, esp when cross-compiling or on + dnl a box that does not have the hardware. + AC_MSG_CHECKING([for portals libraries]) + ok=no + AC_TRY_LINK( + [#include ], + [int m, n; m = PtlInit(&n);], + [ok=yes]) + + if test "$ok" = no ; then + PORTALS_LIBS="$libs -lportals" + LIBS="$save_libs $PORTALS_LIBS" + AC_TRY_LINK( + [#include ], + [int m, n; m = PtlInit(&n);], + [ok=yes]) + fi + + if test "$ok" = no ; then + PORTALS_LIBS="$libs -lp3api -lp3lib -lp3utcp -lp3rt -lpthread" + LIBS="$save_libs $PORTALS_LIBS" + AC_TRY_LINK( + [#include ], + [int m, n; m = PtlInit(&n);], + [ok=yes]) + fi + + if test "$ok" = yes ; then + AC_MSG_RESULT([yes]) + BUILD_PORTALS=1 + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([Could not link Portals library.]) + fi + + # + # Check for API variations. + # + AC_CHECK_FUNCS(PtlErrorStr) + AC_CHECK_FUNCS(PtlEventKindStr) + + AC_TRY_COMPILE( + [#include ], + [int m; ptl_process_id_t any_pid; + m = PtlACEntry(0, 0, any_pid, (ptl_uid_t) -1, (ptl_jid_t) -1, 0);], + AC_DEFINE(HAVE_PTLACENTRY_JID, 1, + [Define if have PtlACEntry with jid argument.])) + + # Reset + CPPFLAGS="$save_cppflags" + LIBS="$save_libs" + fi + AC_SUBST(BUILD_PORTALS) + AC_SUBST(PORTALS_INCS) + AC_SUBST(PORTALS_LIBS) +]) + +dnl vim: set ft=config : + +# +# Configure rules for ZOID +# +# See COPYING in top-level directory. +# +AC_DEFUN([AX_ZOID], +[ + dnl Configure options for ZOID install path. + dnl --with-zoid= + AC_ARG_WITH(zoid, +[ --with-zoid= Location of the ZOID tree (default no ZOID)], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-zoid requires the path to your ZOID source tree.]) + elif test "$withval" != no ; then + ZOID_SRCDIR="$withval" + fi + ) + if test -n "$ZOID_SRCDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -Isrc/io/bmi -I$ZOID_SRCDIR/include -I$ZOID_SRCDIR/zbmi -I$ZOID_SRCDIR/zbmi/implementation" + AC_CHECK_HEADER(zbmi.h,, AC_MSG_ERROR([Header zbmi.h not found.])) + AC_CHECK_HEADER(zoid_api.h,, AC_MSG_ERROR([Header zoid_api.h not found.])) + AC_CHECK_HEADER(zbmi_protocol.h,, AC_MSG_ERROR([Header zbmi_protocol.h not found.])) + CPPFLAGS="$save_cppflags" + BUILD_ZOID=1 + fi + AC_SUBST(BUILD_ZOID) + AC_SUBST(ZOID_SRCDIR) +]) + +dnl vim: set ft=config : + diff --git a/config.save b/config.save new file mode 100755 index 0000000..be895d8 --- /dev/null +++ b/config.save @@ -0,0 +1,1177 @@ +#! /bin/bash +# Generated by configure. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=${CONFIG_SHELL-/bin/bash} +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +config_files=" include/pvfs2.h Makefile module.mk src/apps/admin/module.mk src/apps/admin/pvfs2-config src/apps/devel/module.mk src/apps/karma/module.mk src/apps/vis/module.mk src/apps/fuse/module.mk src/apps/ucache/module.mk src/apps/kernel/linux/module.mk src/apps/user/module.mk src/io/trove/module.mk src/io/trove/trove-handle-mgmt/module.mk src/io/trove/trove-dbpf/module.mk src/common/misc/module.mk src/common/quickhash/module.mk src/common/quicklist/module.mk src/common/dotconf/module.mk src/common/id-generator/module.mk src/common/gossip/module.mk src/common/gen-locks/module.mk src/common/llist/module.mk src/common/statecomp/module.mk src/common/events/module.mk src/common/mgmt/module.mk src/io/bmi/module.mk src/io/bmi/bmi_tcp/module.mk src/io/bmi/bmi_gm/module.mk src/io/bmi/bmi_mx/module.mk src/io/bmi/bmi_ib/module.mk src/io/bmi/bmi_osd/module.mk src/io/bmi/bmi_portals/module.mk src/io/bmi/bmi_zoid/module.mk src/io/description/module.mk src/io/flow/module.mk src/io/flow/flowproto-bmi-trove/module.mk src/io/flow/flowproto-template/module.mk src/io/flow/flowproto-dump-offsets/module.mk src/io/flow/flowproto-bmi-cache/module.mk src/io/buffer/module.mk src/io/job/module.mk src/io/dev/module.mk src/proto/module.mk src/server/module.mk src/server/request-scheduler/module.mk src/client/sysint/module.mk src/client/usrint/module.mk src/kernel/linux-2.6/Makefile src/kernel/linux-2.4/Makefile doc/module.mk doc/coding/module.mk doc/design/module.mk doc/random/module.mk examples/pvfs2-server.rc doc/doxygen/pvfs2-doxygen.conf" +config_headers=" pvfs2-config.h" + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to ." +ac_cs_version="\ +config.status +configured by ./configure, generated by GNU Autoconf 2.59, + with options \"\" + +Copyright (C) 2003 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=. +INSTALL="/usr/bin/install -c" +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + ac_shift=: + ;; + -*) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +if $ac_cs_recheck; then + echo "running /bin/bash ./configure " $ac_configure_extra_args " --no-create --no-recursion" >&6 + exec /bin/bash ./configure $ac_configure_extra_args --no-create --no-recursion +fi + +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "include/pvfs2.h" ) CONFIG_FILES="$CONFIG_FILES include/pvfs2.h" ;; + "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "module.mk" ) CONFIG_FILES="$CONFIG_FILES module.mk" ;; + "src/apps/admin/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/admin/module.mk" ;; + "src/apps/admin/pvfs2-config" ) CONFIG_FILES="$CONFIG_FILES src/apps/admin/pvfs2-config" ;; + "src/apps/devel/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/devel/module.mk" ;; + "src/apps/karma/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/karma/module.mk" ;; + "src/apps/vis/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/vis/module.mk" ;; + "src/apps/fuse/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/fuse/module.mk" ;; + "src/apps/ucache/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/ucache/module.mk" ;; + "src/apps/kernel/linux/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/kernel/linux/module.mk" ;; + "src/apps/user/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/user/module.mk" ;; + "src/io/trove/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/trove/module.mk" ;; + "src/io/trove/trove-handle-mgmt/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/trove/trove-handle-mgmt/module.mk" ;; + "src/io/trove/trove-dbpf/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/trove/trove-dbpf/module.mk" ;; + "src/common/misc/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/misc/module.mk" ;; + "src/common/quickhash/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/quickhash/module.mk" ;; + "src/common/quicklist/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/quicklist/module.mk" ;; + "src/common/dotconf/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/dotconf/module.mk" ;; + "src/common/id-generator/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/id-generator/module.mk" ;; + "src/common/gossip/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/gossip/module.mk" ;; + "src/common/gen-locks/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/gen-locks/module.mk" ;; + "src/common/llist/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/llist/module.mk" ;; + "src/common/statecomp/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/statecomp/module.mk" ;; + "src/common/events/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/events/module.mk" ;; + "src/common/mgmt/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/mgmt/module.mk" ;; + "src/io/bmi/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/module.mk" ;; + "src/io/bmi/bmi_tcp/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_tcp/module.mk" ;; + "src/io/bmi/bmi_gm/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_gm/module.mk" ;; + "src/io/bmi/bmi_mx/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_mx/module.mk" ;; + "src/io/bmi/bmi_ib/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_ib/module.mk" ;; + "src/io/bmi/bmi_osd/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_osd/module.mk" ;; + "src/io/bmi/bmi_portals/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_portals/module.mk" ;; + "src/io/bmi/bmi_zoid/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_zoid/module.mk" ;; + "src/io/description/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/description/module.mk" ;; + "src/io/flow/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/module.mk" ;; + "src/io/flow/flowproto-bmi-trove/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-bmi-trove/module.mk" ;; + "src/io/flow/flowproto-template/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-template/module.mk" ;; + "src/io/flow/flowproto-dump-offsets/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-dump-offsets/module.mk" ;; + "src/io/flow/flowproto-bmi-cache/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-bmi-cache/module.mk" ;; + "src/io/buffer/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/buffer/module.mk" ;; + "src/io/job/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/job/module.mk" ;; + "src/io/dev/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/dev/module.mk" ;; + "src/proto/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/proto/module.mk" ;; + "src/server/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/server/module.mk" ;; + "src/server/request-scheduler/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/server/request-scheduler/module.mk" ;; + "src/client/sysint/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/client/sysint/module.mk" ;; + "src/client/usrint/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/client/usrint/module.mk" ;; + "src/kernel/linux-2.6/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/kernel/linux-2.6/Makefile" ;; + "src/kernel/linux-2.4/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/kernel/linux-2.4/Makefile" ;; + "doc/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/module.mk" ;; + "doc/coding/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/coding/module.mk" ;; + "doc/design/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/design/module.mk" ;; + "doc/random/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/random/module.mk" ;; + "examples/pvfs2-server.rc" ) CONFIG_FILES="$CONFIG_FILES examples/pvfs2-server.rc" ;; + "doc/doxygen/pvfs2-doxygen.conf" ) CONFIG_FILES="$CONFIG_FILES doc/doxygen/pvfs2-doxygen.conf" ;; + "pvfs2-config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS pvfs2-config.h" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason to put it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t$/@;t t/; /@;t t$/s/[\\&,]/\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t$/,;t t/' >$tmp/subs.sed <<\CEOF +s,@SHELL@,/bin/bash,;t t +s,@PATH_SEPARATOR@,:,;t t +s,@PACKAGE_NAME@,,;t t +s,@PACKAGE_TARNAME@,,;t t +s,@PACKAGE_VERSION@,,;t t +s,@PACKAGE_STRING@,,;t t +s,@PACKAGE_BUGREPORT@,,;t t +s,@exec_prefix@,${prefix},;t t +s,@prefix@,/usr/local,;t t +s,@program_transform_name@,s,x,x,,;t t +s,@bindir@,${exec_prefix}/bin,;t t +s,@sbindir@,${exec_prefix}/sbin,;t t +s,@libexecdir@,${exec_prefix}/libexec,;t t +s,@datadir@,${prefix}/share,;t t +s,@sysconfdir@,${prefix}/etc,;t t +s,@sharedstatedir@,${prefix}/com,;t t +s,@localstatedir@,${prefix}/var,;t t +s,@libdir@,${exec_prefix}/lib,;t t +s,@includedir@,${prefix}/include,;t t +s,@oldincludedir@,/usr/include,;t t +s,@infodir@,${prefix}/info,;t t +s,@mandir@,${prefix}/man,;t t +s,@build_alias@,,;t t +s,@host_alias@,,;t t +s,@target_alias@,,;t t +s,@DEFS@,-DHAVE_CONFIG_H,;t t +s,@ECHO_C@,,;t t +s,@ECHO_N@,-n,;t t +s,@ECHO_T@,,;t t +s,@LIBS@, -lcrypto -lssl -ldl,;t t +s,@PVFS2_VERSION@,2.8.6-orangefs-2012-07-03-162939,;t t +s,@PVFS2_VERSION_MAJOR@,2,;t t +s,@PVFS2_VERSION_MINOR@,8,;t t +s,@PVFS2_VERSION_SUB@,6,;t t +s,@build@,x86_64-unknown-linux-gnu,;t t +s,@build_cpu@,x86_64,;t t +s,@build_vendor@,unknown,;t t +s,@build_os@,linux-gnu,;t t +s,@host@,x86_64-unknown-linux-gnu,;t t +s,@host_cpu@,x86_64,;t t +s,@host_vendor@,unknown,;t t +s,@host_os@,linux-gnu,;t t +s,@CC@,gcc,;t t +s,@CFLAGS@, -g -O2,;t t +s,@LDFLAGS@, -rdynamic,;t t +s,@CPPFLAGS@,,;t t +s,@ac_ct_CC@,gcc,;t t +s,@EXEEXT@,,;t t +s,@OBJEXT@,o,;t t +s,@CPP@,gcc -E,;t t +s,@EGREP@,grep -E,;t t +s,@INSTALL_PROGRAM@,${INSTALL},;t t +s,@INSTALL_SCRIPT@,${INSTALL},;t t +s,@INSTALL_DATA@,${INSTALL} -m 644,;t t +s,@HAVE_PERL@,yes,;t t +s,@HAVE_FIND@,yes,;t t +s,@HAVE_BISON@,yes,;t t +s,@HAVE_FLEX@,yes,;t t +s,@BUILD_CC@,gcc,;t t +s,@BUILD_CFLAGS@,,;t t +s,@BUILD_LDFLAGS@,,;t t +s,@LIBCFLAGS@, -D__GEN_POSIX_LOCKING__,;t t +s,@THREAD_LIB@,-lpthread,;t t +s,@BUILD_SERVER@,1,;t t +s,@BUILD_BMI_ONLY@,,;t t +s,@NEED_BERKELEY_DB@,yes,;t t +s,@MMAP_RA_CACHE@,,;t t +s,@RESET_FILE_POS@,,;t t +s,@TRUSTED_CONNECTIONS@,,;t t +s,@HAVE_PKGCONFIG@,yes,;t t +s,@GTKLIBS@,,;t t +s,@GTKCFLAGS@,,;t t +s,@BUILD_KARMA@,,;t t +s,@build_static@,yes,;t t +s,@REDHAT_RELEASE@,,;t t +s,@NPTL_WORKAROUND@,,;t t +s,@MISC_TROVE_FLAGS@,-D__PVFS2_TROVE_AIO_THREADED__,;t t +s,@TAU_INCS@,,;t t +s,@BUILD_TAU@,,;t t +s,@BUILD_KERNEL@,,;t t +s,@THREADED_KMOD_HELPER@,,;t t +s,@LINUX_KERNEL_SRC@,,;t t +s,@LINUX24_KERNEL_SRC@,,;t t +s,@LINUX24_KERNEL_MINOR_VER@,,;t t +s,@BUILD_ABSOLUTE_TOP@,/usr/src/orangefsosd-2.8.6,;t t +s,@SRC_RELATIVE_TOP@,./,;t t +s,@SRC_ABSOLUTE_TOP@,/usr/src/orangefsosd-2.8.6,;t t +s,@FUSE_LDFLAGS@,,;t t +s,@FUSE_CFLAGS@,,;t t +s,@BUILD_FUSE@,,;t t +s,@ENABLE_COVERAGE@,,;t t +s,@STRICT_CFLAGS@,,;t t +s,@QUIET_COMPILE@,1,;t t +s,@BUILD_EPOLL@,1,;t t +s,@PVFS2_SEGV_BACKTRACE@,1,;t t +s,@build_shared@,no,;t t +s,@BUILD_USRINT@,1,;t t +s,@BUILDUSRINT@,,;t t +s,@BUILD_UCACHE@,,;t t +s,@BUILD_ACL_INTERFACE@,0,;t t +s,@INTELC@,,;t t +s,@GNUC@,1,;t t +s,@DB_CFLAGS@,,;t t +s,@DB_LIB@,-ldb,;t t +s,@NEEDS_LIBRT@,1,;t t +s,@TARGET_OS_DARWIN@,,;t t +s,@TARGET_OS_LINUX@,1,;t t +s,@BUILD_BMI_TCP@,1,;t t +s,@BUILD_GM@,,;t t +s,@GM_INCDIR@,,;t t +s,@GM_LIBDIR@,,;t t +s,@BUILD_MX@,,;t t +s,@MX_INCDIR@,,;t t +s,@MX_LIBDIR@,,;t t +s,@BUILD_IB@,,;t t +s,@IB_INCDIR@,,;t t +s,@IB_LIBDIR@,,;t t +s,@BUILD_OPENIB@,,;t t +s,@OPENIB_INCDIR@,,;t t +s,@OPENIB_LIBDIR@,,;t t +s,@BUILD_PORTALS@,,;t t +s,@PORTALS_INCS@,,;t t +s,@PORTALS_LIBS@,,;t t +s,@BUILD_ZOID@,,;t t +s,@ZOID_SRCDIR@,,;t t +s,@SERVER_LDFLAGS@, -rdynamic,;t t +s,@GOSSIP_ENABLE_BACKTRACE@,1,;t t +s,@BUILD_VIS@,,;t t +s,@VISCFLAGS@,,;t t +s,@VISLIBS@,,;t t +s,@LIBOBJS@,,;t t +s,@LTLIBOBJS@,,;t t +CEOF + + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_builddir$INSTALL ;; + esac + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + sed "/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +} + +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +s,@INSTALL@,$ac_INSTALL,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$,\1#\2' +ac_dC=' ' +ac_dD=',;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$,\1#\2define\3' +ac_uC=' ' +ac_uD=',;t' + +for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + # Do quote $f, to prevent DOS paths from being IFS'd. + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + # Remove the trailing spaces. + sed 's/[ ]*$//' $ac_file_inputs >$tmp/in + + # Handle all the #define templates only if necessary. + if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then + # If there are no defines, we may have an empty if/fi + : + cat >$tmp/defines.sed <$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in + + cat >$tmp/defines.sed <$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in + + fi # grep + + # Handle all the #undef templates + cat >$tmp/undefs.sed <$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in + + cat >$tmp/undefs.sed <$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + echo "/* Generated by configure. */" >$tmp/config.h + else + echo "/* $ac_file. Generated by configure. */" >$tmp/config.h + fi + cat $tmp/in >>$tmp/config.h + rm -f $tmp/in + if test x"$ac_file" != x-; then + if diff $ac_file $tmp/config.h >/dev/null 2>&1; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + rm -f $ac_file + mv $tmp/config.h $ac_file + fi + else + cat $tmp/config.h + rm -f $tmp/config.h + fi +done + +{ (exit 0); exit 0; } diff --git a/configure b/configure index b7200e3..e0ba23e 100755 --- a/configure +++ b/configure @@ -1,54 +1,25 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.61. +# Generated by GNU Autoconf 2.59. # -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, -# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# Copyright (C) 2003 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh +# Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - - - -# PATH needs CR -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - echo "#! /bin/sh" >conf$$.sh - echo "exit 0" >>conf$$.sh - chmod +x conf$$.sh - if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then - PATH_SEPARATOR=';' - else - PATH_SEPARATOR=: - fi - rm -f conf$$.sh +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix fi +DUALCASE=1; export DUALCASE # for MKS sh # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then @@ -58,43 +29,8 @@ else fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -as_nl=' -' -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -case $0 in - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break -done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - { (exit 1); exit 1; } -fi - # Work around bugs in pre-3.0 UWIN ksh. -for as_var in ENV MAIL MAILPATH -do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var -done +$as_unset ENV MAIL MAILPATH PS1='$ ' PS2='> ' PS4='+ ' @@ -108,19 +44,18 @@ do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else - ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + $as_unset $as_var fi done # Required to use basename. -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then +if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then as_basename=basename else as_basename=false @@ -128,388 +63,157 @@ fi # Name of the executable. -as_me=`$as_basename -- "$0" || +as_me=`$as_basename "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# CDPATH. -$as_unset CDPATH + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` -if test "x$CONFIG_SHELL" = x; then - if (eval ":") 2>/dev/null; then - as_have_required=yes -else - as_have_required=no -fi - - if test $as_have_required = yes && (eval ": -(as_func_return () { - (exit \$1) -} -as_func_success () { - as_func_return 0 -} -as_func_failure () { - as_func_return 1 -} -as_func_ret_success () { - return 0 -} -as_func_ret_failure () { - return 1 -} - -exitcode=0 -if as_func_success; then - : -else - exitcode=1 - echo as_func_success failed. -fi - -if as_func_failure; then - exitcode=1 - echo as_func_failure succeeded. -fi - -if as_func_ret_success; then - : -else - exitcode=1 - echo as_func_ret_success failed. -fi +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits -if as_func_ret_failure; then - exitcode=1 - echo as_func_ret_failure succeeded. +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh fi -if ( set x; as_func_ret_success y && test x = \"\$1\" ); then - : -else - exitcode=1 - echo positional parameters were not saved. -fi -test \$exitcode = 0) || { (exit 1); exit 1; } + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done -( - as_lineno_1=\$LINENO - as_lineno_2=\$LINENO - test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && - test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } -") 2> /dev/null; then - : -else - as_candidate_shells= + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. - case $as_dir in + for as_base in sh bash ksh sh5; do + case $as_dir in /*) - for as_base in sh bash ksh sh5; do - as_candidate_shells="$as_candidate_shells $as_dir/$as_base" - done;; - esac -done -IFS=$as_save_IFS - - - for as_shell in $as_candidate_shells $SHELL; do - # Try only shells that exist, to save several forks. - if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { ("$as_shell") 2> /dev/null <<\_ASEOF -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - -: -_ASEOF -}; then - CONFIG_SHELL=$as_shell - as_have_required=yes - if { "$as_shell" 2> /dev/null <<\_ASEOF -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - -: -(as_func_return () { - (exit $1) -} -as_func_success () { - as_func_return 0 -} -as_func_failure () { - as_func_return 1 -} -as_func_ret_success () { - return 0 -} -as_func_ret_failure () { - return 1 -} - -exitcode=0 -if as_func_success; then - : -else - exitcode=1 - echo as_func_success failed. -fi - -if as_func_failure; then - exitcode=1 - echo as_func_failure succeeded. -fi - -if as_func_ret_success; then - : -else - exitcode=1 - echo as_func_ret_success failed. -fi - -if as_func_ret_failure; then - exitcode=1 - echo as_func_ret_failure succeeded. -fi - -if ( set x; as_func_ret_success y && test x = "$1" ); then - : -else - exitcode=1 - echo positional parameters were not saved. -fi - -test $exitcode = 0) || { (exit 1); exit 1; } - -( - as_lineno_1=$LINENO - as_lineno_2=$LINENO - test "x$as_lineno_1" != "x$as_lineno_2" && - test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } - -_ASEOF -}; then - break -fi - -fi - - done - - if test "x$CONFIG_SHELL" != x; then - for as_var in BASH_ENV ENV - do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var - done - export CONFIG_SHELL - exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} -fi - - - if test $as_have_required = no; then - echo This script requires a shell more modern than all the - echo shells that I found on your system. Please install a - echo modern shell, or manually run the script under such a - echo shell if you do have one. - { (exit 1); exit 1; } -fi - - -fi - -fi - - - -(eval "as_func_return () { - (exit \$1) -} -as_func_success () { - as_func_return 0 -} -as_func_failure () { - as_func_return 1 -} -as_func_ret_success () { - return 0 -} -as_func_ret_failure () { - return 1 -} - -exitcode=0 -if as_func_success; then - : -else - exitcode=1 - echo as_func_success failed. -fi - -if as_func_failure; then - exitcode=1 - echo as_func_failure succeeded. -fi - -if as_func_ret_success; then - : -else - exitcode=1 - echo as_func_ret_success failed. -fi - -if as_func_ret_failure; then - exitcode=1 - echo as_func_ret_failure succeeded. -fi - -if ( set x; as_func_ret_success y && test x = \"\$1\" ); then - : -else - exitcode=1 - echo positional parameters were not saved. -fi - -test \$exitcode = 0") || { - echo No shell found that supports shell functions. - echo Please tell autoconf@gnu.org about your system, - echo including any error possibly output before this - echo message -} - - - + if ("$as_dir/$as_base" -c ' as_lineno_1=$LINENO as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && - test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a - # line-number line after each line using $LINENO; the second 'sed' - # does the real work. The second script uses 'N' to pair each - # line-number line with the line containing $LINENO, and appends - # trailing '-' during substitution so that $LINENO is not a special - # case at line end. + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the - # scripts with optimization help from Paolo Bonzini. Blame Lee - # E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, t loop - s/-\n.*// + s,-$,, + s,^['$as_cr_digits']*\n,, ' >$as_me.lineno && - chmod +x "$as_me.lineno" || + chmod +x $as_me.lineno || { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno # Exit status is that of the last command. exit } -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in --n*) - case `echo 'x\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - *) ECHO_C='\c';; - esac;; -*) - ECHO_N='-n';; +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; esac -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then +if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir -fi echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -p'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null +rm -f conf$$ conf$$.exe conf$$.file if mkdir -p . 2>/dev/null; then as_mkdir_p=: @@ -518,28 +222,7 @@ else as_mkdir_p=false fi -if test -x / >/dev/null 2>&1; then - as_test_x='test -x' -else - if ls -dL / >/dev/null 2>&1; then - as_ls_L_option=L - else - as_ls_L_option= - fi - as_test_x=' - eval sh -c '\'' - if test -d "$1"; then - test -d "$1/."; - else - case $1 in - -*)set "./$1";; - esac; - case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in - ???[sx]*):;;*)false;;esac;fi - '\'' sh - ' -fi -as_executable_p=$as_test_x +as_executable_p="test -f" # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" @@ -548,27 +231,39 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH -exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` +exec 6>&1 + # # Initializations. # ac_default_prefix=/usr/local -ac_clean_files= ac_config_libobj_dir=. -LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + # Identity of this package. PACKAGE_NAME= PACKAGE_TARNAME= @@ -580,174 +275,42 @@ ac_unique_file="include/pvfs2-types.h" # Factoring default headers for most tests. ac_includes_default="\ #include -#ifdef HAVE_SYS_TYPES_H +#if HAVE_SYS_TYPES_H # include #endif -#ifdef HAVE_SYS_STAT_H +#if HAVE_SYS_STAT_H # include #endif -#ifdef STDC_HEADERS +#if STDC_HEADERS # include # include #else -# ifdef HAVE_STDLIB_H +# if HAVE_STDLIB_H # include # endif #endif -#ifdef HAVE_STRING_H -# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +#if HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H # include # endif # include #endif -#ifdef HAVE_STRINGS_H +#if HAVE_STRINGS_H # include #endif -#ifdef HAVE_INTTYPES_H +#if HAVE_INTTYPES_H # include +#else +# if HAVE_STDINT_H +# include +# endif #endif -#ifdef HAVE_STDINT_H -# include -#endif -#ifdef HAVE_UNISTD_H +#if HAVE_UNISTD_H # include #endif" -ac_subst_vars='SHELL -PATH_SEPARATOR -PACKAGE_NAME -PACKAGE_TARNAME -PACKAGE_VERSION -PACKAGE_STRING -PACKAGE_BUGREPORT -exec_prefix -prefix -program_transform_name -bindir -sbindir -libexecdir -datarootdir -datadir -sysconfdir -sharedstatedir -localstatedir -includedir -oldincludedir -docdir -infodir -htmldir -dvidir -pdfdir -psdir -libdir -localedir -mandir -DEFS -ECHO_C -ECHO_N -ECHO_T -LIBS -build_alias -host_alias -target_alias -PVFS2_VERSION -build -build_cpu -build_vendor -build_os -host -host_cpu -host_vendor -host_os -CC -CFLAGS -LDFLAGS -CPPFLAGS -ac_ct_CC -EXEEXT -OBJEXT -CPP -GREP -EGREP -INSTALL_PROGRAM -INSTALL_SCRIPT -INSTALL_DATA -HAVE_PERL -HAVE_FIND -HAVE_BISON -HAVE_FLEX -BUILD_CC -BUILD_CFLAGS -BUILD_LDFLAGS -LIBCFLAGS -THREAD_LIB -BUILD_SERVER -MMAP_RA_CACHE -TRUSTED_CONNECTIONS -HAVE_PKGCONFIG -GTKLIBS -GTKCFLAGS -BUILD_KARMA -build_static -REDHAT_RELEASE -NPTL_WORKAROUND -MISC_TROVE_FLAGS -THREADED_KMOD_HELPER -LINUX_KERNEL_SRC -LINUX24_KERNEL_SRC -LINUX24_KERNEL_MINOR_VER -BUILD_ABSOLUTE_TOP -SRC_RELATIVE_TOP -SRC_ABSOLUTE_TOP -ENABLE_COVERAGE -STRICT_CFLAGS -QUIET_COMPILE -BUILD_EPOLL -PVFS2_SEGV_BACKTRACE -build_shared -INTELC -GNUC -DB_CFLAGS -DB_LIB -NEEDS_LIBRT -TARGET_OS_DARWIN -TARGET_OS_LINUX -BUILD_BMI_TCP -BUILD_GM -GM_INCDIR -GM_LIBDIR -BUILD_MX -MX_INCDIR -MX_LIBDIR -BUILD_IB -IB_INCDIR -IB_LIBDIR -BUILD_OPENIB -OPENIB_INCDIR -OPENIB_LIBDIR -BUILD_PORTALS -PORTALS_INCS -PORTALS_LIBS -BUILD_OSD -OSD_LIBS -SERVER_LDFLAGS -GOSSIP_ENABLE_BACKTRACE -BUILD_VIS -VISCFLAGS -VISLIBS -LIBOBJS -LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS PVFS2_VERSION PVFS2_VERSION_MAJOR PVFS2_VERSION_MINOR PVFS2_VERSION_SUB build build_cpu build_vendor build_os host host_cpu host_vendor host_os CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA HAVE_PERL HAVE_FIND HAVE_BISON HAVE_FLEX BUILD_CC BUILD_CFLAGS BUILD_LDFLAGS LIBCFLAGS THREAD_LIB BUILD_SERVER BUILD_BMI_ONLY NEED_BERKELEY_DB MMAP_RA_CACHE RESET_FILE_POS TRUSTED_CONNECTIONS HAVE_PKGCONFIG GTKLIBS GTKCFLAGS BUILD_KARMA build_static REDHAT_RELEASE NPTL_WORKAROUND MISC_TROVE_FLAGS TAU_INCS BUILD_TAU BUILD_KERNEL THREADED_KMOD_HELPER LINUX_KERNEL_SRC LINUX24_KERNEL_SRC LINUX24_KERNEL_MINOR_VER BUILD_ABSOLUTE_TOP SRC_RELATIVE_TOP SRC_ABSOLUTE_TOP FUSE_LDFLAGS FUSE_CFLAGS BUILD_FUSE ENABLE_COVERAGE STRICT_CFLAGS QUIET_COMPILE BUILD_EPOLL PVFS2_SEGV_BACKTRACE build_shared BUILD_USRINT BUILDUSRINT BUILD_UCACHE BUILD_ACL_INTERFACE INTELC GNUC DB_CFLAGS DB_LIB NEEDS_LIBRT TARGET_OS_DARWIN TARGET_OS_LINUX BUILD_BMI_TCP BUILD_GM GM_INCDIR GM_LIBDIR BUILD_MX MX_INCDIR MX_LIBDIR BUILD_IB IB_INCDIR IB_LIBDIR BUILD_OPENIB OPENIB_INCDIR OPENIB_LIBDIR BUILD_PORTALS PORTALS_INCS PORTALS_LIBS BUILD_ZOID ZOID_SRCDIR SERVER_LDFLAGS GOSSIP_ENABLE_BACKTRACE BUILD_VIS VISCFLAGS VISLIBS LIBOBJS LTLIBOBJS' ac_subst_files='' - ac_precious_vars='build_alias -host_alias -target_alias -CC -CFLAGS -LDFLAGS -LIBS -CPPFLAGS -CPP' - # Initialize some variables set by options. ac_init_help= @@ -774,48 +337,34 @@ x_libraries=NONE # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. -# (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' -datarootdir='${prefix}/share' -datadir='${datarootdir}' +datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' -docdir='${datarootdir}/doc/${PACKAGE}' -infodir='${datarootdir}/info' -htmldir='${docdir}' -dvidir='${docdir}' -pdfdir='${docdir}' -psdir='${docdir}' -libdir='${exec_prefix}/lib' -localedir='${datarootdir}/locale' -mandir='${datarootdir}/man' +infodir='${prefix}/info' +mandir='${prefix}/man' ac_prev= -ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then - eval $ac_prev=\$ac_option + eval "$ac_prev=\$ac_option" ac_prev= continue fi - case $ac_option in - *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; - *) ac_optarg=yes ;; - esac + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` # Accept the important Cygnus configure options, so we can diagnose typos. - case $ac_dashdash$ac_option in - --) - ac_dashdash=yes ;; + case $ac_option in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; @@ -837,45 +386,33 @@ do --config-cache | -C) cache_file=config.cache ;; - -datadir | --datadir | --datadi | --datad) + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=*) + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) datadir=$ac_optarg ;; - -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ - | --dataroo | --dataro | --datar) - ac_prev=datarootdir ;; - -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ - | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) - datarootdir=$ac_optarg ;; - -disable-* | --disable-*) ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. - expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } - ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` - eval enable_$ac_feature=no ;; - - -docdir | --docdir | --docdi | --doc | --do) - ac_prev=docdir ;; - -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) - docdir=$ac_optarg ;; - - -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) - ac_prev=dvidir ;; - -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) - dvidir=$ac_optarg ;; + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; -enable-* | --enable-*) ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. - expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } - ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` - eval enable_$ac_feature=\$ac_optarg ;; + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ @@ -902,12 +439,6 @@ do -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; - -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) - ac_prev=htmldir ;; - -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ - | --ht=*) - htmldir=$ac_optarg ;; - -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; @@ -932,16 +463,13 @@ do | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; - -localedir | --localedir | --localedi | --localed | --locale) - ac_prev=localedir ;; - -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) - localedir=$ac_optarg ;; - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst | --locals) + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) @@ -1006,16 +534,6 @@ do | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; - -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) - ac_prev=pdfdir ;; - -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) - pdfdir=$ac_optarg ;; - - -psdir | --psdir | --psdi | --psd | --ps) - ac_prev=psdir ;; - -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) - psdir=$ac_optarg ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; @@ -1068,20 +586,24 @@ do -with-* | --with-*) ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. - expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } - ac_package=`echo $ac_package | sed 's/[-.]/_/g'` - eval with_$ac_package=\$ac_optarg ;; + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; -without-* | --without-*) ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. - expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } - ac_package=`echo $ac_package | sed 's/[-.]/_/g'` - eval with_$ac_package=no ;; + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; --x) # Obsolete; use --with-x. @@ -1112,7 +634,8 @@ Try \`$0 --help' for more information." >&2 expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 { (exit 1); exit 1; }; } - eval $ac_envvar=\$ac_optarg + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" export $ac_envvar ;; *) @@ -1132,19 +655,27 @@ if test -n "$ac_prev"; then { (exit 1); exit 1; }; } fi -# Be sure to have absolute directory names. -for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ - datadir sysconfdir sharedstatedir localstatedir includedir \ - oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix do - eval ac_val=\$$ac_var + eval ac_val=$`echo $ac_var` case $ac_val in - [\\/$]* | ?:[\\/]* ) continue;; - NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; esac - { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 - { (exit 1); exit 1; }; } done # There might be people who depend on the old broken behavior: `$host' @@ -1171,76 +702,74 @@ test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null -ac_pwd=`pwd` && test -n "$ac_pwd" && -ac_ls_di=`ls -di .` && -ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || - { echo "$as_me: error: Working directory cannot be determined" >&2 - { (exit 1); exit 1; }; } -test "X$ac_ls_di" = "X$ac_pwd_ls_di" || - { echo "$as_me: error: pwd does not report name of working directory" >&2 - { (exit 1); exit 1; }; } - - # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes - # Try the directory containing this script, then the parent directory. - ac_confdir=`$as_dirname -- "$0" || + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || $as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$0" : 'X\(//\)[^/]' \| \ X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || echo X"$0" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` srcdir=$ac_confdir - if test ! -r "$srcdir/$ac_unique_file"; then + if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi -if test ! -r "$srcdir/$ac_unique_file"; then - test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." - { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 { (exit 1); exit 1; }; } + fi fi -ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" -ac_abs_confdir=`( - cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2 +(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || + { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 { (exit 1); exit 1; }; } - pwd)` -# When building in place, set srcdir=. -if test "$ac_abs_confdir" = "$ac_pwd"; then - srcdir=. -fi -# Remove unnecessary trailing slashes from srcdir. -# Double slashes in file names in object file debugging info -# mess up M-x gdb in Emacs. -case $srcdir in -*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; -esac -for ac_var in $ac_precious_vars; do - eval ac_env_${ac_var}_set=\${${ac_var}+set} - eval ac_env_${ac_var}_value=\$${ac_var} - eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} - eval ac_cv_env_${ac_var}_value=\$${ac_var} -done +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS +ac_env_CPP_set=${CPP+set} +ac_env_CPP_value=$CPP +ac_cv_env_CPP_set=${CPP+set} +ac_cv_env_CPP_value=$CPP # # Report the --help message. @@ -1269,6 +798,9 @@ Configuration: -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] +_ACEOF + + cat <<_ACEOF Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] @@ -1286,22 +818,15 @@ Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] - --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] - --datadir=DIR read-only architecture-independent data [DATAROOTDIR] - --infodir=DIR info documentation [DATAROOTDIR/info] - --localedir=DIR locale-dependent data [DATAROOTDIR/locale] - --mandir=DIR man documentation [DATAROOTDIR/man] - --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] - --htmldir=DIR html documentation [DOCDIR] - --dvidir=DIR dvi documentation [DOCDIR] - --pdfdir=DIR pdf documentation [DOCDIR] - --psdir=DIR ps documentation [DOCDIR] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] _ACEOF cat <<\_ACEOF @@ -1320,157 +845,212 @@ Optional Features: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --disable-thread-safety Disables thread safety in the client library - --disable-server Disables building of PVFS2 server - --disable-karma-mem-usage-stats Disables memory usage stats in karma + --disable-server Disables building of PVFS2 server + --disable-karma-mem-usage-stats + Disables memory usage stats in karma + --enable-bmi-only Build only the BMI library --disable-perf-counters Disables pvfs2-server performance counters - --disable-disk-io Disables pvfs2-server disk I/O (for testing) - --enable-mmap-racache Enables mmap-readahead in kernel interface - --enable-trusted-connections Enable connects only from trusted hosts/ports - (experimental, off by default) + --enable-mmap-racache **EXPERIMENTAL** Enables mmap-readahead in kernel + interface + --enable-reset-file-pos Resets file position pointer in kernel interface upon error + --enable-trusted-connections + **EXPERIMENTAL** Enable connects only + from trusted hosts/ports --disable-karma Disables optional gui. (Enabled by default) --disable-static Do not build static client library --enable-redhat24 Enable workaround for RedHat 2.4 kernel - --enable-nptl-workaround Enable workaround for buggy NPTL/Pthread libraries - --disable-aio-threaded-callbacks Disable use of AIO threaded callbacks + --enable-nptl-workaround + Enable workaround for buggy NPTL/Pthread libraries + --disable-aio-threaded-callbacks + **EXPERIMENTAL** Disable use of AIO + threaded callbacks --disable-kernel-aio Forcibly disable kernel aio - --enable-kernel-sendfile Forcibly enable kernel sendfile - --enable-threaded-kmod-helper Use threads in the kernel helper application + --enable-kernel-sendfile + **EXPERIMENTAL** Forcibly enable kernel sendfile + --enable-threaded-kmod-helper + **EXPERIMENTAL** Use threads in the kernel + helper application --enable-fast Disable optional debugging, enable optimizations. + --enable-fuse **EXPERIMENTAL** Enable FUSE component --enable-strict Turn on strict compiler warnings --enable-verbose-build Enables full output during build process --enable-epoll Enable use of epoll for TCP/IP polling (epoll is enabled by default, this option ensures no checking is done for x-compiling) --disable-epoll Disable epoll, use poll instead - --disable-segv-backtrace Disables back traces in segfault signal handler + --disable-segv-backtrace + Disables back traces in segfault signal handler --enable-shared Build shared client library + --disable-usrint Do not build posix and stdio user libraries + --enable-usrint-kmount Assume FS is mounted via kernel + --disable-usrint-cwd Do not enable user interface CWD support + --enable-ucache Do not enable user user interface data cache. --enable-static-server Builds the server static + --enable-visual Enable visualisation tools. (Disabled by default, requires SDL) Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-openssl= Build with openssl (default=/usr) - --without-openssl Don't build with openssl. + --without-openssl Don't build with openssl. --with-mtrace Use mtrace (must set MALLOC_TRACE to output file) --with-berkdb-debug Use berkeley db error reporting (if detected). - --with-pablo=path Use Pablo trace library installed in "path" - --with-mpe=path Use MPE profiling library installed in "path" + --with-tau=path Use TAU trace library installed in "path" --with-kernel=srcpath Build pvfs2 kernel module against 2.6.x src --with-kernel24=srcpath Build pvfs2 kernel module against 2.4.x src --with-efence= Use electric fence for malloc debugging. --with-valgrind= Use valgrind annotations for debugging. --with-db= Location of installed DB package (default=/usr) --without-bmi-tcp Disable BMI TCP method - --with-gm= Location of the GM install (default no GM) - --with-gm-includes= Location of the GM includes - --with-gm-libs= Location of the GM libraries - --with-mx= Location of the MX install (default no MX) - --with-mx-includes= Location of the MX includes - --with-mx-libs= Location of the MX libraries + --with-gm= Location of the GM install (default no GM) + --with-gm-includes= + Location of the GM includes + --with-gm-libs= Location of the GM libraries + --with-mx= Location of the MX install (default no MX) + --with-mx-includes= + Location of the MX includes + --with-mx-libs= Location of the MX libraries --with-ib= Location of the IB installation (default no IB) - --with-ib-includes= Location of the IB includes - --with-ib-libs= Location of the IB libraries + --with-ib-includes= + Location of the IB includes + --with-ib-libs= Location of the IB libraries --with-openib= Location of the OpenIB install (default no OpenIB) - --with-openib-includes= Location of the OpenIB includes - --with-openib-libs= Location of the OpenIB libraries + --with-openib-includes= + Location of the OpenIB includes + --with-openib-libs= + Location of the OpenIB libraries --with-portals= Location of the Portals install (default no Portals) - --with-portals-includes= Extra CFLAGS to specify Portals includes - --with-portals-libs= Extra LIBS to link Portals libraries + --with-portals-includes= + Extra CFLAGS to specify Portals includes + --with-portals-libs= + Extra LIBS to link Portals libraries + --with-zoid= Location of the ZOID tree (default no ZOID) Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory - LIBS libraries to pass to the linker, e.g. -l - CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if - you have headers in a nonstandard directory + CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have + headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. _ACEOF -ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. + ac_popdir=`pwd` for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue - test -d "$ac_dir" || continue + test -d $ac_dir || continue ac_builddir=. -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) +if test "$ac_dir" != .; then ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi case $srcdir in - .) # We are building in place. + .) # No --srcdir option. We are building in place. ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. - if test -f "$ac_srcdir/configure.gnu"; then - echo && - $SHELL "$ac_srcdir/configure.gnu" --help=recursive - elif test -f "$ac_srcdir/configure"; then - echo && - $SHELL "$ac_srcdir/configure" --help=recursive + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help else echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 - fi || ac_status=$? - cd "$ac_pwd" || { ac_status=$?; break; } + fi + cd $ac_popdir done fi -test -n "$ac_init_help" && exit $ac_status +test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -configure -generated by GNU Autoconf 2.61 -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, -2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +Copyright (C) 2003 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF - exit + exit 0 fi -cat >config.log <<_ACEOF +exec 5>config.log +cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by $as_me, which was -generated by GNU Autoconf 2.61. Invocation command line was +generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ _ACEOF -exec 5>>config.log { cat <<_ASUNAME ## --------- ## @@ -1489,7 +1069,7 @@ uname -v = `(uname -v) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` -/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` @@ -1503,7 +1083,6 @@ do test -z "$as_dir" && as_dir=. echo "PATH: $as_dir" done -IFS=$as_save_IFS } >&5 @@ -1525,6 +1104,7 @@ _ACEOF ac_configure_args= ac_configure_args0= ac_configure_args1= +ac_sep= ac_must_keep_next=false for ac_pass in 1 2 do @@ -1535,7 +1115,7 @@ do -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; - *\'*) + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in @@ -1557,7 +1137,9 @@ do -* ) ac_must_keep_next=true ;; esac fi - ac_configure_args="$ac_configure_args '$ac_arg'" + ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + # Get rid of the leading space. + ac_sep=" " ;; esac done @@ -1568,8 +1150,8 @@ $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_ # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. -# WARNING: Use '\'' to represent an apostrophe within the trap. -# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { @@ -1582,34 +1164,20 @@ trap 'exit_status=$? _ASBOX echo # The following way of writing the cache mishandles newlines in values, -( - for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 -echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - *) $as_unset $ac_var ;; - esac ;; - esac - done +{ (set) 2>&1 | - case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( - *${as_nl}ac_space=\ *) + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) sed -n \ - "s/'\''/'\''\\\\'\'''\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" - ;; #( + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; *) - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" ;; - esac | - sort -) + esac; +} echo cat <<\_ASBOX @@ -1620,28 +1188,22 @@ _ASBOX echo for ac_var in $ac_subst_vars do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - echo "$ac_var='\''$ac_val'\''" + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" done | sort echo if test -n "$ac_subst_files"; then cat <<\_ASBOX -## ------------------- ## -## File substitutions. ## -## ------------------- ## +## ------------- ## +## Output files. ## +## ------------- ## _ASBOX echo for ac_var in $ac_subst_files do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - echo "$ac_var='\''$ac_val'\''" + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" done | sort echo fi @@ -1653,24 +1215,26 @@ _ASBOX ## ----------- ## _ASBOX echo - cat confdefs.h + sed "/^$/d" confdefs.h | sort echo fi test "$ac_signal" != 0 && echo "$as_me: caught signal $ac_signal" echo "$as_me: exit $exit_status" } >&5 - rm -f core *.core core.conftest.* && - rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + rm -f core *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status -' 0 + ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -f -r conftest* confdefs.h +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h # Predefined preprocessor variables. @@ -1701,17 +1265,14 @@ _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer explicitly selected file to automatically selected ones. -if test -n "$CONFIG_SITE"; then - set x "$CONFIG_SITE" -elif test "x$prefix" != xNONE; then - set x "$prefix/share/config.site" "$prefix/etc/config.site" -else - set x "$ac_default_prefix/share/config.site" \ - "$ac_default_prefix/etc/config.site" +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi fi -shift -for ac_site_file -do +for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 echo "$as_me: loading site script $ac_site_file" >&6;} @@ -1727,8 +1288,8 @@ if test -r "$cache_file"; then { echo "$as_me:$LINENO: loading cache $cache_file" >&5 echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in - [\\/]* | ?:[\\/]* ) . "$cache_file";; - *) . "./$cache_file";; + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; esac fi else @@ -1740,11 +1301,12 @@ fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false -for ac_var in $ac_precious_vars; do +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set - eval ac_old_val=\$ac_cv_env_${ac_var}_value - eval ac_new_val=\$ac_env_${ac_var}_value + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" case $ac_old_set,$ac_new_set in set,) { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 @@ -1769,7 +1331,8 @@ echo "$as_me: current value: $ac_new_val" >&2;} # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in - *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in @@ -1786,6 +1349,12 @@ echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start ov { (exit 1); exit 1; }; } fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + @@ -1802,152 +1371,119 @@ fi -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_aux_dir= -for ac_dir in maint/config "$srcdir"/maint/config; do - if test -f "$ac_dir/install-sh"; then +for ac_dir in maint/config $srcdir/maint/config; do + if test -f $ac_dir/install-sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break - elif test -f "$ac_dir/install.sh"; then + elif test -f $ac_dir/install.sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break - elif test -f "$ac_dir/shtool"; then + elif test -f $ac_dir/shtool; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/shtool install -c" break fi done if test -z "$ac_aux_dir"; then - { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in maint/config \"$srcdir\"/maint/config" >&5 -echo "$as_me: error: cannot find install-sh or install.sh in maint/config \"$srcdir\"/maint/config" >&2;} + { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in maint/config $srcdir/maint/config" >&5 +echo "$as_me: error: cannot find install-sh or install.sh in maint/config $srcdir/maint/config" >&2;} { (exit 1); exit 1; }; } fi - -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. - +ac_config_guess="$SHELL $ac_aux_dir/config.guess" +ac_config_sub="$SHELL $ac_aux_dir/config.sub" +ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure. CONFIGURE_TIME=`date -u +"%Y-%m-%d-%H%M%S"` -PVFS2_MAJOR=2 -PVFS2_MINOR=7 -PVFS2_SUB=1 +PVFS2_VERSION_MAJOR=2 +PVFS2_VERSION_MINOR=8 +PVFS2_VERSION_SUB=6 +#PVFS2_VERSION_RELEASE="orangefs" #PVFS2_PRE="" -PVFS2_PRE="pre1-$CONFIGURE_TIME" +PVFS2_VERSION_PRE="orangefs-$CONFIGURE_TIME" -PVFS2_VERSION=$PVFS2_MAJOR.$PVFS2_MINOR.$PVFS2_SUB$PVFS2_PRE +PVFS2_VERSION=$PVFS2_VERSION_MAJOR.$PVFS2_VERSION_MINOR.$PVFS2_VERSION_SUB-$PVFS2_VERSION_RELEASE$PVFS2_VERSION_PRE cat >>confdefs.h <<_ACEOF -#define PVFS2_VERSION_MAJOR $PVFS2_MAJOR +#define PVFS2_VERSION_MAJOR $PVFS2_VERSION_MAJOR _ACEOF cat >>confdefs.h <<_ACEOF -#define PVFS2_VERSION_MINOR $PVFS2_MINOR +#define PVFS2_VERSION_MINOR $PVFS2_VERSION_MINOR _ACEOF cat >>confdefs.h <<_ACEOF -#define PVFS2_VERSION_SUB $PVFS2_SUB +#define PVFS2_VERSION_SUB $PVFS2_VERSION_SUB _ACEOF +#AC_DEFINE_UNQUOTED(PVFS2_VERSION_RELEASE, $PVFS2_VERSION_RELEASE, release version number) + + + +#AC_SUBST(PVFS2_VERSION_RELEASE) + # Make sure we can run config.sub. -$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || - { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5 -echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;} +$ac_config_sub sun4 >/dev/null 2>&1 || + { { echo "$as_me:$LINENO: error: cannot run $ac_config_sub" >&5 +echo "$as_me: error: cannot run $ac_config_sub" >&2;} { (exit 1); exit 1; }; } -{ echo "$as_me:$LINENO: checking build system type" >&5 -echo $ECHO_N "checking build system type... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking build system type" >&5 +echo $ECHO_N "checking build system type... $ECHO_C" >&6 if test "${ac_cv_build+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_build_alias=$build_alias -test "x$ac_build_alias" = x && - ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` -test "x$ac_build_alias" = x && + ac_cv_build_alias=$build_alias +test -z "$ac_cv_build_alias" && + ac_cv_build_alias=`$ac_config_guess` +test -z "$ac_cv_build_alias" && { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5 echo "$as_me: error: cannot guess build type; you must specify one" >&2;} { (exit 1); exit 1; }; } -ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || - { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5 -echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;} +ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_build_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_build_alias failed" >&2;} { (exit 1); exit 1; }; } fi -{ echo "$as_me:$LINENO: result: $ac_cv_build" >&5 -echo "${ECHO_T}$ac_cv_build" >&6; } -case $ac_cv_build in -*-*-*) ;; -*) { { echo "$as_me:$LINENO: error: invalid value of canonical build" >&5 -echo "$as_me: error: invalid value of canonical build" >&2;} - { (exit 1); exit 1; }; };; -esac +echo "$as_me:$LINENO: result: $ac_cv_build" >&5 +echo "${ECHO_T}$ac_cv_build" >&6 build=$ac_cv_build -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_build -shift -build_cpu=$1 -build_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -build_os=$* -IFS=$ac_save_IFS -case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac - - -{ echo "$as_me:$LINENO: checking host system type" >&5 -echo $ECHO_N "checking host system type... $ECHO_C" >&6; } +build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + + +echo "$as_me:$LINENO: checking host system type" >&5 +echo $ECHO_N "checking host system type... $ECHO_C" >&6 if test "${ac_cv_host+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - if test "x$host_alias" = x; then - ac_cv_host=$ac_cv_build -else - ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || - { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5 -echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;} + ac_cv_host_alias=$host_alias +test -z "$ac_cv_host_alias" && + ac_cv_host_alias=$ac_cv_build_alias +ac_cv_host=`$ac_config_sub $ac_cv_host_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_host_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_host_alias failed" >&2;} { (exit 1); exit 1; }; } -fi fi -{ echo "$as_me:$LINENO: result: $ac_cv_host" >&5 -echo "${ECHO_T}$ac_cv_host" >&6; } -case $ac_cv_host in -*-*-*) ;; -*) { { echo "$as_me:$LINENO: error: invalid value of canonical host" >&5 -echo "$as_me: error: invalid value of canonical host" >&2;} - { (exit 1); exit 1; }; };; -esac +echo "$as_me:$LINENO: result: $ac_cv_host" >&5 +echo "${ECHO_T}$ac_cv_host" >&6 host=$ac_cv_host -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_host -shift -host_cpu=$1 -host_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -host_os=$* -IFS=$ac_save_IFS -case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac +host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` @@ -1956,7 +1492,7 @@ if test "x$USR_CFLAGS" = "x"; then USR_CFLAGS_SET=no fi -ac_config_headers="$ac_config_headers pvfs2-config.h" + ac_config_headers="$ac_config_headers pvfs2-config.h" ac_ext=c @@ -1967,8 +1503,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -1981,34 +1517,32 @@ do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done -IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2021,51 +1555,36 @@ do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done -IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -echo "${ECHO_T}$ac_ct_CC" >&6; } + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi + CC=$ac_ct_CC else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2078,34 +1597,74 @@ do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done -IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2119,7 +1678,7 @@ do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue @@ -2130,7 +1689,6 @@ do fi done done -IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. @@ -2148,23 +1706,22 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe + for ac_prog in cl do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2177,38 +1734,36 @@ do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done -IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC - for ac_prog in cl.exe + for ac_prog in cl do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2221,45 +1776,29 @@ do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done -IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -echo "${ECHO_T}$ac_ct_CC" >&6; } + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - test -n "$ac_ct_CC" && break done - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi + CC=$ac_ct_CC fi fi @@ -2272,35 +1811,21 @@ See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } # Provide some information about the compiler. -echo "$as_me:$LINENO: checking for C compiler version" >&5 +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` -{ (ac_try="$ac_compiler --version >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler --version >&5") 2>&5 +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } -{ (ac_try="$ac_compiler -v >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -v >&5") 2>&5 +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } -{ (ac_try="$ac_compiler -V >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -V >&5") 2>&5 +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } @@ -2325,77 +1850,47 @@ ac_clean_files="$ac_clean_files a.out a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. -{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 -echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` -# -# List of possible output files, starting from the most likely. -# The algorithm is not robust to junk in `.', hence go to wildcards (a.*) -# only as a last resort. b.out is created by i960 compilers. -ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out' -# -# The IRIX 6 linker writes into existing files which may not be -# executable, retaining their permissions. Remove them first so a -# subsequent execution test works. -ac_rmfiles= -for ac_file in $ac_files -do - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; - * ) ac_rmfiles="$ac_rmfiles $ac_file";; - esac -done -rm -f $ac_rmfiles - -if { (ac_try="$ac_link_default" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link_default") 2>&5 +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then - # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. -# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' -# in a Makefile. We should not override ac_cv_exeext if it was cached, -# so that the user can short-circuit this test for compilers unknown to -# Autoconf. -for ac_file in $ac_files '' + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +# b.out is created by i960 compilers. +for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out do test -f "$ac_file" || continue case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) + ;; + conftest.$ac_ext ) + # This is the source file. ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; - then :; else - ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - fi - # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an `-o' - # argument, so we may need to know it at that point already. - # Even if this section looks crufty: it has the advantage of - # actually working. + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool, + # but it would be cool to find out if it's true. Does anybody + # maintain Libtool? --akim. + export ac_cv_exeext break;; * ) break;; esac done -test "$ac_cv_exeext" = no && ac_cv_exeext= - else - ac_file='' -fi - -{ echo "$as_me:$LINENO: result: $ac_file" >&5 -echo "${ECHO_T}$ac_file" >&6; } -if test -z "$ac_file"; then echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 @@ -2407,21 +1902,19 @@ See \`config.log' for more details." >&2;} fi ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 -# Check that the compiler produces executables we can run. If not, either +# Check the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. -{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5 -echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 # FIXME: These cross compiler hacks should be removed for Autoconf 3.0 # If not cross compiling, check that we can run a simple program. if test "$cross_compiling" != yes; then if { ac_try='./$ac_file' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then @@ -2440,27 +1933,22 @@ See \`config.log' for more details." >&2;} fi fi fi -{ echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 rm -f a.out a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save -# Check that the compiler produces executables we can run. If not, either +# Check the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. -{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 -echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; } -{ echo "$as_me:$LINENO: result: $cross_compiling" >&5 -echo "${ECHO_T}$cross_compiling" >&6; } - -{ echo "$as_me:$LINENO: checking for suffix of executables" >&5 -echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; } -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then @@ -2471,8 +1959,9 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext break;; * ) break;; esac @@ -2486,14 +1975,14 @@ See \`config.log' for more details." >&2;} fi rm -f conftest$ac_cv_exeext -{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 -echo "${ECHO_T}$ac_cv_exeext" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT -{ echo "$as_me:$LINENO: checking for suffix of object files" >&5 -echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 if test "${ac_cv_objext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2513,20 +2002,14 @@ main () } _ACEOF rm -f conftest.o conftest.obj -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>&5 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then - for ac_file in conftest.o conftest.obj conftest.*; do - test -f "$ac_file" || continue; + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;; + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac @@ -2544,12 +2027,12 @@ fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi -{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 -echo "${ECHO_T}$ac_cv_objext" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 OBJEXT=$ac_cv_objext ac_objext=$OBJEXT -{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 -echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 if test "${ac_cv_c_compiler_gnu+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -2572,49 +2055,50 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_compiler_gnu=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_compiler_gnu=no +ac_compiler_gnu=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi -{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 -echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 GCC=`test $ac_compiler_gnu = yes && echo yes` ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS -{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 -echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; } +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 if test "${ac_cv_prog_cc_g+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -2630,118 +2114,38 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - CFLAGS="" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); }; }; then ac_cv_prog_cc_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_prog_cc_g=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi -{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 -echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then @@ -2757,12 +2161,12 @@ else CFLAGS= fi fi -{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 -echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } -if test "${ac_cv_prog_cc_c89+set}" = set; then +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_cv_prog_cc_c89=no + ac_cv_prog_cc_stdc=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ @@ -2796,17 +2200,12 @@ static char *f (char * (*g) (char **, int), char **p, ...) /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get + as 'x'. The following induces an error, until -std1 is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ + that's true only with -std1. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; @@ -2821,120 +2220,270 @@ return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; return 0; } _ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_c89=$ac_arg + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break +rm -f conftest.err conftest.$ac_objext done -rm -f conftest.$ac_ext +rm -f conftest.$ac_ext conftest.$ac_objext CC=$ac_save_CC fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { echo "$as_me:$LINENO: result: none needed" >&5 -echo "${ECHO_T}none needed" >&6; } ;; - xno) - { echo "$as_me:$LINENO: result: unsupported" >&5 -echo "${ECHO_T}unsupported" >&6; } ;; + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; *) - CC="$CC $ac_cv_prog_cc_c89" - { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 -echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; esac - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 -echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; } -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then - if test "${ac_cv_prog_CPP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" - do - ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif _ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#ifdef __STDC__ -# include +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include #else # include #endif Syntax error _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then : else echo "$as_me: failed program was:" >&5 @@ -2943,10 +2492,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi - rm -f conftest.err conftest.$ac_ext - # OK, works on sane cases. Now check whether nonexistent headers + # OK, works on sane cases. Now check whether non-existent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ @@ -2956,22 +2504,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then # Broken: success on invalid input. continue else @@ -2982,7 +2532,6 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_preproc_ok=: break fi - rm -f conftest.err conftest.$ac_ext done @@ -3000,8 +2549,8 @@ fi else ac_cv_prog_CPP=$CPP fi -{ echo "$as_me:$LINENO: result: $CPP" >&5 -echo "${ECHO_T}$CPP" >&6; } +echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6 ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do @@ -3024,22 +2573,24 @@ cat >>conftest.$ac_ext <<_ACEOF #endif Syntax error _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then : else echo "$as_me: failed program was:" >&5 @@ -3048,10 +2599,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi - rm -f conftest.err conftest.$ac_ext - # OK, works on sane cases. Now check whether nonexistent headers + # OK, works on sane cases. Now check whether non-existent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ @@ -3061,22 +2611,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then # Broken: success on invalid input. continue else @@ -3087,7 +2639,6 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_preproc_ok=: break fi - rm -f conftest.err conftest.$ac_ext done @@ -3110,170 +2661,23 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 -echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; } -if test "${ac_cv_path_GREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - # Extract the first word of "grep ggrep" to use in msg output -if test -z "$GREP"; then -set dummy grep ggrep; ac_prog_name=$2 -if test "${ac_cv_path_GREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_path_GREP_found=false -# Loop through the user's path and test for each of PROGNAME-LIST -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in grep ggrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" - { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue - # Check for GNU ac_path_GREP and select it if it is found. - # Check for GNU $ac_path_GREP -case `"$ac_path_GREP" --version 2>&1` in -*GNU*) - ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; -*) - ac_count=0 - echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - echo 'GREP' >> "conftest.nl" - "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - ac_count=`expr $ac_count + 1` - if test $ac_count -gt ${ac_path_GREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_GREP="$ac_path_GREP" - ac_path_GREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - - $ac_path_GREP_found && break 3 - done -done - -done -IFS=$as_save_IFS - - -fi - -GREP="$ac_cv_path_GREP" -if test -z "$GREP"; then - { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 -echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} - { (exit 1); exit 1; }; } -fi - -else - ac_cv_path_GREP=$GREP -fi - - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 -echo "${ECHO_T}$ac_cv_path_GREP" >&6; } - GREP="$ac_cv_path_GREP" - - -{ echo "$as_me:$LINENO: checking for egrep" >&5 -echo $ECHO_N "checking for egrep... $ECHO_C" >&6; } -if test "${ac_cv_path_EGREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 - then ac_cv_path_EGREP="$GREP -E" - else - # Extract the first word of "egrep" to use in msg output -if test -z "$EGREP"; then -set dummy egrep; ac_prog_name=$2 -if test "${ac_cv_path_EGREP+set}" = set; then +echo "$as_me:$LINENO: checking for egrep" >&5 +echo $ECHO_N "checking for egrep... $ECHO_C" >&6 +if test "${ac_cv_prog_egrep+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_path_EGREP_found=false -# Loop through the user's path and test for each of PROGNAME-LIST -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in egrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" - { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue - # Check for GNU ac_path_EGREP and select it if it is found. - # Check for GNU $ac_path_EGREP -case `"$ac_path_EGREP" --version 2>&1` in -*GNU*) - ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; -*) - ac_count=0 - echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - echo 'EGREP' >> "conftest.nl" - "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - ac_count=`expr $ac_count + 1` - if test $ac_count -gt ${ac_path_EGREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_EGREP="$ac_path_EGREP" - ac_path_EGREP_max=$ac_count + if echo a | (grep -E '(a|b)') >/dev/null 2>&1 + then ac_cv_prog_egrep='grep -E' + else ac_cv_prog_egrep='egrep' fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - - $ac_path_EGREP_found && break 3 - done -done - -done -IFS=$as_save_IFS - - -fi - -EGREP="$ac_cv_path_EGREP" -if test -z "$EGREP"; then - { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 -echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} - { (exit 1); exit 1; }; } -fi - -else - ac_cv_path_EGREP=$EGREP -fi - - - fi fi -{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 -echo "${ECHO_T}$ac_cv_path_EGREP" >&6; } - EGREP="$ac_cv_path_EGREP" +echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5 +echo "${ECHO_T}$ac_cv_prog_egrep" >&6 + EGREP=$ac_cv_prog_egrep -{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5 -echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 if test "${ac_cv_header_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -3297,31 +2701,35 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_cv_header_stdc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_header_stdc=no +ac_cv_header_stdc=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. @@ -3377,7 +2785,6 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include -#include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) @@ -3397,27 +2804,18 @@ main () for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) - return 2; - return 0; + exit(2); + exit (0); } _ACEOF rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then @@ -3430,14 +2828,12 @@ sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi - - fi fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 -echo "${ECHO_T}$ac_cv_header_stdc" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6 if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF @@ -3460,9 +2856,9 @@ for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF @@ -3476,35 +2872,38 @@ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" +eval "$as_ac_Header=no" fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 @@ -3515,18 +2914,18 @@ fi done -if test "${ac_cv_header_sys_vfs_h+set}" = set; then - { echo "$as_me:$LINENO: checking for sys/vfs.h" >&5 -echo $ECHO_N "checking for sys/vfs.h... $ECHO_C" >&6; } -if test "${ac_cv_header_sys_vfs_h+set}" = set; then +if test "${ac_cv_header_pwd_h+set}" = set; then + echo "$as_me:$LINENO: checking for pwd.h" >&5 +echo $ECHO_N "checking for pwd.h... $ECHO_C" >&6 +if test "${ac_cv_header_pwd_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_vfs_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_vfs_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_pwd_h" >&5 +echo "${ECHO_T}$ac_cv_header_pwd_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking sys/vfs.h usability" >&5 -echo $ECHO_N "checking sys/vfs.h usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking pwd.h usability" >&5 +echo $ECHO_N "checking pwd.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -3534,64 +2933,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -#include +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking sys/vfs.h presence" >&5 -echo $ECHO_N "checking sys/vfs.h presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking pwd.h presence" >&5 +echo $ECHO_N "checking pwd.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -3599,68 +3004,74 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: sys/vfs.h: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: sys/vfs.h: proceeding with the compiler's result" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: pwd.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: pwd.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: present but cannot be compiled" >&5 -echo "$as_me: WARNING: sys/vfs.h: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: sys/vfs.h: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: sys/vfs.h: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: sys/vfs.h: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: sys/vfs.h: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/vfs.h: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: sys/vfs.h: in the future, the compiler will take precedence" >&2;} - + { echo "$as_me:$LINENO: WARNING: pwd.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: pwd.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: pwd.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: pwd.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: pwd.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: pwd.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: pwd.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: pwd.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for sys/vfs.h" >&5 -echo $ECHO_N "checking for sys/vfs.h... $ECHO_C" >&6; } -if test "${ac_cv_header_sys_vfs_h+set}" = set; then +echo "$as_me:$LINENO: checking for pwd.h" >&5 +echo $ECHO_N "checking for pwd.h... $ECHO_C" >&6 +if test "${ac_cv_header_pwd_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_cv_header_sys_vfs_h=$ac_header_preproc + ac_cv_header_pwd_h=$ac_header_preproc fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_vfs_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_vfs_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_pwd_h" >&5 +echo "${ECHO_T}$ac_cv_header_pwd_h" >&6 fi -if test $ac_cv_header_sys_vfs_h = yes; then +if test $ac_cv_header_pwd_h = yes; then cat >>confdefs.h <<\_ACEOF -#define HAVE_SYS_VFS_H 1 +#define HAVE_PWD_H 1 _ACEOF fi -if test "${ac_cv_header_sys_mount_h+set}" = set; then - { echo "$as_me:$LINENO: checking for sys/mount.h" >&5 -echo $ECHO_N "checking for sys/mount.h... $ECHO_C" >&6; } -if test "${ac_cv_header_sys_mount_h+set}" = set; then +if test "${ac_cv_header_features_h+set}" = set; then + echo "$as_me:$LINENO: checking for features.h" >&5 +echo $ECHO_N "checking for features.h... $ECHO_C" >&6 +if test "${ac_cv_header_features_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_mount_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_mount_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_features_h" >&5 +echo "${ECHO_T}$ac_cv_header_features_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking sys/mount.h usability" >&5 -echo $ECHO_N "checking sys/mount.h usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking features.h usability" >&5 +echo $ECHO_N "checking features.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -3668,64 +3079,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -#include +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking sys/mount.h presence" >&5 -echo $ECHO_N "checking sys/mount.h presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking features.h presence" >&5 +echo $ECHO_N "checking features.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -3733,75 +3150,74 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) - { echo "$as_me:$LINENO: WARNING: sys/mount.h: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: sys/mount.h: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/mount.h: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: sys/mount.h: proceeding with the compiler's result" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: features.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: features.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) - { echo "$as_me:$LINENO: WARNING: sys/mount.h: present but cannot be compiled" >&5 -echo "$as_me: WARNING: sys/mount.h: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/mount.h: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: sys/mount.h: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/mount.h: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: sys/mount.h: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/mount.h: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: sys/mount.h: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/mount.h: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: sys/mount.h: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: sys/mount.h: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: sys/mount.h: in the future, the compiler will take precedence" >&2;} - + { echo "$as_me:$LINENO: WARNING: features.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: features.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: features.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: features.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: features.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: features.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: features.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: features.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for sys/mount.h" >&5 -echo $ECHO_N "checking for sys/mount.h... $ECHO_C" >&6; } -if test "${ac_cv_header_sys_mount_h+set}" = set; then +echo "$as_me:$LINENO: checking for features.h" >&5 +echo $ECHO_N "checking for features.h... $ECHO_C" >&6 +if test "${ac_cv_header_features_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_cv_header_sys_mount_h=$ac_header_preproc + ac_cv_header_features_h=$ac_header_preproc fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_mount_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_mount_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_features_h" >&5 +echo "${ECHO_T}$ac_cv_header_features_h" >&6 fi -if test $ac_cv_header_sys_mount_h = yes; then +if test $ac_cv_header_features_h = yes; then cat >>confdefs.h <<\_ACEOF -#define HAVE_SYS_MOUNT_H 1 +#define HAVE_FEATURES_H 1 _ACEOF fi - - - -for ac_header in mntent.h, fstab.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if test "${ac_cv_header_fstab_h+set}" = set; then + echo "$as_me:$LINENO: checking for fstab.h" >&5 +echo $ECHO_N "checking for fstab.h... $ECHO_C" >&6 +if test "${ac_cv_header_fstab_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_fstab_h" >&5 +echo "${ECHO_T}$ac_cv_header_fstab_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking fstab.h usability" >&5 +echo $ECHO_N "checking fstab.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -3809,64 +3225,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -#include <$ac_header> +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking fstab.h presence" >&5 +echo $ECHO_N "checking fstab.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -3874,75 +3296,74 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: fstab.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: fstab.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + { echo "$as_me:$LINENO: WARNING: fstab.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: fstab.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: fstab.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: fstab.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: fstab.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: fstab.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: fstab.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: fstab.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for fstab.h" >&5 +echo $ECHO_N "checking for fstab.h... $ECHO_C" >&6 +if test "${ac_cv_header_fstab_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - eval "$as_ac_Header=\$ac_header_preproc" + ac_cv_header_fstab_h=$ac_header_preproc fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_fstab_h" >&5 +echo "${ECHO_T}$ac_cv_header_fstab_h" >&6 fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +if test $ac_cv_header_fstab_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FSTAB_H 1 _ACEOF fi -done - - -for ac_header in stdarg.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if test "${ac_cv_header_malloc_h+set}" = set; then + echo "$as_me:$LINENO: checking for malloc.h" >&5 +echo $ECHO_N "checking for malloc.h... $ECHO_C" >&6 +if test "${ac_cv_header_malloc_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_malloc_h" >&5 +echo "${ECHO_T}$ac_cv_header_malloc_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking malloc.h usability" >&5 +echo $ECHO_N "checking malloc.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -3950,64 +3371,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -#include <$ac_header> +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking malloc.h presence" >&5 +echo $ECHO_N "checking malloc.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -4015,75 +3442,74 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: malloc.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: malloc.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + { echo "$as_me:$LINENO: WARNING: malloc.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: malloc.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: malloc.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: malloc.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: malloc.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: malloc.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: malloc.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: malloc.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for malloc.h" >&5 +echo $ECHO_N "checking for malloc.h... $ECHO_C" >&6 +if test "${ac_cv_header_malloc_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - eval "$as_ac_Header=\$ac_header_preproc" + ac_cv_header_malloc_h=$ac_header_preproc fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_malloc_h" >&5 +echo "${ECHO_T}$ac_cv_header_malloc_h" >&6 fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +if test $ac_cv_header_malloc_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MALLOC_H 1 _ACEOF fi -done - - -for ac_header in attr/xattr.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if test "${ac_cv_header_memory_h+set}" = set; then + echo "$as_me:$LINENO: checking for memory.h" >&5 +echo $ECHO_N "checking for memory.h... $ECHO_C" >&6 +if test "${ac_cv_header_memory_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_memory_h" >&5 +echo "${ECHO_T}$ac_cv_header_memory_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking memory.h usability" >&5 +echo $ECHO_N "checking memory.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -4091,64 +3517,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -#include <$ac_header> +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking memory.h presence" >&5 +echo $ECHO_N "checking memory.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -4156,74 +3588,74 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: memory.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: memory.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + { echo "$as_me:$LINENO: WARNING: memory.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: memory.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: memory.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: memory.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: memory.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: memory.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: memory.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: memory.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for memory.h" >&5 +echo $ECHO_N "checking for memory.h... $ECHO_C" >&6 +if test "${ac_cv_header_memory_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - eval "$as_ac_Header=\$ac_header_preproc" + ac_cv_header_memory_h=$ac_header_preproc fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_memory_h" >&5 +echo "${ECHO_T}$ac_cv_header_memory_h" >&6 fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +if test $ac_cv_header_memory_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MEMORY_H 1 _ACEOF fi -done - -for ac_header in sys/xattr.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if test "${ac_cv_header_mntent_h+set}" = set; then + echo "$as_me:$LINENO: checking for mntent.h" >&5 +echo $ECHO_N "checking for mntent.h... $ECHO_C" >&6 +if test "${ac_cv_header_mntent_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_mntent_h" >&5 +echo "${ECHO_T}$ac_cv_header_mntent_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking mntent.h usability" >&5 +echo $ECHO_N "checking mntent.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -4231,64 +3663,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -#include <$ac_header> +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking mntent.h presence" >&5 +echo $ECHO_N "checking mntent.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -4296,123 +3734,220 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: mntent.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: mntent.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + { echo "$as_me:$LINENO: WARNING: mntent.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: mntent.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: mntent.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: mntent.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: mntent.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: mntent.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: mntent.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: mntent.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for mntent.h" >&5 +echo $ECHO_N "checking for mntent.h... $ECHO_C" >&6 +if test "${ac_cv_header_mntent_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - eval "$as_ac_Header=\$ac_header_preproc" + ac_cv_header_mntent_h=$ac_header_preproc fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_mntent_h" >&5 +echo "${ECHO_T}$ac_cv_header_mntent_h" >&6 fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +if test $ac_cv_header_mntent_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MNTENT_H 1 _ACEOF fi -done - -{ echo "$as_me:$LINENO: checking for long int" >&5 -echo $ECHO_N "checking for long int... $ECHO_C" >&6; } -if test "${ac_cv_type_long_int+set}" = set; then +if test "${ac_cv_header_netdb_h+set}" = set; then + echo "$as_me:$LINENO: checking for netdb.h" >&5 +echo $ECHO_N "checking for netdb.h... $ECHO_C" >&6 +if test "${ac_cv_header_netdb_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_netdb_h" >&5 +echo "${ECHO_T}$ac_cv_header_netdb_h" >&6 else - cat >conftest.$ac_ext <<_ACEOF + # Is the header compilable? +echo "$as_me:$LINENO: checking netdb.h usability" >&5 +echo $ECHO_N "checking netdb.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default -typedef long int ac__type_new_; -int -main () -{ -if ((ac__type_new_ *) 0) - return 0; -if (sizeof (ac__type_new_)) - return 0; - ; - return 0; -} +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_type_long_int=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_type_long_int=no +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Is the header present? +echo "$as_me:$LINENO: checking netdb.h presence" >&5 +echo $ECHO_N "checking netdb.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes fi -{ echo "$as_me:$LINENO: result: $ac_cv_type_long_int" >&5 -echo "${ECHO_T}$ac_cv_type_long_int" >&6; } +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ echo "$as_me:$LINENO: checking size of long int" >&5 -echo $ECHO_N "checking size of long int... $ECHO_C" >&6; } -if test "${ac_cv_sizeof_long_int+set}" = set; then + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: netdb.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: netdb.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: netdb.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: netdb.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: netdb.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: netdb.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: netdb.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: netdb.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: netdb.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: netdb.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: netdb.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: netdb.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: netdb.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: netdb.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: netdb.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: netdb.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for netdb.h" >&5 +echo $ECHO_N "checking for netdb.h... $ECHO_C" >&6 +if test "${ac_cv_header_netdb_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - if test "$cross_compiling" = yes; then - # Depending upon the size, compute the lo and hi bounds. + ac_cv_header_netdb_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_netdb_h" >&5 +echo "${ECHO_T}$ac_cv_header_netdb_h" >&6 + +fi +if test $ac_cv_header_netdb_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NETDB_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_stdarg_h+set}" = set; then + echo "$as_me:$LINENO: checking for stdarg.h" >&5 +echo $ECHO_N "checking for stdarg.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdarg_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdarg_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdarg_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking stdarg.h usability" >&5 +echo $ECHO_N "checking stdarg.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -4420,2928 +3955,9376 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default - typedef long int ac__type_sizeof_; -int -main () -{ -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= 0)]; -test_array [0] = 0 - - ; - return 0; -} +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_lo=0 ac_mid=0 - while :; do - cat >conftest.$ac_ext <<_ACEOF + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking stdarg.h presence" >&5 +echo $ECHO_N "checking stdarg.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef long int ac__type_sizeof_; -int -main () -{ -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)]; -test_array [0] = 0 - - ; - return 0; -} +#include _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_hi=$ac_mid; break + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_lo=`expr $ac_mid + 1` - if test $ac_lo -le $ac_mid; then - ac_lo= ac_hi= - break - fi - ac_mid=`expr 2 '*' $ac_mid + 1` + ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - done +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: stdarg.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: stdarg.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: stdarg.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: stdarg.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: stdarg.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: stdarg.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: stdarg.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: stdarg.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: stdarg.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: stdarg.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: stdarg.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: stdarg.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: stdarg.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: stdarg.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: stdarg.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: stdarg.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for stdarg.h" >&5 +echo $ECHO_N "checking for stdarg.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdarg_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + ac_cv_header_stdarg_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdarg_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdarg_h" >&6 - cat >conftest.$ac_ext <<_ACEOF +fi +if test $ac_cv_header_stdarg_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STDARG_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_stdint_h+set}" = set; then + echo "$as_me:$LINENO: checking for stdint.h" >&5 +echo $ECHO_N "checking for stdint.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdint_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdint_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdint_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking stdint.h usability" >&5 +echo $ECHO_N "checking stdint.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default - typedef long int ac__type_sizeof_; -int -main () -{ -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) < 0)]; -test_array [0] = 0 - - ; - return 0; -} +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_hi=-1 ac_mid=-1 - while :; do - cat >conftest.$ac_ext <<_ACEOF + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking stdint.h presence" >&5 +echo $ECHO_N "checking stdint.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef long int ac__type_sizeof_; -int -main () -{ -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= $ac_mid)]; -test_array [0] = 0 - - ; - return 0; -} +#include _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_lo=$ac_mid; break + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_hi=`expr '(' $ac_mid ')' - 1` - if test $ac_mid -le $ac_hi; then - ac_lo= ac_hi= - break - fi - ac_mid=`expr 2 '*' $ac_mid` + ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - done +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: stdint.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: stdint.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: stdint.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: stdint.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: stdint.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: stdint.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: stdint.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: stdint.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: stdint.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: stdint.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: stdint.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: stdint.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: stdint.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: stdint.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: stdint.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: stdint.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for stdint.h" >&5 +echo $ECHO_N "checking for stdint.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdint_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + ac_cv_header_stdint_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdint_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdint_h" >&6 - ac_lo= ac_hi= fi +if test $ac_cv_header_stdint_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STDINT_H 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -# Binary search between lo and hi bounds. -while test "x$ac_lo" != "x$ac_hi"; do - ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` - cat >conftest.$ac_ext <<_ACEOF + +if test "${ac_cv_header_stdlib_h+set}" = set; then + echo "$as_me:$LINENO: checking for stdlib.h" >&5 +echo $ECHO_N "checking for stdlib.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdlib_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdlib_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdlib_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking stdlib.h usability" >&5 +echo $ECHO_N "checking stdlib.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default - typedef long int ac__type_sizeof_; -int -main () -{ -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)]; -test_array [0] = 0 - - ; - return 0; -} +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_hi=$ac_mid + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_lo=`expr '(' $ac_mid ')' + 1` +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -done -case $ac_lo in -?*) ac_cv_sizeof_long_int=$ac_lo;; -'') if test "$ac_cv_type_long_int" = yes; then - { { echo "$as_me:$LINENO: error: cannot compute sizeof (long int) -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot compute sizeof (long int) -See \`config.log' for more details." >&2;} - { (exit 77); exit 77; }; } - else - ac_cv_sizeof_long_int=0 - fi ;; -esac -else - cat >conftest.$ac_ext <<_ACEOF +# Is the header present? +echo "$as_me:$LINENO: checking stdlib.h presence" >&5 +echo $ECHO_N "checking stdlib.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef long int ac__type_sizeof_; -static long int longval () { return (long int) (sizeof (ac__type_sizeof_)); } -static unsigned long int ulongval () { return (long int) (sizeof (ac__type_sizeof_)); } -#include #include -int -main () -{ - - FILE *f = fopen ("conftest.val", "w"); - if (! f) - return 1; - if (((long int) (sizeof (ac__type_sizeof_))) < 0) - { - long int i = longval (); - if (i != ((long int) (sizeof (ac__type_sizeof_)))) - return 1; - fprintf (f, "%ld\n", i); - } +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else - { - unsigned long int i = ulongval (); - if (i != ((long int) (sizeof (ac__type_sizeof_)))) - return 1; - fprintf (f, "%lu\n", i); - } - return ferror (f) || fclose (f) != 0; - - ; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_sizeof_long_int=`cat conftest.val` + ac_cpp_err= + fi else - echo "$as_me: program exited with status $ac_status" >&5 -echo "$as_me: failed program was:" >&5 + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -( exit $ac_status ) -if test "$ac_cv_type_long_int" = yes; then - { { echo "$as_me:$LINENO: error: cannot compute sizeof (long int) -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot compute sizeof (long int) -See \`config.log' for more details." >&2;} - { (exit 77); exit 77; }; } - else - ac_cv_sizeof_long_int=0 - fi -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi -rm -f conftest.val + ac_header_preproc=no fi -{ echo "$as_me:$LINENO: result: $ac_cv_sizeof_long_int" >&5 -echo "${ECHO_T}$ac_cv_sizeof_long_int" >&6; } +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: stdlib.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: stdlib.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: stdlib.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: stdlib.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: stdlib.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: stdlib.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: stdlib.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: stdlib.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: stdlib.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: stdlib.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for stdlib.h" >&5 +echo $ECHO_N "checking for stdlib.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdlib_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_stdlib_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdlib_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdlib_h" >&6 +fi +if test $ac_cv_header_stdlib_h = yes; then -cat >>confdefs.h <<_ACEOF -#define SIZEOF_LONG_INT $ac_cv_sizeof_long_int +cat >>confdefs.h <<\_ACEOF +#define HAVE_STDLIB_H 1 _ACEOF +fi -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AmigaOS /C/install, which installs bootblocks on floppy discs -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# OS/2's system install, which has a completely different semantic -# ./install, which can be erroneously created by make from ./install.sh. -{ echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 -echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6; } -if test -z "$INSTALL"; then -if test "${ac_cv_path_install+set}" = set; then +if test "${ac_cv_header_strings_h+set}" = set; then + echo "$as_me:$LINENO: checking for strings.h" >&5 +echo $ECHO_N "checking for strings.h... $ECHO_C" >&6 +if test "${ac_cv_header_strings_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_strings_h" >&5 +echo "${ECHO_T}$ac_cv_header_strings_h" >&6 else - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - # Account for people who put trailing slashes in PATH elements. -case $as_dir/ in - ./ | .// | /cC/* | \ - /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ - ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ - /usr/ucb/* ) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then - if test $ac_prog = install && - grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - elif test $ac_prog = install && - grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then - # program-specific install script used by HP pwplus--don't use. - : - else - ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" - break 3 - fi - fi - done - done - ;; -esac -done -IFS=$as_save_IFS - + # Is the header compilable? +echo "$as_me:$LINENO: checking strings.h usability" >&5 +echo $ECHO_N "checking strings.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +ac_header_compiler=no fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL=$ac_cv_path_install +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking strings.h presence" >&5 +echo $ECHO_N "checking strings.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else - # As a last resort, use the slow shell script. Don't cache a - # value for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the value is a relative name. - INSTALL=$ac_install_sh + ac_cpp_err= fi +else + ac_cpp_err=yes fi -{ echo "$as_me:$LINENO: result: $INSTALL" >&5 -echo "${ECHO_T}$INSTALL" >&6; } - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_CC+set}" = set; then +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: strings.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: strings.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: strings.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: strings.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: strings.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: strings.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: strings.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: strings.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: strings.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: strings.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: strings.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: strings.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: strings.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: strings.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: strings.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: strings.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for strings.h" >&5 +echo $ECHO_N "checking for strings.h... $ECHO_C" >&6 +if test "${ac_cv_header_strings_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - + ac_cv_header_strings_h=$ac_header_preproc fi +echo "$as_me:$LINENO: result: $ac_cv_header_strings_h" >&5 +echo "${ECHO_T}$ac_cv_header_strings_h" >&6 + fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi +if test $ac_cv_header_strings_h = yes; then +cat >>confdefs.h <<\_ACEOF +#define HAVE_STRINGS_H 1 +_ACEOF fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + + +if test "${ac_cv_header_stdlib_h+set}" = set; then + echo "$as_me:$LINENO: checking for stdlib.h" >&5 +echo $ECHO_N "checking for stdlib.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdlib_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdlib_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdlib_h" >&6 else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. + # Is the header compilable? +echo "$as_me:$LINENO: checking stdlib.h usability" >&5 +echo $ECHO_N "checking stdlib.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="gcc" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +ac_header_compiler=no fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -echo "${ECHO_T}$ac_ct_CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 - if test "x$ac_ct_CC" = x; then - CC="" +# Is the header present? +echo "$as_me:$LINENO: checking stdlib.h presence" >&5 +echo $ECHO_N "checking stdlib.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 + ac_cpp_err= fi -done -done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - fi + ac_cpp_err=yes fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" - fi -fi -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_CC+set}" = set; then +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: stdlib.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: stdlib.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: stdlib.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: stdlib.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: stdlib.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: stdlib.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: stdlib.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: stdlib.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: stdlib.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: stdlib.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: stdlib.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for stdlib.h" >&5 +echo $ECHO_N "checking for stdlib.h... $ECHO_C" >&6 +if test "${ac_cv_header_stdlib_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$CC" && break - done + ac_cv_header_stdlib_h=$ac_header_preproc fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_ac_ct_CC+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS +echo "$as_me:$LINENO: result: $ac_cv_header_stdlib_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdlib_h" >&6 fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -echo "${ECHO_T}$ac_ct_CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - +if test $ac_cv_header_stdlib_h = yes; then - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_STDLIB_H 1 +_ACEOF fi -test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH -See \`config.log' for more details." >&5 -echo "$as_me: error: no acceptable C compiler found in \$PATH -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } - -# Provide some information about the compiler. -echo "$as_me:$LINENO: checking for C compiler version" >&5 -ac_compiler=`set X $ac_compile; echo $2` -{ (ac_try="$ac_compiler --version >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler --version >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -v >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -v >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -V >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -V >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 -echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; } -if test "${ac_cv_c_compiler_gnu+set}" = set; then +if test "${ac_cv_header_linux_types_h+set}" = set; then + echo "$as_me:$LINENO: checking for linux/types.h" >&5 +echo $ECHO_N "checking for linux/types.h... $ECHO_C" >&6 +if test "${ac_cv_header_linux_types_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_linux_types_h" >&5 +echo "${ECHO_T}$ac_cv_header_linux_types_h" >&6 else - cat >conftest.$ac_ext <<_ACEOF + # Is the header compilable? +echo "$as_me:$LINENO: checking linux/types.h usability" >&5 +echo $ECHO_N "checking linux/types.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} +$ac_includes_default +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_compiler_gnu=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_compiler_gnu=no +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 -echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; } -GCC=`test $ac_compiler_gnu = yes && echo yes` -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 -echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; } -if test "${ac_cv_prog_cc_g+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF +# Is the header present? +echo "$as_me:$LINENO: checking linux/types.h presence" >&5 +echo $ECHO_N "checking linux/types.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} +#include _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - CFLAGS="" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: linux/types.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: linux/types.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/types.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: linux/types.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: linux/types.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: linux/types.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/types.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: linux/types.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/types.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: linux/types.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/types.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: linux/types.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/types.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: linux/types.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/types.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: linux/types.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : +echo "$as_me:$LINENO: checking for linux/types.h" >&5 +echo $ECHO_N "checking for linux/types.h... $ECHO_C" >&6 +if test "${ac_cv_header_linux_types_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + ac_cv_header_linux_types_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_linux_types_h" >&5 +echo "${ECHO_T}$ac_cv_header_linux_types_h" >&6 + +fi +if test $ac_cv_header_linux_types_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_LINUX_TYPES_H 1 +_ACEOF + +fi - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF + +if test "${ac_cv_header_linux_malloc_h+set}" = set; then + echo "$as_me:$LINENO: checking for linux/malloc.h" >&5 +echo $ECHO_N "checking for linux/malloc.h... $ECHO_C" >&6 +if test "${ac_cv_header_linux_malloc_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_linux_malloc_h" >&5 +echo "${ECHO_T}$ac_cv_header_linux_malloc_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking linux/malloc.h usability" >&5 +echo $ECHO_N "checking linux/malloc.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} +$ac_includes_default +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag -fi -{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 -echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 -echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } -if test "${ac_cv_prog_cc_c89+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC +# Is the header present? +echo "$as_me:$LINENO: checking linux/malloc.h presence" >&5 +echo $ECHO_N "checking linux/malloc.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -#include -#include -#include -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} +#include _ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_c89=$ac_arg + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - + ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: linux/malloc.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: linux/malloc.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: linux/malloc.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: linux/malloc.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: linux/malloc.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: linux/malloc.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: linux/malloc.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: linux/malloc.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: linux/malloc.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for linux/malloc.h" >&5 +echo $ECHO_N "checking for linux/malloc.h... $ECHO_C" >&6 +if test "${ac_cv_header_linux_malloc_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_linux_malloc_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_linux_malloc_h" >&5 +echo "${ECHO_T}$ac_cv_header_linux_malloc_h" >&6 fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { echo "$as_me:$LINENO: result: none needed" >&5 -echo "${ECHO_T}none needed" >&6; } ;; - xno) - { echo "$as_me:$LINENO: result: unsupported" >&5 -echo "${ECHO_T}unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 -echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; -esac +if test $ac_cv_header_linux_malloc_h = yes; then +cat >>confdefs.h <<\_ACEOF +#define HAVE_LINUX_MALLOC_H 1 +_ACEOF -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu +fi -{ echo "$as_me:$LINENO: checking for required gcc" >&5 -echo $ECHO_N "checking for required gcc... $ECHO_C" >&6; } -if test "x$GCC" = "x"; then - { { echo "$as_me:$LINENO: error: no" >&5 -echo "$as_me: error: no" >&2;} - { (exit 1); exit 1; }; } -fi -CFLAGS=$USR_CFLAGS -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 -echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; } -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then - if test "${ac_cv_prog_CPP+set}" = set; then +if test "${ac_cv_header_sys_vfs_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/vfs.h" >&5 +echo $ECHO_N "checking for sys/vfs.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_vfs_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_vfs_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_vfs_h" >&6 else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" - do - ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat >conftest.$ac_ext <<_ACEOF + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/vfs.h usability" >&5 +echo $ECHO_N "checking sys/vfs.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error +$ac_includes_default +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - : + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - # Broken: fails on valid input. -continue +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f conftest.err conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat >conftest.$ac_ext <<_ACEOF +# Is the header present? +echo "$as_me:$LINENO: checking sys/vfs.h presence" >&5 +echo $ECHO_N "checking sys/vfs.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - # Broken: success on invalid input. -continue + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - # Passes both tests. -ac_preproc_ok=: -break + ac_header_preproc=no fi - -rm -f conftest.err conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext -if $ac_preproc_ok; then - break -fi +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 - done - ac_cv_prog_CPP=$CPP - -fi - CPP=$ac_cv_prog_CPP -else - ac_cv_prog_CPP=$CPP -fi -{ echo "$as_me:$LINENO: result: $CPP" >&5 -echo "${ECHO_T}$CPP" >&6; } -ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat >conftest.$ac_ext <<_ACEOF +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/vfs.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/vfs.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/vfs.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/vfs.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/vfs.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/vfs.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/vfs.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/vfs.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/vfs.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/vfs.h" >&5 +echo $ECHO_N "checking for sys/vfs.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_vfs_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_sys_vfs_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_vfs_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_vfs_h" >&6 + +fi +if test $ac_cv_header_sys_vfs_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_VFS_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_sys_mount_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/mount.h" >&5 +echo $ECHO_N "checking for sys/mount.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_mount_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_mount_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_mount_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/mount.h usability" >&5 +echo $ECHO_N "checking sys/mount.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error +$ac_includes_default +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - : + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - # Broken: fails on valid input. -continue +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f conftest.err conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat >conftest.$ac_ext <<_ACEOF +# Is the header present? +echo "$as_me:$LINENO: checking sys/mount.h presence" >&5 +echo $ECHO_N "checking sys/mount.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include +#include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - # Broken: success on invalid input. -continue + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - # Passes both tests. -ac_preproc_ok=: -break + ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.err conftest.$ac_ext -if $ac_preproc_ok; then - : +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/mount.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/mount.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/mount.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/mount.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/mount.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/mount.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/mount.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/mount.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/mount.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/mount.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/mount.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/mount.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/mount.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/mount.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/mount.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/mount.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/mount.h" >&5 +echo $ECHO_N "checking for sys/mount.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_mount_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 else - { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check -See \`config.log' for more details." >&5 -echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } + ac_cv_header_sys_mount_h=$ac_header_preproc fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_mount_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_mount_h" >&6 -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu +fi +if test $ac_cv_header_sys_mount_h = yes; then -# Extract the first word of "perl", so it can be a program name with args. -set dummy perl; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_HAVE_PERL+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$HAVE_PERL"; then - ac_cv_prog_HAVE_PERL="$HAVE_PERL" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_HAVE_PERL="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_MOUNT_H 1 +_ACEOF - test -z "$ac_cv_prog_HAVE_PERL" && ac_cv_prog_HAVE_PERL="no" -fi -fi -HAVE_PERL=$ac_cv_prog_HAVE_PERL -if test -n "$HAVE_PERL"; then - { echo "$as_me:$LINENO: result: $HAVE_PERL" >&5 -echo "${ECHO_T}$HAVE_PERL" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } fi -# Extract the first word of "find", so it can be a program name with args. -set dummy find; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_HAVE_FIND+set}" = set; then +if test "${ac_cv_header_sys_stat_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/stat.h" >&5 +echo $ECHO_N "checking for sys/stat.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_stat_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_stat_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_stat_h" >&6 else - if test -n "$HAVE_FIND"; then - ac_cv_prog_HAVE_FIND="$HAVE_FIND" # Let the user override the test. + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/stat.h usability" >&5 +echo $ECHO_N "checking sys/stat.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_HAVE_FIND="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - test -z "$ac_cv_prog_HAVE_FIND" && ac_cv_prog_HAVE_FIND="no" +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking sys/stat.h presence" >&5 +echo $ECHO_N "checking sys/stat.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes fi -HAVE_FIND=$ac_cv_prog_HAVE_FIND -if test -n "$HAVE_FIND"; then - { echo "$as_me:$LINENO: result: $HAVE_FIND" >&5 -echo "${ECHO_T}$HAVE_FIND" >&6; } +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/stat.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/stat.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/stat.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/stat.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/stat.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/stat.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/stat.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/stat.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/stat.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/stat.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/stat.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/stat.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/stat.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/stat.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/stat.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/stat.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/stat.h" >&5 +echo $ECHO_N "checking for sys/stat.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_stat_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_sys_stat_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_stat_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_stat_h" >&6 -# Extract the first word of "bison", so it can be a program name with args. -set dummy bison; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_HAVE_BISON+set}" = set; then +fi +if test $ac_cv_header_sys_stat_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_STAT_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_sys_types_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/types.h" >&5 +echo $ECHO_N "checking for sys/types.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_types_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_types_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_types_h" >&6 else - if test -n "$HAVE_BISON"; then - ac_cv_prog_HAVE_BISON="$HAVE_BISON" # Let the user override the test. + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/types.h usability" >&5 +echo $ECHO_N "checking sys/types.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_HAVE_BISON="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - test -z "$ac_cv_prog_HAVE_BISON" && ac_cv_prog_HAVE_BISON="no" +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking sys/types.h presence" >&5 +echo $ECHO_N "checking sys/types.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no fi -HAVE_BISON=$ac_cv_prog_HAVE_BISON -if test -n "$HAVE_BISON"; then - { echo "$as_me:$LINENO: result: $HAVE_BISON" >&5 -echo "${ECHO_T}$HAVE_BISON" >&6; } +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/types.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/types.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/types.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/types.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/types.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/types.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/types.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/types.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/types.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/types.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/types.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/types.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/types.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/types.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/types.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/types.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/types.h" >&5 +echo $ECHO_N "checking for sys/types.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_types_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + ac_cv_header_sys_types_h=$ac_header_preproc fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_types_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_types_h" >&6 +fi +if test $ac_cv_header_sys_types_h = yes; then -# Extract the first word of "flex", so it can be a program name with args. -set dummy flex; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_HAVE_FLEX+set}" = set; then +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_TYPES_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_sys_socket_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/socket.h" >&5 +echo $ECHO_N "checking for sys/socket.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_socket_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_socket_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_socket_h" >&6 else - if test -n "$HAVE_FLEX"; then - ac_cv_prog_HAVE_FLEX="$HAVE_FLEX" # Let the user override the test. + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/socket.h usability" >&5 +echo $ECHO_N "checking sys/socket.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_HAVE_FLEX="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - test -z "$ac_cv_prog_HAVE_FLEX" && ac_cv_prog_HAVE_FLEX="no" +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking sys/socket.h presence" >&5 +echo $ECHO_N "checking sys/socket.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes fi -HAVE_FLEX=$ac_cv_prog_HAVE_FLEX -if test -n "$HAVE_FLEX"; then - { echo "$as_me:$LINENO: result: $HAVE_FLEX" >&5 -echo "${ECHO_T}$HAVE_FLEX" >&6; } +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/socket.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/socket.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/socket.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/socket.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/socket.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/socket.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/socket.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/socket.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/socket.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/socket.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/socket.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/socket.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/socket.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/socket.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/socket.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/socket.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/socket.h" >&5 +echo $ECHO_N "checking for sys/socket.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_socket_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_sys_socket_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_socket_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_socket_h" >&6 +fi +if test $ac_cv_header_sys_socket_h = yes; then +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_SOCKET_H 1 +_ACEOF -{ echo "$as_me:$LINENO: checking for required Math::BigInt perl module" >&5 -echo $ECHO_N "checking for required Math::BigInt perl module... $ECHO_C" >&6; } -perl -e "use Math::BigInt" 2>&1 > /dev/null -if test $? != 0; then - { { echo "$as_me:$LINENO: error: no" >&5 -echo "$as_me: error: no" >&2;} - { (exit 1); exit 1; }; } -else - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } fi -if test $host != $build; then - for ac_prog in gcc cc -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_BUILD_CC+set}" = set; then + +if test "${ac_cv_header_sys_sendfile_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/sendfile.h" >&5 +echo $ECHO_N "checking for sys/sendfile.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_sendfile_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_sendfile_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_sendfile_h" >&6 else - if test -n "$BUILD_CC"; then - ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test. + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/sendfile.h usability" >&5 +echo $ECHO_N "checking sys/sendfile.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_BUILD_CC="$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +ac_header_compiler=no fi -fi -BUILD_CC=$ac_cv_prog_BUILD_CC -if test -n "$BUILD_CC"; then - { echo "$as_me:$LINENO: result: $BUILD_CC" >&5 -echo "${ECHO_T}$BUILD_CC" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking sys/sendfile.h presence" >&5 +echo $ECHO_N "checking sys/sendfile.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + ac_cpp_err=yes fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/sendfile.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/sendfile.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/sendfile.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/sendfile.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/sendfile.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/sendfile.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/sendfile.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/sendfile.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/sendfile.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/sendfile.h" >&5 +echo $ECHO_N "checking for sys/sendfile.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_sendfile_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_sys_sendfile_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_sendfile_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_sendfile_h" >&6 + +fi +if test $ac_cv_header_sys_sendfile_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_SENDFILE_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_sys_xattr_h+set}" = set; then + echo "$as_me:$LINENO: checking for sys/xattr.h" >&5 +echo $ECHO_N "checking for sys/xattr.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_xattr_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_xattr_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_xattr_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking sys/xattr.h usability" >&5 +echo $ECHO_N "checking sys/xattr.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking sys/xattr.h presence" >&5 +echo $ECHO_N "checking sys/xattr.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: sys/xattr.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: sys/xattr.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: sys/xattr.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: sys/xattr.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: sys/xattr.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: sys/xattr.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: sys/xattr.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: sys/xattr.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: sys/xattr.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for sys/xattr.h" >&5 +echo $ECHO_N "checking for sys/xattr.h... $ECHO_C" >&6 +if test "${ac_cv_header_sys_xattr_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_sys_xattr_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_sys_xattr_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_xattr_h" >&6 + +fi +if test $ac_cv_header_sys_xattr_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SYS_XATTR_H 1 +_ACEOF + +fi + + + +if test "${ac_cv_header_arpa_inet_h+set}" = set; then + echo "$as_me:$LINENO: checking for arpa/inet.h" >&5 +echo $ECHO_N "checking for arpa/inet.h... $ECHO_C" >&6 +if test "${ac_cv_header_arpa_inet_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_arpa_inet_h" >&5 +echo "${ECHO_T}$ac_cv_header_arpa_inet_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking arpa/inet.h usability" >&5 +echo $ECHO_N "checking arpa/inet.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking arpa/inet.h presence" >&5 +echo $ECHO_N "checking arpa/inet.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: arpa/inet.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: arpa/inet.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: arpa/inet.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: arpa/inet.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: arpa/inet.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: arpa/inet.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: arpa/inet.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: arpa/inet.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: arpa/inet.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for arpa/inet.h" >&5 +echo $ECHO_N "checking for arpa/inet.h... $ECHO_C" >&6 +if test "${ac_cv_header_arpa_inet_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_arpa_inet_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_arpa_inet_h" >&5 +echo "${ECHO_T}$ac_cv_header_arpa_inet_h" >&6 + +fi +if test $ac_cv_header_arpa_inet_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ARPA_INET_H 1 +_ACEOF + +fi + + +if test "${ac_cv_header_attr_xattr_h+set}" = set; then + echo "$as_me:$LINENO: checking for attr/xattr.h" >&5 +echo $ECHO_N "checking for attr/xattr.h... $ECHO_C" >&6 +if test "${ac_cv_header_attr_xattr_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_attr_xattr_h" >&5 +echo "${ECHO_T}$ac_cv_header_attr_xattr_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking attr/xattr.h usability" >&5 +echo $ECHO_N "checking attr/xattr.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking attr/xattr.h presence" >&5 +echo $ECHO_N "checking attr/xattr.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: attr/xattr.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: attr/xattr.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: attr/xattr.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: attr/xattr.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: attr/xattr.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: attr/xattr.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: attr/xattr.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: attr/xattr.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: attr/xattr.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for attr/xattr.h" >&5 +echo $ECHO_N "checking for attr/xattr.h... $ECHO_C" >&6 +if test "${ac_cv_header_attr_xattr_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_attr_xattr_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_attr_xattr_h" >&5 +echo "${ECHO_T}$ac_cv_header_attr_xattr_h" >&6 + +fi +if test $ac_cv_header_attr_xattr_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ATTR_XATTR_H 1 +_ACEOF + +fi + + + +echo "$as_me:$LINENO: checking for long int" >&5 +echo $ECHO_N "checking for long int... $ECHO_C" >&6 +if test "${ac_cv_type_long_int+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((long int *) 0) + return 0; +if (sizeof (long int)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_long_int=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_long_int=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_long_int" >&5 +echo "${ECHO_T}$ac_cv_type_long_int" >&6 + +echo "$as_me:$LINENO: checking size of long int" >&5 +echo $ECHO_N "checking size of long int... $ECHO_C" >&6 +if test "${ac_cv_sizeof_long_int+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$ac_cv_type_long_int" = yes; then + # The cast to unsigned long works around a bug in the HP C Compiler + # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects + # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. + # This bug is HP SR number 8606223364. + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long int))) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=0 ac_mid=0 + while :; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long int))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_lo=`expr $ac_mid + 1` + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid + 1` +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long int))) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=-1 ac_mid=-1 + while :; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long int))) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_hi=`expr '(' $ac_mid ')' - 1` + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid` +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_lo= ac_hi= +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long int))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_lo=`expr '(' $ac_mid ')' + 1` +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in +?*) ac_cv_sizeof_long_int=$ac_lo;; +'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (long int), 77 +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute sizeof (long int), 77 +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } ;; +esac +else + if test "$cross_compiling" = yes; then + { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run test program while cross compiling +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +long longval () { return (long) (sizeof (long int)); } +unsigned long ulongval () { return (long) (sizeof (long int)); } +#include +#include +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + exit (1); + if (((long) (sizeof (long int))) < 0) + { + long i = longval (); + if (i != ((long) (sizeof (long int)))) + exit (1); + fprintf (f, "%ld\n", i); + } + else + { + unsigned long i = ulongval (); + if (i != ((long) (sizeof (long int)))) + exit (1); + fprintf (f, "%lu\n", i); + } + exit (ferror (f) || fclose (f) != 0); + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_sizeof_long_int=`cat conftest.val` +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +{ { echo "$as_me:$LINENO: error: cannot compute sizeof (long int), 77 +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute sizeof (long int), 77 +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +rm -f conftest.val +else + ac_cv_sizeof_long_int=0 +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_sizeof_long_int" >&5 +echo "${ECHO_T}$ac_cv_sizeof_long_int" >&6 +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG_INT $ac_cv_sizeof_long_int +_ACEOF + + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 +echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6 +if test -z "$INSTALL"; then +if test "${ac_cv_path_install+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in + ./ | .// | /cC/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + done + done + ;; +esac +done + + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. We don't cache a + # path for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the path is relative. + INSTALL=$ac_install_sh + fi +fi +echo "$as_me:$LINENO: result: $INSTALL" >&5 +echo "${ECHO_T}$INSTALL" >&6 + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cc_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std1 is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std1. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +echo "$as_me:$LINENO: checking for required gcc" >&5 +echo $ECHO_N "checking for required gcc... $ECHO_C" >&6 +if test "x$GCC" = "x"; then + { { echo "$as_me:$LINENO: error: no" >&5 +echo "$as_me: error: no" >&2;} + { (exit 1); exit 1; }; } +fi + +CFLAGS=$USR_CFLAGS + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6 +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +# Extract the first word of "perl", so it can be a program name with args. +set dummy perl; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_HAVE_PERL+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_PERL"; then + ac_cv_prog_HAVE_PERL="$HAVE_PERL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_HAVE_PERL="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_HAVE_PERL" && ac_cv_prog_HAVE_PERL="no" +fi +fi +HAVE_PERL=$ac_cv_prog_HAVE_PERL +if test -n "$HAVE_PERL"; then + echo "$as_me:$LINENO: result: $HAVE_PERL" >&5 +echo "${ECHO_T}$HAVE_PERL" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test $HAVE_PERL = "no"; then + { { echo "$as_me:$LINENO: error: \"perl required in PATH to complete build\"" >&5 +echo "$as_me: error: \"perl required in PATH to complete build\"" >&2;} + { (exit 1); exit 1; }; } +fi + +# Extract the first word of "find", so it can be a program name with args. +set dummy find; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_HAVE_FIND+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_FIND"; then + ac_cv_prog_HAVE_FIND="$HAVE_FIND" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_HAVE_FIND="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_HAVE_FIND" && ac_cv_prog_HAVE_FIND="no" +fi +fi +HAVE_FIND=$ac_cv_prog_HAVE_FIND +if test -n "$HAVE_FIND"; then + echo "$as_me:$LINENO: result: $HAVE_FIND" >&5 +echo "${ECHO_T}$HAVE_FIND" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test $HAVE_FIND = "no"; then + { { echo "$as_me:$LINENO: error: \"find required in PATH complete build\"" >&5 +echo "$as_me: error: \"find required in PATH complete build\"" >&2;} + { (exit 1); exit 1; }; } +fi + +# Extract the first word of "bison", so it can be a program name with args. +set dummy bison; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_HAVE_BISON+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_BISON"; then + ac_cv_prog_HAVE_BISON="$HAVE_BISON" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_HAVE_BISON="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_HAVE_BISON" && ac_cv_prog_HAVE_BISON="no" +fi +fi +HAVE_BISON=$ac_cv_prog_HAVE_BISON +if test -n "$HAVE_BISON"; then + echo "$as_me:$LINENO: result: $HAVE_BISON" >&5 +echo "${ECHO_T}$HAVE_BISON" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test $HAVE_BISON = "no"; then + { { echo "$as_me:$LINENO: error: \"bison required in PATH to complete build\"" >&5 +echo "$as_me: error: \"bison required in PATH to complete build\"" >&2;} + { (exit 1); exit 1; }; } +fi + +# Extract the first word of "flex", so it can be a program name with args. +set dummy flex; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_HAVE_FLEX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_FLEX"; then + ac_cv_prog_HAVE_FLEX="$HAVE_FLEX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_HAVE_FLEX="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_HAVE_FLEX" && ac_cv_prog_HAVE_FLEX="no" +fi +fi +HAVE_FLEX=$ac_cv_prog_HAVE_FLEX +if test -n "$HAVE_FLEX"; then + echo "$as_me:$LINENO: result: $HAVE_FLEX" >&5 +echo "${ECHO_T}$HAVE_FLEX" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test $HAVE_FLEX = "no"; then + { { echo "$as_me:$LINENO: error: \"flex required in PATH to complete build\"" >&5 +echo "$as_me: error: \"flex required in PATH to complete build\"" >&2;} + { (exit 1); exit 1; }; } +fi + +echo "$as_me:$LINENO: checking for required Math::BigInt perl module" >&5 +echo $ECHO_N "checking for required Math::BigInt perl module... $ECHO_C" >&6 +perl -e "use Math::BigInt" 2>&1 > /dev/null +if test $? != 0; then + { { echo "$as_me:$LINENO: error: no" >&5 +echo "$as_me: error: no" >&2;} + { (exit 1); exit 1; }; } +else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +fi + +if test $host != $build; then + for ac_prog in gcc cc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_BUILD_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$BUILD_CC"; then + ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_BUILD_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +BUILD_CC=$ac_cv_prog_BUILD_CC +if test -n "$BUILD_CC"; then + echo "$as_me:$LINENO: result: $BUILD_CC" >&5 +echo "${ECHO_T}$BUILD_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$BUILD_CC" && break +done + +else + BUILD_CC=$CC + BUILD_CFLAGS=$BUILD_CFLAGS + BUILD_LDFLAGS=$BUILD_LDFLAGS +fi + + + + + +# +# Threading options. +# +# Client - thread-safe or not +# --disable-thread-safety : chooses null or posix locking mechanism +# For applications that know they are not multi-threaded, or choose +# to handle locking themselves, this configure option can be used to +# slightly decrease library-induced latency by removing the locking. +# +# Client - job threading +# libpvfs2-threaded.{so,a} with __PVFS2_JOB_THREADED__ +# This option causes the client library to spawn an extra thread +# to handle network communications. In this way, it is possible +# that the library can handle new and completed requests from +# the application while also interacting with the network. +# +# These separate libraries can not be built using the existing +# configure mechanism. They are only built if required by the +# kernel module helper. +# +# Server - three options: +# trove +# (hack Makefile.in) +# job +# (hack Makefile.in) +# aio +# --disable-aio-threaded-callbacks +# +# Trove (storage) and job threading in the server are implemented +# using #defines, but not exported through the configure mechanism. +# AIO threaded callbacks are a separate thread used in conjunction +# with trove, and can be disabled through a configure option as +# AIO has been found to be buggy on many systems. +# +# +# Kernel module helper - job threading +# --enable-threaded-kmod-helper +# +# This is a special client that "helps" applications that access PVFS +# through the kernel using normal VFS calls (e.g. "ls"). This +# configure options builds it using __PVFS2_JOB_THREADED__ as described +# above. That enables a thread for the network layer and a thread +# for the kernel interface. This has the potential to increase the +# rate at which concurrent operations are processed, but has the potential +# drawback of somewhat higher overhead for a single operation and lack +# of testing. +# +# Enabling this options causes one of libpvfs2-threaded.{so,a} to be +# built, depending on --enable-shared and --enable-static. If both are +# enabled, the shared library takes precedence. +# + +echo "$as_me:$LINENO: checking for client library thread safety support" >&5 +echo $ECHO_N "checking for client library thread safety support... $ECHO_C" >&6 +# Check whether --enable-thread-safety or --disable-thread-safety was given. +if test "${enable_thread_safety+set}" = set; then + enableval="$enable_thread_safety" + if test "x$enableval" = "xno" ; then + LIBCFLAGS="$LIBCFLAGS -D__GEN_NULL_LOCKING__" + THREAD_LIB="" + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +else + LIBCFLAGS="$LIBCFLAGS -D__GEN_POSIX_LOCKING__" + THREAD_LIB="-lpthread" + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +fi; + + + + +# Check whether --enable-server or --disable-server was given. +if test "${enable_server+set}" = set; then + enableval="$enable_server" + if test "x$enableval" = "xyes" ; then + BUILD_SERVER=1 + NEED_BERKELEY_DB=yes +else + BUILD_SERVER="" +fi +else + BUILD_SERVER=1 + NEED_BERKELEY_DB=yes +fi; + + + +# Check whether --with-openssl or --without-openssl was given. +if test "${with_openssl+set}" = set; then + withval="$with_openssl" + + opensslpath=${withval} + + if test "x${withval}" != "xno"; then + + echo "$as_me:$LINENO: checking for openssl library" >&5 +echo $ECHO_N "checking for openssl library... $ECHO_C" >&6 + + if test "x${opensslpath}" != "x"; then + CFLAGS="${CFLAGS} -I${opensslpath}/include" + LDFLAGS="$LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" + SERVER_LDFLAGS="$SERVER_LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" + fi + LIBS="$LIBS -lcrypto -lssl" + + cat >conftest.$ac_ext <<_ACEOF +#include "openssl/bio.h" +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: Invalid openssl path specified. No openssl/bio.h found." >&5 +echo "$as_me: error: Invalid openssl path specified. No openssl/bio.h found." >&2;} + { (exit 1); exit 1; }; } +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include "openssl/bio.h" +int +main () +{ +BIO * b; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: could not find openssl libs" >&5 +echo "$as_me: error: could not find openssl libs" >&2;} + { (exit 1); exit 1; }; } +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + +cat >>confdefs.h <<\_ACEOF +#define WITH_OPENSSL 1 +_ACEOF + + + +for ac_header in openssl/evp.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in openssl/crypto.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in openssl/sha.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + fi + +else + + echo "$as_me:$LINENO: checking for openssl library" >&5 +echo $ECHO_N "checking for openssl library... $ECHO_C" >&6 + TMPLIBS=${LIBS} + LIBS="$LIBS -lcrypto -lssl" + + cat >conftest.$ac_ext <<_ACEOF +#include "openssl/bio.h" +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ echo "$as_me:$LINENO: WARNING: No openssl headers found." >&5 +echo "$as_me: WARNING: No openssl headers found." >&2;} +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include "openssl/bio.h" +int +main () +{ +BIO * b; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define WITH_OPENSSL 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + { echo "$as_me:$LINENO: WARNING: No openssl headers found." >&5 +echo "$as_me: WARNING: No openssl headers found." >&2;} + LIBS=${TMPLIBS} + +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + +for ac_header in openssl/evp.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in openssl/crypto.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in openssl/sha.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + +fi; + + +# Check whether --enable-karma-mem-usage-stats or --disable-karma-mem-usage-stats was given. +if test "${enable_karma_mem_usage_stats+set}" = set; then + enableval="$enable_karma_mem_usage_stats" + if test "x$enableval" = "xno" ; then + CFLAGS="$CFLAGS -D__KARMA_DISABLE_MEM_USAGE__" +fi + +fi; + +# Check whether --enable-bmi-only or --disable-bmi-only was given. +if test "${enable_bmi_only+set}" = set; then + enableval="$enable_bmi_only" + if test "x$enableval" = "xyes" ; then + BUILD_BMI_ONLY=1 +fi + +fi; + + +if test "x$BUILD_BMI_ONLY" = "x1"; then + BUILD_SERVER="" + NEED_BERKELEY_DB=no + + +fi + +# Check whether --enable-perf-counters or --disable-perf-counters was given. +if test "${enable_perf_counters+set}" = set; then + enableval="$enable_perf_counters" + if test "x$enableval" = "xno" ; then + CFLAGS="$CFLAGS -D__PVFS2_DISABLE_PERF_COUNTERS__" +fi + +fi; + +MMAP_RA_CACHE="" +# Check whether --enable-mmap-racache or --disable-mmap-racache was given. +if test "${enable_mmap_racache+set}" = set; then + enableval="$enable_mmap_racache" + if test "x$enableval" = "xyes" ; then +MMAP_RA_CACHE="-DUSE_MMAP_RA_CACHE" +fi + +fi; + + +RESET_FILE_POS="" +# Check whether --enable-reset-file-pos or --disable-reset-file-pos was given. +if test "${enable_reset_file_pos+set}" = set; then + enableval="$enable_reset_file_pos" + if test "x$enableval" = "xyes" ; then +RESET_FILE_POS="-DRESET_FILE_POS" +fi + +fi; + + + +# Check whether --enable-trusted-connections or --disable-trusted-connections was given. +if test "${enable_trusted_connections+set}" = set; then + enableval="$enable_trusted_connections" + if test "x$enableval" = "xyes"; then +TRUSTED_CONNECTIONS="-DUSE_TRUSTED" +fi +fi; + + + +test_for_karma() +{ + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_HAVE_PKGCONFIG+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_PKGCONFIG"; then + ac_cv_prog_HAVE_PKGCONFIG="$HAVE_PKGCONFIG" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_HAVE_PKGCONFIG="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_HAVE_PKGCONFIG" && ac_cv_prog_HAVE_PKGCONFIG="no" +fi +fi +HAVE_PKGCONFIG=$ac_cv_prog_HAVE_PKGCONFIG +if test -n "$HAVE_PKGCONFIG"; then + echo "$as_me:$LINENO: result: $HAVE_PKGCONFIG" >&5 +echo "${ECHO_T}$HAVE_PKGCONFIG" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + if test "x$HAVE_PKGCONFIG" = "xyes" ; then + echo "$as_me:$LINENO: checking for gtk2.0 (for karma gui)" >&5 +echo $ECHO_N "checking for gtk2.0 (for karma gui)... $ECHO_C" >&6 + if `pkg-config --exists gtk+-2.0` ; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + GTKLIBS=`pkg-config --libs gtk+-2.0` + GTKCFLAGS=`pkg-config --cflags gtk+-2.0` + + + + BUILD_KARMA="1" + + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi + fi +} + +# Check whether --enable-karma or --disable-karma was given. +if test "${enable_karma+set}" = set; then + enableval="$enable_karma" + if test "x$enableval" = "xyes" ; then + test_for_karma + fi + +else + test_for_karma + +fi; + +# Check whether --enable-static or --disable-static was given. +if test "${enable_static+set}" = set; then + enableval="$enable_static" + build_static=$enableval +else + build_static=yes +fi; + + + + +set_redhat24=0 +use_redhat24=0 + +# Check whether --enable-redhat24 or --disable-redhat24 was given. +if test "${enable_redhat24+set}" = set; then + enableval="$enable_redhat24" + set_redhat24=1 + if test "$enableval" = no ; then + use_redhat24=0 + else + use_redhat24=1 + fi + +fi; + + +set_nptl_workaround=0 +use_nptl_workaround=0 + +# Check whether --enable-nptl-workaround or --disable-nptl-workaround was given. +if test "${enable_nptl_workaround+set}" = set; then + enableval="$enable_nptl_workaround" + set_nptl_workaround=1 + if test "$enableval" = no ; then + use_nptl_workaround=0 + else + use_nptl_workaround=1 + fi + +fi; + +RHTAG1="Red Hat Linux release 9" +RHTAG2="Red Hat Enterprise Linux WS release 3" +RHTAG3="Red Hat Enterprise Linux AS release 3" +RHTAG4="Red Hat Enterprise Linux ES release 3" +RHTAG5="Red Hat Linux Advanced Server release 2.1AS" +RHTAG6="Fedora Core release 1 (Yarrow)" + + + +REDHAT_RELEASE="" +if test -f /etc/redhat-release; then + echo "$as_me:$LINENO: checking for tagged Redhat releases (must patch)" >&5 +echo $ECHO_N "checking for tagged Redhat releases (must patch)... $ECHO_C" >&6 + REDHAT_REL=`cat /etc/redhat-release` + if test "x`echo $REDHAT_REL | cut -b 1-23`" = "x$RHTAG1" || + test "x`echo $REDHAT_REL | cut -b 1-37`" = "x$RHTAG2" || + test "x`echo $REDHAT_REL | cut -b 1-37`" = "x$RHTAG3" || + test "x`echo $REDHAT_REL | cut -b 1-37`" = "x$RHTAG4" || + test "x`echo $REDHAT_REL | cut -b 1-43`" = "x$RHTAG5" || + test "x`echo $REDHAT_REL | cut -b 1-30`" = "x$RHTAG6" ; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + + if test "$set_redhat24" = 0 || test "$set_nptl_workaround" = 0 ; then + { { echo "$as_me:$LINENO: error: + You appear to be configuring PVFS2 on a RedHat distribution that + likely ships with a heavily modified kernel and c library. You must + specify two configure arguments to provide necessary information before + proceeding. First pick one of these two options: + ============================= + --enable-redhat24 (if you are using a redhat provided 2.4 kernel) + --disable-redhat24 (if you are using a stock kernel.org kernel) + ============================= + Also pick one of the next two options. You should probably enable the + workaround if you are using RedHat EL 3 prior to update 2. Otherwise it + is probably safer (and higher performance) to disable it: + ============================= + --enable-nptl-workaround (to work around buggy glibc pthread library) + --disable-nptl-workaround (if you trust your glibc pthread library) + ============================= " >&5 +echo "$as_me: error: + You appear to be configuring PVFS2 on a RedHat distribution that + likely ships with a heavily modified kernel and c library. You must + specify two configure arguments to provide necessary information before + proceeding. First pick one of these two options: + ============================= + --enable-redhat24 (if you are using a redhat provided 2.4 kernel) + --disable-redhat24 (if you are using a stock kernel.org kernel) + ============================= + Also pick one of the next two options. You should probably enable the + workaround if you are using RedHat EL 3 prior to update 2. Otherwise it + is probably safer (and higher performance) to disable it: + ============================= + --enable-nptl-workaround (to work around buggy glibc pthread library) + --disable-nptl-workaround (if you trust your glibc pthread library) + ============================= " >&2;} + { (exit 1); exit 1; }; } + fi + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi +fi + + +if test "$use_redhat24" = 1 ; then + REDHAT_RELEASE="-DREDHAT_RELEASE_9" +fi + + +if test "$use_nptl_workaround" = 1 ; then + NPTL_WORKAROUND="1" +fi + + +use_aio_thcb=1 +# Check whether --enable-aio-threaded-callbacks or --disable-aio-threaded-callbacks was given. +if test "${enable_aio_threaded_callbacks+set}" = set; then + enableval="$enable_aio_threaded_callbacks" + if test "$enableval" = no ; then use_aio_thcb=0 ; fi +fi; + + +MISC_TROVE_FLAGS="" +if test $use_aio_thcb = 1 ; then + MISC_TROVE_FLAGS="-D__PVFS2_TROVE_AIO_THREADED__" +fi + + +echo "$as_me:$LINENO: checking for __error_code field in aiocb struct" >&5 +echo $ECHO_N "checking for __error_code field in aiocb struct... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + struct aiocb aiocb; + aiocb.__error_code = 0; + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AIOCB_ERROR_CODE 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + +echo "$as_me:$LINENO: checking for __return_value field in aiocb struct" >&5 +echo $ECHO_N "checking for __return_value field in aiocb struct... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + struct aiocb aiocb; + aiocb.__return_value = 0; + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AIOCB_RETURN_VALUE 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: checking whether byte ordering is bigendian" >&5 +echo $ECHO_N "checking whether byte ordering is bigendian... $ECHO_C" >&6 +if test "${ac_cv_c_bigendian+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # See if sys/param.h defines the BYTE_ORDER macro. +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include + +int +main () +{ +#if !BYTE_ORDER || !BIG_ENDIAN || !LITTLE_ENDIAN + bogus endian macros +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + # It does; now see whether it defined to BIG_ENDIAN or not. +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include + +int +main () +{ +#if BYTE_ORDER != BIG_ENDIAN + not big endian +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_c_bigendian=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_c_bigendian=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +# It does not; compile a test program. +if test "$cross_compiling" = yes; then + # try to guess the endianness by grepping values into an object file + ac_cv_c_bigendian=unknown + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +short ascii_mm[] = { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 }; +short ascii_ii[] = { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 }; +void _ascii () { char *s = (char *) ascii_mm; s = (char *) ascii_ii; } +short ebcdic_ii[] = { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 }; +short ebcdic_mm[] = { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 }; +void _ebcdic () { char *s = (char *) ebcdic_mm; s = (char *) ebcdic_ii; } +int +main () +{ + _ascii (); _ebcdic (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + if grep BIGenDianSyS conftest.$ac_objext >/dev/null ; then + ac_cv_c_bigendian=yes +fi +if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then + if test "$ac_cv_c_bigendian" = unknown; then + ac_cv_c_bigendian=no + else + # finding both strings is unlikely to happen, but who knows? + ac_cv_c_bigendian=unknown + fi +fi +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +int +main () +{ + /* Are we little or big endian? From Harbison&Steele. */ + union + { + long l; + char c[sizeof (long)]; + } u; + u.l = 1; + exit (u.c[sizeof (long) - 1] == 1); +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_c_bigendian=no +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_c_bigendian=yes +fi +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_c_bigendian" >&5 +echo "${ECHO_T}$ac_cv_c_bigendian" >&6 +case $ac_cv_c_bigendian in + yes) + +cat >>confdefs.h <<\_ACEOF +#define WORDS_BIGENDIAN 1 +_ACEOF + ;; + no) + ;; + *) + { { echo "$as_me:$LINENO: error: unknown endianness +presetting ac_cv_c_bigendian=no (or yes) will help" >&5 +echo "$as_me: error: unknown endianness +presetting ac_cv_c_bigendian=no (or yes) will help" >&2;} + { (exit 1); exit 1; }; } ;; +esac + + +echo "$as_me:$LINENO: checking for void *" >&5 +echo $ECHO_N "checking for void *... $ECHO_C" >&6 +if test "${ac_cv_type_void_p+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((void * *) 0) + return 0; +if (sizeof (void *)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_void_p=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_void_p=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_void_p" >&5 +echo "${ECHO_T}$ac_cv_type_void_p" >&6 + +echo "$as_me:$LINENO: checking size of void *" >&5 +echo $ECHO_N "checking size of void *... $ECHO_C" >&6 +if test "${ac_cv_sizeof_void_p+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$ac_cv_type_void_p" = yes; then + # The cast to unsigned long works around a bug in the HP C Compiler + # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects + # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. + # This bug is HP SR number 8606223364. + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (void *))) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=0 ac_mid=0 + while :; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (void *))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_lo=`expr $ac_mid + 1` + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid + 1` +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (void *))) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=-1 ac_mid=-1 + while :; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (void *))) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_hi=`expr '(' $ac_mid ')' - 1` + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid` +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_lo= ac_hi= +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (void *))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_lo=`expr '(' $ac_mid ')' + 1` +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in +?*) ac_cv_sizeof_void_p=$ac_lo;; +'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (void *), 77 +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute sizeof (void *), 77 +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } ;; +esac +else + if test "$cross_compiling" = yes; then + { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run test program while cross compiling +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +long longval () { return (long) (sizeof (void *)); } +unsigned long ulongval () { return (long) (sizeof (void *)); } +#include +#include +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + exit (1); + if (((long) (sizeof (void *))) < 0) + { + long i = longval (); + if (i != ((long) (sizeof (void *)))) + exit (1); + fprintf (f, "%ld\n", i); + } + else + { + unsigned long i = ulongval (); + if (i != ((long) (sizeof (void *)))) + exit (1); + fprintf (f, "%lu\n", i); + } + exit (ferror (f) || fclose (f) != 0); + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_sizeof_void_p=`cat conftest.val` +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +{ { echo "$as_me:$LINENO: error: cannot compute sizeof (void *), 77 +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute sizeof (void *), 77 +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +rm -f conftest.val +else + ac_cv_sizeof_void_p=0 +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_sizeof_void_p" >&5 +echo "${ECHO_T}$ac_cv_sizeof_void_p" >&6 +cat >>confdefs.h <<_ACEOF +#define SIZEOF_VOID_P $ac_cv_sizeof_void_p +_ACEOF + + + + +# Check whether --with-mtrace or --without-mtrace was given. +if test "${with_mtrace+set}" = set; then + withval="$with_mtrace" + CFLAGS="$CFLAGS -include mcheck.h" + +cat >>confdefs.h <<\_ACEOF +#define WITH_MTRACE 1 +_ACEOF + + +fi; + + +# Check whether --with-berkdb-debug or --without-berkdb-debug was given. +if test "${with_berkdb_debug+set}" = set; then + withval="$with_berkdb_debug" + +cat >>confdefs.h <<\_ACEOF +#define BERKDB_ERROR_REPORTING 1 +_ACEOF + + +fi; + +TAU_INCS= +BUILD_TAU= + + +# Check whether --with-tau or --without-tau was given. +if test "${with_tau+set}" = set; then + withval="$with_tau" + if test "x$withval" = "xyes" ; then + { { echo "$as_me:$LINENO: error: --with-tau must be given a pathname" >&5 +echo "$as_me: error: --with-tau must be given a pathname" >&2;} + { (exit 1); exit 1; }; } + else + TAU_INCS="-I$withval/include" + CFLAGS="$CFLAGS $TAU_INCS -D__PVFS2_ENABLE_EVENT__" + LDFLAGS="$LDFLAGS -L$withval/$(uname -m)/lib" + tau_mkfile=$withval/$(uname -m)/lib/Makefile.*-profile-trace + if test ! -f $tau_mkfile; then + { { echo "$as_me:$LINENO: error: TAU not compiled with profiling and tracing support" >&5 +echo "$as_me: error: TAU not compiled with profiling and tracing support" >&2;} + { (exit 1); exit 1; }; } + fi + tau_config=$(echo $tau_mkfile | sed -e "s|.*Makefile.tau-||") + LIBS="$LIBS -lTAU_tf -lTAU_traceinput-$tau_config -ltau-$tau_config -lpthread -lstdc++" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_TAU 1 +_ACEOF + + BUILD_TAU=1 + fi + +fi; + + + + +BUILD_KERNEL= + + +# Check whether --with-kernel or --without-kernel was given. +if test "${with_kernel+set}" = set; then + withval="$with_kernel" + if test "x$withval" = "xyes" -o "x$withval" = "x" ; then + { { echo "$as_me:$LINENO: error: --with-kernel must be given the path to your kernel source." >&5 +echo "$as_me: error: --with-kernel must be given the path to your kernel source." >&2;} + { (exit 1); exit 1; }; } + fi + if ! test -d $withval ; then + { { echo "$as_me:$LINENO: error: The --with-kernel path $withval is not a directory." >&5 +echo "$as_me: error: The --with-kernel path $withval is not a directory." >&2;} + { (exit 1); exit 1; }; } + fi + if ! test -r $withval/include/linux/version.h ; then + { { echo "$as_me:$LINENO: error: The kernel source tree must have been configured." >&5 +echo "$as_me: error: The kernel source tree must have been configured." >&2;} + { (exit 1); exit 1; }; } + fi + + if test -r $withval/include/linux/version.h && grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/version.h ; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/linux/version.h` + elif test -r $withval/include/linux/utsrelease.h && grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/utsrelease.h; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/linux/utsrelease.h` + elif test -r $withval/include/generated/utsrelease.h && grep -q UTS_RELEASE..2\\.6\\. $withval/include/generated/utsrelease.h; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/generated/utsrelease.h` + elif test -r $withval/include/generated/utsrelease.h && grep -qE UTS_RELEASE..3\\. $withval/include/generated/utsrelease.h; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/generated/utsrelease.h` + else + { { echo "$as_me:$LINENO: error: The kernel source tree does not appear to be 2.6 or 3.X" >&5 +echo "$as_me: error: The kernel source tree does not appear to be 2.6 or 3.X" >&2;} + { (exit 1); exit 1; }; } + + fi + + # At least up through 2.6.3 needed to write .__modpost.cmd; this changed + # sometime between then and 2.6.10. Now anybody can compile out-of-tree + # modules against a configured kernel tree + LINUX_KERNEL_SRC="$withval" + BUILD_KERNEL=1 + +else + LINUX_KERNEL_SRC="" + +fi; + + +# Check whether --with-kernel24 or --without-kernel24 was given. +if test "${with_kernel24+set}" = set; then + withval="$with_kernel24" + if test "x$withval" = "xyes" -o "x$withval" = "x" ; then + { { echo "$as_me:$LINENO: error: --with-kernel24 must be given the path to your kernel 2.4.x source." >&5 +echo "$as_me: error: --with-kernel24 must be given the path to your kernel 2.4.x source." >&2;} + { (exit 1); exit 1; }; } + fi + if ! test -d $withval ; then + { { echo "$as_me:$LINENO: error: The --with-kernel path $withval is not a directory." >&5 +echo "$as_me: error: The --with-kernel path $withval is not a directory." >&2;} + { (exit 1); exit 1; }; } + fi + if ! test -r $withval/include/linux/version.h ; then + { { echo "$as_me:$LINENO: error: The kernel source tree must have been configured." >&5 +echo "$as_me: error: The kernel source tree must have been configured." >&2;} + { (exit 1); exit 1; }; } + fi + vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/linux/version.h` + if ! grep -q UTS_RELEASE..2\\.4\\. $withval/include/linux/version.h ; then + { { echo "$as_me:$LINENO: error: The kernel source tree does not appear to be 2.4" >&5 +echo "$as_me: error: The kernel source tree does not appear to be 2.4" >&2;} + { (exit 1); exit 1; }; } + fi + k24_minor_ver=`echo $vers | cut -b 26- | sed -n 's/\"//p' | head -n 1` + tmp_k24_minor_ver=`echo $k24_minor_ver | grep \-` + if ! test "x$tmp_k24_minor_ver" = "x"; then + tmp_k24_minor_ver=`echo $k24_minor_ver | cut -d- -f1` + k24_minor_ver=$tmp_k24_minor_ver + fi + LINUX24_KERNEL_SRC="$withval" LINUX24_KERNEL_MINOR_VER="`echo $k24_minor_ver| cut -d'.' -f 1`" + BUILD_KERNEL=1 + +else + LINUX24_KERNEL_SRC="" LINUX24_KERNEL_MINOR_VER="" +fi; + + + + +oldcflags=$CFLAGS +if test -z "${LINUX_KERNEL_SRC}" ; then + lk_src=${LINUX24_KERNEL_SRC} +elif test -z "${LINUX24_KERNEL_SRC}" ; then + lk_src=${LINUX_KERNEL_SRC} +fi + + +# Check whether --enable-kernel-aio or --disable-kernel-aio was given. +if test "${enable_kernel_aio+set}" = set; then + enableval="$enable_kernel_aio" + enable_kernel_aio=$enableval +else + enable_kernel_aio=yes + +fi; + +# Check whether --enable-kernel-sendfile or --disable-kernel-sendfile was given. +if test "${enable_kernel_sendfile+set}" = set; then + enableval="$enable_kernel_sendfile" + enable_kernel_sendfile=$enableval +else + enable_kernel_sendfile=no + +fi; + +if test -n "$lk_src" ; then + + + + + NOSTDINCFLAGS="-Werror-implicit-function-declaration -nostdinc -isystem `$CC -print-file-name=include`" + + if test -d $lk_src/source; then + lk_src_source="$lk_src/source" + else + lk_src_source=$lk_src + fi + + CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src_source/include -I$lk_src_source/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty) -DKBUILD_MODNAME=KBUILD_STR(empty)" + + if test -f $lk_src/include/generated/autoconf.h ; then + CFLAGS="$CFLAGS -imacros $lk_src/include/generated/autoconf.h" + else + CFLAGS="$CFLAGS -imacros $lk_src/include/linux/autoconf.h" + fi + + if test -n "${ARCH}" ; then + CFLAGS="$CFLAGS -I$lk_src_source/arch/${ARCH}/include -I$lk_src_source/arch/${ARCH}/include/asm/mach-default" + else + SUBARCH=`uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ -e s/sa110/arm/ \ + -e s/s390x/s390/ -e s/parisc64/parisc/ \ + -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ + -e s/sh.*/sh/` + if test "x$SUBARCH" = "xi386"; then + ARCH=x86 + elif test "x$SUBARCH" = "xx86_64"; then + ARCH=x86 + elif test "x$SUBARCH" = "xsparc64"; then + ARCH=sparc + else + ARCH=$SUBARCH + fi + + CFLAGS="$CFLAGS -I$lk_src_source/arch/${ARCH}/include -I$lk_src_source/arch/${ARCH}/include/asm/mach-default" + + fi + + if test "$lk_src" != "$lk_src_source"; then + CFLAGS="$CFLAGS -I$lk_src/include" + fi + + need_optimize_flag=0 + echo "$as_me:$LINENO: checking for sanity of linux/fs.h include" >&5 +echo $ECHO_N "checking for sanity of linux/fs.h include... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + need_optimize_flag=1 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + if test $need_optimize_flag -eq 1; then + CFLAGS="-Os $CFLAGS" + fi + + echo "$as_me:$LINENO: checking for i_size_write in kernel" >&5 +echo $ECHO_N "checking for i_size_write in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + void i_size_write(struct inode *inode, + loff_t i_size) + { + return; + } + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_I_SIZE_WRITE 1 +_ACEOF + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for i_size_read in kernel" >&5 +echo $ECHO_N "checking for i_size_read in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + loff_t i_size_read(struct inode *inode) + { + return 0; + } + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_I_SIZE_READ 1 +_ACEOF + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for iget_locked function in kernel" >&5 +echo $ECHO_N "checking for iget_locked function in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + loff_t iget_locked(struct inode *inode) + { + return 0; + } + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_IGET_LOCKED 1 +_ACEOF + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for iget4_locked function in kernel" >&5 +echo $ECHO_N "checking for iget4_locked function in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + loff_t iget4_locked(struct inode *inode) + { + return 0; + } + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_IGET4_LOCKED 1 +_ACEOF + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for iget5_locked function in kernel" >&5 +echo $ECHO_N "checking for iget5_locked function in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + loff_t iget5_locked(struct inode *inode) + { + return 0; + } + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_IGET5_LOCKED 1 +_ACEOF + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for struct xtvec in kernel" >&5 +echo $ECHO_N "checking for struct xtvec in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct xtvec xv = { 0, 0 }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STRUCT_XTVEC 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for struct kmem_cache in kernel" >&5 +echo $ECHO_N "checking for struct kmem_cache in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + #include + + int foo(struct kmem_cache *s) + { + return (s == NULL) ? 3 : 4; + } + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STRUCT_KMEM_CACHE 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags + + echo "$as_me:$LINENO: checking for SLAB_KERNEL flag in kernel" >&5 +echo $ECHO_N "checking for SLAB_KERNEL flag in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static int flags = SLAB_KERNEL; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SLAB_KERNEL 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for memory_backed in struct backing_dev_info in kernel" >&5 +echo $ECHO_N "checking for memory_backed in struct backing_dev_info in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + #include + static struct backing_dev_info bdi = { + .memory_backed = 0 + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BDI_MEMORY_BACKED 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + if test "x$enable_kernel_sendfile" = "xyes"; then + echo "$as_me:$LINENO: checking for sendfile callback in struct file_operations in kernel" >&5 +echo $ECHO_N "checking for sendfile callback in struct file_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct file_operations fop = { + .sendfile = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SENDFILE_VFS_SUPPORT 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + fi + + echo "$as_me:$LINENO: checking for readv callback in struct file_operations in kernel" >&5 +echo $ECHO_N "checking for readv callback in struct file_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct file_operations fop = { + .readv = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_READV_FILE_OPERATIONS 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + echo "$as_me:$LINENO: checking for writev callback in struct file_operations in kernel" >&5 +echo $ECHO_N "checking for writev callback in struct file_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct file_operations fop = { + .writev = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_WRITEV_FILE_OPERATIONS 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for find_inode_handle callback in struct super_operations in kernel" >&5 +echo $ECHO_N "checking for find_inode_handle callback in struct super_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct super_operations sop = { + .find_inode_handle = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for i_blksize in struct inode" >&5 +echo $ECHO_N "checking for i_blksize in struct inode... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct inode i = { + .i_blksize = 0, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_I_BLKSIZE_IN_STRUCT_INODE 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for i_sem in struct inode" >&5 +echo $ECHO_N "checking for i_sem in struct inode... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct inode i = { + .i_sem = {0}, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_I_SEM_IN_STRUCT_INODE 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for statfs_lite callback in struct super_operations in kernel" >&5 +echo $ECHO_N "checking for statfs_lite callback in struct super_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct super_operations sop = { + .statfs_lite = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STATFS_LITE_SUPER_OPERATIONS 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for fill_handle callback in struct inode_operations in kernel" >&5 +echo $ECHO_N "checking for fill_handle callback in struct inode_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct inode_operations iop = { + .fill_handle = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FILL_HANDLE_INODE_OPERATIONS 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for getattr_lite callback in struct inode_operations in kernel" >&5 +echo $ECHO_N "checking for getattr_lite callback in struct inode_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct inode_operations iop = { + .getattr_lite = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_GETATTR_LITE_INODE_OPERATIONS 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for get_fs_key callback in struct super_operations in kernel" >&5 +echo $ECHO_N "checking for get_fs_key callback in struct super_operations in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + static struct super_operations sop = { + .get_fs_key = NULL, + }; + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 - test -n "$BUILD_CC" && break -done +cat >>confdefs.h <<\_ACEOF +#define HAVE_GET_FS_KEY_SUPER_OPERATIONS 1 +_ACEOF else - BUILD_CC=$CC - BUILD_CFLAGS=$BUILD_CFLAGS - BUILD_LDFLAGS=$BUILD_LDFLAGS -fi - - + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# -# Threading options. -# -# Client - thread-safe or not -# --disable-thread-safety : chooses null or posix locking mechanism -# For applications that know they are not multi-threaded, or choose -# to handle locking themselves, this configure option can be used to -# slightly decrease library-induced latency by removing the locking. -# -# Client - job threading -# libpvfs2-threaded.{so,a} with __PVFS2_JOB_THREADED__ -# This option causes the client library to spawn an extra thread -# to handle network communications. In this way, it is possible -# that the library can handle new and completed requests from -# the application while also interacting with the network. -# -# These separate libraries can not be built using the existing -# configure mechanism. They are only built if required by the -# kernel module helper. -# -# Server - three options: -# trove -# (hack Makefile.in) -# job -# (hack Makefile.in) -# aio -# --disable-aio-threaded-callbacks -# -# Trove (storage) and job threading in the server are implemented -# using #defines, but not exported through the configure mechanism. -# AIO threaded callbacks are a separate thread used in conjunction -# with trove, and can be disabled through a configure option as -# AIO has been found to be buggy on many systems. -# -# -# Kernel module helper - job threading -# --enable-threaded-kmod-helper -# -# This is a special client that "helps" applications that access PVFS -# through the kernel using normal VFS calls (e.g. "ls"). This -# configure options builds it using __PVFS2_JOB_THREADED__ as described -# above. That enables a thread for the network layer and a thread -# for the kernel interface. This has the potential to increase the -# rate at which concurrent operations are processed, but has the potential -# drawback of somewhat higher overhead for a single operation and lack -# of testing. -# -# Enabling this options causes one of libpvfs2-threaded.{so,a} to be -# built, depending on --enable-shared and --enable-static. If both are -# enabled, the shared library takes precedence. -# + echo "$as_me:$LINENO: checking for readdirplus member in file_operations structure" >&5 +echo $ECHO_N "checking for readdirplus member in file_operations structure... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ -{ echo "$as_me:$LINENO: checking for client library thread safety support" >&5 -echo $ECHO_N "checking for client library thread safety support... $ECHO_C" >&6; } -# Check whether --enable-thread-safety was given. -if test "${enable_thread_safety+set}" = set; then - enableval=$enable_thread_safety; if test "x$enableval" = "xno" ; then - LIBCFLAGS="$LIBCFLAGS -D__GEN_NULL_LOCKING__" - THREAD_LIB="" - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi -else - LIBCFLAGS="$LIBCFLAGS -D__GEN_POSIX_LOCKING__" - THREAD_LIB="-lpthread" - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } -fi + #define __KERNEL__ + #include +int +main () +{ + struct file_operations filop = { + .readdirplus = NULL + }; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +cat >>confdefs.h <<\_ACEOF +#define HAVE_READDIRPLUS_FILE_OPERATIONS 1 +_ACEOF -# Check whether --enable-server was given. -if test "${enable_server+set}" = set; then - enableval=$enable_server; if test "x$enableval" = "xyes" ; then - BUILD_SERVER=1 - NEED_BERKELEY_DB=yes -else - BUILD_SERVER="" -fi else - BUILD_SERVER=1 - NEED_BERKELEY_DB=yes -fi - + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# Check whether --with-openssl was given. -if test "${with_openssl+set}" = set; then - withval=$with_openssl; - opensslpath=${withval} + echo "$as_me:$LINENO: checking for readdirplus_lite member in file_operations structure" >&5 +echo $ECHO_N "checking for readdirplus_lite member in file_operations structure... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ - if test "x${withval}" != "xno"; then + #define __KERNEL__ + #include - { echo "$as_me:$LINENO: checking for openssl library" >&5 -echo $ECHO_N "checking for openssl library... $ECHO_C" >&6; } +int +main () +{ - if test "x${opensslpath}" != "x"; then - CFLAGS="${CFLAGS} -I${opensslpath}/include" - LDFLAGS="$LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" - SERVER_LDFLAGS="$SERVER_LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" - fi - LIBS="$LIBS -lcrypt -lssl" + struct file_operations filop = { + .readdirplus_lite = NULL + }; - cat >conftest.$ac_ext <<_ACEOF -#include "openssl/bio.h" + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_READDIRPLUSLITE_FILE_OPERATIONS 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { { echo "$as_me:$LINENO: error: Invalid openssl path specified. No openssl/bio.h found." >&5 -echo "$as_me: error: Invalid openssl path specified. No openssl/bio.h found." >&2;} - { (exit 1); exit 1; }; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for readx member in file_operations structure" >&5 +echo $ECHO_N "checking for readx member in file_operations structure... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include "openssl/bio.h" + + #define __KERNEL__ + #include + int main () { -BIO * b; + + struct file_operations filop = { + .readx = NULL + }; + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_READX_FILE_OPERATIONS 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { { echo "$as_me:$LINENO: error: could not find openssl libs" >&5 -echo "$as_me: error: could not find openssl libs" >&2;} - { (exit 1); exit 1; }; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext + echo "$as_me:$LINENO: checking for writex member in file_operations structure" >&5 +echo $ECHO_N "checking for writex member in file_operations structure... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #define __KERNEL__ + #include + +int +main () +{ + + struct file_operations filop = { + .writex = NULL + }; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define WITH_OPENSSL 1 +#define HAVE_WRITEX_FILE_OPERATIONS 1 _ACEOF +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -for ac_header in openssl/evp.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for aio support in kernel" >&5 +echo $ECHO_N "checking for aio support in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> + + #define __KERNEL__ + #include + #include + static struct kiocb iocb; + +int +main () +{ + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AIO 1 +_ACEOF + + have_aio=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no -fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + have_aio=no -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + if test "x$have_aio" = "xyes" -a "x$enable_kernel_aio" = "xyes"; then + echo "$as_me:$LINENO: checking for ki_dtor in kiocb structure of kernel" >&5 +echo $ECHO_N "checking for ki_dtor in kiocb structure of kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> + + #define __KERNEL__ + #include + #include + static struct kiocb io_cb = { + .ki_dtor = NULL, + }; + +int +main () +{ + + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} +cat >>confdefs.h <<\_ACEOF +#define HAVE_AIO_VFS_SUPPORT 1 +_ACEOF - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - -for ac_header in openssl/crypto.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for new prototype of aio_read callback of file_operations structure" >&5 +echo $ECHO_N "checking for new prototype of aio_read callback of file_operations structure... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> + + #define __KERNEL__ + #include + extern ssize_t my_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); + static struct file_operations fop = { + .aio_read = my_aio_read, + }; + +int +main () +{ + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AIO_NEW_AIO_SIGNATURE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } + fi -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for dentry argument in kernel super_operations statfs" >&5 +echo $ECHO_N "checking for dentry argument in kernel super_operations statfs... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> + + #define __KERNEL__ + #include + struct super_operations sop; + int s(struct dentry *de, struct kstatfs *kfs) + { + return 0; + } + +int +main () +{ + + sop.statfs = s; + + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_DENTRY_STATFS_SOP 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 + echo "$as_me:$LINENO: checking for vfsmount argument in kernel file_system_type get_sb" >&5 +echo $ECHO_N "checking for vfsmount argument in kernel file_system_type get_sb... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ _ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ -fi - -done - - fi - -else + #define __KERNEL__ + #include + int get_sb_bdev(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, + int), + struct vfsmount *vfsm) + { + return 0; + } - { echo "$as_me:$LINENO: checking for openssl library" >&5 -echo $ECHO_N "checking for openssl library... $ECHO_C" >&6; } - TMPLIBS=${LIBS} - LIBS="$LIBS -lcrypt -lssl" +int +main () +{ - cat >conftest.$ac_ext <<_ACEOF -#include "openssl/bio.h" + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_VFSMOUNT_GETSB 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: WARNING: No openssl headers found." >&5 -echo "$as_me: WARNING: No openssl headers found." >&2;} -fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for get_sb_nodev" >&5 +echo $ECHO_N "checking for get_sb_nodev... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include "openssl/bio.h" + + #define __KERNEL__ + #include + int v_fill_sb(struct super_block *sb, void *data, int s) + { + return 0; + } + int main () { -BIO * b; + + int ret = 0; + struct super_block *sb = NULL; +#ifdef HAVE_VFSMOUNT_GETSB + ret = get_sb_nodev(NULL, 0, NULL, v_fill_sb, NULL ); +#else + sb = get_sb_nodev(NULL, 0, NULL, v_fill_sb); +#endif + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define WITH_OPENSSL 1 +#define HAVE_GETSB_NODEV 1 _ACEOF - else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: WARNING: No openssl headers found." >&5 -echo "$as_me: WARNING: No openssl headers found." >&2;} - LIBS=${TMPLIBS} +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - - -for ac_header in openssl/evp.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for file_system_type get_sb" >&5 +echo $ECHO_N "checking for file_system_type get_sb... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no -fi + #define __KERNEL__ + #include -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +int +main () +{ -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <$ac_header> + struct file_system_type f; + f.get_sb = NULL; + + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} +cat >>confdefs.h <<\_ACEOF +#define HAVE_FSTYPE_GET_SB 1 +_ACEOF - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done -for ac_header in openssl/crypto.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for file_system_type mount exclusively" >&5 +echo $ECHO_N "checking for file_system_type mount exclusively... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> + + #define __KERNEL__ + #include + +int +main () +{ + +#ifdef HAVE_FSTYPE_GET_SB + assert(0); +#else + struct file_system_type f; + f.mount = NULL; +#endif + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FSTYPE_MOUNT_ONLY 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no -fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for xattr support in kernel" >&5 +echo $ECHO_N "checking for xattr support in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> + + #define __KERNEL__ + #include + static struct inode_operations in_op = { + .getxattr = NULL + }; + +int +main () +{ + + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_XATTR 1 +_ACEOF + + have_xattr=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + have_xattr=no - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 + if test "x$have_xattr" = "xyes"; then + echo "$as_me:$LINENO: checking for const argument to setxattr function" >&5 +echo $ECHO_N "checking for const argument to setxattr function... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ _ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ -fi - -done - - - -fi - - -# Check whether --enable-karma-mem-usage-stats was given. -if test "${enable_karma_mem_usage_stats+set}" = set; then - enableval=$enable_karma_mem_usage_stats; if test "x$enableval" = "xno" ; then - CFLAGS="$CFLAGS -D__KARMA_DISABLE_MEM_USAGE__" -fi - -fi - - -# Check whether --enable-perf-counters was given. -if test "${enable_perf_counters+set}" = set; then - enableval=$enable_perf_counters; if test "x$enableval" = "xno" ; then - CFLAGS="$CFLAGS -D__PVFS2_DISABLE_PERF_COUNTERS__" -fi - -fi - + #define __KERNEL__ + #include -# Check whether --enable-disk-io was given. -if test "${enable_disk_io+set}" = set; then - enableval=$enable_disk_io; if test "x$enableval" = "xno" ; then - CFLAGS="$CFLAGS -D__PVFS2_DISABLE_DISK_IO__" -fi +int +main () +{ -fi + struct inode_operations inode_ops; + int ret; + struct dentry * dent = NULL; + const char * name = NULL; + const void * val = NULL; + size_t size = 0; + int flags = 0; + ret = inode_ops.setxattr(dent, name, val, size, flags); -MMAP_RA_CACHE="" -# Check whether --enable-mmap-racache was given. -if test "${enable_mmap_racache+set}" = set; then - enableval=$enable_mmap_racache; if test "x$enableval" = "xyes" ; then -MMAP_RA_CACHE="-DUSE_MMAP_RA_CACHE" -fi + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_SETXATTR_CONST_ARG 1 +_ACEOF +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -# Check whether --enable-trusted-connections was given. -if test "${enable_trusted_connections+set}" = set; then - enableval=$enable_trusted_connections; if test "x$enableval" = "xyes"; then -TRUSTED_CONNECTIONS="-DUSE_TRUSTED" -fi fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + fi + echo "$as_me:$LINENO: checking for file argument to sysctl proc handlers" >&5 +echo $ECHO_N "checking for file argument to sysctl proc handlers... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + #define __KERNEL__ + #include + #include -test_for_karma() +int +main () { - # Extract the first word of "pkg-config", so it can be a program name with args. -set dummy pkg-config; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_HAVE_PKGCONFIG+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$HAVE_PKGCONFIG"; then - ac_cv_prog_HAVE_PKGCONFIG="$HAVE_PKGCONFIG" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_HAVE_PKGCONFIG="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_HAVE_PKGCONFIG" && ac_cv_prog_HAVE_PKGCONFIG="no" -fi -fi -HAVE_PKGCONFIG=$ac_cv_prog_HAVE_PKGCONFIG -if test -n "$HAVE_PKGCONFIG"; then - { echo "$as_me:$LINENO: result: $HAVE_PKGCONFIG" >&5 -echo "${ECHO_T}$HAVE_PKGCONFIG" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - if test "x$HAVE_PKGCONFIG" = "xyes" ; then - { echo "$as_me:$LINENO: checking for gtk2.0 (for karma gui)" >&5 -echo $ECHO_N "checking for gtk2.0 (for karma gui)... $ECHO_C" >&6; } - if `pkg-config --exists gtk+-2.0` ; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - GTKLIBS=`pkg-config --libs gtk+-2.0` - GTKCFLAGS=`pkg-config --cflags gtk+-2.0` + struct ctl_table * ctl = NULL; + int write = 0; + struct file * filp = NULL; + void __user * buffer = NULL; + size_t * lenp = NULL; + loff_t * ppos = NULL; + proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); - - BUILD_KARMA="1" - - else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - fi - fi + ; + return 0; } +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -# Check whether --enable-karma was given. -if test "${enable_karma+set}" = set; then - enableval=$enable_karma; if test "x$enableval" = "xyes" ; then - test_for_karma - fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_PROC_HANDLER_FILE_ARG 1 +_ACEOF else - test_for_karma - -fi + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -# Check whether --enable-static was given. -if test "${enable_static+set}" = set; then - enableval=$enable_static; build_static=$enableval -else - build_static=yes fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + echo "$as_me:$LINENO: checking for ppos argument to sysctl proc handlers" >&5 +echo $ECHO_N "checking for ppos argument to sysctl proc handlers... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + #define __KERNEL__ + #include + #include +int +main () +{ + struct ctl_table * ctl = NULL; + int write = 0; + void __user * buffer = NULL; + size_t * lenp = NULL; + loff_t * ppos = NULL; -set_redhat24=0 -use_redhat24=0 - -# Check whether --enable-redhat24 was given. -if test "${enable_redhat24+set}" = set; then - enableval=$enable_redhat24; set_redhat24=1 - if test "$enableval" = no ; then - use_redhat24=0 - else - use_redhat24=1 - fi - -fi + proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +cat >>confdefs.h <<\_ACEOF +#define HAVE_PROC_HANDLER_PPOS_ARG 1 +_ACEOF -set_nptl_workaround=0 -use_nptl_workaround=0 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -# Check whether --enable-nptl-workaround was given. -if test "${enable_nptl_workaround+set}" = set; then - enableval=$enable_nptl_workaround; set_nptl_workaround=1 - if test "$enableval" = no ; then - use_nptl_workaround=0 - else - use_nptl_workaround=1 - fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -RHTAG1="Red Hat Linux release 9" -RHTAG2="Red Hat Enterprise Linux WS release 3" -RHTAG3="Red Hat Enterprise Linux AS release 3" -RHTAG4="Red Hat Enterprise Linux ES release 3" -RHTAG5="Red Hat Linux Advanced Server release 2.1AS" -RHTAG6="Fedora Core release 1 (Yarrow)" +for ac_header in linux/posix_acl.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#define __KERNEL__ + #include + #ifdef HAVE_XATTR + #include + #endif +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -REDHAT_RELEASE="" -if test -f /etc/redhat-release; then - { echo "$as_me:$LINENO: checking for tagged Redhat releases (must patch)" >&5 -echo $ECHO_N "checking for tagged Redhat releases (must patch)... $ECHO_C" >&6; } - REDHAT_REL=`cat /etc/redhat-release` - if test "x`echo $REDHAT_REL | cut -b 1-23`" = "x$RHTAG1" || - test "x`echo $REDHAT_REL | cut -b 1-37`" = "x$RHTAG2" || - test "x`echo $REDHAT_REL | cut -b 1-37`" = "x$RHTAG3" || - test "x`echo $REDHAT_REL | cut -b 1-37`" = "x$RHTAG4" || - test "x`echo $REDHAT_REL | cut -b 1-43`" = "x$RHTAG5" || - test "x`echo $REDHAT_REL | cut -b 1-30`" = "x$RHTAG6" ; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF - if test "$set_redhat24" = 0 || test "$set_nptl_workaround" = 0 ; then - { { echo "$as_me:$LINENO: error: - You appear to be configuring PVFS2 on a RedHat distribution that - likely ships with a heavily modified kernel and c library. You must - specify two configure arguments to provide necessary information before - proceeding. First pick one of these two options: - ============================= - --enable-redhat24 (if you are using a redhat provided 2.4 kernel) - --disable-redhat24 (if you are using a stock kernel.org kernel) - ============================= - Also pick one of the next two options. You should probably enable the - workaround if you are using RedHat EL 3 prior to update 2. Otherwise it - is probably safer (and higher performance) to disable it: - ============================= - --enable-nptl-workaround (to work around buggy glibc pthread library) - --disable-nptl-workaround (if you trust your glibc pthread library) - ============================= " >&5 -echo "$as_me: error: - You appear to be configuring PVFS2 on a RedHat distribution that - likely ships with a heavily modified kernel and c library. You must - specify two configure arguments to provide necessary information before - proceeding. First pick one of these two options: - ============================= - --enable-redhat24 (if you are using a redhat provided 2.4 kernel) - --disable-redhat24 (if you are using a stock kernel.org kernel) - ============================= - Also pick one of the next two options. You should probably enable the - workaround if you are using RedHat EL 3 prior to update 2. Otherwise it - is probably safer (and higher performance) to disable it: - ============================= - --enable-nptl-workaround (to work around buggy glibc pthread library) - --disable-nptl-workaround (if you trust your glibc pthread library) - ============================= " >&2;} - { (exit 1); exit 1; }; } - fi - else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - fi fi +done + -if test "$use_redhat24" = 1 ; then - REDHAT_RELEASE="-DREDHAT_RELEASE_9" -fi +for ac_header in linux/posix_acl_xattr.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#define __KERNEL__ + #include + #ifdef HAVE_XATTR + #include + #endif -if test "$use_nptl_workaround" = 1 ; then - NPTL_WORKAROUND="1" -fi +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -use_aio_thcb=1 -# Check whether --enable-aio-threaded-callbacks was given. -if test "${enable_aio_threaded_callbacks+set}" = set; then - enableval=$enable_aio_threaded_callbacks; if test "$enableval" = no ; then use_aio_thcb=0 ; fi +eval "$as_ac_Header=no" fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF +fi +done -MISC_TROVE_FLAGS="" -if test $use_aio_thcb = 1 ; then - MISC_TROVE_FLAGS="-D__PVFS2_TROVE_AIO_THREADED__" -fi -{ echo "$as_me:$LINENO: checking for __error_code field in aiocb struct" >&5 -echo $ECHO_N "checking for __error_code field in aiocb struct... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF +for ac_header in linux/xattr_acl.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +#define __KERNEL__ + #include + #ifdef HAVE_XATTR + #include + #endif - #include - -int -main () -{ - - struct aiocb aiocb; - aiocb.__error_code = 0; - ; - return 0; -} +#include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -cat >>confdefs.h <<\_ACEOF -#define HAVE_AIOCB_ERROR_CODE 1 +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + +for ac_header in linux/mount.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ _ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#define __KERNEL__ + #include +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done -{ echo "$as_me:$LINENO: checking for __return_value field in aiocb struct" >&5 -echo $ECHO_N "checking for __return_value field in aiocb struct... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + +for ac_header in linux/ioctl32.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +#define __KERNEL__ + #include - #include - -int -main () -{ - - struct aiocb aiocb; - aiocb.__return_value = 0; - ; - return 0; -} +#include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -cat >>confdefs.h <<\_ACEOF -#define HAVE_AIOCB_RETURN_VALUE 1 +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in linux/compat.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ _ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#define __KERNEL__ + #include +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: checking whether byte ordering is bigendian" >&5 -echo $ECHO_N "checking whether byte ordering is bigendian... $ECHO_C" >&6; } -if test "${ac_cv_c_bigendian+set}" = set; then +done + + +for ac_header in linux/syscalls.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - # See if sys/param.h defines the BYTE_ORDER macro. -cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -#include +#define __KERNEL__ + #include -int -main () -{ -#if ! (defined BYTE_ORDER && defined BIG_ENDIAN && defined LITTLE_ENDIAN \ - && BYTE_ORDER && BIG_ENDIAN && LITTLE_ENDIAN) - bogus endian macros -#endif - ; - return 0; -} +#include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - # It does; now see whether it defined to BIG_ENDIAN or not. -cat >conftest.$ac_ext <<_ACEOF + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in asm/ioctl32.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -#include +#define __KERNEL__ + #include -int -main () -{ -#if BYTE_ORDER != BIG_ENDIAN - not big endian -#endif - ; - return 0; -} +#include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_c_bigendian=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_c_bigendian=no +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 +done - # It does not; compile a test program. -if test "$cross_compiling" = yes; then - # try to guess the endianness by grepping values into an object file - ac_cv_c_bigendian=unknown + +for ac_header in linux/exportfs.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -short int ascii_mm[] = { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 }; -short int ascii_ii[] = { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 }; -void _ascii () { char *s = (char *) ascii_mm; s = (char *) ascii_ii; } -short int ebcdic_ii[] = { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 }; -short int ebcdic_mm[] = { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 }; -void _ebcdic () { char *s = (char *) ebcdic_mm; s = (char *) ebcdic_ii; } -int -main () -{ - _ascii (); _ebcdic (); - ; - return 0; -} +#define __KERNEL__ + #include + + +#include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - if grep BIGenDianSyS conftest.$ac_objext >/dev/null ; then - ac_cv_c_bigendian=yes -fi -if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then - if test "$ac_cv_c_bigendian" = unknown; then - ac_cv_c_bigendian=no - else - # finding both strings is unlikely to happen, but who knows? - ac_cv_c_bigendian=unknown - fi -fi + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done + + + echo "$as_me:$LINENO: checking for linux/smp_lock.h" >&5 +echo $ECHO_N "checking for linux/smp_lock.h... $ECHO_C" >&6 +if test "${ac_cv_header_linux_smp_lock_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ @@ -7349,727 +13332,627 @@ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -int -main () -{ +#define __KERNEL__ + #include - /* Are we little or big endian? From Harbison&Steele. */ - union - { - long int l; - char c[sizeof (long int)]; - } u; - u.l = 1; - return u.c[sizeof (long int) - 1] == 1; - ; - return 0; -} +#include _ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - ac_cv_c_bigendian=no + ac_cv_header_linux_smp_lock_h=yes else - echo "$as_me: program exited with status $ac_status" >&5 -echo "$as_me: failed program was:" >&5 + echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -( exit $ac_status ) -ac_cv_c_bigendian=yes +ac_cv_header_linux_smp_lock_h=no fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi +echo "$as_me:$LINENO: result: $ac_cv_header_linux_smp_lock_h" >&5 +echo "${ECHO_T}$ac_cv_header_linux_smp_lock_h" >&6 -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ echo "$as_me:$LINENO: result: $ac_cv_c_bigendian" >&5 -echo "${ECHO_T}$ac_cv_c_bigendian" >&6; } -case $ac_cv_c_bigendian in - yes) - -cat >>confdefs.h <<\_ACEOF -#define WORDS_BIGENDIAN 1 -_ACEOF - ;; - no) - ;; - *) - { { echo "$as_me:$LINENO: error: unknown endianness -presetting ac_cv_c_bigendian=no (or yes) will help" >&5 -echo "$as_me: error: unknown endianness -presetting ac_cv_c_bigendian=no (or yes) will help" >&2;} - { (exit 1); exit 1; }; } ;; -esac + echo "$as_me:$LINENO: checking for generic_file_readv api in kernel" >&5 +echo $ECHO_N "checking for generic_file_readv api in kernel... $ECHO_C" >&6 -{ echo "$as_me:$LINENO: checking for void *" >&5 -echo $ECHO_N "checking for void *... $ECHO_C" >&6; } -if test "${ac_cv_type_void_p+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -typedef void * ac__type_new_; + + #define __KERNEL__ + #include + int generic_file_readv(struct inode *inode) + { + return 0; + } + int main () { -if ((ac__type_new_ *) 0) - return 0; -if (sizeof (ac__type_new_)) - return 0; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_type_void_p=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_type_void_p=no -fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_GENERIC_FILE_READV 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi -{ echo "$as_me:$LINENO: result: $ac_cv_type_void_p" >&5 -echo "${ECHO_T}$ac_cv_type_void_p" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ echo "$as_me:$LINENO: checking size of void *" >&5 -echo $ECHO_N "checking size of void *... $ECHO_C" >&6; } -if test "${ac_cv_sizeof_void_p+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test "$cross_compiling" = yes; then - # Depending upon the size, compute the lo and hi bounds. -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for generic_permission api in kernel" >&5 +echo $ECHO_N "checking for generic_permission api in kernel... $ECHO_C" >&6 + + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef void * ac__type_sizeof_; + + #define __KERNEL__ + #include + int generic_permission(struct inode *inode) + { + return 0; + } + int main () { -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= 0)]; -test_array [0] = 0 ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_lo=0 ac_mid=0 - while :; do - cat >conftest.$ac_ext <<_ACEOF + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_GENERIC_PERMISSION 1 +_ACEOF + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for three-param generic_permission" >&5 +echo $ECHO_N "checking for three-param generic_permission... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef void * ac__type_sizeof_; + + #define __KERNEL__ + #include + struct inode *f; + int main () { -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)]; -test_array [0] = 0 + + generic_permission(f, 0, NULL); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_hi=$ac_mid; break -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 - ac_lo=`expr $ac_mid + 1` - if test $ac_lo -le $ac_mid; then - ac_lo= ac_hi= - break - fi - ac_mid=`expr 2 '*' $ac_mid + 1` -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_THREE_PARAM_GENERIC_PERMISSION 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for four-param generic_permission" >&5 +echo $ECHO_N "checking for four-param generic_permission... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef void * ac__type_sizeof_; + + #define __KERNEL__ + #include + struct inode *f; + int main () { -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) < 0)]; -test_array [0] = 0 + + generic_permission(f, 0, 0, NULL); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_hi=-1 ac_mid=-1 - while :; do - cat >conftest.$ac_ext <<_ACEOF + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FOUR_PARAM_GENERIC_PERMISSION 1 +_ACEOF + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for two-param generic_permission" >&5 +echo $ECHO_N "checking for two-param generic_permission... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef void * ac__type_sizeof_; + + #define __KERNEL__ + #include + struct inode *f; + int main () { -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= $ac_mid)]; -test_array [0] = 0 + + generic_permission(f, 0); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_lo=$ac_mid; break -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 - ac_hi=`expr '(' $ac_mid ')' - 1` - if test $ac_mid -le $ac_hi; then - ac_lo= ac_hi= - break - fi - ac_mid=`expr 2 '*' $ac_mid` -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_TWO_PARAM_GENERIC_PERMISSION 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_lo= ac_hi= -fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -# Binary search between lo and hi bounds. -while test "x$ac_lo" != "x$ac_hi"; do - ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for set_nlink" >&5 +echo $ECHO_N "checking for set_nlink... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef void * ac__type_sizeof_; + + #define __KERNEL__ + #include + struct inode *i; + int main () { -static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)]; -test_array [0] = 0 + + set_nlink(i, 0); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_hi=$ac_mid + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_I_SET_NLINK 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_lo=`expr '(' $ac_mid ')' + 1` +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -done -case $ac_lo in -?*) ac_cv_sizeof_void_p=$ac_lo;; -'') if test "$ac_cv_type_void_p" = yes; then - { { echo "$as_me:$LINENO: error: cannot compute sizeof (void *) -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot compute sizeof (void *) -See \`config.log' for more details." >&2;} - { (exit 77); exit 77; }; } - else - ac_cv_sizeof_void_p=0 - fi ;; -esac -else - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for inc_nlink" >&5 +echo $ECHO_N "checking for inc_nlink... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default - typedef void * ac__type_sizeof_; -static long int longval () { return (long int) (sizeof (ac__type_sizeof_)); } -static unsigned long int ulongval () { return (long int) (sizeof (ac__type_sizeof_)); } -#include -#include + + #define __KERNEL__ + #include + struct inode *i; + int main () { - FILE *f = fopen ("conftest.val", "w"); - if (! f) - return 1; - if (((long int) (sizeof (ac__type_sizeof_))) < 0) - { - long int i = longval (); - if (i != ((long int) (sizeof (ac__type_sizeof_)))) - return 1; - fprintf (f, "%ld\n", i); - } - else - { - unsigned long int i = ulongval (); - if (i != ((long int) (sizeof (ac__type_sizeof_)))) - return 1; - fprintf (f, "%lu\n", i); - } - return ferror (f) || fclose (f) != 0; + inc_nlink(i); ; return 0; } _ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - ac_cv_sizeof_void_p=`cat conftest.val` -else - echo "$as_me: program exited with status $ac_status" >&5 -echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -( exit $ac_status ) -if test "$ac_cv_type_void_p" = yes; then - { { echo "$as_me:$LINENO: error: cannot compute sizeof (void *) -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot compute sizeof (void *) -See \`config.log' for more details." >&2;} - { (exit 77); exit 77; }; } - else - ac_cv_sizeof_void_p=0 - fi -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi -rm -f conftest.val -fi -{ echo "$as_me:$LINENO: result: $ac_cv_sizeof_void_p" >&5 -echo "${ECHO_T}$ac_cv_sizeof_void_p" >&6; } - - - -cat >>confdefs.h <<_ACEOF -#define SIZEOF_VOID_P $ac_cv_sizeof_void_p -_ACEOF - - - - -# Check whether --with-mtrace was given. -if test "${with_mtrace+set}" = set; then - withval=$with_mtrace; CFLAGS="$CFLAGS -include mcheck.h" - -cat >>confdefs.h <<\_ACEOF -#define WITH_MTRACE 1 -_ACEOF - - -fi - - - -# Check whether --with-berkdb-debug was given. -if test "${with_berkdb_debug+set}" = set; then - withval=$with_berkdb_debug; cat >>confdefs.h <<\_ACEOF -#define BERKDB_ERROR_REPORTING 1 +#define HAVE_I_INC_NLINK 1 _ACEOF +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -fi - - - -# Check whether --with-pablo was given. -if test "${with_pablo+set}" = set; then - withval=$with_pablo; if test "x$withval" = "xyes" ; then - { { echo "$as_me:$LINENO: error: --with-pablo must be given a pathname" >&5 -echo "$as_me: error: --with-pablo must be given a pathname" >&2;} - { (exit 1); exit 1; }; } - else - CFLAGS="$CFLAGS -I$withval/include" - LDFLAGS="$LDFLAGS -L$withval/lib" - LIBS="$LIBS -lPabloTrace" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_PABLO 1 -_ACEOF - - fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - - -# Check whether --with-mpe was given. -if test "${with_mpe+set}" = set; then - withval=$with_mpe; if test "x$withval" = "xyes" ; then - { { echo "$as_me:$LINENO: error: --with-mpe must be given a pathname" >&5 -echo "$as_me: error: --with-mpe must be given a pathname" >&2;} - { (exit 1); exit 1; }; } - else - CFLAGS="$CFLAGS -I$withval/include" - LDFLAGS="$LDFLAGS -L$withval/lib" - LIBS="$LIBS -lmpe -lmpich" - { echo "$as_me:$LINENO: checking for valid mpe install" >&5 -echo $ECHO_N "checking for valid mpe install... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for drop_nlink" >&5 +echo $ECHO_N "checking for drop_nlink... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + + #define __KERNEL__ + #include + struct inode *i; + int main () { - int eventID_begin, eventID_end; - eventID_begin = MPE_Log_get_event_number(); - eventID_end = MPE_Log_get_event_number(); - MPE_Describe_state( eventID_begin, eventID_end, "Amult", "bluegreen" ); - MPE_Log_event( eventID_begin, 0, (char *)0 ); - MPE_Log_event( eventID_end, 0, (char *)0 ); + drop_nlink(i); ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_MPE 1 +#define HAVE_I_DROP_NLINK 1 _ACEOF - { echo "$as_me:$LINENO: result: ok" >&5 -echo "${ECHO_T}ok" >&6; } -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { { echo "$as_me:$LINENO: error: Invalid path to mpe install" >&5 -echo "$as_me: error: Invalid path to mpe install" >&2;} - { (exit 1); exit 1; }; } -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - fi - -fi - - - -# Check whether --with-kernel was given. -if test "${with_kernel+set}" = set; then - withval=$with_kernel; if test "x$withval" = "xyes" -o "x$withval" = "x" ; then - { { echo "$as_me:$LINENO: error: --with-kernel must be given the path to your kernel source." >&5 -echo "$as_me: error: --with-kernel must be given the path to your kernel source." >&2;} - { (exit 1); exit 1; }; } - fi - if ! test -d $withval ; then - { { echo "$as_me:$LINENO: error: The --with-kernel path $withval is not a directory." >&5 -echo "$as_me: error: The --with-kernel path $withval is not a directory." >&2;} - { (exit 1); exit 1; }; } - fi - if ! test -r $withval/include/linux/version.h ; then - { { echo "$as_me:$LINENO: error: The kernel source tree must have been configured." >&5 -echo "$as_me: error: The kernel source tree must have been configured." >&2;} - { (exit 1); exit 1; }; } - fi - vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/linux/version.h` - if ! grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/version.h ; then - # 2.6.18 moves UTS_RELEASE into its own header - if ! grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/utsrelease.h ; then - { { echo "$as_me:$LINENO: error: The kernel source tree does not appear to be 2.6" >&5 -echo "$as_me: error: The kernel source tree does not appear to be 2.6" >&2;} - { (exit 1); exit 1; }; } - fi - fi - # At least up through 2.6.3 needed to write .__modpost.cmd; this changed - # sometime between then and 2.6.10. Now anybody can compile out-of-tree - # modules against a configured kernel tree - LINUX_KERNEL_SRC="$withval" - -else - LINUX_KERNEL_SRC="" -fi - - - -# Check whether --with-kernel24 was given. -if test "${with_kernel24+set}" = set; then - withval=$with_kernel24; if test "x$withval" = "xyes" -o "x$withval" = "x" ; then - { { echo "$as_me:$LINENO: error: --with-kernel24 must be given the path to your kernel 2.4.x source." >&5 -echo "$as_me: error: --with-kernel24 must be given the path to your kernel 2.4.x source." >&2;} - { (exit 1); exit 1; }; } - fi - if ! test -d $withval ; then - { { echo "$as_me:$LINENO: error: The --with-kernel path $withval is not a directory." >&5 -echo "$as_me: error: The --with-kernel path $withval is not a directory." >&2;} - { (exit 1); exit 1; }; } - fi - if ! test -r $withval/include/linux/version.h ; then - { { echo "$as_me:$LINENO: error: The kernel source tree must have been configured." >&5 -echo "$as_me: error: The kernel source tree must have been configured." >&2;} - { (exit 1); exit 1; }; } - fi - vers=`sed -n '/UTS_RELEASE/{; s/.*"\(0-9\.0-9\).*".*/\1/; p; }' $withval/include/linux/version.h` - if ! grep -q UTS_RELEASE..2\\.4\\. $withval/include/linux/version.h ; then - { { echo "$as_me:$LINENO: error: The kernel source tree does not appear to be 2.4" >&5 -echo "$as_me: error: The kernel source tree does not appear to be 2.4" >&2;} - { (exit 1); exit 1; }; } - fi - k24_minor_ver=`echo $vers | cut -b 26- | sed -n 's/\"//p' | head -n 1` - tmp_k24_minor_ver=`echo $k24_minor_ver | grep \-` - if ! test "x$tmp_k24_minor_ver" = "x"; then - tmp_k24_minor_ver=`echo $k24_minor_ver | cut -d- -f1` - k24_minor_ver=$tmp_k24_minor_ver - fi - LINUX24_KERNEL_SRC="$withval" LINUX24_KERNEL_MINOR_VER="`echo $k24_minor_ver| cut -d'.' -f 1`" - -else - LINUX24_KERNEL_SRC="" LINUX24_KERNEL_MINOR_VER="" -fi - - - - -oldcflags=$CFLAGS -if test -z "${LINUX_KERNEL_SRC}" ; then - lk_src=${LINUX24_KERNEL_SRC} -elif test -z "${LINUX24_KERNEL_SRC}" ; then - lk_src=${LINUX_KERNEL_SRC} -fi - - -# Check whether --enable-kernel-aio was given. -if test "${enable_kernel_aio+set}" = set; then - enableval=$enable_kernel_aio; enable_kernel_aio=$enableval -else - enable_kernel_aio=yes - -fi - - -# Check whether --enable-kernel-sendfile was given. -if test "${enable_kernel_sendfile+set}" = set; then - enableval=$enable_kernel_sendfile; enable_kernel_sendfile=$enableval else - enable_kernel_sendfile=no + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for clear_nlink" >&5 +echo $ECHO_N "checking for clear_nlink... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ -if test -n "$lk_src" ; then + #define __KERNEL__ + #include + struct inode *i; +int +main () +{ + clear_nlink(i); - NOSTDINCFLAGS="-Werror-implicit-function-declaration -nostdinc -isystem `$CC -print-file-name=include`" + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 - CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src/include -I$lk_src/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty) -DKBUILD_MODNAME=KBUILD_STR(empty) -imacros $lk_src/include/linux/autoconf.h" +cat >>confdefs.h <<\_ACEOF +#define HAVE_I_CLEAR_NLINK 1 +_ACEOF - if test -n "${ARCH}" ; then - CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include" - fi +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: checking for i_size_write in kernel" >&5 -echo $ECHO_N "checking for i_size_write in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for posix_acl_equiv_mode umode_t" >&5 +echo $ECHO_N "checking for posix_acl_equiv_mode umode_t... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8078,57 +13961,64 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - void i_size_write(struct inode *inode, - loff_t i_size) - { - return; - } + #include + struct posix_acl *acl; + umode_t mode = 0; int main () { + posix_acl_equiv_mode(acl, &mode); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_I_SIZE_WRITE 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for i_size_read in kernel" >&5 -echo $ECHO_N "checking for i_size_read in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for posix_acl_create" >&5 +echo $ECHO_N "checking for posix_acl_create... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8137,56 +14027,64 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - loff_t i_size_read(struct inode *inode) - { - return 0; - } + #include + struct posix_acl *acl; + umode_t mode = 0; int main () { + posix_acl_create(&acl, GFP_KERNEL, &mode); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_POSIX_ACL_CREATE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_I_SIZE_READ 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for iget_locked function in kernel" >&5 -echo $ECHO_N "checking for iget_locked function in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for posix_acl_chmod" >&5 +echo $ECHO_N "checking for posix_acl_chmod... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8195,56 +14093,66 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - loff_t iget_locked(struct inode *inode) - { - return 0; - } + #include + struct posix_acl *acl; + struct inode *inode; + umode_t mode = 0; int main () { + posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode ); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_POSIX_ACL_CHMOD 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_IGET_LOCKED 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for iget4_locked function in kernel" >&5 -echo $ECHO_N "checking for iget4_locked function in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for posix_acl_clone" >&5 +echo $ECHO_N "checking for posix_acl_clone... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8253,56 +14161,63 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - loff_t iget4_locked(struct inode *inode) - { - return 0; - } + #include + struct posix_acl *acl; int main () { + posix_acl_clone(acl, GFP_KERNEL); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_POSIX_ACL_CLONE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_IGET4_LOCKED 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for iget5_locked function in kernel" >&5 -echo $ECHO_N "checking for iget5_locked function in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for fsync with loff_t" >&5 +echo $ECHO_N "checking for fsync with loff_t... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8311,56 +14226,68 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - loff_t iget5_locked(struct inode *inode) + + int my_fsync(struct file *, loff_t, loff_t, int); + + int my_fsync(struct file *f, loff_t start, loff_t end, int datasync) { - return 0; } int main () { + struct file_operations fop; + + fop.fsync = my_fsync; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FSYNC_LOFF_T_PARAMS 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_IGET5_LOCKED 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for struct xtvec in kernel" >&5 -echo $ECHO_N "checking for struct xtvec in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for generic_getxattr api in kernel" >&5 +echo $ECHO_N "checking for generic_getxattr api in kernel... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8368,8 +14295,12 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - static struct xtvec xv = { 0, 0 }; + #include + #include + int generic_getxattr(struct inode *inode) + { + return 0; + } int main () @@ -8380,44 +14311,45 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_STRUCT_XTVEC 1 -_ACEOF - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_GENERIC_GETXATTR 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - { echo "$as_me:$LINENO: checking for struct kmem_cache in kernel" >&5 -echo $ECHO_N "checking for struct kmem_cache in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for arg member in read_descriptor_t in kernel" >&5 +echo $ECHO_N "checking for arg member in read_descriptor_t in kernel... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8425,61 +14357,61 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - #include - - int foo(struct kmem_cache *s) - { - return (s == NULL) ? 3 : 4; - } + #define __KERNEL__ + #include int main () { + read_descriptor_t x; + x.arg.data = NULL; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_STRUCT_KMEM_CACHE 1 +#define HAVE_ARG_IN_READ_DESCRIPTOR_T 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CFLAGS=$tmp_cflags - - { echo "$as_me:$LINENO: checking for SLAB_KERNEL flag in kernel" >&5 -echo $ECHO_N "checking for SLAB_KERNEL flag in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for fh_to_dentry member in export_operations in kernel" >&5 +echo $ECHO_N "checking for fh_to_dentry member in export_operations in kernel... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8487,55 +14419,61 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - static int flags = SLAB_KERNEL; + #define __KERNEL__ + #include int main () { + struct export_operations x; + x.fh_to_dentry = NULL; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_SLAB_KERNEL 1 +#define HAVE_FHTODENTRY_EXPORT_OPERATIONS 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for memory_backed in struct backing_dev_info in kernel" >&5 -echo $ECHO_N "checking for memory_backed in struct backing_dev_info in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for encode_fh member in export_operations in kernel" >&5 +echo $ECHO_N "checking for encode_fh member in export_operations in kernel... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8543,71 +14481,71 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - #include - static struct backing_dev_info bdi = { - .memory_backed = 0 - }; + #define __KERNEL__ + #include int main () { + struct export_operations x; + x.encode_fh = NULL; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_BDI_MEMORY_BACKED 1 +#define HAVE_ENCODEFH_EXPORT_OPERATIONS 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test "x$enable_kernel_sendfile" = "xyes"; then - { echo "$as_me:$LINENO: checking for sendfile callback in struct file_operations in kernel" >&5 -echo $ECHO_N "checking for sendfile callback in struct file_operations in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for second arg type int in address_space_operations releasepage" >&5 +echo $ECHO_N "checking for second arg type int in address_space_operations releasepage... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - static struct file_operations fop = { - .sendfile = NULL, - }; + #define __KERNEL__ + #include + extern int try_to_release_page(struct page *page, int gfp_mask); int main () @@ -8618,43 +14556,46 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_SENDFILE_VFS_SUPPORT 1 +#define HAVE_INT_ARG2_ADDRESS_SPACE_OPERATIONS_RELEASEPAGE 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - fi - - { echo "$as_me:$LINENO: checking for readv callback in struct file_operations in kernel" >&5 -echo $ECHO_N "checking for readv callback in struct file_operations in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for int return in inode_operations follow_link" >&5 +echo $ECHO_N "checking for int return in inode_operations follow_link... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8662,11 +14603,10 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - static struct file_operations fop = { - .readv = NULL, - }; + #define __KERNEL__ + #include + extern int page_follow_link_light(struct dentry *, + struct nameidata *); int main () @@ -8677,41 +14617,46 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_READV_FILE_OPERATIONS 1 +#define HAVE_INT_RETURN_INODE_OPERATIONS_FOLLOW_LINK 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for writev callback in struct file_operations in kernel" >&5 -echo $ECHO_N "checking for writev callback in struct file_operations in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for int return in kmem_cache_destroy" >&5 +echo $ECHO_N "checking for int return in kmem_cache_destroy... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8719,11 +14664,9 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - static struct file_operations fop = { - .writev = NULL, - }; + #define __KERNEL__ + #include + extern int kmem_cache_destroy(kmem_cache_t *); int main () @@ -8734,42 +14677,46 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_WRITEV_FILE_OPERATIONS 1 +#define HAVE_INT_RETURN_KMEM_CACHE_DESTROY 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for find_inode_handle callback in struct super_operations in kernel" >&5 -echo $ECHO_N "checking for find_inode_handle callback in struct super_operations in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for older int return in invalidatepage" >&5 +echo $ECHO_N "checking for older int return in invalidatepage... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8779,55 +14726,66 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - static struct super_operations sop = { - .find_inode_handle = NULL, - }; int main () { + struct address_space_operations aso; + + int ret; + struct page * page = NULL; + unsigned long offset; + + ret = aso.invalidatepage(page, offset); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS 1 +#define HAVE_INT_RETURN_ADDRESS_SPACE_OPERATIONS_INVALIDATEPAGE 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: NO" >&5 +echo "${ECHO_T}NO" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for i_blksize in struct inode" >&5 -echo $ECHO_N "checking for i_blksize in struct inode... $ECHO_C" >&6; } + tmp_cflags=${CFLAGS} + CFLAGS="${CFLAGS} -Werror" + echo "$as_me:$LINENO: checking for warnings when including linux/config.h" >&5 +echo $ECHO_N "checking for warnings when including linux/config.h... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8836,10 +14794,7 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - static struct inode i = { - .i_blksize = 0, - }; + #include int main () @@ -8850,42 +14805,47 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_I_BLKSIZE_IN_STRUCT_INODE 1 +#define HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for statfs_lite callback in struct super_operations in kernel" >&5 -echo $ECHO_N "checking for statfs_lite callback in struct super_operations in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for compat_ioctl member in file_operations structure" >&5 +echo $ECHO_N "checking for compat_ioctl member in file_operations structure... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8893,58 +14853,64 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - static struct super_operations sop = { - .statfs_lite = NULL, - }; + #define __KERNEL__ + #include int main () { + struct file_operations filop = { + .compat_ioctl = NULL + }; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_STATFS_LITE_SUPER_OPERATIONS 1 +#define HAVE_COMPAT_IOCTL_HANDLER 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for fill_handle callback in struct inode_operations in kernel" >&5 -echo $ECHO_N "checking for fill_handle callback in struct inode_operations in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + if test x$ac_cv_header_linux_ioctl32_h = xyes ; then + echo "$as_me:$LINENO: checking for register_ioctl32_conversion kernel exports" >&5 +echo $ECHO_N "checking for register_ioctl32_conversion kernel exports... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -8952,10 +14918,12 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - static struct inode_operations iop = { - .fill_handle = NULL, - }; + #include + #include + int register_ioctl32_conversion(void) + { + return 0; + } int main () @@ -8966,42 +14934,46 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_FILL_HANDLE_INODE_OPERATIONS 1 -_ACEOF - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_REGISTER_IOCTL32_CONVERSION 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + fi - { echo "$as_me:$LINENO: checking for getattr_lite callback in struct inode_operations in kernel" >&5 -echo $ECHO_N "checking for getattr_lite callback in struct inode_operations in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for int return value of kmem_cache_destroy" >&5 +echo $ECHO_N "checking for int return value of kmem_cache_destroy... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -9010,56 +14982,59 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - static struct inode_operations iop = { - .getattr_lite = NULL, - }; + #include int main () { + int i = kmem_cache_destroy(NULL); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_GETATTR_LITE_INODE_OPERATIONS 1 +#define HAVE_KMEM_CACHE_DESTROY_INT_RETURN 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for get_fs_key callback in struct super_operations in kernel" >&5 -echo $ECHO_N "checking for get_fs_key callback in struct super_operations in kernel... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for combined file_operations readv and aio_read" >&5 +echo $ECHO_N "checking for combined file_operations readv and aio_read... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -9067,57 +15042,61 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - static struct super_operations sop = { - .get_fs_key = NULL, - }; + #define __KERNEL__ + #include int main () { + struct file_operations filop = { + .readv = NULL + }; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_GET_FS_KEY_SUPER_OPERATIONS 1 -_ACEOF - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_COMBINED_AIO_AND_VECTOR 1 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for readdirplus member in file_operations structure" >&5 -echo $ECHO_N "checking for readdirplus member in file_operations structure... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for kzalloc" >&5 +echo $ECHO_N "checking for kzalloc... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -9125,58 +15104,61 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include int main () { - struct file_operations filop = { - .readdirplus = NULL - }; + void * a; + a = kzalloc(1024, GFP_KERNEL); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_READDIRPLUS_FILE_OPERATIONS 1 +#define HAVE_KZALLOC 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for readdirplus_lite member in file_operations structure" >&5 -echo $ECHO_N "checking for readdirplus_lite member in file_operations structure... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for two arguments to register_sysctl_table" >&5 +echo $ECHO_N "checking for two arguments to register_sysctl_table... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -9184,59 +15166,61 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + #include int main () { - struct file_operations filop = { - .readdirplus_lite = NULL - }; + register_sysctl_table(NULL, 0); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_READDIRPLUSLITE_FILE_OPERATIONS 1 +#define HAVE_TWO_ARG_REGISTER_SYSCTL_TABLE 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - - { echo "$as_me:$LINENO: checking for readx member in file_operations structure" >&5 -echo $ECHO_N "checking for readx member in file_operations structure... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for generic FS_IOC ioctl flags" >&5 +echo $ECHO_N "checking for generic FS_IOC ioctl flags... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -9251,51 +15235,53 @@ int main () { - struct file_operations filop = { - .readx = NULL - }; + int flags = FS_IOC_GETFLAGS; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_READX_FILE_OPERATIONS 1 -_ACEOF - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi +cat >>confdefs.h <<\_ACEOF +#define HAVE_NO_FS_IOC_FLAGS 1 +_ACEOF + + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for writex member in file_operations structure" >&5 -echo $ECHO_N "checking for writex member in file_operations structure... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for obsolete struct page count without underscore" >&5 +echo $ECHO_N "checking for obsolete struct page count without underscore... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -9304,252 +15290,254 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include + #include int main () { - struct file_operations filop = { - .writex = NULL - }; + struct page *p; + int foo; + foo = atomic_read(&(p)->count); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_WRITEX_FILE_OPERATIONS 1 +#define HAVE_OBSOLETE_STRUCT_PAGE_COUNT_NO_UNDERSCORE 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for aio support in kernel" >&5 -echo $ECHO_N "checking for aio support in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel has device classes" >&5 +echo $ECHO_N "checking if kernel has device classes... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - #include - static struct kiocb iocb; + #define __KERNEL__ + #include int main () { + class_device_destroy(NULL, "pvfs2") + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_AIO 1 +#define HAVE_KERNEL_DEVICE_CLASSES 1 _ACEOF - have_aio=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - have_aio=no +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test "x$have_aio" = "xyes" -a "x$enable_kernel_aio" = "xyes"; then - { echo "$as_me:$LINENO: checking for ki_dtor in kiocb structure of kernel" >&5 -echo $ECHO_N "checking for ki_dtor in kiocb structure of kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for destructor param to kmem_cache_create" >&5 +echo $ECHO_N "checking for destructor param to kmem_cache_create... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - #include - static struct kiocb io_cb = { - .ki_dtor = NULL, - }; + #define __KERNEL__ + #include int main () { + kmem_cache_create("config-test", 0, 0, 0, NULL, NULL); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_AIO_VFS_SUPPORT 1 +#define HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - tmp_cflags=$CFLAGS - CFLAGS="$CFLAGS -Werror" - { echo "$as_me:$LINENO: checking for new prototype of aio_read callback of file_operations structure" >&5 -echo $ECHO_N "checking for new prototype of aio_read callback of file_operations structure... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for one-param kmem_cache_create constructor" >&5 +echo $ECHO_N "checking for one-param kmem_cache_create constructor... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - extern ssize_t my_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); - static struct file_operations fop = { - .aio_read = my_aio_read, - }; + #define __KERNEL__ + #include + #include + void ctor(void *req) + { + } int main () { + kmem_cache_create("config-test", 0, 0, 0, ctor); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_AIO_NEW_AIO_SIGNATURE 1 +#define HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CFLAGS=$tmp_cflags - - fi - - if test "x$GCC" = "xyes" ; then - { echo "$as_me:$LINENO: checking for gcc major version" >&5 -echo $ECHO_N "checking for gcc major version... $ECHO_C" >&6; } - gcc_version=`$CC --version| head -1 | tr . ' ' | cut -d ' ' -f 3` - { echo "$as_me:$LINENO: result: $gcc_version" >&5 -echo "${ECHO_T}$gcc_version" >&6; } - if test $gcc_version -gt 3 ; then - extra_gcc_flags="-Wno-pointer-sign -Wno-strict-aliasing -Wno-strict-aliasing=2" - fi - fi - - { echo "$as_me:$LINENO: checking for dentry argument in kernel super_operations statfs" >&5 -echo $ECHO_N "checking for dentry argument in kernel super_operations statfs... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror -Wall" + echo "$as_me:$LINENO: checking for two param permission" >&5 +echo $ECHO_N "checking for two param permission... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -9557,58 +15545,73 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ + #include + #include #include - int vfs_statfs(struct dentry *de, struct kstatfs *kfs) + #include + int ctor(struct inode *i, int a) { return 0; } + struct inode_operations iop = { + .permission = ctor, + }; int main () { + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_DENTRY_STATFS_SOP 1 +#define HAVE_TWO_PARAM_PERMISSION 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for vfsmount argument in kernel file_system_type get_sb" >&5 -echo $ECHO_N "checking for vfsmount argument in kernel file_system_type get_sb... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for two-param kmem_cache_create constructor" >&5 +echo $ECHO_N "checking for two-param kmem_cache_create constructor... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -9616,62 +15619,65 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - int get_sb_bdev(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, - int), - struct vfsmount *vfsm) + #include + #include + void ctor(struct kmem_cache *cachep, void *req) { - return 0; } int main () { + kmem_cache_create("config-test", 0, 0, 0, ctor); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_VFSMOUNT_GETSB 1 +#define HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for xattr support in kernel" >&5 -echo $ECHO_N "checking for xattr support in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel address_space struct has a spin_lock field named page_lock" >&5 +echo $ECHO_N "checking if kernel address_space struct has a spin_lock field named page_lock... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -9680,59 +15686,62 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - static struct inode_operations in_op = { - .getxattr = NULL - }; int main () { + struct address_space as; + spin_lock(&as.page_lock); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_XATTR 1 +#define HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT 1 _ACEOF - have_xattr=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - have_xattr=no +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test "x$have_xattr" = "xyes"; then - { echo "$as_me:$LINENO: checking for const argument to setxattr function" >&5 -echo $ECHO_N "checking for const argument to setxattr function... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking if kernel address_space struct has a rwlock_t field named tree_lock" >&5 +echo $ECHO_N "checking if kernel address_space struct has a rwlock_t field named tree_lock... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -9746,729 +15755,741 @@ int main () { - struct inode_operations inode_ops; - int ret; - struct dentry * dent = NULL; - const char * name = NULL; - const void * val = NULL; - size_t size = 0; - int flags = 0; - - ret = inode_ops.setxattr(dent, name, val, size, flags); + struct address_space as; + read_lock(&as.tree_lock); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_SETXATTR_CONST_ARG 1 +#define HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - fi - - { echo "$as_me:$LINENO: checking for 6th argument to sysctl proc handlers" >&5 -echo $ECHO_N "checking for 6th argument to sysctl proc handlers... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking if kernel address_space struct has a spinlock_t field named tree_lock" >&5 +echo $ECHO_N "checking if kernel address_space struct has a spinlock_t field named tree_lock... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - #include + #define __KERNEL__ + #include int main () { - proc_dointvec_minmax(NULL, 0, NULL, NULL, NULL, NULL); + struct address_space as; + spin_lock(&as.tree_lock); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_PROC_HANDLER_SIX_ARG 1 +#define HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -for ac_header in linux/posix_acl.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel address_space struct has a priv_lock field - from RT linux" >&5 +echo $ECHO_N "checking if kernel address_space struct has a priv_lock field - from RT linux... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include - #ifdef HAVE_XATTR - #include - #endif + #define __KERNEL__ + #include + +int +main () +{ + + struct address_space as; + spin_lock(&as.priv_lock); -#include <$ac_header> + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - - -for ac_header in linux/posix_acl_xattr.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel defines mapping_nrpages macro - from RT linux" >&5 +echo $ECHO_N "checking if kernel defines mapping_nrpages macro - from RT linux... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include - #ifdef HAVE_XATTR - #include - #endif + #define __KERNEL__ + #include + +int +main () +{ + + struct address_space idata; + int i = mapping_nrpages(&idata); -#include <$ac_header> + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - eval "$as_ac_Header=no" -fi + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +cat >>confdefs.h <<\_ACEOF +#define HAVE_MAPPING_NRPAGES_MACRO 1 _ACEOF -fi - -done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -for ac_header in linux/xattr_acl.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel super_operations contains read_inode field" >&5 +echo $ECHO_N "checking if kernel super_operations contains read_inode field... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include - #ifdef HAVE_XATTR - #include - #endif + #define __KERNEL__ + #include -#include <$ac_header> +int +main () +{ + + struct super_operations sops; + sops.read_inode(NULL); + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_READ_INODE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - - -for ac_header in linux/mount.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel super_operations contains drop_inode field" >&5 +echo $ECHO_N "checking if kernel super_operations contains drop_inode field... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include + #define __KERNEL__ + #include -#include <$ac_header> +int +main () +{ + + struct super_operations sops; + sops.drop_inode(NULL); + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_DROP_INODE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - -for ac_header in linux/ioctl32.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if kernel super_operations contains put_inode field" >&5 +echo $ECHO_N "checking if kernel super_operations contains put_inode field... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include + #define __KERNEL__ + #include + +int +main () +{ + + struct super_operations sops; + sops.put_inode(NULL); -#include <$ac_header> + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_PUT_INODE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - -for ac_header in linux/compat.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if mount.h defines MNT_NOATIME" >&5 +echo $ECHO_N "checking if mount.h defines MNT_NOATIME... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include + #define __KERNEL__ + #include -#include <$ac_header> +int +main () +{ + + int flag = MNT_NOATIME; + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MNT_NOATIME 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - -for ac_header in linux/syscalls.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking if mount.h defines MNT_NODIRATIME" >&5 +echo $ECHO_N "checking if mount.h defines MNT_NODIRATIME... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include + #define __KERNEL__ + #include -#include <$ac_header> +int +main () +{ + + int flag = MNT_NODIRATIME; + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MNT_NODIRATIME 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - -for ac_header in asm/ioctl32.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for d_alloc_anon" >&5 +echo $ECHO_N "checking for d_alloc_anon... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include + #define __KERNEL__ + #include -#include <$ac_header> +int +main () +{ + + struct inode *i; + d_alloc_anon(i); + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - eval "$as_ac_Header=no" -fi + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +cat >>confdefs.h <<\_ACEOF +#define HAVE_D_ALLOC_ANON 1 _ACEOF -fi +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -done +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -for ac_header in linux/exportfs.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for s_dirty in struct super_block" >&5 +echo $ECHO_N "checking for s_dirty in struct super_block... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#define __KERNEL__ - #include + #define __KERNEL__ + #include -#include <$ac_header> +int +main () +{ + + struct super_block *s; + list_empty(&s->s_dirty); + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SB_DIRTY_LIST 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -done - - - { echo "$as_me:$LINENO: checking for generic_file_readv api in kernel" >&5 -echo $ECHO_N "checking for generic_file_readv api in kernel... $ECHO_C" >&6; } - - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for current_fsuid" >&5 +echo $ECHO_N "checking for current_fsuid... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - int generic_file_readv(struct inode *inode) - { - return 0; - } + #define __KERNEL__ + #include + #include int main () { + int uid = current_fsuid(); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_GENERIC_FILE_READV 1 +#define HAVE_CURRENT_FSUID 1 _ACEOF -fi +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 - { echo "$as_me:$LINENO: checking for generic_permission api in kernel" >&5 -echo $ECHO_N "checking for generic_permission api in kernel... $ECHO_C" >&6; } +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags + echo "$as_me:$LINENO: checking if kernel backing_dev_info struct has a name field" >&5 +echo $ECHO_N "checking if kernel backing_dev_info struct has a name field... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10478,114 +16499,130 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include - int generic_permission(struct inode *inode) - { - return 0; - } + #include int main () { + struct backing_dev_info foo = + { + .name = "foo" + }; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BACKING_DEV_INFO_NAME 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_GENERIC_PERMISSION 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for generic_getxattr api in kernel" >&5 -echo $ECHO_N "checking for generic_getxattr api in kernel... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for bdi_init" >&5 +echo $ECHO_N "checking for bdi_init... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ + #define __KERNEL__ #include - #include - int generic_getxattr(struct inode *inode) - { - return 0; - } + #include int main () { + int ret = bdi_init(NULL); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BDI_INIT 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_GENERIC_GETXATTR 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for arg member in read_descriptor_t in kernel" >&5 -echo $ECHO_N "checking for arg member in read_descriptor_t in kernel... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking whether struct ctl_table has ctl_name" >&5 +echo $ECHO_N "checking whether struct ctl_table has ctl_name... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10593,57 +16630,59 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + static struct ctl_table c = { .ctl_name = 0, }; int main () { - read_descriptor_t x; - x.arg.data = NULL; - ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_ARG_IN_READ_DESCRIPTOR_T 1 +#define HAVE_CTL_NAME 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for second arg type int in address_space_operations releasepage" >&5 -echo $ECHO_N "checking for second arg type int in address_space_operations releasepage... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking whether struct ctl_table has strategy" >&5 +echo $ECHO_N "checking whether struct ctl_table has strategy... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10651,9 +16690,9 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - extern int try_to_release_page(struct page *page, int gfp_mask); + #define __KERNEL__ + #include + static struct ctl_table c = { .strategy = 0, }; int main () @@ -10664,42 +16703,49 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_INT_ARG2_ADDRESS_SPACE_OPERATIONS_RELEASEPAGE 1 +#define HAVE_STRATEGY_NAME 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for int return in inode_operations follow_link" >&5 -echo $ECHO_N "checking for int return in inode_operations follow_link... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for five-param xattr_handler.get" >&5 +echo $ECHO_N "checking for five-param xattr_handler.get... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10707,56 +16753,67 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - extern int page_follow_link_light(struct dentry *, - struct nameidata *); + #define __KERNEL__ + #include + #include + static struct xattr_handler x; + static int get_xattr_h( struct dentry *d, const char *n, + void *b, size_t s, int h) + { return 0; } int main () { + x.get = get_xattr_h; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_INT_RETURN_INODE_OPERATIONS_FOLLOW_LINK 1 +#define HAVE_XATTR_HANDLER_GET_FIVE_PARAM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for int return in kmem_cache_destroy" >&5 -echo $ECHO_N "checking for int return in kmem_cache_destroy... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for six-param xattr_handler.set" >&5 +echo $ECHO_N "checking for six-param xattr_handler.set... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10764,55 +16821,68 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include - extern int kmem_cache_destroy(kmem_cache_t *); + #define __KERNEL__ + #include + #include + static struct xattr_handler x; + static int set_xattr_h( struct dentry *d, const char *n, + const void *b, size_t s, int f, int h) + { return 0; } int main () { + x.set = set_xattr_h; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_INT_RETURN_KMEM_CACHE_DESTROY 1 +#define HAVE_XATTR_HANDLER_SET_SIX_PARAM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for older int return in invalidatepage" >&5 -echo $ECHO_N "checking for older int return in invalidatepage... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for const s_xattr member in super_block struct" >&5 +echo $ECHO_N "checking for const s_xattr member in super_block struct... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10822,62 +16892,64 @@ cat >>conftest.$ac_ext <<_ACEOF #define __KERNEL__ #include + #include + struct super_block sb; + const struct xattr_handler *x[] = { NULL }; int main () { - struct address_space_operations aso; - - int ret; - struct page * page = NULL; - unsigned long offset; - - ret = aso.invalidatepage(page, offset); + sb.s_xattr = x; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_INT_RETURN_ADDRESS_SPACE_OPERATIONS_INVALIDATEPAGE 1 +#define HAVE_CONST_S_XATTR_IN_SUPERBLOCK 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: NO" >&5 -echo "${ECHO_T}NO" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - tmp_cflags=${CFLAGS} - CFLAGS="${CFLAGS} -Werror" - { echo "$as_me:$LINENO: checking for warnings when including linux/config.h" >&5 -echo $ECHO_N "checking for warnings when including linux/config.h... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking stddef.h true/false enum" >&5 +echo $ECHO_N "checking stddef.h true/false enum... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10886,7 +16958,8 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include + #include + int f = true; int main () @@ -10897,43 +16970,50 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H 1 +#define HAVE_TRUE_FALSE_ENUM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CFLAGS=$tmp_cflags - { echo "$as_me:$LINENO: checking for compat_ioctl member in file_operations structure" >&5 -echo $ECHO_N "checking for compat_ioctl member in file_operations structure... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for dentry argument in fsync" >&5 +echo $ECHO_N "checking for dentry argument in fsync... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -10941,60 +17021,67 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + static struct file_operations f; + static int local_fsync(struct file *f, struct dentry *d, int i) + { return 0; } int main () { - struct file_operations filop = { - .compat_ioctl = NULL - }; + f.fsync = local_fsync; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_COMPAT_IOCTL_HANDLER 1 +#define HAVE_FSYNC_DENTRY_PARAM 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test x$ac_cv_header_linux_ioctl32_h = xyes ; then - { echo "$as_me:$LINENO: checking for register_ioctl32_conversion kernel exports" >&5 -echo $ECHO_N "checking for register_ioctl32_conversion kernel exports... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for unlocked_ioctl in file_operations" >&5 +echo $ECHO_N "checking for unlocked_ioctl in file_operations... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -11002,58 +17089,63 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - #include - int register_ioctl32_conversion(void) - { - return 0; - } + #include + static struct file_operations f; int main () { + f.unlocked_ioctl = NULL; + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_UNLOCKED_IOCTL_HANDLER 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_REGISTER_IOCTL32_CONVERSION 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - fi - - { echo "$as_me:$LINENO: checking for int return value of kmem_cache_destroy" >&5 -echo $ECHO_N "checking for int return value of kmem_cache_destroy... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for inode_setattr" >&5 +echo $ECHO_N "checking for inode_setattr... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11062,55 +17154,65 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include + #include + struct iattr *iattr; + struct inode *inode; + int ret; int main () { - int i = kmem_cache_destroy(NULL); + ret = inode_setattr(inode, iattr); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_KMEM_CACHE_DESTROY_INT_RETURN 1 +#define HAVE_INODE_SETATTR 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for combined file_operations readv and aio_read" >&5 -echo $ECHO_N "checking for combined file_operations readv and aio_read... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for three-param dentry_operations.d_hash" >&5 +echo $ECHO_N "checking for three-param dentry_operations.d_hash... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11118,57 +17220,70 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + #include + static struct dentry_operations d; + static int d_hash_t(const struct dentry *d, + const struct inode *i, + struct qstr * q) + { return 0; } int main () { - struct file_operations filop = { - .readv = NULL - }; + d.d_hash = d_hash_t; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_THREE_PARAM_D_HASH 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_COMBINED_AIO_AND_VECTOR 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for kzalloc" >&5 -echo $ECHO_N "checking for kzalloc... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for seven-param dentry_operations.d_compare" >&5 +echo $ECHO_N "checking for seven-param dentry_operations.d_compare... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11177,56 +17292,73 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include + #include + #include + static struct dentry_operations d; + static int d_compare_t(const struct dentry *d1, + const struct inode *i1, + const struct dentry *d2, + const struct inode *i2, + unsigned int len, + const char *str, + const struct qstr *qstr) + { return 0; } int main () { - void * a; - a = kzalloc(1024, GFP_KERNEL); + d.d_compare = d_compare_t; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_KZALLOC 1 +#define HAVE_SEVEN_PARAM_D_COMPARE 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for two arguments to register_sysctl_table" >&5 -echo $ECHO_N "checking for two arguments to register_sysctl_table... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for constified dentry_operations.d_delete" >&5 +echo $ECHO_N "checking for constified dentry_operations.d_delete... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11235,56 +17367,66 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - #include + #include + #include + static struct dentry_operations d; + static int d_delete_t(const struct dentry *d) + { return 0; } int main () { - register_sysctl_table(NULL, 0); + d.d_delete = d_delete_t; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_TWO_ARG_REGISTER_SYSCTL_TABLE 1 +#define HAVE_D_DELETE_CONST 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for generic FS_IOC ioctl flags" >&5 -echo $ECHO_N "checking for generic FS_IOC ioctl flags... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for dentry.d_count atomic_t type" >&5 +echo $ECHO_N "checking for dentry.d_count atomic_t type... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11292,56 +17434,66 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + #include + struct dentry d; + atomic_t x; int main () { - int flags = FS_IOC_GETFLAGS; + x = d.d_count; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_NO_FS_IOC_FLAGS 1 +#define HAVE_DENTRY_D_COUNT_ATOMIC 1 _ACEOF - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for obsolete struct page count without underscore" >&5 -echo $ECHO_N "checking for obsolete struct page count without underscore... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for three-param inode_operations permission" >&5 +echo $ECHO_N "checking for three-param inode_operations permission... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11349,58 +17501,66 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + struct inode_operations i; + int p(struct inode *i, int mode, unsigned int flags) + { return 0; } int main () { - struct page *p; - int foo; - foo = atomic_read(&(p)->count); + i.permission = p; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_OBSOLETE_STRUCT_PAGE_COUNT_NO_UNDERSCORE 1 +#define HAVE_THREE_PARAM_PERMISSION_WITH_FLAG 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking if kernel has device classes" >&5 -echo $ECHO_N "checking if kernel has device classes... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for three-param acl_check of generic_permission" >&5 +echo $ECHO_N "checking for three-param acl_check of generic_permission... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11408,56 +17568,66 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + struct inode *i; + int p(struct inode *i, int mode, unsigned int flags) + { return 0; } int main () { - class_create(NULL, "pvfs2") + generic_permission(i, 0, 0, p); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_KERNEL_DEVICE_CLASSES 1 +#define HAVE_THREE_PARAM_ACL_CHECK 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for destructor param to kmem_cache_create" >&5 -echo $ECHO_N "checking for destructor param to kmem_cache_create... $ECHO_C" >&6; } + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + echo "$as_me:$LINENO: checking for SPIN_LOCK_UNLOCKED " >&5 +echo $ECHO_N "checking for SPIN_LOCK_UNLOCKED ... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11465,58 +17635,63 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + spinlock_t test_lock = SPIN_LOCK_UNLOCKED; + struct inode *i; int main () { - kmem_cache_create("config-test", 0, 0, 0, NULL, NULL); - ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM 1 +#define HAVE_SPIN_LOCK_UNLOCKED 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - tmp_cflags=$CFLAGS + tmp_cflags=$CFLAGS CFLAGS="$CFLAGS -Werror" - { echo "$as_me:$LINENO: checking for two-param kmem_cache_create constructor" >&5 -echo $ECHO_N "checking for two-param kmem_cache_create constructor... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for get_sb " >&5 +echo $ECHO_N "checking for get_sb ... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11525,60 +17700,60 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define __KERNEL__ - #include - #include - void ctor(struct kmem_cache *cachep, void *req) - { - } + #include + struct file_system_type f; int main () { - kmem_cache_create("config-test", 0, 0, 0, ctor); + f.get_sb = NULL; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM 1 +#define HAVE_GET_SB_MEMBER_FILE_SYSTEM_TYPE 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CFLAGS=$tmp_cflags - - { echo "$as_me:$LINENO: checking if kernel address_space struct has a spin_lock field named page_lock" >&5 -echo $ECHO_N "checking if kernel address_space struct has a spin_lock field named page_lock... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for dirty_inode flag" >&5 +echo $ECHO_N "checking for dirty_inode flag... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -11586,585 +17761,767 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include + #define __KERNEL__ + #include + void di(struct inode *i, int f) + { + return; + } int main () { - struct address_space as; - spin_lock(&as.page_lock); + struct super_operations s; + s.dirty_inode = di; ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT 1 +#define HAVE_DIRTY_INODE_FLAGS 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$tmp_cflags + + CFLAGS=$oldcflags + + + +cat >>confdefs.h <<\_ACEOF +#define WITH_LINUX_KMOD 1 +_ACEOF + +fi + +# Check whether --enable-threaded-kmod-helper or --disable-threaded-kmod-helper was given. +if test "${enable_threaded_kmod_helper+set}" = set; then + enableval="$enable_threaded_kmod_helper" + if test "x$enableval" = "xyes" ; then + THREADED_KMOD_HELPER=yes + fi + +fi; + + +BUILD_ABSOLUTE_TOP=`pwd` +SRC_RELATIVE_TOP=`echo $0 | sed -e "s|configure$||"` +SRC_ABSOLUTE_TOP=`cd $SRC_RELATIVE_TOP ; pwd` + + + + + + + + +# Check whether --enable-fast or --disable-fast was given. +if test "${enable_fast+set}" = set; then + enableval="$enable_fast" + +if test "x$USR_CFLAGS_SET" = "xno"; then + CFLAGS="$CFLAGS -DNDEBUG -O3 -DGOSSIP_DISABLE_DEBUG" +fi + +fi; + +test_for_fuse() +{ + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_HAVE_PKGCONFIG+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_PKGCONFIG"; then + ac_cv_prog_HAVE_PKGCONFIG="$HAVE_PKGCONFIG" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_HAVE_PKGCONFIG="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_HAVE_PKGCONFIG" && ac_cv_prog_HAVE_PKGCONFIG="no" +fi +fi +HAVE_PKGCONFIG=$ac_cv_prog_HAVE_PKGCONFIG +if test -n "$HAVE_PKGCONFIG"; then + echo "$as_me:$LINENO: result: $HAVE_PKGCONFIG" >&5 +echo "${ECHO_T}$HAVE_PKGCONFIG" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + if test "x$HAVE_PKGCONFIG" = "xyes" ; then + echo "$as_me:$LINENO: checking for FUSE library" >&5 +echo $ECHO_N "checking for FUSE library... $ECHO_C" >&6 + if `pkg-config --exists fuse` ; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + FUSE_LDFLAGS=`pkg-config --libs fuse` + FUSE_CFLAGS=`pkg-config --cflags fuse` + + + + BUILD_FUSE="1" + + else + { { echo "$as_me:$LINENO: error: FUSE: FUSE library not found. Check LD_LIBRARY_PATH." >&5 +echo "$as_me: error: FUSE: FUSE library not found. Check LD_LIBRARY_PATH." >&2;} + { (exit 1); exit 1; }; } + fi + else + { { echo "$as_me:$LINENO: error: FUSE: pkg-config not available. Please install pkg-config." >&5 +echo "$as_me: error: FUSE: pkg-config not available. Please install pkg-config." >&2;} + { (exit 1); exit 1; }; } + fi +} + +# Check whether --enable-fuse or --disable-fuse was given. +if test "${enable_fuse+set}" = set; then + enableval="$enable_fuse" + +if test "x$enableval" = "xyes" ; then + test_for_fuse + CFLAGS="$CFLAGS -D__PVFS2_ENABLE_FUSE__" +fi + +fi; + +# default CFLAGS is -g -O2, unless user set CFLAGS or asked for --enable-fast +if test "x$USR_CFLAGS_SET" = "xno" && test "x$enable_fast" != "xyes"; then + CFLAGS="$CFLAGS -g -O2" +fi + + + + +STRICT_CFLAGS= +# Check whether --enable-strict or --disable-strict was given. +if test "${enable_strict+set}" = set; then + enableval="$enable_strict" + STRICT_CFLAGS=1 +fi; + + +# Check whether --enable-verbose-build or --disable-verbose-build was given. +if test "${enable_verbose_build+set}" = set; then + enableval="$enable_verbose_build" + QUIET_COMPILE=0 +else + QUIET_COMPILE=1 +fi; + -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +test_for_epoll() +{ - { echo "$as_me:$LINENO: checking if kernel address_space struct has a rwlock_t field named tree_lock" >&5 -echo $ECHO_N "checking if kernel address_space struct has a rwlock_t field named tree_lock... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF +for ac_header in sys/epoll.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - - #define __KERNEL__ - #include - -int -main () -{ - - struct address_space as; - read_lock(&as.tree_lock); - - ; - return 0; -} +$ac_includes_default +#include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT 1 -_ACEOF - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking if kernel address_space struct has a priv_lock field - from RT linux" >&5 -echo $ECHO_N "checking if kernel address_space struct has a priv_lock field - from RT linux... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - - #define __KERNEL__ - #include - -int -main () -{ - - struct address_space as; - spin_lock(&as.priv_lock); - - ; - return 0; -} +#include <$ac_header> _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT 1 -_ACEOF - + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done - { echo "$as_me:$LINENO: checking if kernel defines mapping_nrpages macro - from RT linux" >&5 -echo $ECHO_N "checking if kernel defines mapping_nrpages macro - from RT linux... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for epoll functions" >&5 +echo $ECHO_N "checking for epoll functions... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - - #define __KERNEL__ - #include - +#include int main () { - struct address_space idata; - int i = mapping_nrpages(&idata); + int fd; + fd = epoll_create(1); + close(fd); ; return 0; } _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_MAPPING_NRPAGES_MACRO 1 -_ACEOF + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + BUILD_EPOLL=1 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +} -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Check whether --enable-epoll or --disable-epoll was given. +if test "${enable_epoll+set}" = set; then + enableval="$enable_epoll" - { echo "$as_me:$LINENO: checking if kernel super_operations contains read_inode field" >&5 -echo $ECHO_N "checking if kernel super_operations contains read_inode field... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + if test "x$enableval" = "xyes"; then + BUILD_EPOLL=1 + + fi + +else + + test_for_epoll + +fi; + +# Check whether --enable-segv-backtrace or --disable-segv-backtrace was given. +if test "${enable_segv_backtrace+set}" = set; then + enableval="$enable_segv_backtrace" + +else + echo "$as_me:$LINENO: checking if segv backtrace capable" >&5 +echo $ECHO_N "checking if segv backtrace capable... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #define __KERNEL__ - #include +#include +#define __USE_GNU +#include +#if !defined(REG_EIP) && !defined(REG_RIP) + choke me +#endif int main () { - struct super_operations sops; - sops.read_inode(NULL); - ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_READ_INODE 1 -_ACEOF - + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + PVFS2_SEGV_BACKTRACE=1 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi; - CFLAGS=$oldcflags - - -cat >>confdefs.h <<\_ACEOF -#define WITH_LINUX_KMOD 1 -_ACEOF -fi +# Check whether --enable-shared or --disable-shared was given. +if test "${enable_shared+set}" = set; then + enableval="$enable_shared" + build_shared=$enableval +else + build_shared=no +fi; -# Check whether --enable-threaded-kmod-helper was given. -if test "${enable_threaded_kmod_helper+set}" = set; then - enableval=$enable_threaded_kmod_helper; if test "x$enableval" = "xyes" ; then - THREADED_KMOD_HELPER=yes - fi +if test "x$build_shared" = "xno" -a "x$build_static" = "xno" ; then + { { echo "$as_me:$LINENO: error: Must do --enable-shared or --enable-static or both." >&5 +echo "$as_me: error: Must do --enable-shared or --enable-static or both." >&2;} + { (exit 1); exit 1; }; } fi +BUILD_USRINT= +# Check whether --enable-usrint or --disable-usrint was given. +if test "${enable_usrint+set}" = set; then + enableval="$enable_usrint" + if test "x$enableval" = "xyes" ; then +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_BUILD 1 +_ACEOF -BUILD_ABSOLUTE_TOP=`pwd` -SRC_RELATIVE_TOP=$srcdir -SRC_ABSOLUTE_TOP=`cd $srcdir ; pwd` - - - - - - - + BUILD_USRINT=1 +else -# Check whether --enable-fast was given. -if test "${enable_fast+set}" = set; then - enableval=$enable_fast; -if test "x$USR_CFLAGS_SET" = "xno"; then - CFLAGS="$CFLAGS -DNDEBUG -O3 -DGOSSIP_DISABLE_DEBUG" -fi +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_BUILD 0 +_ACEOF fi +else -# default CFLAGS is -g -O2, unless user set CFLAGS or asked for --enable-fast -if test "x$USR_CFLAGS_SET" = "xno" && test "x$enable_fast" != "xyes"; then - CFLAGS="$CFLAGS -g -O2" -fi +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_BUILD 1 +_ACEOF +BUILD_USRINT=1 +fi; +USRINT_KMOUNT= +# Check whether --enable-usrint-kmount or --disable-usrint-kmount was given. +if test "${enable_usrint_kmount+set}" = set; then + enableval="$enable_usrint_kmount" + if test "x$enableval" = "xyes" ; then + if test "x$BUILD_KERNEL" = "x1" -o "x$BUILD_FUSE" = "x1" ; then -STRICT_CFLAGS= -# Check whether --enable-strict was given. -if test "${enable_strict+set}" = set; then - enableval=$enable_strict; STRICT_CFLAGS=1 -fi +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_KMOUNT 1 +_ACEOF + USRINT_KMOUNT=1 + else + { { echo "$as_me:$LINENO: error: Assume FS mounted but neither kernel nor FUSE build " >&5 +echo "$as_me: error: Assume FS mounted but neither kernel nor FUSE build " >&2;} + { (exit 1); exit 1; }; } + fi +else +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_KMOUNT 0 +_ACEOF -# Check whether --enable-verbose-build was given. -if test "${enable_verbose_build+set}" = set; then - enableval=$enable_verbose_build; QUIET_COMPILE=0 -else - QUIET_COMPILE=1 fi +else +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_KMOUNT 0 +_ACEOF +fi; -test_for_epoll() -{ +# Check whether --enable-usrint-cwd or --disable-usrint-cwd was given. +if test "${enable_usrint_cwd+set}" = set; then + enableval="$enable_usrint_cwd" + if test "x$enableval" = "xyes" ; then -for ac_header in sys/epoll.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_CWD 1 _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_CWD 0 +_ACEOF + fi +else + if test "x$USRINT_KMOUNT" = "x1" ; then + +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_CWD 0 +_ACEOF -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +else -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ +cat >>confdefs.h <<\_ACEOF +#define PVFS_USRINT_CWD 1 _ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <$ac_header> + +fi +fi; + +echo "$as_me:$LINENO: checking if dlfns needs explicit library request" >&5 +echo $ECHO_N "checking if dlfns needs explicit library request... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF + +#define _GNU_SOURCE 1 +#include +main() {void *p = dlsym(RTLD_DEFAULT,"sym");} + _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + LIBS="$LIBS -ldl" + fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +BUILD_UCACHE= +# Check whether --enable-ucache or --disable-ucache was given. +if test "${enable_ucache+set}" = set; then + enableval="$enable_ucache" + if test "x$enableval" = "xyes" ; then -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} +cat >>confdefs.h <<\_ACEOF +#define PVFS_UCACHE_ENABLE 1 +_ACEOF - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 + BUILD_UCACHE=1 else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +cat >>confdefs.h <<\_ACEOF +#define PVFS_UCACHE_ENABLE 0 _ACEOF fi -done +else - { echo "$as_me:$LINENO: checking for epoll functions" >&5 -echo $ECHO_N "checking for epoll functions... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF +cat >>confdefs.h <<\_ACEOF +#define PVFS_UCACHE_ENABLE 0 +_ACEOF + + +fi; + +BUILD_ACL_INTERFACE= +echo "$as_me:$LINENO: checking for user acl includes sys/acl.h acl/libacl.h" >&5 +echo $ECHO_N "checking for user acl includes sys/acl.h acl/libacl.h... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + +#include +#include + int main () { - int fd; - fd = epoll_create(1); - close(fd); - ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - BUILD_EPOLL=1 - -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext -} + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + BUILD_ACL_INTERFACE=1 -# Check whether --enable-epoll was given. -if test "${enable_epoll+set}" = set; then - enableval=$enable_epoll; - if test "x$enableval" = "xyes"; then - BUILD_EPOLL=1 +cat >>confdefs.h <<\_ACEOF +#define PVFS_HAVE_ACL_INCLUDES 1 +_ACEOF - fi else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - test_for_epoll +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + BUILD_ACL_INTERFACE=0 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# Check whether --enable-segv-backtrace was given. -if test "${enable_segv_backtrace+set}" = set; then - enableval=$enable_segv_backtrace; -else - { echo "$as_me:$LINENO: checking if segv backtrace capable" >&5 -echo $ECHO_N "checking if segv backtrace capable... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF +echo "$as_me:$LINENO: checking for scandir compare arg using void pointers" >&5 +echo $ECHO_N "checking for scandir compare arg using void pointers... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -#define __USE_GNU -#include -#if !defined(REG_EIP) && !defined(REG_RIP) - choke me -#endif +#define _LARGEFILE64_SOURCE 1 +#define _GNU_SOURCE 1 +#include +int scandir (const char *dir, struct dirent ***list, + int (*sel)(const struct dirent *), + int (*cmp)(const void *, const void *)) +{ return 0; } +int scandir64 (const char *dir, struct dirent64 ***list, + int (*sel)(const struct dirent64 *), + int (*cmp)(const void *, const void *)) +{ return 0; } int main () @@ -12175,59 +18532,50 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - PVFS2_SEGV_BACKTRACE=1 -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -fi + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +cat >>confdefs.h <<\_ACEOF +#define PVFS_SCANDIR_VOID 1 +_ACEOF -# Check whether --enable-shared was given. -if test "${enable_shared+set}" = set; then - enableval=$enable_shared; build_shared=$enableval else - build_shared=no -fi - + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -if test "x$build_shared" = "xno" -a "x$build_static" = "xno" ; then - { { echo "$as_me:$LINENO: error: Must do --enable-shared or --enable-static or both." >&5 -echo "$as_me: error: Must do --enable-shared or --enable-static or both." >&2;} - { (exit 1); exit 1; }; } fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + INTELC= GNUC= -{ echo "$as_me:$LINENO: checking whether cc is an Intel compiler" >&5 -echo $ECHO_N "checking whether cc is an Intel compiler... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking whether cc is an Intel compiler" >&5 +echo $ECHO_N "checking whether cc is an Intel compiler... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -12248,36 +18596,40 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 INTELC=1 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test "x$INTELC" = "x" ; then if test "x$GCC" = "xyes" ; then GNUC=1 @@ -12287,14 +18639,15 @@ fi -# Check whether --with-efence was given. +# Check whether --with-efence or --without-efence was given. if test "${with_efence+set}" = set; then - withval=$with_efence; if test "x$withval" != "xyes" ; then + withval="$with_efence" + if test "x$withval" != "xyes" ; then LDFLAGS="${LDFLAGS} -L$withval" fi -{ echo "$as_me:$LINENO: checking for malloc in -lefence" >&5 -echo $ECHO_N "checking for malloc in -lefence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for malloc in -lefence" >&5 +echo $ECHO_N "checking for malloc in -lefence... $ECHO_C" >&6 if test "${ac_cv_lib_efence_malloc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else @@ -12307,53 +18660,56 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char malloc (); int main () { -return malloc (); +malloc (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_cv_lib_efence_malloc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_lib_efence_malloc=no +ac_cv_lib_efence_malloc=no fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ echo "$as_me:$LINENO: result: $ac_cv_lib_efence_malloc" >&5 -echo "${ECHO_T}$ac_cv_lib_efence_malloc" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_lib_efence_malloc" >&5 +echo "${ECHO_T}$ac_cv_lib_efence_malloc" >&6 if test $ac_cv_lib_efence_malloc = yes; then cat >>confdefs.h <<_ACEOF #define HAVE_LIBEFENCE 1 @@ -12364,78 +18720,85 @@ _ACEOF fi -fi - +fi; -# Check whether --with-valgrind was given. +# Check whether --with-valgrind or --without-valgrind was given. if test "${with_valgrind+set}" = set; then - withval=$with_valgrind; + withval="$with_valgrind" + found=no save_cppflags="$CPPFLAGS" - { echo "$as_me:$LINENO: checking for valgrind.h usability" >&5 -echo $ECHO_N "checking for valgrind.h usability... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for valgrind.h usability" >&5 +echo $ECHO_N "checking for valgrind.h usability... $ECHO_C" >&6 if test "x$withval" = xyes ; then cat >conftest.$ac_ext <<_ACEOF #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then found=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test x$found = xno ; then CPPFLAGS="$CPPFLAGS -I/usr/include/valgrind" cat >conftest.$ac_ext <<_ACEOF #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then found=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - CPPFLAGS="$save_cppflags" +CPPFLAGS="$save_cppflags" fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi else for d in $withval $withval/include \ @@ -12446,38 +18809,42 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then found=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - CPPFLAGS="$save_cppflags" +CPPFLAGS="$save_cppflags" fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test x$found = xyes ; then break fi done fi - { echo "$as_me:$LINENO: result: $found" >&5 -echo "${ECHO_T}$found" >&6; } + echo "$as_me:$LINENO: result: $found" >&5 +echo "${ECHO_T}$found" >&6 if test x$found = xyes ; then cat >>confdefs.h <<\_ACEOF @@ -12486,19 +18853,19 @@ _ACEOF fi -fi - +fi; if test "x$NEED_BERKELEY_DB" = "xyes" ; then -# Check whether --with-db was given. +# Check whether --with-db or --without-db was given. if test "${with_db+set}" = set; then - withval=$with_db; + withval="$with_db" + dbpath=${withval} DB_LDFLAGS= - { echo "$as_me:$LINENO: checking for db library" >&5 -echo $ECHO_N "checking for db library... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for db library" >&5 +echo $ECHO_N "checking for db library... $ECHO_C" >&6 oldlibs=$LIBS lib=notfound @@ -12509,32 +18876,35 @@ echo $ECHO_N "checking for db library... $ECHO_C" >&6; } #include "$dbpath/include/$dbheader/db.h" _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then DB_CFLAGS="-I$dbpath/include/$dbheader/" break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext done if test "x$dbheader" = "xnotfound"; then @@ -12542,35 +18912,39 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext #include "$dbpath/include/db.h" _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then DB_CFLAGS="-I$dbpath/include/" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { { echo "$as_me:$LINENO: error: Invalid libdb path specified. No db.h found. +{ { echo "$as_me:$LINENO: error: Invalid libdb path specified. No db.h found. See \`config.log' for more details." >&5 echo "$as_me: error: Invalid libdb path specified. No db.h found. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi DB_LDFLAGS="-L${dbpath}/lib" @@ -12595,32 +18969,34 @@ DB *dbp; db_create(&dbp, NULL, 0); } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then lib=db else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CFLAGS=$oldcflags @@ -12644,32 +19020,34 @@ DB *dbp; db_create(&dbp, NULL, 0); } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext done fi @@ -12680,15 +19058,15 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ echo "$as_me: error: could not find DB libraries" >&2;} { (exit 1); exit 1; }; } else - { echo "$as_me:$LINENO: result: $lib" >&5 -echo "${ECHO_T}$lib" >&6; } + echo "$as_me:$LINENO: result: $lib" >&5 +echo "${ECHO_T}$lib" >&6 fi - { echo "$as_me:$LINENO: checking for dbenv parameter to DB error callback function" >&5 -echo $ECHO_N "checking for dbenv parameter to DB error callback function... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for dbenv parameter to DB error callback function" >&5 +echo $ECHO_N "checking for dbenv parameter to DB error callback function... $ECHO_C" >&6 oldcflags=$CFLAGS CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" cat >conftest.$ac_ext <<_ACEOF @@ -12720,24 +19098,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_DBENV_PARAMETER_TO_DB_ERROR_CALLBACK 1 @@ -12748,16 +19131,15 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 have_dbenv_parameter_to_db_error_callback=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test "x$have_dbenv_parameter_to_db_error_callback" = "xyes" ; then - { echo "$as_me:$LINENO: checking if third parameter to error callback function is const" >&5 -echo $ECHO_N "checking if third parameter to error callback function is const... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking if third parameter to error callback function is const" >&5 +echo $ECHO_N "checking if third parameter to error callback function is const... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -12787,43 +19169,47 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK 1 _ACEOF fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" - { echo "$as_me:$LINENO: checking for DB stat with malloc function ptr" >&5 -echo $ECHO_N "checking for DB stat with malloc function ptr... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for DB stat with malloc function ptr" >&5 +echo $ECHO_N "checking for DB stat with malloc function ptr... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -12850,24 +19236,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_UNKNOWN_PARAMETER_TO_DB_STAT 1 @@ -12878,17 +19269,16 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 have_db_stat_malloc=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test "x$have_db_stat_malloc" = "xno" ; then - { echo "$as_me:$LINENO: checking for txnid parameter to DB stat function" >&5 -echo $ECHO_N "checking for txnid parameter to DB stat function... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for txnid parameter to DB stat function" >&5 +echo $ECHO_N "checking for txnid parameter to DB stat function... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -12914,24 +19304,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_TXNID_PARAMETER_TO_DB_STAT 1 @@ -12942,17 +19337,16 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 have_txnid_param_to_stat=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi - { echo "$as_me:$LINENO: checking for txnid parameter to DB open function" >&5 -echo $ECHO_N "checking for txnid parameter to DB open function... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for txnid parameter to DB open function" >&5 +echo $ECHO_N "checking for txnid parameter to DB open function... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -12982,24 +19376,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_TXNID_PARAMETER_TO_DB_OPEN 1 @@ -13009,14 +19408,13 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for DB_DIRTY_READ flag" >&5 -echo $ECHO_N "checking for DB_DIRTY_READ flag... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for DB_DIRTY_READ flag" >&5 +echo $ECHO_N "checking for DB_DIRTY_READ flag... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -13037,24 +19435,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_DB_DIRTY_READ 1 @@ -13064,14 +19467,13 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for DB_BUFFER_SMALL error" >&5 -echo $ECHO_N "checking for DB_BUFFER_SMALL error... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for DB_BUFFER_SMALL error" >&5 +echo $ECHO_N "checking for DB_BUFFER_SMALL error... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -13093,24 +19495,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_DB_BUFFER_SMALL 1 @@ -13120,14 +19527,13 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for berkeley db get_pagesize function" >&5 -echo $ECHO_N "checking for berkeley db get_pagesize function... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for berkeley db get_pagesize function" >&5 +echo $ECHO_N "checking for berkeley db get_pagesize function... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -13152,24 +19558,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_DB_GET_PAGESIZE 1 @@ -13179,12 +19590,72 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking Berkeley DB version" >&5 +echo $ECHO_N "checking Berkeley DB version... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + #if DB_VERSION_MAJOR < 4 || \ + (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 8) || \ + (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 8 && \ + DB_VERSION_PATCH < 30) + #error "Recommend version of Berkeley DB at least 4.8.30" + #endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + HAVE_DB_OLD=0 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + HAVE_DB_OLD=1 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS="$oldcflags" else @@ -13192,8 +19663,8 @@ else dbpath="" DB_LDFLAGS= - { echo "$as_me:$LINENO: checking for db library" >&5 -echo $ECHO_N "checking for db library... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for db library" >&5 +echo $ECHO_N "checking for db library... $ECHO_C" >&6 oldlibs=$LIBS lib=notfound @@ -13204,32 +19675,35 @@ echo $ECHO_N "checking for db library... $ECHO_C" >&6; } #include "$dbpath/include/$dbheader/db.h" _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then DB_CFLAGS="-I$dbpath/include/$dbheader/" break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext done if test "x$dbheader" = "xnotfound"; then @@ -13237,35 +19711,39 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext #include "$dbpath/include/db.h" _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then DB_CFLAGS="-I$dbpath/include/" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { { echo "$as_me:$LINENO: error: Invalid libdb path specified. No db.h found. +{ { echo "$as_me:$LINENO: error: Invalid libdb path specified. No db.h found. See \`config.log' for more details." >&5 echo "$as_me: error: Invalid libdb path specified. No db.h found. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi DB_LDFLAGS="-L${dbpath}/lib" @@ -13290,32 +19768,34 @@ DB *dbp; db_create(&dbp, NULL, 0); } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then lib=db else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CFLAGS=$oldcflags @@ -13339,32 +19819,34 @@ DB *dbp; db_create(&dbp, NULL, 0); } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext done fi @@ -13375,15 +19857,15 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ echo "$as_me: error: could not find DB libraries" >&2;} { (exit 1); exit 1; }; } else - { echo "$as_me:$LINENO: result: $lib" >&5 -echo "${ECHO_T}$lib" >&6; } + echo "$as_me:$LINENO: result: $lib" >&5 +echo "${ECHO_T}$lib" >&6 fi - { echo "$as_me:$LINENO: checking for dbenv parameter to DB error callback function" >&5 -echo $ECHO_N "checking for dbenv parameter to DB error callback function... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for dbenv parameter to DB error callback function" >&5 +echo $ECHO_N "checking for dbenv parameter to DB error callback function... $ECHO_C" >&6 oldcflags=$CFLAGS CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" cat >conftest.$ac_ext <<_ACEOF @@ -13415,24 +19897,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_DBENV_PARAMETER_TO_DB_ERROR_CALLBACK 1 @@ -13443,16 +19930,15 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 have_dbenv_parameter_to_db_error_callback=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test "x$have_dbenv_parameter_to_db_error_callback" = "xyes" ; then - { echo "$as_me:$LINENO: checking if third parameter to error callback function is const" >&5 -echo $ECHO_N "checking if third parameter to error callback function is const... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking if third parameter to error callback function is const" >&5 +echo $ECHO_N "checking if third parameter to error callback function is const... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -13482,236 +19968,47 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK 1 -_ACEOF - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - fi - - CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" - { echo "$as_me:$LINENO: checking for DB stat with malloc function ptr" >&5 -echo $ECHO_N "checking for DB stat with malloc function ptr... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - - #include - #include - -int -main () -{ - - int ret = 0; - DB *db = db; - int dummy = 0; - u_int32_t flags = 0; - - ret = db->stat(db, &dummy, malloc, flags); - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_UNKNOWN_PARAMETER_TO_DB_STAT 1 -_ACEOF - - have_db_stat_malloc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - have_db_stat_malloc=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test "x$have_db_stat_malloc" = "xno" ; then - - { echo "$as_me:$LINENO: checking for txnid parameter to DB stat function" >&5 -echo $ECHO_N "checking for txnid parameter to DB stat function... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - - #include - -int -main () -{ - - int ret = 0; - DB *db = db; - DB_TXN *txnid = txnid; - u_int32_t flags = 0; - - ret = db->stat(db, txnid, NULL, flags); - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_TXNID_PARAMETER_TO_DB_STAT 1 -_ACEOF - - have_txnid_param_to_stat=yes + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - have_txnid_param_to_stat=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - fi - - { echo "$as_me:$LINENO: checking for txnid parameter to DB open function" >&5 -echo $ECHO_N "checking for txnid parameter to DB open function... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - - #include - -int -main () -{ - - int ret = 0; - DB *db = NULL; - DB_TXN *txnid = NULL; - char *file = NULL; - char *database = NULL; - DBTYPE type = 0; - u_int32_t flags = 0; - int mode = 0; - - ret = db->open(db, txnid, file, database, type, flags, mode); - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_TXNID_PARAMETER_TO_DB_OPEN 1 +#define HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK 1 _ACEOF -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { echo "$as_me:$LINENO: checking for DB_DIRTY_READ flag" >&5 -echo $ECHO_N "checking for DB_DIRTY_READ flag... $ECHO_C" >&6; } + CFLAGS="$USR_CFLAGS $DB_CFLAGS -Werror" + echo "$as_me:$LINENO: checking for DB stat with malloc function ptr" >&5 +echo $ECHO_N "checking for DB stat with malloc function ptr... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -13719,110 +20016,136 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #include + #include + #include int main () { - u_int32_t flags = DB_DIRTY_READ; + int ret = 0; + DB *db = db; + int dummy = 0; + u_int32_t flags = 0; + + ret = db->stat(db, &dummy, malloc, flags); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_DB_DIRTY_READ 1 +#define HAVE_UNKNOWN_PARAMETER_TO_DB_STAT 1 _ACEOF + have_db_stat_malloc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + have_db_stat_malloc=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + if test "x$have_db_stat_malloc" = "xno" ; then - { echo "$as_me:$LINENO: checking for DB_BUFFER_SMALL error" >&5 -echo $ECHO_N "checking for DB_BUFFER_SMALL error... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for txnid parameter to DB stat function" >&5 +echo $ECHO_N "checking for txnid parameter to DB stat function... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - #include + #include int main () { - int res = DB_BUFFER_SMALL; - res++; + int ret = 0; + DB *db = db; + DB_TXN *txnid = txnid; + u_int32_t flags = 0; + + ret = db->stat(db, txnid, NULL, flags); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_DB_BUFFER_SMALL 1 +#define HAVE_TXNID_PARAMETER_TO_DB_STAT 1 _ACEOF + have_txnid_param_to_stat=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + have_txnid_param_to_stat=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + fi - { echo "$as_me:$LINENO: checking for berkeley db get_pagesize function" >&5 -echo $ECHO_N "checking for berkeley db get_pagesize function... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for txnid parameter to DB open function" >&5 +echo $ECHO_N "checking for txnid parameter to DB open function... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -13838,608 +20161,443 @@ main () int ret = 0; DB *db = NULL; - int pagesize; + DB_TXN *txnid = NULL; + char *file = NULL; + char *database = NULL; + DBTYPE type = 0; + u_int32_t flags = 0; + int mode = 0; - ret = db->get_pagesize(db, &pagesize); + ret = db->open(db, txnid, file, database, type, flags, mode); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_DB_GET_PAGESIZE 1 +#define HAVE_TXNID_PARAMETER_TO_DB_OPEN 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - CFLAGS="$oldcflags" - -fi - -fi - -if test "x$BUILD_SERVER" = "x1"; then - - -{ echo "$as_me:$LINENO: checking if server lib needs -lrt" >&5 -echo $ECHO_N "checking if server lib needs -lrt... $ECHO_C" >&6; } -oldldflags=$LDFLAGS - -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for DB_DIRTY_READ flag" >&5 +echo $ECHO_N "checking for DB_DIRTY_READ flag... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include - #include - #include + + #include + int main () { -lio_listio(LIO_NOWAIT, NULL, 0, NULL); + + u_int32_t flags = DB_DIRTY_READ; + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_DB_DIRTY_READ 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - LDFLAGS="$LDFLAGS -lrt" - cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking for DB_BUFFER_SMALL error" >&5 +echo $ECHO_N "checking for DB_BUFFER_SMALL error... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include - #include - #include + + #include + int main () { -lio_listio(LIO_NOWAIT, NULL, 0, NULL); + + int res = DB_BUFFER_SMALL; + res++; + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - NEEDS_LIBRT=1 - - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { { echo "$as_me:$LINENO: error: failed attempting to link lio_listio" >&5 -echo "$as_me: error: failed attempting to link lio_listio" >&2;} - { (exit 1); exit 1; }; } -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - -LDFLAGS=$oldldflags - -fi - -case "$host_os" in - - *darwin*) - -cat >>confdefs.h <<\_ACEOF -#define TARGET_OS_DARWIN 1 -_ACEOF - - TARGET_OS_DARWIN=1 - - ;; - *linux*) - -cat >>confdefs.h <<\_ACEOF -#define TARGET_OS_LINUX 1 -_ACEOF - - TARGET_OS_LINUX=1 - - ;; -esac - - -for ac_header in netdb.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_compiler=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <$ac_header> -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +cat >>confdefs.h <<\_ACEOF +#define HAVE_DB_BUFFER_SMALL 1 _ACEOF +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - -done - - -for ac_header in arpa/inet.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for berkeley db get_pagesize function" >&5 +echo $ECHO_N "checking for berkeley db get_pagesize function... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> + + #include + +int +main () +{ + + int ret = 0; + DB *db = NULL; + int pagesize; + + ret = db->get_pagesize(db, &pagesize); + + ; + return 0; +} _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_DB_GET_PAGESIZE 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me:$LINENO: checking Berkeley DB version" >&5 +echo $ECHO_N "checking Berkeley DB version... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> + + #include + +int +main () +{ + + #if DB_VERSION_MAJOR < 4 || \ + (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 8) || \ + (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 8 && \ + DB_VERSION_PATCH < 30) + #error "Recommend version of Berkeley DB at least 4.8.30" + #endif + + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + HAVE_DB_OLD=0 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + HAVE_DB_OLD=1 fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$oldcflags" +fi; fi -done +if test "x$BUILD_SERVER" = "x1"; then -for ac_header in sys/socket.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking if server lib needs -lrt" >&5 +echo $ECHO_N "checking if server lib needs -lrt... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> +#include + #include + #include +int +main () +{ +lio_listio(LIO_NOWAIT, NULL, 0, NULL); + ; + return 0; +} _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + oldlibs=$LIBS + LIBS="$LIBS -lrt" + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> +#include + #include + #include +int +main () +{ +lio_listio(LIO_NOWAIT, NULL, 0, NULL); + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + NEEDS_LIBRT=1 + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no +{ { echo "$as_me:$LINENO: error: failed attempting to link lio_listio" >&5 +echo "$as_me: error: failed attempting to link lio_listio" >&2;} + { (exit 1); exit 1; }; } fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS=$oldlibs -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} +fi + +case "$host_os" in + + *darwin*) + +cat >>confdefs.h <<\_ACEOF +#define TARGET_OS_DARWIN 1 +_ACEOF + + TARGET_OS_DARWIN=1 ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } + *linux*) -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +cat >>confdefs.h <<\_ACEOF +#define TARGET_OS_LINUX 1 _ACEOF -fi + TARGET_OS_LINUX=1 -done + ;; +esac -{ echo "$as_me:$LINENO: checking for gethostbyname" >&5 -echo $ECHO_N "checking for gethostbyname... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for gethostbyname" >&5 +echo $ECHO_N "checking for gethostbyname... $ECHO_C" >&6 oldcflags="$CFLAGS" CFLAGS="$USR_CFLAGS" cat >conftest.$ac_ext <<_ACEOF @@ -14462,24 +20620,29 @@ gethostbyname("localhost"); } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_GETHOSTBYNAME 1 @@ -14489,15 +20652,14 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 WARN_ABOUT_HOSTNAMES="yes" fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -{ echo "$as_me:$LINENO: checking for gethostbyaddr" >&5 -echo $ECHO_N "checking for gethostbyaddr... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for gethostbyaddr" >&5 +echo $ECHO_N "checking for gethostbyaddr... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -14519,24 +20681,29 @@ gethostbyaddr((void *)&peer.sin_addr.s_addr, sizeof(struct in_addr), AF_INET); } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_GETHOSTBYADDR 1 @@ -14546,19 +20713,19 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS="$oldcflags" BUILD_BMI_TCP=1 -# Check whether --with-bmi-tcp was given. +# Check whether --with-bmi-tcp or --without-bmi-tcp was given. if test "${with_bmi_tcp+set}" = set; then - withval=$with_bmi_tcp; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_bmi_tcp" + if test -z "$withval" -o "$withval" = yes ; then : elif test "$withval" = no ; then BUILD_BMI_TCP= @@ -14568,16 +20735,16 @@ echo "$as_me: error: Option --with-tcp requires yes/no argument." >&2;} { (exit 1); exit 1; }; } fi -fi - +fi; gm_home= -# Check whether --with-gm was given. +# Check whether --with-gm or --without-gm was given. if test "${with_gm+set}" = set; then - withval=$with_gm; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_gm" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-gm requires the path to your GM tree." >&5 echo "$as_me: error: Option --with-gm requires the path to your GM tree." >&2;} { (exit 1); exit 1; }; } @@ -14585,12 +20752,12 @@ echo "$as_me: error: Option --with-gm requires the path to your GM tree." >&2;} gm_home="$withval" fi -fi - +fi; -# Check whether --with-gm-includes was given. +# Check whether --with-gm-includes or --without-gm-includes was given. if test "${with_gm_includes+set}" = set; then - withval=$with_gm_includes; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_gm_includes" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-gm-includes requires path to GM headers." >&5 echo "$as_me: error: Option --with-gm-includes requires path to GM headers." >&2;} { (exit 1); exit 1; }; } @@ -14598,12 +20765,12 @@ echo "$as_me: error: Option --with-gm-includes requires path to GM headers." >&2 GM_INCDIR="$withval" fi -fi - +fi; -# Check whether --with-gm-libs was given. +# Check whether --with-gm-libs or --without-gm-libs was given. if test "${with_gm_libs+set}" = set; then - withval=$with_gm_libs; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_gm_libs" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-gm-libs requires path to GM libraries." >&5 echo "$as_me: error: Option --with-gm-libs requires path to GM libraries." >&2;} { (exit 1); exit 1; }; } @@ -14611,8 +20778,7 @@ echo "$as_me: error: Option --with-gm-libs requires path to GM libraries." >&2;} GM_LIBDIR="$withval" fi -fi - +fi; if test -n "$gm_home" ; then if test -z "$GM_INCDIR"; then GM_INCDIR=$gm_home/include @@ -14628,17 +20794,17 @@ fi save_cppflags="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -I$GM_INCDIR -I$GM_INCDIR/gm" if test "${ac_cv_header_gm_h+set}" = set; then - { echo "$as_me:$LINENO: checking for gm.h" >&5 -echo $ECHO_N "checking for gm.h... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for gm.h" >&5 +echo $ECHO_N "checking for gm.h... $ECHO_C" >&6 if test "${ac_cv_header_gm_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_gm_h" >&5 -echo "${ECHO_T}$ac_cv_header_gm_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_gm_h" >&5 +echo "${ECHO_T}$ac_cv_header_gm_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking gm.h usability" >&5 -echo $ECHO_N "checking gm.h usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking gm.h usability" >&5 +echo $ECHO_N "checking gm.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -14649,37 +20815,41 @@ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking gm.h presence" >&5 -echo $ECHO_N "checking gm.h presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking gm.h presence" >&5 +echo $ECHO_N "checking gm.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -14688,22 +20858,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -14711,10 +20883,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -14738,18 +20909,25 @@ echo "$as_me: WARNING: gm.h: section \"Present But Cannot Be Compiled\"" >&2 echo "$as_me: WARNING: gm.h: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: gm.h: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: gm.h: in the future, the compiler will take precedence" >&2;} - + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for gm.h" >&5 -echo $ECHO_N "checking for gm.h... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for gm.h" >&5 +echo $ECHO_N "checking for gm.h... $ECHO_C" >&6 if test "${ac_cv_header_gm_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_header_gm_h=$ac_header_preproc fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_gm_h" >&5 -echo "${ECHO_T}$ac_cv_header_gm_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_gm_h" >&5 +echo "${ECHO_T}$ac_cv_header_gm_h" >&6 fi if test $ac_cv_header_gm_h = yes; then @@ -14779,9 +20957,10 @@ echo "$as_me: error: Neither GM library libgm.so or libgm.a found." >&2;} mx_home= -# Check whether --with-mx was given. +# Check whether --with-mx or --without-mx was given. if test "${with_mx+set}" = set; then - withval=$with_mx; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_mx" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-mx requires the path to your MX tree." >&5 echo "$as_me: error: Option --with-mx requires the path to your MX tree." >&2;} { (exit 1); exit 1; }; } @@ -14789,12 +20968,12 @@ echo "$as_me: error: Option --with-mx requires the path to your MX tree." >&2;} mx_home="$withval" fi -fi - +fi; -# Check whether --with-mx-includes was given. +# Check whether --with-mx-includes or --without-mx-includes was given. if test "${with_mx_includes+set}" = set; then - withval=$with_mx_includes; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_mx_includes" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-mx-includes requires path to MX headers." >&5 echo "$as_me: error: Option --with-mx-includes requires path to MX headers." >&2;} { (exit 1); exit 1; }; } @@ -14802,12 +20981,12 @@ echo "$as_me: error: Option --with-mx-includes requires path to MX headers." >&2 MX_INCDIR="$withval" fi -fi - +fi; -# Check whether --with-mx-libs was given. +# Check whether --with-mx-libs or --without-mx-libs was given. if test "${with_mx_libs+set}" = set; then - withval=$with_mx_libs; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_mx_libs" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-mx-libs requires path to MX libraries." >&5 echo "$as_me: error: Option --with-mx-libs requires path to MX libraries." >&2;} { (exit 1); exit 1; }; } @@ -14815,8 +20994,7 @@ echo "$as_me: error: Option --with-mx-libs requires path to MX libraries." >&2;} MX_LIBDIR="$withval" fi -fi - +fi; if test -n "$mx_home" ; then if test -z "$MX_INCDIR"; then MX_INCDIR=$mx_home/include @@ -14832,17 +21010,17 @@ fi save_cppflags="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -I$MX_INCDIR -I$MX_INCDIR/mx" if test "${ac_cv_header_myriexpress_h+set}" = set; then - { echo "$as_me:$LINENO: checking for myriexpress.h" >&5 -echo $ECHO_N "checking for myriexpress.h... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for myriexpress.h" >&5 +echo $ECHO_N "checking for myriexpress.h... $ECHO_C" >&6 if test "${ac_cv_header_myriexpress_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_myriexpress_h" >&5 -echo "${ECHO_T}$ac_cv_header_myriexpress_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_myriexpress_h" >&5 +echo "${ECHO_T}$ac_cv_header_myriexpress_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking myriexpress.h usability" >&5 -echo $ECHO_N "checking myriexpress.h usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking myriexpress.h usability" >&5 +echo $ECHO_N "checking myriexpress.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -14853,37 +21031,41 @@ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking myriexpress.h presence" >&5 -echo $ECHO_N "checking myriexpress.h presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking myriexpress.h presence" >&5 +echo $ECHO_N "checking myriexpress.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -14892,22 +21074,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -14915,10 +21099,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -14942,18 +21125,25 @@ echo "$as_me: WARNING: myriexpress.h: section \"Present But Cannot Be Compil echo "$as_me: WARNING: myriexpress.h: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: myriexpress.h: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: myriexpress.h: in the future, the compiler will take precedence" >&2;} - + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for myriexpress.h" >&5 -echo $ECHO_N "checking for myriexpress.h... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for myriexpress.h" >&5 +echo $ECHO_N "checking for myriexpress.h... $ECHO_C" >&6 if test "${ac_cv_header_myriexpress_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_header_myriexpress_h=$ac_header_preproc fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_myriexpress_h" >&5 -echo "${ECHO_T}$ac_cv_header_myriexpress_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_myriexpress_h" >&5 +echo "${ECHO_T}$ac_cv_header_myriexpress_h" >&6 fi if test $ac_cv_header_myriexpress_h = yes; then @@ -14979,13 +21169,88 @@ echo "$as_me: error: Neither MX library libmyriexpress.so or libmyriexpress.a fo + if test -n "$BUILD_MX" ; then + save_ldflags="$LDFLAGS" + LDFLAGS="-L$MX_LIBDIR $LDFLAGS" + save_libs="$LIBS" + LIBS="-lmyriexpress -lpthread $LIBS" + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$MX_INCDIR" + + echo "$as_me:$LINENO: checking for mx_decompose_endpoint_addr2" >&5 +echo $ECHO_N "checking for mx_decompose_endpoint_addr2... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include "mx_extensions.h" + #include + +int +main () +{ + + mx_endpoint_addr_t epa; + mx_decompose_endpoint_addr2(epa, NULL, NULL, NULL); + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + { { echo "$as_me:$LINENO: error: Function mx_decompose_endpoint_addr2() not found." >&5 +echo "$as_me: error: Function mx_decompose_endpoint_addr2() not found." >&2;} + { (exit 1); exit 1; }; } + +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + LDFLAGS="$save_ldflags" + CPPFLAGS="$save_cppflags" + LIBS="$save_libs" + fi + ib_home= -# Check whether --with-ib was given. +# Check whether --with-ib or --without-ib was given. if test "${with_ib+set}" = set; then - withval=$with_ib; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_ib" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-ib requires the path to your IB tree." >&5 echo "$as_me: error: Option --with-ib requires the path to your IB tree." >&2;} { (exit 1); exit 1; }; } @@ -14993,12 +21258,12 @@ echo "$as_me: error: Option --with-ib requires the path to your IB tree." >&2;} ib_home="$withval" fi -fi - +fi; -# Check whether --with-ib-includes was given. +# Check whether --with-ib-includes or --without-ib-includes was given. if test "${with_ib_includes+set}" = set; then - withval=$with_ib_includes; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_ib_includes" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-ib-includes requires path to IB headers." >&5 echo "$as_me: error: Option --with-ib-includes requires path to IB headers." >&2;} { (exit 1); exit 1; }; } @@ -15006,12 +21271,12 @@ echo "$as_me: error: Option --with-ib-includes requires path to IB headers." >&2 IB_INCDIR="$withval" fi -fi - +fi; -# Check whether --with-ib-libs was given. +# Check whether --with-ib-libs or --without-ib-libs was given. if test "${with_ib_libs+set}" = set; then - withval=$with_ib_libs; if test -z "$withval" -o "$withval" = yes ; then + withval="$with_ib_libs" + if test -z "$withval" -o "$withval" = yes ; then { { echo "$as_me:$LINENO: error: Option --with-ib-libs requires path to IB libraries." >&5 echo "$as_me: error: Option --with-ib-libs requires path to IB libraries." >&2;} { (exit 1); exit 1; }; } @@ -15019,8 +21284,7 @@ echo "$as_me: error: Option --with-ib-libs requires path to IB libraries." >&2;} IB_LIBDIR="$withval" fi -fi - +fi; if test -n "$ib_home" ; then if test -z "$IB_INCDIR"; then IB_INCDIR=$ib_home/include @@ -15036,17 +21300,17 @@ fi save_cppflags="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -I$IB_INCDIR" if test "${ac_cv_header_vapi_h+set}" = set; then - { echo "$as_me:$LINENO: checking for vapi.h" >&5 -echo $ECHO_N "checking for vapi.h... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for vapi.h" >&5 +echo $ECHO_N "checking for vapi.h... $ECHO_C" >&6 if test "${ac_cv_header_vapi_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_vapi_h" >&5 -echo "${ECHO_T}$ac_cv_header_vapi_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_vapi_h" >&5 +echo "${ECHO_T}$ac_cv_header_vapi_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking vapi.h usability" >&5 -echo $ECHO_N "checking vapi.h usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking vapi.h usability" >&5 +echo $ECHO_N "checking vapi.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -15057,37 +21321,41 @@ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking vapi.h presence" >&5 -echo $ECHO_N "checking vapi.h presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking vapi.h presence" >&5 +echo $ECHO_N "checking vapi.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -15096,22 +21364,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -15119,10 +21389,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -15146,955 +21415,1709 @@ echo "$as_me: WARNING: vapi.h: section \"Present But Cannot Be Compiled\"" > echo "$as_me: WARNING: vapi.h: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: vapi.h: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: vapi.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for vapi.h" >&5 +echo $ECHO_N "checking for vapi.h... $ECHO_C" >&6 +if test "${ac_cv_header_vapi_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_vapi_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_vapi_h" >&5 +echo "${ECHO_T}$ac_cv_header_vapi_h" >&6 + +fi +if test $ac_cv_header_vapi_h = yes; then + : +else + { { echo "$as_me:$LINENO: error: Header vapi.h not found." >&5 +echo "$as_me: error: Header vapi.h not found." >&2;} + { (exit 1); exit 1; }; } +fi + + + if test ! -f $IB_LIBDIR/libvapi.so ; then + if test ! -f $IB_LIBDIR/libvapi.a ; then + { { echo "$as_me:$LINENO: error: Infiniband library libvapi.so not found." >&5 +echo "$as_me: error: Infiniband library libvapi.so not found." >&2;} + { (exit 1); exit 1; }; } + fi + fi + BUILD_IB=1 + echo "$as_me:$LINENO: checking for wrap_common.h" >&5 +echo $ECHO_N "checking for wrap_common.h... $ECHO_C" >&6 +if test "${ac_cv_header_wrap_common_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_header_wrap_common_h=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_header_wrap_common_h=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_header_wrap_common_h" >&5 +echo "${ECHO_T}$ac_cv_header_wrap_common_h" >&6 +if test $ac_cv_header_wrap_common_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_IB_WRAP_COMMON_H 1 +_ACEOF + +fi + + + CPPFLAGS="$save_cppflags" + fi + + + + + openib_home= + +# Check whether --with-openib or --without-openib was given. +if test "${with_openib+set}" = set; then + withval="$with_openib" + if test -z "$withval" -o "$withval" = yes ; then + { { echo "$as_me:$LINENO: error: Option --with-openib requires the path to your OpenIB tree." >&5 +echo "$as_me: error: Option --with-openib requires the path to your OpenIB tree." >&2;} + { (exit 1); exit 1; }; } + elif test "$withval" != no ; then + openib_home="$withval" + fi + +fi; + +# Check whether --with-openib-includes or --without-openib-includes was given. +if test "${with_openib_includes+set}" = set; then + withval="$with_openib_includes" + if test -z "$withval" -o "$withval" = yes ; then + { { echo "$as_me:$LINENO: error: Option --with-openib-includes requires path to OpenIB headers." >&5 +echo "$as_me: error: Option --with-openib-includes requires path to OpenIB headers." >&2;} + { (exit 1); exit 1; }; } + elif test "$withval" != no ; then + OPENIB_INCDIR="$withval" + fi + +fi; + +# Check whether --with-openib-libs or --without-openib-libs was given. +if test "${with_openib_libs+set}" = set; then + withval="$with_openib_libs" + if test -z "$withval" -o "$withval" = yes ; then + { { echo "$as_me:$LINENO: error: Option --with-openib-libs requires path to OpenIB libraries." >&5 +echo "$as_me: error: Option --with-openib-libs requires path to OpenIB libraries." >&2;} + { (exit 1); exit 1; }; } + elif test "$withval" != no ; then + OPENIB_LIBDIR="$withval" + fi + +fi; + if test -n "$openib_home" ; then + if test -z "$OPENIB_INCDIR"; then + OPENIB_INCDIR=$openib_home/include + fi + if test -z "$OPENIB_LIBDIR"; then + OPENIB_LIBDIR=$openib_home/lib64 + if test ! -d "$OPENIB_LIBDIR" ; then + OPENIB_LIBDIR=$openib_home/lib + fi + fi + fi + if test -n "$OPENIB_INCDIR$OPENIB_LIBDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" + if test "${ac_cv_header_infiniband_verbs_h+set}" = set; then + echo "$as_me:$LINENO: checking for infiniband/verbs.h" >&5 +echo $ECHO_N "checking for infiniband/verbs.h... $ECHO_C" >&6 +if test "${ac_cv_header_infiniband_verbs_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_infiniband_verbs_h" >&5 +echo "${ECHO_T}$ac_cv_header_infiniband_verbs_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking infiniband/verbs.h usability" >&5 +echo $ECHO_N "checking infiniband/verbs.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking infiniband/verbs.h presence" >&5 +echo $ECHO_N "checking infiniband/verbs.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: infiniband/verbs.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for vapi.h" >&5 -echo $ECHO_N "checking for vapi.h... $ECHO_C" >&6; } -if test "${ac_cv_header_vapi_h+set}" = set; then +echo "$as_me:$LINENO: checking for infiniband/verbs.h" >&5 +echo $ECHO_N "checking for infiniband/verbs.h... $ECHO_C" >&6 +if test "${ac_cv_header_infiniband_verbs_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_cv_header_vapi_h=$ac_header_preproc + ac_cv_header_infiniband_verbs_h=$ac_header_preproc fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_vapi_h" >&5 -echo "${ECHO_T}$ac_cv_header_vapi_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_infiniband_verbs_h" >&5 +echo "${ECHO_T}$ac_cv_header_infiniband_verbs_h" >&6 fi -if test $ac_cv_header_vapi_h = yes; then +if test $ac_cv_header_infiniband_verbs_h = yes; then : else - { { echo "$as_me:$LINENO: error: Header vapi.h not found." >&5 -echo "$as_me: error: Header vapi.h not found." >&2;} + { { echo "$as_me:$LINENO: error: Header infiniband/verbs.h not found." >&5 +echo "$as_me: error: Header infiniband/verbs.h not found." >&2;} { (exit 1); exit 1; }; } fi - if test ! -f $IB_LIBDIR/libvapi.so ; then - if test ! -f $IB_LIBDIR/libvapi.a ; then - { { echo "$as_me:$LINENO: error: Infiniband library libvapi.so not found." >&5 -echo "$as_me: error: Infiniband library libvapi.so not found." >&2;} + if test ! -f $OPENIB_LIBDIR/libibverbs.so ; then + if test ! -f $OPENIB_LIBDIR/libibverbs.a ; then + { { echo "$as_me:$LINENO: error: OpenIB library libibverbs.so not found." >&5 +echo "$as_me: error: OpenIB library libibverbs.so not found." >&2;} { (exit 1); exit 1; }; } fi fi - BUILD_IB=1 - { echo "$as_me:$LINENO: checking for wrap_common.h" >&5 -echo $ECHO_N "checking for wrap_common.h... $ECHO_C" >&6; } -if test "${ac_cv_header_wrap_common_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF + BUILD_OPENIB=1 + CPPFLAGS="$save_cppflags" + fi + + + + + if test -n "$BUILD_OPENIB" ; then + save_ldflags="$LDFLAGS" + LDFLAGS="-L$OPENIB_LIBDIR -libverbs" + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" + + echo "$as_me:$LINENO: checking for ibv_get_devices" >&5 +echo $ECHO_N "checking for ibv_get_devices... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -#include +int +main () +{ + + ibv_get_devices(); + + ; + return 0; +} _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_header_wrap_common_h=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_IBV_GET_DEVICES 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_header_wrap_common_h=no -fi +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_wrap_common_h" >&5 -echo "${ECHO_T}$ac_cv_header_wrap_common_h" >&6; } -if test $ac_cv_header_wrap_common_h = yes; then +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + echo "$as_me:$LINENO: checking for IBV_EVENT_CLIENT_REREGISTER" >&5 +echo $ECHO_N "checking for IBV_EVENT_CLIENT_REREGISTER... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include "infiniband/verbs.h" + +int +main () +{ + + enum ibv_event_type x = IBV_EVENT_CLIENT_REREGISTER; + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_IB_WRAP_COMMON_H 1 +#define HAVE_IBV_EVENT_CLIENT_REREGISTER 1 _ACEOF -fi +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + LDFLAGS="$save_ldflags" CPPFLAGS="$save_cppflags" fi + use_portals= + home= + incs= + libs= - openib_home= - -# Check whether --with-openib was given. -if test "${with_openib+set}" = set; then - withval=$with_openib; if test -z "$withval" -o "$withval" = yes ; then - { { echo "$as_me:$LINENO: error: Option --with-openib requires the path to your OpenIB tree." >&5 -echo "$as_me: error: Option --with-openib requires the path to your OpenIB tree." >&2;} - { (exit 1); exit 1; }; } +# Check whether --with-portals or --without-portals was given. +if test "${with_portals+set}" = set; then + withval="$with_portals" + if test -z "$withval" -o "$withval" = yes ; then + use_portals=yes elif test "$withval" != no ; then - openib_home="$withval" + home="$withval" fi -fi - +fi; -# Check whether --with-openib-includes was given. -if test "${with_openib_includes+set}" = set; then - withval=$with_openib_includes; if test -z "$withval" -o "$withval" = yes ; then - { { echo "$as_me:$LINENO: error: Option --with-openib-includes requires path to OpenIB headers." >&5 -echo "$as_me: error: Option --with-openib-includes requires path to OpenIB headers." >&2;} +# Check whether --with-portals-includes or --without-portals-includes was given. +if test "${with_portals_includes+set}" = set; then + withval="$with_portals_includes" + if test -z "$withval" -o "$withval" = yes ; then + { { echo "$as_me:$LINENO: error: Option --with-portals-includes requires an argument." >&5 +echo "$as_me: error: Option --with-portals-includes requires an argument." >&2;} { (exit 1); exit 1; }; } elif test "$withval" != no ; then - OPENIB_INCDIR="$withval" + incs="$withval" fi -fi - +fi; -# Check whether --with-openib-libs was given. -if test "${with_openib_libs+set}" = set; then - withval=$with_openib_libs; if test -z "$withval" -o "$withval" = yes ; then - { { echo "$as_me:$LINENO: error: Option --with-openib-libs requires path to OpenIB libraries." >&5 -echo "$as_me: error: Option --with-openib-libs requires path to OpenIB libraries." >&2;} +# Check whether --with-portals-libs or --without-portals-libs was given. +if test "${with_portals_libs+set}" = set; then + withval="$with_portals_libs" + if test -z "$withval" -o "$withval" = yes ; then + { { echo "$as_me:$LINENO: error: Option --with-portals-libs requires an argument." >&5 +echo "$as_me: error: Option --with-portals-libs requires an argument." >&2;} { (exit 1); exit 1; }; } elif test "$withval" != no ; then - OPENIB_LIBDIR="$withval" + libs="$withval" fi -fi - - if test -n "$openib_home" ; then - if test -z "$OPENIB_INCDIR"; then - OPENIB_INCDIR=$openib_home/include +fi; + if test -n "$home" ; then + if test -z "$incs"; then + incs=-I$home/include fi - if test -z "$OPENIB_LIBDIR"; then - OPENIB_LIBDIR=$openib_home/lib64 - if test ! -d "$OPENIB_LIBDIR" ; then - OPENIB_LIBDIR=$openib_home/lib + if test -z "$libs"; then + libs=-L$home/lib64 + if test ! -d "$home/lib64" ; then + libs=-L$home/lib fi fi fi - if test -n "$OPENIB_INCDIR$OPENIB_LIBDIR" ; then + + BUILD_PORTALS= + PORTALS_INCS= + PORTALS_LIBS= + if test "X$use_portals$home$incs$libs" != X ; then + # Save stuff save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" - if test "${ac_cv_header_infiniband_verbs_h+set}" = set; then - { echo "$as_me:$LINENO: checking for infiniband/verbs.h" >&5 -echo $ECHO_N "checking for infiniband/verbs.h... $ECHO_C" >&6; } -if test "${ac_cv_header_infiniband_verbs_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 + save_libs="$LIBS" + + PORTALS_INCS="$incs" + CPPFLAGS="$CPPFLAGS $PORTALS_INCS" + + PORTALS_LIBS="$libs" + LIBS="$save_libs $PORTALS_LIBS" + + echo "$as_me:$LINENO: checking for portals3.h header" >&5 +echo $ECHO_N "checking for portals3.h header... $ECHO_C" >&6 + ok=no + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +int m, n; m = PtlInit(&n); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_infiniband_verbs_h" >&5 -echo "${ECHO_T}$ac_cv_header_infiniband_verbs_h" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + + if test "$ok" = yes ; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + { { echo "$as_me:$LINENO: error: Header portals/portals3.h not found." >&5 +echo "$as_me: error: Header portals/portals3.h not found." >&2;} + { (exit 1); exit 1; }; } + fi + + echo "$as_me:$LINENO: checking for portals libraries" >&5 +echo $ECHO_N "checking for portals libraries... $ECHO_C" >&6 + ok=no + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +int m, n; m = PtlInit(&n); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking infiniband/verbs.h usability" >&5 -echo $ECHO_N "checking infiniband/verbs.h usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + if test "$ok" = no ; then + PORTALS_LIBS="$libs -lportals" + LIBS="$save_libs $PORTALS_LIBS" + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include +#include +int +main () +{ +int m, n; m = PtlInit(&n); + ; + return 0; +} _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking infiniband/verbs.h presence" >&5 -echo $ECHO_N "checking infiniband/verbs.h presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + if test "$ok" = no ; then + PORTALS_LIBS="$libs -lp3api -lp3lib -lp3utcp -lp3rt -lpthread" + LIBS="$save_libs $PORTALS_LIBS" + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include +#include +int +main () +{ +int m, n; m = PtlInit(&n); + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + fi -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } + if test "$ok" = yes ; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + BUILD_PORTALS=1 + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + { { echo "$as_me:$LINENO: error: Could not link Portals library." >&5 +echo "$as_me: error: Could not link Portals library." >&2;} + { (exit 1); exit 1; }; } + fi -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: present but cannot be compiled" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: infiniband/verbs.h: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: infiniband/verbs.h: in the future, the compiler will take precedence" >&2;} + # + # Check for API variations. + # - ;; -esac -{ echo "$as_me:$LINENO: checking for infiniband/verbs.h" >&5 -echo $ECHO_N "checking for infiniband/verbs.h... $ECHO_C" >&6; } -if test "${ac_cv_header_infiniband_verbs_h+set}" = set; then +for ac_func in PtlErrorStr +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_cv_header_infiniband_verbs_h=$ac_header_preproc -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_infiniband_verbs_h" >&5 -echo "${ECHO_T}$ac_cv_header_infiniband_verbs_h" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func -fi -if test $ac_cv_header_infiniband_verbs_h = yes; then - : -else - { { echo "$as_me:$LINENO: error: Header infiniband/verbs.h not found." >&5 -echo "$as_me: error: Header infiniband/verbs.h not found." >&2;} - { (exit 1); exit 1; }; } -fi +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ +#ifdef __STDC__ +# include +#else +# include +#endif - if test ! -f $OPENIB_LIBDIR/libibverbs.so ; then - if test ! -f $OPENIB_LIBDIR/libibverbs.a ; then - { { echo "$as_me:$LINENO: error: OpenIB library libibverbs.so not found." >&5 -echo "$as_me: error: OpenIB library libibverbs.so not found." >&2;} - { (exit 1); exit 1; }; } - fi - fi - BUILD_OPENIB=1 - CPPFLAGS="$save_cppflags" - fi +#undef $ac_func +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +{ +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} +#endif +int +main () +{ +return f != $ac_func; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +eval "$as_ac_var=no" +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF - if test -n "$BUILD_OPENIB" ; then - save_ldflags="$LDFLAGS" - LDFLAGS="-L$OPENIB_LIBDIR -libverbs" - save_cppflags="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$OPENIB_INCDIR" +fi +done - { echo "$as_me:$LINENO: checking for ibv_get_devices" >&5 -echo $ECHO_N "checking for ibv_get_devices... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + +for ac_func in PtlEventKindStr +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +{ +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} +#endif int main () { - - ibv_get_devices(); - +return f != $ac_func; ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 -cat >>confdefs.h <<\_ACEOF -#define HAVE_IBV_GET_DEVICES 1 +eval "$as_ac_var=no" +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - fi +done -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - { echo "$as_me:$LINENO: checking for IBV_EVENT_CLIENT_REREGISTER" >&5 -echo $ECHO_N "checking for IBV_EVENT_CLIENT_REREGISTER... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ - - #include "infiniband/verbs.h" - +#include int main () { - - enum ibv_event_type x = IBV_EVENT_CLIENT_REREGISTER; - +int m; ptl_process_id_t any_pid; + m = PtlACEntry(0, 0, any_pid, (ptl_uid_t) -1, (ptl_jid_t) -1, 0); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then cat >>confdefs.h <<\_ACEOF -#define HAVE_IBV_EVENT_CLIENT_REREGISTER 1 +#define HAVE_PTLACENTRY_JID 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - LDFLAGS="$save_ldflags" + # Reset CPPFLAGS="$save_cppflags" + LIBS="$save_libs" fi - use_portals= - home= - incs= - libs= -# Check whether --with-portals was given. -if test "${with_portals+set}" = set; then - withval=$with_portals; if test -z "$withval" -o "$withval" = yes ; then - use_portals=yes - elif test "$withval" != no ; then - home="$withval" - fi -fi -# Check whether --with-portals-includes was given. -if test "${with_portals_includes+set}" = set; then - withval=$with_portals_includes; if test -z "$withval" -o "$withval" = yes ; then - { { echo "$as_me:$LINENO: error: Option --with-portals-includes requires an argument." >&5 -echo "$as_me: error: Option --with-portals-includes requires an argument." >&2;} +# Check whether --with-zoid or --without-zoid was given. +if test "${with_zoid+set}" = set; then + withval="$with_zoid" + if test -z "$withval" -o "$withval" = yes ; then + { { echo "$as_me:$LINENO: error: Option --with-zoid requires the path to your ZOID source tree." >&5 +echo "$as_me: error: Option --with-zoid requires the path to your ZOID source tree." >&2;} { (exit 1); exit 1; }; } elif test "$withval" != no ; then - incs="$withval" + ZOID_SRCDIR="$withval" fi +fi; + if test -n "$ZOID_SRCDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -Isrc/io/bmi -I$ZOID_SRCDIR/include -I$ZOID_SRCDIR/zbmi -I$ZOID_SRCDIR/zbmi/implementation" + if test "${ac_cv_header_zbmi_h+set}" = set; then + echo "$as_me:$LINENO: checking for zbmi.h" >&5 +echo $ECHO_N "checking for zbmi.h... $ECHO_C" >&6 +if test "${ac_cv_header_zbmi_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 fi +echo "$as_me:$LINENO: result: $ac_cv_header_zbmi_h" >&5 +echo "${ECHO_T}$ac_cv_header_zbmi_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking zbmi.h usability" >&5 +echo $ECHO_N "checking zbmi.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -# Check whether --with-portals-libs was given. -if test "${with_portals_libs+set}" = set; then - withval=$with_portals_libs; if test -z "$withval" -o "$withval" = yes ; then - { { echo "$as_me:$LINENO: error: Option --with-portals-libs requires an argument." >&5 -echo "$as_me: error: Option --with-portals-libs requires an argument." >&2;} - { (exit 1); exit 1; }; } - elif test "$withval" != no ; then - libs="$withval" - fi - +# Is the header present? +echo "$as_me:$LINENO: checking zbmi.h presence" >&5 +echo $ECHO_N "checking zbmi.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 - if test -n "$home" ; then - if test -z "$incs"; then - incs=-I$home/include - fi - if test -z "$libs"; then - libs=-L$home/lib64 - if test ! -d "$home/lib64" ; then - libs=-L$home/lib - fi - fi - fi + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 - BUILD_PORTALS= - PORTALS_INCS= - PORTALS_LIBS= - if test "X$use_portals$home$incs$libs" != X ; then - # Save stuff - save_cppflags="$CPPFLAGS" - save_libs="$LIBS" +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: zbmi.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: zbmi.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: zbmi.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: zbmi.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: zbmi.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: zbmi.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: zbmi.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: zbmi.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: zbmi.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: zbmi.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for zbmi.h" >&5 +echo $ECHO_N "checking for zbmi.h... $ECHO_C" >&6 +if test "${ac_cv_header_zbmi_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_zbmi_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_zbmi_h" >&5 +echo "${ECHO_T}$ac_cv_header_zbmi_h" >&6 - PORTALS_INCS="$incs" - CPPFLAGS="$CPPFLAGS $PORTALS_INCS" +fi +if test $ac_cv_header_zbmi_h = yes; then + : +else + { { echo "$as_me:$LINENO: error: Header zbmi.h not found." >&5 +echo "$as_me: error: Header zbmi.h not found." >&2;} + { (exit 1); exit 1; }; } +fi - PORTALS_LIBS="$libs" - LIBS="$save_libs $PORTALS_LIBS" - { echo "$as_me:$LINENO: checking for portals3.h header" >&5 -echo $ECHO_N "checking for portals3.h header... $ECHO_C" >&6; } - ok=no - cat >conftest.$ac_ext <<_ACEOF + if test "${ac_cv_header_zoid_api_h+set}" = set; then + echo "$as_me:$LINENO: checking for zoid_api.h" >&5 +echo $ECHO_N "checking for zoid_api.h... $ECHO_C" >&6 +if test "${ac_cv_header_zoid_api_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_zoid_api_h" >&5 +echo "${ECHO_T}$ac_cv_header_zoid_api_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking zoid_api.h usability" >&5 +echo $ECHO_N "checking zoid_api.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -int -main () -{ -int m, n; m = PtlInit(&n); - ; - return 0; -} +$ac_includes_default +#include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ok=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - +ac_header_compiler=no fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test "$ok" = yes ; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - { { echo "$as_me:$LINENO: error: Header portals/portals3.h not found." >&5 -echo "$as_me: error: Header portals/portals3.h not found." >&2;} - { (exit 1); exit 1; }; } - fi - - { echo "$as_me:$LINENO: checking for portals libraries" >&5 -echo $ECHO_N "checking for portals libraries... $ECHO_C" >&6; } - ok=no - cat >conftest.$ac_ext <<_ACEOF +# Is the header present? +echo "$as_me:$LINENO: checking zoid_api.h presence" >&5 +echo $ECHO_N "checking zoid_api.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -int -main () -{ -int m, n; m = PtlInit(&n); - ; - return 0; -} +#include _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - ok=yes + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: zoid_api.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: zoid_api.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: zoid_api.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: zoid_api.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: zoid_api.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: zoid_api.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: zoid_api.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: zoid_api.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: zoid_api.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: zoid_api.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: zoid_api.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: zoid_api.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: zoid_api.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: zoid_api.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: zoid_api.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: zoid_api.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for zoid_api.h" >&5 +echo $ECHO_N "checking for zoid_api.h... $ECHO_C" >&6 +if test "${ac_cv_header_zoid_api_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_zoid_api_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_zoid_api_h" >&5 +echo "${ECHO_T}$ac_cv_header_zoid_api_h" >&6 +fi +if test $ac_cv_header_zoid_api_h = yes; then + : +else + { { echo "$as_me:$LINENO: error: Header zoid_api.h not found." >&5 +echo "$as_me: error: Header zoid_api.h not found." >&2;} + { (exit 1); exit 1; }; } fi -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - if test "$ok" = no ; then - PORTALS_LIBS="$libs -lportals" - LIBS="$save_libs $PORTALS_LIBS" - cat >conftest.$ac_ext <<_ACEOF + if test "${ac_cv_header_zbmi_protocol_h+set}" = set; then + echo "$as_me:$LINENO: checking for zbmi_protocol.h" >&5 +echo $ECHO_N "checking for zbmi_protocol.h... $ECHO_C" >&6 +if test "${ac_cv_header_zbmi_protocol_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_zbmi_protocol_h" >&5 +echo "${ECHO_T}$ac_cv_header_zbmi_protocol_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking zbmi_protocol.h usability" >&5 +echo $ECHO_N "checking zbmi_protocol.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include -int -main () -{ -int m, n; m = PtlInit(&n); - ; - return 0; -} +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking zbmi_protocol.h presence" >&5 +echo $ECHO_N "checking zbmi_protocol.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - ok=yes + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: zbmi_protocol.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: zbmi_protocol.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for zbmi_protocol.h" >&5 +echo $ECHO_N "checking for zbmi_protocol.h... $ECHO_C" >&6 +if test "${ac_cv_header_zbmi_protocol_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_zbmi_protocol_h=$ac_header_preproc fi +echo "$as_me:$LINENO: result: $ac_cv_header_zbmi_protocol_h" >&5 +echo "${ECHO_T}$ac_cv_header_zbmi_protocol_h" >&6 -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - fi +fi +if test $ac_cv_header_zbmi_protocol_h = yes; then + : +else + { { echo "$as_me:$LINENO: error: Header zbmi_protocol.h not found." >&5 +echo "$as_me: error: Header zbmi_protocol.h not found." >&2;} + { (exit 1); exit 1; }; } +fi - if test "$ok" = no ; then - PORTALS_LIBS="$libs -lp3api -lp3lib -lp3utcp -lp3rt -lpthread" - LIBS="$save_libs $PORTALS_LIBS" - cat >conftest.$ac_ext <<_ACEOF + + CPPFLAGS="$save_cppflags" + BUILD_ZOID=1 + fi + + + + +echo "$as_me:$LINENO: checking for F_NOCACHE" >&5 +echo $ECHO_N "checking for F_NOCACHE... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + + #include + int fd; + fcntl(fd, F_NOCACHE, 1); + int main () { -int m, n; m = PtlInit(&n); + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - ok=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - fi - - if test "$ok" = yes ; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - BUILD_PORTALS=1 - else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - { { echo "$as_me:$LINENO: error: Could not link Portals library." >&5 -echo "$as_me: error: Could not link Portals library." >&2;} - { (exit 1); exit 1; }; } - fi - - # - # Check for API variations. - # - -for ac_func in PtlErrorStr -do -as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF +echo "$as_me:$LINENO: checking for open O_DIRECT" >&5 +echo $ECHO_N "checking for open O_DIRECT... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -/* Define $ac_func to an innocuous variant, in case declares $ac_func. - For example, HP-UX 11i declares gettimeofday. */ -#define $ac_func innocuous_$ac_func - -/* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $ac_func (); below. - Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. */ - -#ifdef __STDC__ -# include -#else -# include -#endif - -#undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char $ac_func (); -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func -choke me -#endif + #include int main () { -return $ac_func (); + + open("somefile", O_DIRECT); + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - eval "$as_ac_var=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_var'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 -_ACEOF - +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi -done +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - -for ac_func in PtlEventKindStr -do -as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF +echo "$as_me:$LINENO: checking for fgetxattr extra arguments" >&5 +echo $ECHO_N "checking for fgetxattr extra arguments... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -/* Define $ac_func to an innocuous variant, in case declares $ac_func. - For example, HP-UX 11i declares gettimeofday. */ -#define $ac_func innocuous_$ac_func - -/* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $ac_func (); below. - Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. */ - -#ifdef __STDC__ -# include -#else -# include -#endif - -#undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char $ac_func (); -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func -choke me -#endif + #include + #ifdef HAVE_ATTR_XATTR_H + #include + #endif + #ifdef HAVE_SYS_XATTR_H + #include + #endif int main () { -return $ac_func (); + + fgetxattr(0, 0, 0, 0, 0, 0); + ; return 0; } _ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - eval "$as_ac_var=yes" + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FGETXATTR_EXTRA_ARGS 1 +_ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_var'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 -_ACEOF +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi -done - +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - cat >conftest.$ac_ext <<_ACEOF +echo "$as_me:$LINENO: checking for setxattr extra arguments" >&5 +echo $ECHO_N "checking for setxattr extra arguments... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + + #include + #ifdef HAVE_ATTR_XATTR_H + #include + #endif + #ifdef HAVE_SYS_XATTR_H + #include + #endif + int main () { -int m; ptl_process_id_t any_pid; - m = PtlACEntry(0, 0, any_pid, (ptl_uid_t) -1, (ptl_jid_t) -1, 0); + + setxattr(0, 0, 0, 0, 0, 0); + ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_PTLACENTRY_JID 1 +#define HAVE_SETXATTR_EXTRA_ARGS 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - # Reset - CPPFLAGS="$save_cppflags" - LIBS="$save_libs" - fi - - - - - -BUILD_OSD=1 -OSD_LIBS="$SRC_ABSOLUTE_TOP/../osd-initiator/libosdinit.a $SRC_ABSOLUTE_TOP/../osd-util/libosdutil.a -lm" - - - -{ echo "$as_me:$LINENO: checking for fgetxattr extra arguments" >&5 -echo $ECHO_N "checking for fgetxattr extra arguments... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for getxattr extra arguments" >&5 +echo $ECHO_N "checking for getxattr extra arguments... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16114,59 +23137,63 @@ int main () { - fgetxattr(0, 0, 0, 0, 0, 0); + getxattr(0, 0, 0, 0, 0, 0); ; return 0; } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF -#define HAVE_FGETXATTR_EXTRA_ARGS 1 +#define HAVE_GETXATTR_EXTRA_ARGS 1 _ACEOF else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test "${ac_cv_header_sys_sysinfo_h+set}" = set; then - { echo "$as_me:$LINENO: checking for sys/sysinfo.h" >&5 -echo $ECHO_N "checking for sys/sysinfo.h... $ECHO_C" >&6; } + echo "$as_me:$LINENO: checking for sys/sysinfo.h" >&5 +echo $ECHO_N "checking for sys/sysinfo.h... $ECHO_C" >&6 if test "${ac_cv_header_sys_sysinfo_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_sysinfo_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_sysinfo_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_sys_sysinfo_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_sysinfo_h" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking sys/sysinfo.h usability" >&5 -echo $ECHO_N "checking sys/sysinfo.h usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking sys/sysinfo.h usability" >&5 +echo $ECHO_N "checking sys/sysinfo.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16177,37 +23204,41 @@ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking sys/sysinfo.h presence" >&5 -echo $ECHO_N "checking sys/sysinfo.h presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking sys/sysinfo.h presence" >&5 +echo $ECHO_N "checking sys/sysinfo.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16216,22 +23247,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -16239,10 +23272,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -16266,18 +23298,25 @@ echo "$as_me: WARNING: sys/sysinfo.h: section \"Present But Cannot Be Compil echo "$as_me: WARNING: sys/sysinfo.h: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: sys/sysinfo.h: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: sys/sysinfo.h: in the future, the compiler will take precedence" >&2;} - + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for sys/sysinfo.h" >&5 -echo $ECHO_N "checking for sys/sysinfo.h... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for sys/sysinfo.h" >&5 +echo $ECHO_N "checking for sys/sysinfo.h... $ECHO_C" >&6 if test "${ac_cv_header_sys_sysinfo_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_header_sys_sysinfo_h=$ac_header_preproc fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_sysinfo_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_sysinfo_h" >&6; } +echo "$as_me:$LINENO: result: $ac_cv_header_sys_sysinfo_h" >&5 +echo "${ECHO_T}$ac_cv_header_sys_sysinfo_h" >&6 fi if test $ac_cv_header_sys_sysinfo_h = yes; then @@ -16295,9 +23334,9 @@ fi for ac_func in strnlen do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF @@ -16323,60 +23362,68 @@ cat >>conftest.$ac_ext <<_ACEOF #undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" +{ #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char $ac_func (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} #endif int main () { -return $ac_func (); +return f != $ac_func; ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" +eval "$as_ac_var=no" fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 @@ -16389,9 +23436,9 @@ done for ac_func in strtoull do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF @@ -16417,60 +23464,68 @@ cat >>conftest.$ac_ext <<_ACEOF #undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" +{ #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char $ac_func (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} #endif int main () { -return $ac_func (); +return f != $ac_func; ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" +eval "$as_ac_var=no" fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 @@ -16483,9 +23538,9 @@ done for ac_func in strstr do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF @@ -16511,60 +23566,68 @@ cat >>conftest.$ac_ext <<_ACEOF #undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" +{ #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char $ac_func (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} #endif int main () { -return $ac_func (); +return f != $ac_func; ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" +eval "$as_ac_var=no" fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 @@ -16577,9 +23640,9 @@ done for ac_func in fgetxattr do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF @@ -16605,60 +23668,68 @@ cat >>conftest.$ac_ext <<_ACEOF #undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" +{ #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char $ac_func (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} #endif int main () { -return $ac_func (); +return f != $ac_func; ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" +eval "$as_ac_var=no" fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 @@ -16671,9 +23742,9 @@ done for ac_func in fsetxattr do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_func" >&5 -echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } -if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF @@ -16699,60 +23770,68 @@ cat >>conftest.$ac_ext <<_ACEOF #undef $ac_func -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" +{ #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char $ac_func (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ -#if defined __stub_$ac_func || defined __stub___$ac_func +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} #endif int main () { -return $ac_func (); +return f != $ac_func; ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - eval "$as_ac_var=no" +eval "$as_ac_var=no" fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ +rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -ac_res=`eval echo '${'$as_ac_var'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 @@ -16762,8 +23841,8 @@ fi done -{ echo "$as_me:$LINENO: checking for fgetxattr prototype" >&5 -echo $ECHO_N "checking for fgetxattr prototype... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for fgetxattr prototype" >&5 +echo $ECHO_N "checking for fgetxattr prototype... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16789,30 +23868,35 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_FGETXATTR_PROTOTYPE 1 @@ -16820,12 +23904,11 @@ _ACEOF fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: checking for fgetxattr extra arguments" >&5 -echo $ECHO_N "checking for fgetxattr extra arguments... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for fgetxattr extra arguments" >&5 +echo $ECHO_N "checking for fgetxattr extra arguments... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16856,24 +23939,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_FGETXATTR_EXTRA_ARGS 1 @@ -16883,15 +23971,14 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -{ echo "$as_me:$LINENO: checking for fsetxattr extra arguments" >&5 -echo $ECHO_N "checking for fsetxattr extra arguments... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking for fsetxattr extra arguments" >&5 +echo $ECHO_N "checking for fsetxattr extra arguments... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16918,24 +24005,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_FSETXATTR_EXTRA_ARGS 1 @@ -16945,22 +24037,21 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -# getmntent is in the standard C library on UNICOS, in -lsun on Irix 4, -# -lseq on Dynix/PTX, -lgen on Unixware. -{ echo "$as_me:$LINENO: checking for library containing getmntent" >&5 -echo $ECHO_N "checking for library containing getmntent... $ECHO_C" >&6; } -if test "${ac_cv_search_getmntent+set}" = set; then +# getmntent is in -lsun on Irix 4, -lseq on Dynix/PTX, -lgen on Unixware. +echo "$as_me:$LINENO: checking for getmntent in -lsun" >&5 +echo $ECHO_N "checking for getmntent in -lsun... $ECHO_C" >&6 +if test "${ac_cv_lib_sun_getmntent+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_func_search_save_LIBS=$LIBS + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsun $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -16968,397 +24059,335 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ +/* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" #endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ char getmntent (); int main () { -return getmntent (); +getmntent (); ; return 0; } _ACEOF -for ac_lib in '' sun seq gen; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - ac_cv_search_getmntent=$ac_res + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_sun_getmntent=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext - if test "${ac_cv_search_getmntent+set}" = set; then - break +ac_cv_lib_sun_getmntent=no fi -done -if test "${ac_cv_search_getmntent+set}" = set; then - : -else - ac_cv_search_getmntent=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS fi -{ echo "$as_me:$LINENO: result: $ac_cv_search_getmntent" >&5 -echo "${ECHO_T}$ac_cv_search_getmntent" >&6; } -ac_res=$ac_cv_search_getmntent -if test "$ac_res" != no; then - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - ac_cv_func_getmntent=yes - -cat >>confdefs.h <<\_ACEOF -#define HAVE_GETMNTENT -_ACEOF - +echo "$as_me:$LINENO: result: $ac_cv_lib_sun_getmntent" >&5 +echo "${ECHO_T}$ac_cv_lib_sun_getmntent" >&6 +if test $ac_cv_lib_sun_getmntent = yes; then + LIBS="-lsun $LIBS" else - ac_cv_func_getmntent=no -fi - - - - -for ac_header in malloc.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo "$as_me:$LINENO: checking for getmntent in -lseq" >&5 +echo $ECHO_N "checking for getmntent in -lseq... $ECHO_C" >&6 +if test "${ac_cv_lib_seq_getmntent+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } + ac_check_lib_save_LIBS=$LIBS +LIBS="-lseq $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char getmntent (); +int +main () +{ +getmntent (); + ; + return 0; +} _ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_seq_getmntent=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_cv_lib_seq_getmntent=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_seq_getmntent" >&5 +echo "${ECHO_T}$ac_cv_lib_seq_getmntent" >&6 +if test $ac_cv_lib_seq_getmntent = yes; then + LIBS="-lseq $LIBS" +else + echo "$as_me:$LINENO: checking for getmntent in -lgen" >&5 +echo $ECHO_N "checking for getmntent in -lgen... $ECHO_C" >&6 +if test "${ac_cv_lib_gen_getmntent+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgen $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include <$ac_header> + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char getmntent (); +int +main () +{ +getmntent (); + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_gen_getmntent=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no +ac_cv_lib_gen_getmntent=no fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } - +echo "$as_me:$LINENO: result: $ac_cv_lib_gen_getmntent" >&5 +echo "${ECHO_T}$ac_cv_lib_gen_getmntent" >&6 +if test $ac_cv_lib_gen_getmntent = yes; then + LIBS="-lgen $LIBS" fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF fi -done +fi -for ac_header in mntent.h +for ac_func in getmntent do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif - ac_header_compiler=no -fi +#undef $ac_func -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +{ +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} +#endif -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <$ac_header> +int +main () +{ +return f != $ac_func; + ; + return 0; +} _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" +eval "$as_ac_var=no" fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } - +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 +if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi - done -# Check whether --enable-static-server was given. -if test "${enable_static_server+set}" = set; then - enableval=$enable_static_server; - SERVER_LDFLAGS="$LDFLAGS -static" + +# Check whether --enable-static-server or --disable-static-server was given. +if test "${enable_static_server+set}" = set; then + enableval="$enable_static_server" + staticserver=$enableval else + staticserver="no" +fi; +if test "$staticserver" = "yes"; then + SERVER_LDFLAGS="$LDFLAGS -static" +else SERVER_LDFLAGS="$LDFLAGS -rdynamic" - fi - for ac_header in execinfo.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17369,37 +24398,41 @@ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17408,22 +24441,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -17431,10 +24466,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -17458,19 +24492,25 @@ echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 fi if test `eval echo '${'$as_ac_Header'}'` = yes; then @@ -17486,33 +24526,34 @@ done -{ echo "$as_me:$LINENO: checking for SDL" >&5 -echo $ECHO_N "checking for SDL... $ECHO_C" >&6; } -SDL_CONFIG=`which sdl-config 2> /dev/null` -if ! test -z "$SDL_CONFIG" && test -x "$SDL_CONFIG"; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - SDL_VERSION=`sdl-config --version` - VISCFLAGS=`sdl-config --cflags` - VISCLIBS=`sdl-config --libs` +test_for_sdl() +{ + echo "$as_me:$LINENO: checking for SDL" >&5 +echo $ECHO_N "checking for SDL... $ECHO_C" >&6 + SDL_CONFIG=`which sdl-config 2> /dev/null` + if ! test -z "$SDL_CONFIG" && test -x "$SDL_CONFIG"; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + SDL_VERSION=`sdl-config --version` + VISCFLAGS=`sdl-config --cflags` + VISCLIBS=`sdl-config --libs` for ac_header in SDL_ttf.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17523,37 +24564,41 @@ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17562,22 +24607,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -17585,10 +24632,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -17612,19 +24658,25 @@ echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 fi if test `eval echo '${'$as_ac_Header'}'` = yes; then @@ -17632,25 +24684,24 @@ if test `eval echo '${'$as_ac_Header'}'` = yes; then #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF BUILD_VIS="1" - CPPFLAGS="$CPPFLAGS $VISCFLAGS" + CPPFLAGS="$CPPFLAGS $VISCFLAGS" else for ac_header in SDL/SDL_ttf.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 else # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17661,37 +24712,41 @@ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_header_compiler=no +ac_header_compiler=no fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17700,22 +24755,24 @@ cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 @@ -17723,10 +24780,9 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi - rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in @@ -17750,19 +24806,25 @@ echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 ;; esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 fi if test `eval echo '${'$as_ac_Header'}'` = yes; then @@ -17770,7 +24832,7 @@ if test `eval echo '${'$as_ac_Header'}'` = yes; then #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF BUILD_VIS="1" - CPPFLAGS="$CPPFLAGS $VISCFLAGS" + CPPFLAGS="$CPPFLAGS $VISCFLAGS" fi done @@ -17783,13 +24845,24 @@ done -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi +} + +# Check whether --enable-visual or --disable-visual was given. +if test "${enable_visual+set}" = set; then + enableval="$enable_visual" + +if test "x$enableval" = "xyes" ; then + test_for_sdl fi -{ echo "$as_me:$LINENO: checking for GNU-style strerror_r" >&5 -echo $ECHO_N "checking for GNU-style strerror_r... $ECHO_C" >&6; } +fi; + +echo "$as_me:$LINENO: checking for GNU-style strerror_r" >&5 +echo $ECHO_N "checking for GNU-style strerror_r... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -17809,24 +24882,29 @@ main () } _ACEOF rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 cat >>confdefs.h <<\_ACEOF #define HAVE_GNU_STRERROR_R 1 @@ -17837,20 +24915,18 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } +echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext for d in src/apps src/io src/common src/client src/kernel \ doc src/apps/kernel test; do install -d $d; done -ac_config_files="$ac_config_files Makefile module.mk src/apps/admin/module.mk src/apps/admin/pvfs2-config src/apps/karma/module.mk src/apps/vis/module.mk src/apps/kernel/linux/module.mk src/io/trove/module.mk src/io/trove/trove-handle-mgmt/module.mk src/io/trove/trove-dbpf/module.mk src/common/misc/module.mk src/common/quickhash/module.mk src/common/quicklist/module.mk src/common/dotconf/module.mk src/common/id-generator/module.mk src/common/gossip/module.mk src/common/gen-locks/module.mk src/common/llist/module.mk src/common/statecomp/module.mk src/io/bmi/module.mk src/io/bmi/bmi_tcp/module.mk src/io/bmi/bmi_gm/module.mk src/io/bmi/bmi_mx/module.mk src/io/bmi/bmi_ib/module.mk src/io/bmi/bmi_osd/module.mk src/io/bmi/bmi_portals/module.mk src/io/description/module.mk src/io/flow/module.mk src/io/flow/flowproto-bmi-trove/module.mk src/io/flow/flowproto-template/module.mk src/io/flow/flowproto-dump-offsets/module.mk src/io/flow/flowproto-bmi-cache/module.mk src/io/buffer/module.mk src/io/job/module.mk src/io/dev/module.mk src/proto/module.mk src/server/module.mk src/server/request-scheduler/module.mk src/client/sysint/module.mk src/kernel/linux-2.6/Makefile src/kernel/linux-2.4/Makefile doc/module.mk doc/coding/module.mk doc/design/module.mk doc/random/module.mk examples/pvfs2-server.rc doc/doxygen/pvfs2-doxygen.conf" - + ac_config_files="$ac_config_files include/pvfs2.h Makefile module.mk src/apps/admin/module.mk src/apps/admin/pvfs2-config src/apps/devel/module.mk src/apps/karma/module.mk src/apps/vis/module.mk src/apps/fuse/module.mk src/apps/ucache/module.mk src/apps/kernel/linux/module.mk src/apps/user/module.mk src/io/trove/module.mk src/io/trove/trove-handle-mgmt/module.mk src/io/trove/trove-dbpf/module.mk src/common/misc/module.mk src/common/quickhash/module.mk src/common/quicklist/module.mk src/common/dotconf/module.mk src/common/id-generator/module.mk src/common/gossip/module.mk src/common/gen-locks/module.mk src/common/llist/module.mk src/common/statecomp/module.mk src/common/events/module.mk src/common/mgmt/module.mk src/io/bmi/module.mk src/io/bmi/bmi_tcp/module.mk src/io/bmi/bmi_gm/module.mk src/io/bmi/bmi_mx/module.mk src/io/bmi/bmi_ib/module.mk src/io/bmi/bmi_osd/module.mk src/io/bmi/bmi_portals/module.mk src/io/bmi/bmi_zoid/module.mk src/io/description/module.mk src/io/flow/module.mk src/io/flow/flowproto-bmi-trove/module.mk src/io/flow/flowproto-template/module.mk src/io/flow/flowproto-dump-offsets/module.mk src/io/flow/flowproto-bmi-cache/module.mk src/io/buffer/module.mk src/io/job/module.mk src/io/dev/module.mk src/proto/module.mk src/server/module.mk src/server/request-scheduler/module.mk src/client/sysint/module.mk src/client/usrint/module.mk src/kernel/linux-2.6/Makefile src/kernel/linux-2.4/Makefile doc/module.mk doc/coding/module.mk doc/design/module.mk doc/random/module.mk examples/pvfs2-server.rc doc/doxygen/pvfs2-doxygen.conf" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure @@ -17869,58 +24945,39 @@ _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. -# So, we kill variables containing newlines. +# So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. -( - for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 -echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - *) $as_unset $ac_var ;; - esac ;; - esac - done - +{ (set) 2>&1 | - case $as_nl`(ac_space=' '; set) 2>&1` in #( - *${as_nl}ac_space=\ *) + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote # substitution turns \\\\ into \\, and sed turns \\ into \). sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" - ;; #( + ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" ;; - esac | - sort -) | + esac; +} | sed ' - /^ac_cv_env_/b end t clear - :clear + : clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end - s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ - :end' >>confcache -if diff "$cache_file" confcache >/dev/null 2>&1; then :; else - if test -w "$cache_file"; then - test "x$cache_file" != "x/dev/null" && - { echo "$as_me:$LINENO: updating cache $cache_file" >&5 -echo "$as_me: updating cache $cache_file" >&6;} + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if diff $cache_file confcache >/dev/null 2>&1; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" cat confcache >$cache_file else - { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 -echo "$as_me: not updating unwritable cache $cache_file" >&6;} + echo "not updating unwritable cache $cache_file" fi fi rm -f confcache @@ -17929,18 +24986,32 @@ test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. - ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`echo "$ac_i" | sed "$ac_script"` - # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR - # will be set to the directory where LIBOBJS objects are built. - ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" - ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' + ac_i=`echo "$ac_i" | + sed 's/\$U\././;s/\.o$//;s/\.obj$//'` + # 2. Add them. + ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' done LIBOBJS=$ac_libobjs @@ -17971,45 +25042,17 @@ cat >>$CONFIG_STATUS <<\_ACEOF ## M4sh Initialization. ## ## --------------------- ## -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh +# Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - - - -# PATH needs CR -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - echo "#! /bin/sh" >conf$$.sh - echo "exit 0" >>conf$$.sh - chmod +x conf$$.sh - if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then - PATH_SEPARATOR=';' - else - PATH_SEPARATOR=: - fi - rm -f conf$$.sh +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix fi +DUALCASE=1; export DUALCASE # for MKS sh # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then @@ -18019,43 +25062,8 @@ else fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -as_nl=' -' -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -case $0 in - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break -done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - { (exit 1); exit 1; } -fi - # Work around bugs in pre-3.0 UWIN ksh. -for as_var in ENV MAIL MAILPATH -do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var -done +$as_unset ENV MAIL MAILPATH PS1='$ ' PS2='> ' PS4='+ ' @@ -18069,19 +25077,18 @@ do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else - ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + $as_unset $as_var fi done # Required to use basename. -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then +if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then as_basename=basename else as_basename=false @@ -18089,120 +25096,159 @@ fi # Name of the executable. -as_me=`$as_basename -- "$0" || +as_me=`$as_basename "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` -# CDPATH. -$as_unset CDPATH + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' as_lineno_1=$LINENO as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && - test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a - # line-number line after each line using $LINENO; the second 'sed' - # does the real work. The second script uses 'N' to pair each - # line-number line with the line containing $LINENO, and appends - # trailing '-' during substitution so that $LINENO is not a special - # case at line end. + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the - # scripts with optimization help from Paolo Bonzini. Blame Lee - # E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, t loop - s/-\n.*// + s,-$,, + s,^['$as_cr_digits']*\n,, ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno # Exit status is that of the last command. exit } -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in --n*) - case `echo 'x\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - *) ECHO_C='\c';; - esac;; -*) - ECHO_N='-n';; +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; esac -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then +if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir -fi echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -p'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null +rm -f conf$$ conf$$.exe conf$$.file if mkdir -p . 2>/dev/null; then as_mkdir_p=: @@ -18211,28 +25257,7 @@ else as_mkdir_p=false fi -if test -x / >/dev/null 2>&1; then - as_test_x='test -x' -else - if ls -dL / >/dev/null 2>&1; then - as_ls_L_option=L - else - as_ls_L_option= - fi - as_test_x=' - eval sh -c '\'' - if test -d "$1"; then - test -d "$1/."; - else - case $1 in - -*)set "./$1";; - esac; - case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in - ???[sx]*):;;*)false;;esac;fi - '\'' sh - ' -fi -as_executable_p=$as_test_x +as_executable_p="test -f" # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" @@ -18241,14 +25266,31 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + exec 6>&1 -# Save the log message, to keep $[0] and so on meaningful, and to +# Open the log real soon, to keep \$[0] and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their -# values after options handling. -ac_log=" +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + This file was extended by $as_me, which was -generated by GNU Autoconf 2.61. Invocation command line was +generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -18256,19 +25298,30 @@ generated by GNU Autoconf 2.61. Invocation command line was CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ -on `(hostname || uname -n) 2>/dev/null | sed 1q` -" - +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 _ACEOF -cat >>$CONFIG_STATUS <<_ACEOF # Files that config.status was made for. -config_files="$ac_config_files" -config_headers="$ac_config_headers" +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi -_ACEOF +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi cat >>$CONFIG_STATUS <<\_ACEOF + ac_cs_usage="\ \`$as_me' instantiates files from templates according to the current configuration. @@ -18276,7 +25329,7 @@ current configuration. Usage: $0 [OPTIONS] [FILE]... -h, --help print this help, then exit - -V, --version print version number and configuration settings, then exit + -V, --version print version number, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions @@ -18292,21 +25345,19 @@ Configuration headers: $config_headers Report bugs to ." - _ACEOF + cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ config.status -configured by $0, generated by GNU Autoconf 2.61, - with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" +configured by $0, generated by GNU Autoconf 2.59, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" -Copyright (C) 2006 Free Software Foundation, Inc. +Copyright (C) 2003 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." - -ac_pwd='$ac_pwd' -srcdir='$srcdir' -INSTALL='$INSTALL' +srcdir=$srcdir +INSTALL="$INSTALL" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF @@ -18317,24 +25368,39 @@ while test $# != 0 do case $1 in --*=*) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` ac_shift=: ;; - *) + -*) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; esac case $ac_option in # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; - --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - echo "$ac_cs_version"; exit ;; - --debug | --debu | --deb | --de | --d | -d ) + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift @@ -18344,24 +25410,18 @@ do $ac_shift CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" ac_need_defaults=false;; - --he | --h) - # Conflict between --help and --header - { echo "$as_me: error: ambiguous option: $1 -Try \`$0 --help' for more information." >&2 - { (exit 1); exit 1; }; };; - --help | --hel | -h ) - echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. - -*) { echo "$as_me: error: unrecognized option: $1 -Try \`$0 --help' for more information." >&2 + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} { (exit 1); exit 1; }; } ;; - *) ac_config_targets="$ac_config_targets $1" - ac_need_defaults=false ;; + *) ac_config_targets="$ac_config_targets $1" ;; esac shift @@ -18377,89 +25437,84 @@ fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF if \$ac_cs_recheck; then - echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 - CONFIG_SHELL=$SHELL - export CONFIG_SHELL - exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion fi _ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF -exec 5>>config.log -{ - echo - sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX -## Running $as_me. ## -_ASBOX - echo "$ac_log" -} >&5 -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF -# Handling of arguments. + + +cat >>$CONFIG_STATUS <<\_ACEOF for ac_config_target in $ac_config_targets do - case $ac_config_target in - "pvfs2-config.h") CONFIG_HEADERS="$CONFIG_HEADERS pvfs2-config.h" ;; - "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; - "module.mk") CONFIG_FILES="$CONFIG_FILES module.mk" ;; - "src/apps/admin/module.mk") CONFIG_FILES="$CONFIG_FILES src/apps/admin/module.mk" ;; - "src/apps/admin/pvfs2-config") CONFIG_FILES="$CONFIG_FILES src/apps/admin/pvfs2-config" ;; - "src/apps/karma/module.mk") CONFIG_FILES="$CONFIG_FILES src/apps/karma/module.mk" ;; - "src/apps/vis/module.mk") CONFIG_FILES="$CONFIG_FILES src/apps/vis/module.mk" ;; - "src/apps/kernel/linux/module.mk") CONFIG_FILES="$CONFIG_FILES src/apps/kernel/linux/module.mk" ;; - "src/io/trove/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/trove/module.mk" ;; - "src/io/trove/trove-handle-mgmt/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/trove/trove-handle-mgmt/module.mk" ;; - "src/io/trove/trove-dbpf/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/trove/trove-dbpf/module.mk" ;; - "src/common/misc/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/misc/module.mk" ;; - "src/common/quickhash/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/quickhash/module.mk" ;; - "src/common/quicklist/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/quicklist/module.mk" ;; - "src/common/dotconf/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/dotconf/module.mk" ;; - "src/common/id-generator/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/id-generator/module.mk" ;; - "src/common/gossip/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/gossip/module.mk" ;; - "src/common/gen-locks/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/gen-locks/module.mk" ;; - "src/common/llist/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/llist/module.mk" ;; - "src/common/statecomp/module.mk") CONFIG_FILES="$CONFIG_FILES src/common/statecomp/module.mk" ;; - "src/io/bmi/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/module.mk" ;; - "src/io/bmi/bmi_tcp/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_tcp/module.mk" ;; - "src/io/bmi/bmi_gm/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_gm/module.mk" ;; - "src/io/bmi/bmi_mx/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_mx/module.mk" ;; - "src/io/bmi/bmi_ib/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_ib/module.mk" ;; - "src/io/bmi/bmi_osd/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_osd/module.mk" ;; - "src/io/bmi/bmi_portals/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_portals/module.mk" ;; - "src/io/description/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/description/module.mk" ;; - "src/io/flow/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/flow/module.mk" ;; - "src/io/flow/flowproto-bmi-trove/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-bmi-trove/module.mk" ;; - "src/io/flow/flowproto-template/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-template/module.mk" ;; - "src/io/flow/flowproto-dump-offsets/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-dump-offsets/module.mk" ;; - "src/io/flow/flowproto-bmi-cache/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-bmi-cache/module.mk" ;; - "src/io/buffer/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/buffer/module.mk" ;; - "src/io/job/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/job/module.mk" ;; - "src/io/dev/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/dev/module.mk" ;; - "src/proto/module.mk") CONFIG_FILES="$CONFIG_FILES src/proto/module.mk" ;; - "src/server/module.mk") CONFIG_FILES="$CONFIG_FILES src/server/module.mk" ;; - "src/server/request-scheduler/module.mk") CONFIG_FILES="$CONFIG_FILES src/server/request-scheduler/module.mk" ;; - "src/client/sysint/module.mk") CONFIG_FILES="$CONFIG_FILES src/client/sysint/module.mk" ;; - "src/kernel/linux-2.6/Makefile") CONFIG_FILES="$CONFIG_FILES src/kernel/linux-2.6/Makefile" ;; - "src/kernel/linux-2.4/Makefile") CONFIG_FILES="$CONFIG_FILES src/kernel/linux-2.4/Makefile" ;; - "doc/module.mk") CONFIG_FILES="$CONFIG_FILES doc/module.mk" ;; - "doc/coding/module.mk") CONFIG_FILES="$CONFIG_FILES doc/coding/module.mk" ;; - "doc/design/module.mk") CONFIG_FILES="$CONFIG_FILES doc/design/module.mk" ;; - "doc/random/module.mk") CONFIG_FILES="$CONFIG_FILES doc/random/module.mk" ;; - "examples/pvfs2-server.rc") CONFIG_FILES="$CONFIG_FILES examples/pvfs2-server.rc" ;; - "doc/doxygen/pvfs2-doxygen.conf") CONFIG_FILES="$CONFIG_FILES doc/doxygen/pvfs2-doxygen.conf" ;; - + case "$ac_config_target" in + # Handling of arguments. + "include/pvfs2.h" ) CONFIG_FILES="$CONFIG_FILES include/pvfs2.h" ;; + "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "module.mk" ) CONFIG_FILES="$CONFIG_FILES module.mk" ;; + "src/apps/admin/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/admin/module.mk" ;; + "src/apps/admin/pvfs2-config" ) CONFIG_FILES="$CONFIG_FILES src/apps/admin/pvfs2-config" ;; + "src/apps/devel/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/devel/module.mk" ;; + "src/apps/karma/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/karma/module.mk" ;; + "src/apps/vis/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/vis/module.mk" ;; + "src/apps/fuse/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/fuse/module.mk" ;; + "src/apps/ucache/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/ucache/module.mk" ;; + "src/apps/kernel/linux/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/kernel/linux/module.mk" ;; + "src/apps/user/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/apps/user/module.mk" ;; + "src/io/trove/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/trove/module.mk" ;; + "src/io/trove/trove-handle-mgmt/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/trove/trove-handle-mgmt/module.mk" ;; + "src/io/trove/trove-dbpf/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/trove/trove-dbpf/module.mk" ;; + "src/common/misc/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/misc/module.mk" ;; + "src/common/quickhash/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/quickhash/module.mk" ;; + "src/common/quicklist/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/quicklist/module.mk" ;; + "src/common/dotconf/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/dotconf/module.mk" ;; + "src/common/id-generator/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/id-generator/module.mk" ;; + "src/common/gossip/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/gossip/module.mk" ;; + "src/common/gen-locks/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/gen-locks/module.mk" ;; + "src/common/llist/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/llist/module.mk" ;; + "src/common/statecomp/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/statecomp/module.mk" ;; + "src/common/events/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/events/module.mk" ;; + "src/common/mgmt/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/common/mgmt/module.mk" ;; + "src/io/bmi/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/module.mk" ;; + "src/io/bmi/bmi_tcp/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_tcp/module.mk" ;; + "src/io/bmi/bmi_gm/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_gm/module.mk" ;; + "src/io/bmi/bmi_mx/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_mx/module.mk" ;; + "src/io/bmi/bmi_ib/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_ib/module.mk" ;; + "src/io/bmi/bmi_osd/module.mk") CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_osd/module.mk" ;; + "src/io/bmi/bmi_portals/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_portals/module.mk" ;; + "src/io/bmi/bmi_zoid/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/bmi/bmi_zoid/module.mk" ;; + "src/io/description/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/description/module.mk" ;; + "src/io/flow/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/module.mk" ;; + "src/io/flow/flowproto-bmi-trove/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-bmi-trove/module.mk" ;; + "src/io/flow/flowproto-template/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-template/module.mk" ;; + "src/io/flow/flowproto-dump-offsets/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-dump-offsets/module.mk" ;; + "src/io/flow/flowproto-bmi-cache/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/flow/flowproto-bmi-cache/module.mk" ;; + "src/io/buffer/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/buffer/module.mk" ;; + "src/io/job/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/job/module.mk" ;; + "src/io/dev/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/io/dev/module.mk" ;; + "src/proto/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/proto/module.mk" ;; + "src/server/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/server/module.mk" ;; + "src/server/request-scheduler/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/server/request-scheduler/module.mk" ;; + "src/client/sysint/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/client/sysint/module.mk" ;; + "src/client/usrint/module.mk" ) CONFIG_FILES="$CONFIG_FILES src/client/usrint/module.mk" ;; + "src/kernel/linux-2.6/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/kernel/linux-2.6/Makefile" ;; + "src/kernel/linux-2.4/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/kernel/linux-2.4/Makefile" ;; + "doc/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/module.mk" ;; + "doc/coding/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/coding/module.mk" ;; + "doc/design/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/design/module.mk" ;; + "doc/random/module.mk" ) CONFIG_FILES="$CONFIG_FILES doc/random/module.mk" ;; + "examples/pvfs2-server.rc" ) CONFIG_FILES="$CONFIG_FILES examples/pvfs2-server.rc" ;; + "doc/doxygen/pvfs2-doxygen.conf" ) CONFIG_FILES="$CONFIG_FILES doc/doxygen/pvfs2-doxygen.conf" ;; + "pvfs2-config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS pvfs2-config.h" ;; *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 echo "$as_me: error: invalid argument: $ac_config_target" >&2;} { (exit 1); exit 1; }; };; esac done - # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely @@ -18470,479 +25525,386 @@ if $ac_need_defaults; then fi # Have a temporary directory for convenience. Make it in the build tree -# simply because there is no reason against having it here, and in addition, +# simply because there is no reason to put it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. -# Hook for its removal unless debugging. -# Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to `$tmp'. +# Create a temporary directory, and hook for its removal unless debugging. $debug || { - tmp= - trap 'exit_status=$? - { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status -' 0 + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 trap '{ (exit 1); exit 1; }' 1 2 13 15 } + # Create a (secure) tmp directory for tmp files. { - tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" } || { - tmp=./conf$$-$RANDOM - (umask 077 && mkdir "$tmp") + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) } || { echo "$me: cannot create a temporary directory in ." >&2 { (exit 1); exit 1; } } +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + # -# Set up the sed scripts for CONFIG_FILES section. +# CONFIG_FILES section. # # No need to generate the scripts if there are no CONFIG_FILES. # This happens for instance when ./config.status config.h -if test -n "$CONFIG_FILES"; then - -_ACEOF - - - -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - cat >conf$$subs.sed <<_ACEOF -SHELL!$SHELL$ac_delim -PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim -PACKAGE_NAME!$PACKAGE_NAME$ac_delim -PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim -PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim -PACKAGE_STRING!$PACKAGE_STRING$ac_delim -PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim -exec_prefix!$exec_prefix$ac_delim -prefix!$prefix$ac_delim -program_transform_name!$program_transform_name$ac_delim -bindir!$bindir$ac_delim -sbindir!$sbindir$ac_delim -libexecdir!$libexecdir$ac_delim -datarootdir!$datarootdir$ac_delim -datadir!$datadir$ac_delim -sysconfdir!$sysconfdir$ac_delim -sharedstatedir!$sharedstatedir$ac_delim -localstatedir!$localstatedir$ac_delim -includedir!$includedir$ac_delim -oldincludedir!$oldincludedir$ac_delim -docdir!$docdir$ac_delim -infodir!$infodir$ac_delim -htmldir!$htmldir$ac_delim -dvidir!$dvidir$ac_delim -pdfdir!$pdfdir$ac_delim -psdir!$psdir$ac_delim -libdir!$libdir$ac_delim -localedir!$localedir$ac_delim -mandir!$mandir$ac_delim -DEFS!$DEFS$ac_delim -ECHO_C!$ECHO_C$ac_delim -ECHO_N!$ECHO_N$ac_delim -ECHO_T!$ECHO_T$ac_delim -LIBS!$LIBS$ac_delim -build_alias!$build_alias$ac_delim -host_alias!$host_alias$ac_delim -target_alias!$target_alias$ac_delim -PVFS2_VERSION!$PVFS2_VERSION$ac_delim -build!$build$ac_delim -build_cpu!$build_cpu$ac_delim -build_vendor!$build_vendor$ac_delim -build_os!$build_os$ac_delim -host!$host$ac_delim -host_cpu!$host_cpu$ac_delim -host_vendor!$host_vendor$ac_delim -host_os!$host_os$ac_delim -CC!$CC$ac_delim -CFLAGS!$CFLAGS$ac_delim -LDFLAGS!$LDFLAGS$ac_delim -CPPFLAGS!$CPPFLAGS$ac_delim -ac_ct_CC!$ac_ct_CC$ac_delim -EXEEXT!$EXEEXT$ac_delim -OBJEXT!$OBJEXT$ac_delim -CPP!$CPP$ac_delim -GREP!$GREP$ac_delim -EGREP!$EGREP$ac_delim -INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim -INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim -INSTALL_DATA!$INSTALL_DATA$ac_delim -HAVE_PERL!$HAVE_PERL$ac_delim -HAVE_FIND!$HAVE_FIND$ac_delim -HAVE_BISON!$HAVE_BISON$ac_delim -HAVE_FLEX!$HAVE_FLEX$ac_delim -BUILD_CC!$BUILD_CC$ac_delim -BUILD_CFLAGS!$BUILD_CFLAGS$ac_delim -BUILD_LDFLAGS!$BUILD_LDFLAGS$ac_delim -LIBCFLAGS!$LIBCFLAGS$ac_delim -THREAD_LIB!$THREAD_LIB$ac_delim -BUILD_SERVER!$BUILD_SERVER$ac_delim -MMAP_RA_CACHE!$MMAP_RA_CACHE$ac_delim -TRUSTED_CONNECTIONS!$TRUSTED_CONNECTIONS$ac_delim -HAVE_PKGCONFIG!$HAVE_PKGCONFIG$ac_delim -GTKLIBS!$GTKLIBS$ac_delim -GTKCFLAGS!$GTKCFLAGS$ac_delim -BUILD_KARMA!$BUILD_KARMA$ac_delim -build_static!$build_static$ac_delim -REDHAT_RELEASE!$REDHAT_RELEASE$ac_delim -NPTL_WORKAROUND!$NPTL_WORKAROUND$ac_delim -MISC_TROVE_FLAGS!$MISC_TROVE_FLAGS$ac_delim -THREADED_KMOD_HELPER!$THREADED_KMOD_HELPER$ac_delim -LINUX_KERNEL_SRC!$LINUX_KERNEL_SRC$ac_delim -LINUX24_KERNEL_SRC!$LINUX24_KERNEL_SRC$ac_delim -LINUX24_KERNEL_MINOR_VER!$LINUX24_KERNEL_MINOR_VER$ac_delim -BUILD_ABSOLUTE_TOP!$BUILD_ABSOLUTE_TOP$ac_delim -SRC_RELATIVE_TOP!$SRC_RELATIVE_TOP$ac_delim -SRC_ABSOLUTE_TOP!$SRC_ABSOLUTE_TOP$ac_delim -ENABLE_COVERAGE!$ENABLE_COVERAGE$ac_delim -STRICT_CFLAGS!$STRICT_CFLAGS$ac_delim -QUIET_COMPILE!$QUIET_COMPILE$ac_delim -BUILD_EPOLL!$BUILD_EPOLL$ac_delim -PVFS2_SEGV_BACKTRACE!$PVFS2_SEGV_BACKTRACE$ac_delim -build_shared!$build_shared$ac_delim -INTELC!$INTELC$ac_delim -GNUC!$GNUC$ac_delim -DB_CFLAGS!$DB_CFLAGS$ac_delim -DB_LIB!$DB_LIB$ac_delim -NEEDS_LIBRT!$NEEDS_LIBRT$ac_delim -_ACEOF - - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then - break - elif $ac_last_try; then - { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 -echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} - { (exit 1); exit 1; }; } - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done - -ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` -if test -n "$ac_eof"; then - ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` - ac_eof=`expr $ac_eof + 1` -fi - -cat >>$CONFIG_STATUS <<_ACEOF -cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b -_ACEOF -sed ' -s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g -s/^/s,@/; s/!/@,|#_!!_#|/ -:n -t n -s/'"$ac_delim"'$/,g/; t -s/$/\\/; p -N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n -' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF -CEOF$ac_eof -_ACEOF - - -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - cat >conf$$subs.sed <<_ACEOF -TARGET_OS_DARWIN!$TARGET_OS_DARWIN$ac_delim -TARGET_OS_LINUX!$TARGET_OS_LINUX$ac_delim -BUILD_BMI_TCP!$BUILD_BMI_TCP$ac_delim -BUILD_GM!$BUILD_GM$ac_delim -GM_INCDIR!$GM_INCDIR$ac_delim -GM_LIBDIR!$GM_LIBDIR$ac_delim -BUILD_MX!$BUILD_MX$ac_delim -MX_INCDIR!$MX_INCDIR$ac_delim -MX_LIBDIR!$MX_LIBDIR$ac_delim -BUILD_IB!$BUILD_IB$ac_delim -IB_INCDIR!$IB_INCDIR$ac_delim -IB_LIBDIR!$IB_LIBDIR$ac_delim -BUILD_OPENIB!$BUILD_OPENIB$ac_delim -OPENIB_INCDIR!$OPENIB_INCDIR$ac_delim -OPENIB_LIBDIR!$OPENIB_LIBDIR$ac_delim -BUILD_PORTALS!$BUILD_PORTALS$ac_delim -PORTALS_INCS!$PORTALS_INCS$ac_delim -PORTALS_LIBS!$PORTALS_LIBS$ac_delim -BUILD_OSD!$BUILD_OSD$ac_delim -OSD_LIBS!$OSD_LIBS$ac_delim -SERVER_LDFLAGS!$SERVER_LDFLAGS$ac_delim -GOSSIP_ENABLE_BACKTRACE!$GOSSIP_ENABLE_BACKTRACE$ac_delim -BUILD_VIS!$BUILD_VIS$ac_delim -VISCFLAGS!$VISCFLAGS$ac_delim -VISLIBS!$VISLIBS$ac_delim -LIBOBJS!$LIBOBJS$ac_delim -LTLIBOBJS!$LTLIBOBJS$ac_delim -_ACEOF - - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 27; then - break - elif $ac_last_try; then - { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 -echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} - { (exit 1); exit 1; }; } - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@PVFS2_VERSION@,$PVFS2_VERSION,;t t +s,@PVFS2_VERSION_MAJOR@,$PVFS2_VERSION_MAJOR,;t t +s,@PVFS2_VERSION_MINOR@,$PVFS2_VERSION_MINOR,;t t +s,@PVFS2_VERSION_SUB@,$PVFS2_VERSION_SUB,;t t +s,@build@,$build,;t t +s,@build_cpu@,$build_cpu,;t t +s,@build_vendor@,$build_vendor,;t t +s,@build_os@,$build_os,;t t +s,@host@,$host,;t t +s,@host_cpu@,$host_cpu,;t t +s,@host_vendor@,$host_vendor,;t t +s,@host_os@,$host_os,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@CPP@,$CPP,;t t +s,@EGREP@,$EGREP,;t t +s,@INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t +s,@INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t +s,@INSTALL_DATA@,$INSTALL_DATA,;t t +s,@HAVE_PERL@,$HAVE_PERL,;t t +s,@HAVE_FIND@,$HAVE_FIND,;t t +s,@HAVE_BISON@,$HAVE_BISON,;t t +s,@HAVE_FLEX@,$HAVE_FLEX,;t t +s,@BUILD_CC@,$BUILD_CC,;t t +s,@BUILD_CFLAGS@,$BUILD_CFLAGS,;t t +s,@BUILD_LDFLAGS@,$BUILD_LDFLAGS,;t t +s,@LIBCFLAGS@,$LIBCFLAGS,;t t +s,@THREAD_LIB@,$THREAD_LIB,;t t +s,@BUILD_SERVER@,$BUILD_SERVER,;t t +s,@BUILD_BMI_ONLY@,$BUILD_BMI_ONLY,;t t +s,@NEED_BERKELEY_DB@,$NEED_BERKELEY_DB,;t t +s,@MMAP_RA_CACHE@,$MMAP_RA_CACHE,;t t +s,@RESET_FILE_POS@,$RESET_FILE_POS,;t t +s,@TRUSTED_CONNECTIONS@,$TRUSTED_CONNECTIONS,;t t +s,@HAVE_PKGCONFIG@,$HAVE_PKGCONFIG,;t t +s,@GTKLIBS@,$GTKLIBS,;t t +s,@GTKCFLAGS@,$GTKCFLAGS,;t t +s,@BUILD_KARMA@,$BUILD_KARMA,;t t +s,@build_static@,$build_static,;t t +s,@REDHAT_RELEASE@,$REDHAT_RELEASE,;t t +s,@NPTL_WORKAROUND@,$NPTL_WORKAROUND,;t t +s,@MISC_TROVE_FLAGS@,$MISC_TROVE_FLAGS,;t t +s,@TAU_INCS@,$TAU_INCS,;t t +s,@BUILD_TAU@,$BUILD_TAU,;t t +s,@BUILD_KERNEL@,$BUILD_KERNEL,;t t +s,@THREADED_KMOD_HELPER@,$THREADED_KMOD_HELPER,;t t +s,@LINUX_KERNEL_SRC@,$LINUX_KERNEL_SRC,;t t +s,@LINUX24_KERNEL_SRC@,$LINUX24_KERNEL_SRC,;t t +s,@LINUX24_KERNEL_MINOR_VER@,$LINUX24_KERNEL_MINOR_VER,;t t +s,@BUILD_ABSOLUTE_TOP@,$BUILD_ABSOLUTE_TOP,;t t +s,@SRC_RELATIVE_TOP@,$SRC_RELATIVE_TOP,;t t +s,@SRC_ABSOLUTE_TOP@,$SRC_ABSOLUTE_TOP,;t t +s,@FUSE_LDFLAGS@,$FUSE_LDFLAGS,;t t +s,@FUSE_CFLAGS@,$FUSE_CFLAGS,;t t +s,@BUILD_FUSE@,$BUILD_FUSE,;t t +s,@ENABLE_COVERAGE@,$ENABLE_COVERAGE,;t t +s,@STRICT_CFLAGS@,$STRICT_CFLAGS,;t t +s,@QUIET_COMPILE@,$QUIET_COMPILE,;t t +s,@BUILD_EPOLL@,$BUILD_EPOLL,;t t +s,@PVFS2_SEGV_BACKTRACE@,$PVFS2_SEGV_BACKTRACE,;t t +s,@build_shared@,$build_shared,;t t +s,@BUILD_USRINT@,$BUILD_USRINT,;t t +s,@BUILDUSRINT@,$BUILDUSRINT,;t t +s,@BUILD_UCACHE@,$BUILD_UCACHE,;t t +s,@BUILD_ACL_INTERFACE@,$BUILD_ACL_INTERFACE,;t t +s,@INTELC@,$INTELC,;t t +s,@GNUC@,$GNUC,;t t +s,@DB_CFLAGS@,$DB_CFLAGS,;t t +s,@DB_LIB@,$DB_LIB,;t t +s,@NEEDS_LIBRT@,$NEEDS_LIBRT,;t t +s,@TARGET_OS_DARWIN@,$TARGET_OS_DARWIN,;t t +s,@TARGET_OS_LINUX@,$TARGET_OS_LINUX,;t t +s,@BUILD_BMI_TCP@,$BUILD_BMI_TCP,;t t +s,@BUILD_GM@,$BUILD_GM,;t t +s,@GM_INCDIR@,$GM_INCDIR,;t t +s,@GM_LIBDIR@,$GM_LIBDIR,;t t +s,@BUILD_MX@,$BUILD_MX,;t t +s,@MX_INCDIR@,$MX_INCDIR,;t t +s,@MX_LIBDIR@,$MX_LIBDIR,;t t +s,@BUILD_IB@,$BUILD_IB,;t t +s,@IB_INCDIR@,$IB_INCDIR,;t t +s,@IB_LIBDIR@,$IB_LIBDIR,;t t +s,@BUILD_OPENIB@,$BUILD_OPENIB,;t t +s,@OPENIB_INCDIR@,$OPENIB_INCDIR,;t t +s,@OPENIB_LIBDIR@,$OPENIB_LIBDIR,;t t +s,@BUILD_PORTALS@,$BUILD_PORTALS,;t t +s,@PORTALS_INCS@,$PORTALS_INCS,;t t +s,@PORTALS_LIBS@,$PORTALS_LIBS,;t t +s,@BUILD_ZOID@,$BUILD_ZOID,;t t +s,@ZOID_SRCDIR@,$ZOID_SRCDIR,;t t +s,@SERVER_LDFLAGS@,$SERVER_LDFLAGS,;t t +s,@GOSSIP_ENABLE_BACKTRACE@,$GOSSIP_ENABLE_BACKTRACE,;t t +s,@BUILD_VIS@,$BUILD_VIS,;t t +s,@VISCFLAGS@,$VISCFLAGS,;t t +s,@VISLIBS@,$VISLIBS,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@LTLIBOBJS@,$LTLIBOBJS,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat fi -done - -ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` -if test -n "$ac_eof"; then - ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` - ac_eof=`expr $ac_eof + 1` -fi +fi # test -n "$CONFIG_FILES" -cat >>$CONFIG_STATUS <<_ACEOF -cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end -_ACEOF -sed ' -s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g -s/^/s,@/; s/!/@,|#_!!_#|/ -:n -t n -s/'"$ac_delim"'$/,g/; t -s/$/\\/; p -N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n -' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF -:end -s/|#_!!_#|//g -CEOF$ac_eof _ACEOF - - -# VPATH may cause trouble with some makes, so we remove $(srcdir), -# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and -# trailing colons and then remove the whole line if VPATH becomes empty -# (actually we leave an empty line to preserve line numbers). -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=/{ -s/:*\$(srcdir):*/:/ -s/:*\${srcdir}:*/:/ -s/:*@srcdir@:*/:/ -s/^\([^=]*=[ ]*\):*/\1/ -s/:*$// -s/^[^=]*=[ ]*$// -}' -fi - cat >>$CONFIG_STATUS <<\_ACEOF -fi # test -n "$CONFIG_FILES" - - -for ac_tag in :F $CONFIG_FILES :H $CONFIG_HEADERS -do - case $ac_tag in - :[FHLC]) ac_mode=$ac_tag; continue;; - esac - case $ac_mode$ac_tag in - :[FHL]*:*);; - :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5 -echo "$as_me: error: Invalid tag $ac_tag." >&2;} - { (exit 1); exit 1; }; };; - :[FH]-) ac_tag=-:-;; - :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; - esac - ac_save_IFS=$IFS - IFS=: - set x $ac_tag - IFS=$ac_save_IFS - shift - ac_file=$1 - shift - - case $ac_mode in - :L) ac_source=$1;; - :[FH]) - ac_file_inputs= - for ac_f - do - case $ac_f in - -) ac_f="$tmp/stdin";; - *) # Look for the file first in the build tree, then in the source tree - # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain `:'. - test -f "$ac_f" || - case $ac_f in - [\\/$]*) false;; - *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; - esac || - { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 -echo "$as_me: error: cannot find input file: $ac_f" >&2;} - { (exit 1); exit 1; }; };; - esac - ac_file_inputs="$ac_file_inputs $ac_f" - done - - # Let's still pretend it is `configure' which instantiates (i.e., don't - # use $as_me), people would be surprised to read: - # /* config.h. Generated by config.status. */ - configure_input="Generated from "`IFS=: - echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure." - if test x"$ac_file" != x-; then - configure_input="$ac_file. $configure_input" - { echo "$as_me:$LINENO: creating $ac_file" >&5 -echo "$as_me: creating $ac_file" >&6;} - fi - - case $ac_tag in - *:-:* | *:-) cat >"$tmp/stdin";; - esac - ;; +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; esac - ac_dir=`$as_dirname -- "$ac_file" || + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ - X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || echo X"$ac_file" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - { as_dir="$ac_dir" - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 -echo "$as_me: error: cannot create directory $as_dir" >&2;} + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} { (exit 1); exit 1; }; }; } + ac_builddir=. -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) +if test "$ac_dir" != .; then ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi case $srcdir in - .) # We are building in place. + .) # No --srcdir option. We are building in place. ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac - case $ac_mode in - :F) - # - # CONFIG_FILE - # case $INSTALL in [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; - *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + *) ac_INSTALL=$ac_top_builddir$INSTALL ;; esac -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF -# If the template does not know about datarootdir, expand it. -# FIXME: This hack should be removed a few years after 2.60. -ac_datarootdir_hack=; ac_datarootdir_seen= -case `sed -n '/datarootdir/ { - p - q -} -/@datadir@/p -/@docdir@/p -/@infodir@/p -/@localedir@/p -/@mandir@/p -' $ac_file_inputs` in -*datarootdir*) ac_datarootdir_seen=yes;; -*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF - ac_datarootdir_hack=' - s&@datadir@&$datadir&g - s&@docdir@&$docdir&g - s&@infodir@&$infodir&g - s&@localedir@&$localedir&g - s&@mandir@&$mandir&g - s&\\\${datarootdir}&$datarootdir&g' ;; -esac + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } _ACEOF - -# Neutralize VPATH when `$srcdir' = `.'. -# Shell code in configure.ac might set extrasub. -# FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF sed "$ac_vpsub $extrasub @@ -18950,137 +25912,252 @@ _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b -s&@configure_input@&$configure_input&;t t -s&@top_builddir@&$ac_top_builddir_sub&;t t -s&@srcdir@&$ac_srcdir&;t t -s&@abs_srcdir@&$ac_abs_srcdir&;t t -s&@top_srcdir@&$ac_top_srcdir&;t t -s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t -s&@builddir@&$ac_builddir&;t t -s&@abs_builddir@&$ac_abs_builddir&;t t -s&@abs_top_builddir@&$ac_abs_top_builddir&;t t -s&@INSTALL@&$ac_INSTALL&;t t -$ac_datarootdir_hack -" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out - -test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && - { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && - { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && - { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined." >&5 -echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined." >&2;} - - rm -f "$tmp/stdin" +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +s,@INSTALL@,$ac_INSTALL,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$,\1#\2' +ac_dC=' ' +ac_dD=',;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$,\1#\2define\3' +ac_uC=' ' +ac_uD=',;t' + +for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case $ac_file in - -) cat "$tmp/out"; rm -f "$tmp/out";; - *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;; + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; esac - ;; - :H) - # - # CONFIG_HEADER - # -_ACEOF - -# Transform confdefs.h into a sed script `conftest.defines', that -# substitutes the proper values into config.h.in to produce config.h. -rm -f conftest.defines conftest.tail -# First, append a space to every undef/define line, to ease matching. -echo 's/$/ /' >conftest.defines -# Then, protect against being on the right side of a sed subst, or in -# an unquoted here document, in config.status. If some macros were -# called several times there might be several #defines for the same -# symbol, which is useless. But do not sort them, since the last -# AC_DEFINE must be honored. -ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* -# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where -# NAME is the cpp macro being defined, VALUE is the value it is being given. -# PARAMS is the parameter list in the macro definition--in most cases, it's -# just an empty string. -ac_dA='s,^\\([ #]*\\)[^ ]*\\([ ]*' -ac_dB='\\)[ (].*,\\1define\\2' -ac_dC=' ' -ac_dD=' ,' - -uniq confdefs.h | - sed -n ' - t rset - :rset - s/^[ ]*#[ ]*define[ ][ ]*// - t ok - d - :ok - s/[\\&,]/\\&/g - s/^\('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p - s/^\('"$ac_word_re"'\)[ ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p - ' >>conftest.defines - -# Remove the space that was appended to ease matching. -# Then replace #undef with comments. This is necessary, for + + test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + # Do quote $f, to prevent DOS paths from being IFS'd. + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + # Remove the trailing spaces. + sed 's/[ ]*$//' $ac_file_inputs >$tmp/in + +_ACEOF + +# Transform confdefs.h into two sed scripts, `conftest.defines' and +# `conftest.undefs', that substitutes the proper values into +# config.h.in to produce config.h. The first handles `#define' +# templates, and the second `#undef' templates. +# And first: Protect against being on the right side of a sed subst in +# config.status. Protect against being in an unquoted here document +# in config.status. +rm -f conftest.defines conftest.undefs +# Using a here document instead of a string reduces the quoting nightmare. +# Putting comments in sed scripts is not portable. +# +# `end' is used to avoid that the second main sed command (meant for +# 0-ary CPP macros) applies to n-ary macro definitions. +# See the Autoconf documentation for `clear'. +cat >confdef2sed.sed <<\_ACEOF +s/[\\&,]/\\&/g +s,[\\$`],\\&,g +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp +t end +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp +: end +_ACEOF +# If some macros were called several times there might be several times +# the same #defines, which is useless. Nevertheless, we may not want to +# sort them, since we want the *last* AC-DEFINE to be honored. +uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines +sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs +rm -f confdef2sed.sed + +# This sed command replaces #undef with comments. This is necessary, for # example, in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. -# (The regexp can be short, since the line contains either #define or #undef.) -echo 's/ $// -s,^[ #]*u.*,/* & */,' >>conftest.defines - -# Break up conftest.defines: -ac_max_sed_lines=50 - -# First sed command is: sed -f defines.sed $ac_file_inputs >"$tmp/out1" -# Second one is: sed -f defines.sed "$tmp/out1" >"$tmp/out2" -# Third one will be: sed -f defines.sed "$tmp/out2" >"$tmp/out1" -# et cetera. -ac_in='$ac_file_inputs' -ac_out='"$tmp/out1"' -ac_nxt='"$tmp/out2"' - -while : +cat >>conftest.undefs <<\_ACEOF +s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */, +_ACEOF + +# Break up conftest.defines because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS +echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS +echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS +echo ' :' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.defines >/dev/null do - # Write a here document: - cat >>$CONFIG_STATUS <<_ACEOF - # First, check the format of the line: - cat >"\$tmp/defines.sed" <<\\CEOF -/^[ ]*#[ ]*undef[ ][ ]*$ac_word_re[ ]*\$/b def -/^[ ]*#[ ]*define[ ][ ]*$ac_word_re[( ]/b def -b -:def -_ACEOF - sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS + # Write a limited-size here document to $tmp/defines.sed. + echo ' cat >$tmp/defines.sed <>$CONFIG_STATUS + # Speed up: don't consider the non `#define' lines. + echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS echo 'CEOF - sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS - ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in - sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail - grep . conftest.tail >/dev/null || break + sed -f $tmp/defines.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail rm -f conftest.defines mv conftest.tail conftest.defines done -rm -f conftest.defines conftest.tail +rm -f conftest.defines +echo ' fi # grep' >>$CONFIG_STATUS +echo >>$CONFIG_STATUS + +# Break up conftest.undefs because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #undef templates' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.undefs >/dev/null +do + # Write a limited-size here document to $tmp/undefs.sed. + echo ' cat >$tmp/undefs.sed <>$CONFIG_STATUS + # Speed up: don't consider the non `#undef' + echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/undefs.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail + rm -f conftest.undefs + mv conftest.tail conftest.undefs +done +rm -f conftest.undefs -echo "ac_result=$ac_in" >>$CONFIG_STATUS cat >>$CONFIG_STATUS <<\_ACEOF + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + echo "/* Generated by configure. */" >$tmp/config.h + else + echo "/* $ac_file. Generated by configure. */" >$tmp/config.h + fi + cat $tmp/in >>$tmp/config.h + rm -f $tmp/in if test x"$ac_file" != x-; then - echo "/* $configure_input */" >"$tmp/config.h" - cat "$ac_result" >>"$tmp/config.h" - if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then + if diff $ac_file $tmp/config.h >/dev/null 2>&1; then { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 echo "$as_me: $ac_file is unchanged" >&6;} else + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + rm -f $ac_file - mv "$tmp/config.h" $ac_file + mv $tmp/config.h $ac_file fi else - echo "/* $configure_input */" - cat "$ac_result" + cat $tmp/config.h + rm -f $tmp/config.h fi - rm -f "$tmp/out12" - ;; - - - esac - -done # for ac_tag +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF { (exit 0); exit 0; } _ACEOF @@ -19113,125 +26190,186 @@ fi chmod +x $BUILD_ABSOLUTE_TOP/src/apps/admin/pvfs2-config # print a summary of the configuration information -{ echo "$as_me:$LINENO: result: " >&5 -echo "${ECHO_T}" >&6; } -{ echo "$as_me:$LINENO: result: ***** Displaying PVFS2 Configuration Information *****" >&5 -echo "${ECHO_T}***** Displaying PVFS2 Configuration Information *****" >&6; } -{ echo "$as_me:$LINENO: result: ------------------------------------------------------" >&5 -echo "${ECHO_T}------------------------------------------------------" >&6; } +if test "x$BUILD_BMI_ONLY" = "x1" ; then + echo "$as_me:$LINENO: result: " >&5 +echo "${ECHO_T}" >&6 + echo "$as_me:$LINENO: result: ***** Displaying BMI configuration information *****" >&5 +echo "${ECHO_T}***** Displaying BMI configuration information *****" >&6 + echo "$as_me:$LINENO: result: ----------------------------------------------------" >&5 +echo "${ECHO_T}----------------------------------------------------" >&6 + PKGSTR="BMI" +else + echo "$as_me:$LINENO: result: " >&5 +echo "${ECHO_T}" >&6 + echo "$as_me:$LINENO: result: ***** Displaying PVFS Configuration Information *****" >&5 +echo "${ECHO_T}***** Displaying PVFS Configuration Information *****" >&6 + echo "$as_me:$LINENO: result: ------------------------------------------------------" >&5 +echo "${ECHO_T}------------------------------------------------------" >&6 + PKGSTR="PVFS2" +fi + +# print PVFS configs (not used by BMI) +if test "x$BUILD_BMI_ONLY" != "x1" ; then if test "x$BUILD_KARMA" = "x1" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured to build karma gui : yes" >&5 -echo "${ECHO_T}PVFS2 configured to build karma gui : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured to build karma gui : yes" >&5 +echo "${ECHO_T}PVFS2 configured to build karma gui : yes" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured to build karma gui : no" >&5 -echo "${ECHO_T}PVFS2 configured to build karma gui : no" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured to build karma gui : no" >&5 +echo "${ECHO_T}PVFS2 configured to build karma gui : no" >&6 fi -if test "x$BUILD_EPOLL" = "x1" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured to use epoll : yes" >&5 -echo "${ECHO_T}PVFS2 configured to use epoll : yes" >&6; } +if test "x$BUILD_VIS" = "x1" ; then + echo "$as_me:$LINENO: result: PVFS2 configured to build visualization tools : yes" >&5 +echo "${ECHO_T}PVFS2 configured to build visualization tools : yes" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured to use epoll : no" >&5 -echo "${ECHO_T}PVFS2 configured to use epoll : no" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured to build visualization tools : no" >&5 +echo "${ECHO_T}PVFS2 configured to build visualization tools : no" >&6 fi -if test "x$ENABLE_COVERAGE" = "x1" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured to perform coverage analysis : yes" >&5 -echo "${ECHO_T}PVFS2 configured to perform coverage analysis : yes" >&6; } +if test "x$ENABLE_COVERAGE" = "x1" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured to perform coverage analysis : yes" >&5 +echo "${ECHO_T}PVFS2 configured to perform coverage analysis : yes" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured to perform coverage analysis : no" >&5 -echo "${ECHO_T}PVFS2 configured to perform coverage analysis : no" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured to perform coverage analysis : no" >&5 +echo "${ECHO_T}PVFS2 configured to perform coverage analysis : no" >&6 fi -if test "x$MISC_TROVE_FLAGS" = "x" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured for aio threaded callbacks : no" >&5 -echo "${ECHO_T}PVFS2 configured for aio threaded callbacks : no" >&6; } +if test "x$MISC_TROVE_FLAGS" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured for aio threaded callbacks : no" >&5 +echo "${ECHO_T}PVFS2 configured for aio threaded callbacks : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured for aio threaded callbacks : yes" >&5 -echo "${ECHO_T}PVFS2 configured for aio threaded callbacks : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured for aio threaded callbacks : yes" >&5 +echo "${ECHO_T}PVFS2 configured for aio threaded callbacks : yes" >&6 fi -if test "x$LINUX_KERNEL_SRC" = "x" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured for the 2.6.x kernel module : no" >&5 -echo "${ECHO_T}PVFS2 configured for the 2.6.x kernel module : no" >&6; } +if test "x$BUILD_FUSE" = "x1" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured to use FUSE : yes" >&5 +echo "${ECHO_T}PVFS2 configured to use FUSE : yes" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured for the 2.6.x kernel module : yes" >&5 -echo "${ECHO_T}PVFS2 configured for the 2.6.x kernel module : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured to use FUSE : no" >&5 +echo "${ECHO_T}PVFS2 configured to use FUSE : no" >&6 fi -if test "x$LINUX24_KERNEL_SRC" = "x" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured for the 2.4.x kernel module : no" >&5 -echo "${ECHO_T}PVFS2 configured for the 2.4.x kernel module : no" >&6; } +if test "x$LINUX_KERNEL_SRC" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured for the 2.6/3 kernel module : no" >&5 +echo "${ECHO_T}PVFS2 configured for the 2.6/3 kernel module : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured for the 2.4.x kernel module : yes" >&5 -echo "${ECHO_T}PVFS2 configured for the 2.4.x kernel module : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured for the 2.6/3 kernel module : yes" >&5 +echo "${ECHO_T}PVFS2 configured for the 2.6/3 kernel module : yes" >&6 fi -if test "x$MMAP_RA_CACHE" = "x" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured for using the mmap-ra-cache : no" >&5 -echo "${ECHO_T}PVFS2 configured for using the mmap-ra-cache : no" >&6; } +if test "x$LINUX24_KERNEL_SRC" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured for the 2.4.x kernel module : no" >&5 +echo "${ECHO_T}PVFS2 configured for the 2.4.x kernel module : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured for using the mmap-ra-cache : yes" >&5 -echo "${ECHO_T}PVFS2 configured for using the mmap-ra-cache : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured for the 2.4.x kernel module : yes" >&5 +echo "${ECHO_T}PVFS2 configured for the 2.4.x kernel module : yes" >&6 fi -if test "x$TRUSTED_CONNECTIONS" = "x" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured for using trusted connections : no" >&5 -echo "${ECHO_T}PVFS2 configured for using trusted connections : no" >&6; } +if test "x$MMAP_RA_CACHE" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured for using the mmap-ra-cache : no" >&5 +echo "${ECHO_T}PVFS2 configured for using the mmap-ra-cache : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured for using trusted connections : yes" >&5 -echo "${ECHO_T}PVFS2 configured for using trusted connections : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured for using the mmap-ra-cache : yes" >&5 +echo "${ECHO_T}PVFS2 configured for using the mmap-ra-cache : yes" >&6 fi -if test "x$THREAD_LIB" = "x" ; then - { echo "$as_me:$LINENO: result: PVFS2 configured for a thread-safe client library : no" >&5 -echo "${ECHO_T}PVFS2 configured for a thread-safe client library : no" >&6; } +if test "x$RESET_FILE_POS" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 configured for resetting file position : no" >&5 +echo "${ECHO_T}PVFS2 configured for resetting file position : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 configured for a thread-safe client library : yes" >&5 -echo "${ECHO_T}PVFS2 configured for a thread-safe client library : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 configured for resetting file position : yes" >&5 +echo "${ECHO_T}PVFS2 configured for resetting file position : yes" >&6 fi -if test "x$REDHAT_RELEASE" = "x"; then - { echo "$as_me:$LINENO: result: PVFS2 will use workaround for redhat 2.4 kernels : no" >&5 -echo "${ECHO_T}PVFS2 will use workaround for redhat 2.4 kernels : no" >&6; } +if test "x$REDHAT_RELEASE" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 will use workaround for redhat 2.4 kernels : no" >&5 +echo "${ECHO_T}PVFS2 will use workaround for redhat 2.4 kernels : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 will use workaround for redhat 2.4 kernels : yes" >&5 -echo "${ECHO_T}PVFS2 will use workaround for redhat 2.4 kernels : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 will use workaround for redhat 2.4 kernels : yes" >&5 +echo "${ECHO_T}PVFS2 will use workaround for redhat 2.4 kernels : yes" >&6 fi -if test "x$NPTL_WORKAROUND" = "x"; then - { echo "$as_me:$LINENO: result: PVFS2 will use workaround for buggy NPTL : no" >&5 -echo "${ECHO_T}PVFS2 will use workaround for buggy NPTL : no" >&6; } +if test "x$NPTL_WORKAROUND" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 will use workaround for buggy NPTL : no" >&5 +echo "${ECHO_T}PVFS2 will use workaround for buggy NPTL : no" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 will use workaround for buggy NPTL : yes" >&5 -echo "${ECHO_T}PVFS2 will use workaround for buggy NPTL : yes" >&6; } + echo "$as_me:$LINENO: result: PVFS2 will use workaround for buggy NPTL : yes" >&5 +echo "${ECHO_T}PVFS2 will use workaround for buggy NPTL : yes" >&6 fi -if test "x$BUILD_SERVER" = "x1"; then - { echo "$as_me:$LINENO: result: PVFS2 server will be built : yes" >&5 -echo "${ECHO_T}PVFS2 server will be built : yes" >&6; } +if test "x$BUILD_SERVER" = "x1" -a "x$BUILD_BMI_ONLY" != "x1"; then + echo "$as_me:$LINENO: result: PVFS2 server will be built : yes" >&5 +echo "${ECHO_T}PVFS2 server will be built : yes" >&6 +else + echo "$as_me:$LINENO: result: PVFS2 server will be built : no" >&5 +echo "${ECHO_T}PVFS2 server will be built : no" >&6 + +fi + +if test "x$BUILD_USRINT" = "x1" ; then + echo "$as_me:$LINENO: result: PVFS2 user interface libraries will be built : yes" >&5 +echo "${ECHO_T}PVFS2 user interface libraries will be built : yes" >&6 +else + echo "$as_me:$LINENO: result: PVFS2 user interface libraries will be built : no" >&5 +echo "${ECHO_T}PVFS2 user interface libraries will be built : no" >&6 + +fi + +if test "x$BUILD_UCACHE" = "x1" ; then + echo "$as_me:$LINENO: result: PVFS2 user interface library cache enabled : yes" >&5 +echo "${ECHO_T}PVFS2 user interface library cache enabled : yes" >&6 else - { echo "$as_me:$LINENO: result: PVFS2 server will be built : no" >&5 -echo "${ECHO_T}PVFS2 server will be built : no" >&6; } + echo "$as_me:$LINENO: result: PVFS2 user interface library cache enabled : no" >&5 +echo "${ECHO_T}PVFS2 user interface library cache enabled : no" >&6 fi +else + +# print BMI and PVFS configs +if test "x$THREAD_LIB" = "x"; then + echo "$as_me:$LINENO: result: $PKGSTR configured for a thread-safe client library : no" >&5 +echo "${ECHO_T}$PKGSTR configured for a thread-safe client library : no" >&6 +else + echo "$as_me:$LINENO: result: $PKGSTR configured for a thread-safe client library : yes" >&5 +echo "${ECHO_T}$PKGSTR configured for a thread-safe client library : yes" >&6 +fi + +if test "x$BUILD_EPOLL" = "x1" ; then + echo "$as_me:$LINENO: result: $PKGSTR configured to use epoll : yes" >&5 +echo "${ECHO_T}$PKGSTR configured to use epoll : yes" >&6 +else + echo "$as_me:$LINENO: result: $PKGSTR configured to use epoll : no" >&5 +echo "${ECHO_T}$PKGSTR configured to use epoll : no" >&6 +fi + +if test "x$TRUSTED_CONNECTIONS" = "x" ; then + echo "$as_me:$LINENO: result: $PKGSTR configured for using trusted connections : no" >&5 +echo "${ECHO_T}$PKGSTR configured for using trusted connections : no" >&6 +else + echo "$as_me:$LINENO: result: $PKGSTR configured for using trusted connections : yes" >&5 +echo "${ECHO_T}$PKGSTR configured for using trusted connections : yes" >&6 +fi + +fi # end of BMI/PVFS config display + if test "x$WARN_ABOUT_HOSTNAMES" = "xyes" ; then - { echo "$as_me:$LINENO: result: WARNING: gethostbyname is not supported on this machine: ALL ADDRESSES MUST BE IN DOT NOTATION." >&5 -echo "${ECHO_T}WARNING: gethostbyname is not supported on this machine: ALL ADDRESSES MUST BE IN DOT NOTATION." >&6; } + echo "$as_me:$LINENO: result: WARNING: gethostbyname is not supported on this machine: ALL ADDRESSES MUST BE IN DOT NOTATION." >&5 +echo "${ECHO_T}WARNING: gethostbyname is not supported on this machine: ALL ADDRESSES MUST BE IN DOT NOTATION." >&6 fi if test x$BUILD_GM = x1 -o x$BUILD_MX = x1 -o x$BUILD_IB = x1 -o \ - x$BUILD_OPENIB = x1 -o x$BUILD_PORTALS = x1 ; then + x$BUILD_OPENIB = x1 -o x$BUILD_PORTALS = x1 -o x$BUILD_ZOID = x1 ; then if test x$BUILD_BMI_TCP = x1 ; then - { echo "$as_me:$LINENO: WARNING: You have selected to build PVFS to use a \"fast\" network + { echo "$as_me:$LINENO: WARNING: You have selected to build $PKGSTR to use a \"fast\" network interface, but have not disabled TCP. The way this is currently implemented will lead to rather slow response times on the fast interface. Suggest you configure with \"--without-bmi-tcp\" for the best performance." >&5 -echo "$as_me: WARNING: You have selected to build PVFS to use a \"fast\" network +echo "$as_me: WARNING: You have selected to build $PKGSTR to use a \"fast\" network interface, but have not disabled TCP. The way this is currently implemented will lead to rather slow response times on the fast interface. Suggest you configure with @@ -19239,10 +26377,21 @@ echo "$as_me: WARNING: You have selected to build PVFS to use a \"fast\" network fi fi -{ echo "$as_me:$LINENO: result: " >&5 -echo "${ECHO_T}" >&6; } -{ echo "$as_me:$LINENO: result: PVFS2 version string: $PVFS2_VERSION" >&5 -echo "${ECHO_T}PVFS2 version string: $PVFS2_VERSION" >&6; } +if test x$HAVE_DB_OLD = x1; then + { echo "$as_me:$LINENO: WARNING: The detected version of Berkeley DB is not at least 4.8.30. + Metadata corruption has been documented in versions prior + to 4.8.30. It is highly recommended that you update the + version of Berkeley DB you are building against." >&5 +echo "$as_me: WARNING: The detected version of Berkeley DB is not at least 4.8.30. + Metadata corruption has been documented in versions prior + to 4.8.30. It is highly recommended that you update the + version of Berkeley DB you are building against." >&2;} +fi + +echo "$as_me:$LINENO: result: " >&5 +echo "${ECHO_T}" >&6 +echo "$as_me:$LINENO: result: $PKGSTR version string: $PVFS2_VERSION" >&5 +echo "${ECHO_T}$PKGSTR version string: $PVFS2_VERSION" >&6 -{ echo "$as_me:$LINENO: result: " >&5 -echo "${ECHO_T}" >&6; } +echo "$as_me:$LINENO: result: " >&5 +echo "${ECHO_T}" >&6 diff --git a/configure.in b/configure.in index a63b999..62bf3b4 100644 --- a/configure.in +++ b/configure.in @@ -6,7 +6,7 @@ dnl You may need to use autoheader as well if changing any DEFINEs dnl sanity checks, output header, location of scripts used here AC_INIT(include/pvfs2-types.h) -AC_PREREQ(2.59) +AC_PREREQ(2.57) AC_CONFIG_AUX_DIR(maint/config) dnl @@ -16,17 +16,25 @@ dnl dnl PVFS2 versioning information. dnl An @ in the date string can confuse e.g. scp and 'make kmod' CONFIGURE_TIME=`date -u +"%Y-%m-%d-%H%M%S"` -PVFS2_MAJOR=2 -PVFS2_MINOR=7 -PVFS2_SUB=1 +PVFS2_VERSION_MAJOR=2 +PVFS2_VERSION_MINOR=8 +PVFS2_VERSION_SUB=6 +#PVFS2_VERSION_RELEASE="orangefs" #PVFS2_PRE="" -PVFS2_PRE="pre1-$CONFIGURE_TIME" +PVFS2_VERSION_PRE="orangefs-$CONFIGURE_TIME" -PVFS2_VERSION=$PVFS2_MAJOR.$PVFS2_MINOR.$PVFS2_SUB$PVFS2_PRE +PVFS2_VERSION=$PVFS2_VERSION_MAJOR.$PVFS2_VERSION_MINOR.$PVFS2_VERSION_SUB-$PVFS2_VERSION_RELEASE$PVFS2_VERSION_PRE AC_SUBST(PVFS2_VERSION) -AC_DEFINE_UNQUOTED(PVFS2_VERSION_MAJOR, $PVFS2_MAJOR, major version number) -AC_DEFINE_UNQUOTED(PVFS2_VERSION_MINOR, $PVFS2_MINOR, minor version number) -AC_DEFINE_UNQUOTED(PVFS2_VERSION_SUB, $PVFS2_SUB, sub version number) +AC_DEFINE_UNQUOTED(PVFS2_VERSION_MAJOR, $PVFS2_VERSION_MAJOR, major version number) +AC_DEFINE_UNQUOTED(PVFS2_VERSION_MINOR, $PVFS2_VERSION_MINOR, minor version number) +AC_DEFINE_UNQUOTED(PVFS2_VERSION_SUB, $PVFS2_VERSION_SUB, sub version number) +#AC_DEFINE_UNQUOTED(PVFS2_VERSION_RELEASE, $PVFS2_VERSION_RELEASE, release version number) +AC_SUBST(PVFS2_VERSION_MAJOR) +AC_SUBST(PVFS2_VERSION_MINOR) +AC_SUBST(PVFS2_VERSION_SUB) +#AC_SUBST(PVFS2_VERSION_RELEASE) + +dnl BMI_ONLY_TAG dnl Checks for host type dnl Ensures we can compile on this OS @@ -37,19 +45,68 @@ if test "x$USR_CFLAGS" = "x"; then USR_CFLAGS_SET=no fi +dnl if we use headers that might not be on every platform, add them here AC_CONFIG_HEADER(pvfs2-config.h) +AC_CHECK_HEADER([pwd.h], + [AC_DEFINE(HAVE_PWD_H, 1, Define if pwd.h exists)]) +AC_CHECK_HEADER([features.h], + [AC_DEFINE(HAVE_FEATURES_H, 1, Define if features.h exists)]) +AC_CHECK_HEADER([fstab.h], + [AC_DEFINE(HAVE_FSTAB_H, 1, Define if fstab.h exists)]) +AC_CHECK_HEADER([malloc.h], + [AC_DEFINE(HAVE_MALLOC_H, 1, Define if malloc.h exists)]) +AC_CHECK_HEADER([memory.h], + [AC_DEFINE(HAVE_MEMORY_H, 1, Define if memory.h exists)]) +AC_CHECK_HEADER([mntent.h], + [AC_DEFINE(HAVE_MNTENT_H, 1, Define if mntent.h exists)]) +AC_CHECK_HEADER([netdb.h], + [AC_DEFINE(HAVE_NETDB_H, 1, Define if netdb.h exists)]) +AC_CHECK_HEADER([stdarg.h], + [AC_DEFINE(HAVE_STDARG_H, 1, Define if stdarg.h exists)]) +AC_CHECK_HEADER([stdint.h], + [AC_DEFINE(HAVE_STDINT_H, 1, Define if stdint.h exists)]) +AC_CHECK_HEADER([stdlib.h], + [AC_DEFINE(HAVE_STDLIB_H, 1, Define if stdlib.h exists)]) +AC_CHECK_HEADER([strings.h], + [AC_DEFINE(HAVE_STRINGS_H, 1, Define if strings.h exists)]) +AC_CHECK_HEADER([stdlib.h], + [AC_DEFINE(HAVE_STDLIB_H, 1, Define if stdlib.h exists)]) + +AC_CHECK_HEADER([linux/types.h], + [AC_DEFINE(HAVE_LINUX_TYPES_H, 1, Define if linux/types.h exists)]) +AC_CHECK_HEADER([linux/malloc.h], + [AC_DEFINE(HAVE_LINUX_MALLOC_H, 1, Define if linux/malloc.h exists)]) + +dnl These are kernel headers checked in maint/config/kernel.m4 +dnl AC_CHECK_HEADERS([linux/compat.h]) +dnl AC_CHECK_HEADERS([linux/exportfs.h]) +dnl AC_CHECK_HEADERS([linux/ioctl32.h]) +dnl AC_CHECK_HEADERS([linux/mount.h]) +dnl AC_CHECK_HEADERS([linux/posix_acl.h]) +dnl AC_CHECK_HEADERS([linux/posix_acl_xattr.h]) +dnl AC_CHECK_HEADERS([linux/syscalls.h]) +dnl AC_CHECK_HEADERS([linux/xattr_acl.h]) + AC_CHECK_HEADER([sys/vfs.h], [AC_DEFINE(HAVE_SYS_VFS_H, 1, Define if sys/vfs.h exists)]) AC_CHECK_HEADER([sys/mount.h], [AC_DEFINE(HAVE_SYS_MOUNT_H, 1, Define if sys/mount.h exists)]) - -AC_CHECK_HEADERS([mntent.h, fstab.h]) - -AC_CHECK_HEADERS(stdarg.h) - -AC_CHECK_HEADERS(attr/xattr.h) -AC_CHECK_HEADERS(sys/xattr.h) +AC_CHECK_HEADER([sys/stat.h], + [AC_DEFINE(HAVE_SYS_STAT_H, 1, Define if sys/stat.h exists)]) +AC_CHECK_HEADER([sys/types.h], + [AC_DEFINE(HAVE_SYS_TYPES_H, 1, Define if sys/types.h exists)]) +AC_CHECK_HEADER([sys/socket.h], + [AC_DEFINE(HAVE_SYS_SOCKET_H, 1, Define if sys/socket.h exists)]) +AC_CHECK_HEADER([sys/sendfile.h], + [AC_DEFINE(HAVE_SYS_SENDFILE_H, 1, Define if sys/sendfile.h exists)]) +AC_CHECK_HEADER([sys/xattr.h], + [AC_DEFINE(HAVE_SYS_XATTR_H, 1, Define if sys/xattr.h exists)]) + +AC_CHECK_HEADER([arpa/inet.h], + [AC_DEFINE(HAVE_ARPA_INET_H, 1, Define if arpa/inet.h exists)]) +AC_CHECK_HEADER([attr/xattr.h], + [AC_DEFINE(HAVE_ATTR_XATTR_H, 1, Define if attr/xattr.h exists)]) AC_CHECK_SIZEOF([long int]) @@ -66,10 +123,24 @@ CFLAGS=$USR_CFLAGS AC_PROG_CPP AC_CHECK_PROG(HAVE_PERL, perl, yes, no) +if test $HAVE_PERL = "no"; then + AC_MSG_ERROR("perl required in PATH to complete build") +fi + AC_CHECK_PROG(HAVE_FIND, find, yes, no) +if test $HAVE_FIND = "no"; then + AC_MSG_ERROR("find required in PATH complete build") +fi + AC_CHECK_PROG(HAVE_BISON, bison, yes, no) -AC_CHECK_PROG(HAVE_FLEX, flex, yes, no) +if test $HAVE_BISON = "no"; then + AC_MSG_ERROR("bison required in PATH to complete build") +fi +AC_CHECK_PROG(HAVE_FLEX, flex, yes, no) +if test $HAVE_FLEX = "no"; then + AC_MSG_ERROR("flex required in PATH to complete build") +fi AC_MSG_CHECKING([for required Math::BigInt perl module]) perl -e "use Math::BigInt" 2>&1 > /dev/null @@ -161,7 +232,7 @@ AC_SUBST(THREAD_LIB) dnl a mechanism to disable building the PVFS2 server AC_ARG_ENABLE(server, -[ --disable-server Disables building of PVFS2 server], +[ --disable-server Disables building of PVFS2 server], [if test "x$enableval" = "xyes" ; then BUILD_SERVER=1 NEED_BERKELEY_DB=yes @@ -173,20 +244,41 @@ fi], AC_SUBST(BUILD_SERVER) AC_ARG_WITH(openssl, - [ --with-openssl= Build with openssl (default=/usr) - --without-openssl Don't build with openssl.], - [AX_OPENSSL(${withval})], - [AX_OPENSSL_OPTIONAL]) + [ --with-openssl= Build with openssl (default=/usr) + --without-openssl Don't build with openssl.], + [AX_OPENSSL(${withval})], + [AX_OPENSSL_OPTIONAL]) + +dnl check to see if the "fast" getgrouplist function is available +dnl AX_GETGROUPLIST dnl a mechanism to turn off memory usage statistics in karma (may be dnl confusing for some classes of users) AC_ARG_ENABLE(karma-mem-usage-stats, -[ --disable-karma-mem-usage-stats Disables memory usage stats in karma], +[ --disable-karma-mem-usage-stats + Disables memory usage stats in karma], [if test "x$enableval" = "xno" ; then CFLAGS="$CFLAGS -D__KARMA_DISABLE_MEM_USAGE__" fi] ,) +dnl build only the bmi library +AC_ARG_ENABLE(bmi-only, +[ --enable-bmi-only Build only the BMI library], +[if test "x$enableval" = "xyes" ; then + BUILD_BMI_ONLY=1 +fi] +,) +AC_SUBST(BUILD_BMI_ONLY) + +dnl if we're only building the BMI lib, disable checks for server and DB. +if test "x$BUILD_BMI_ONLY" = "x1"; then + BUILD_SERVER="" + NEED_BERKELEY_DB=no + AC_SUBST(BUILD_SERVER) + AC_SUBST(NEED_BERKELEY_DB) +fi + dnl a mechanism to turn off perf counters AC_ARG_ENABLE(perf-counters, [ --disable-perf-counters Disables pvfs2-server performance counters], @@ -195,28 +287,34 @@ AC_ARG_ENABLE(perf-counters, fi] ,) -dnl a mechanism to turn off disk I/O for testing/debugging -AC_ARG_ENABLE(disk-io, -[ --disable-disk-io Disables pvfs2-server disk I/O (for testing)], -[if test "x$enableval" = "xno" ; then - CFLAGS="$CFLAGS -D__PVFS2_DISABLE_DISK_IO__" -fi] -,) - dnl a mechanism to turn on mmap-readahead caching (for kernel interface) MMAP_RA_CACHE="" AC_ARG_ENABLE(mmap-racache, -[ --enable-mmap-racache Enables mmap-readahead in kernel interface], +[ --enable-mmap-racache **EXPERIMENTAL** Enables mmap-readahead in kernel + interface], [if test "x$enableval" = "xyes" ; then MMAP_RA_CACHE="-DUSE_MMAP_RA_CACHE" fi] ,) AC_SUBST(MMAP_RA_CACHE) +dnl a mechanism that resets the file position pointer when an error occurs whether +dnl or not any bytes were written (kernel interface only). +RESET_FILE_POS="" +AC_ARG_ENABLE(reset-file-pos, +[ --enable-reset-file-pos Resets file position pointer in kernel interface upon error], +[if test "x$enableval" = "xyes" ; then +RESET_FILE_POS="-DRESET_FILE_POS" +fi] +,) +AC_SUBST(RESET_FILE_POS) + + dnl See if the --enable-trusted-connections option was given to configure AC_ARG_ENABLE(trusted-connections, -[ --enable-trusted-connections Enable connects only from trusted hosts/ports - (experimental, off by default) ], +[ --enable-trusted-connections + **EXPERIMENTAL** Enable connects only + from trusted hosts/ports ], [if test "x$enableval" = "xyes"; then TRUSTED_CONNECTIONS="-DUSE_TRUSTED" fi], @@ -300,7 +398,8 @@ dnl what was the configure option set to? use_nptl_workaround=0 AC_ARG_ENABLE([nptl-workaround], -[ --enable-nptl-workaround Enable workaround for buggy NPTL/Pthread libraries], +[ --enable-nptl-workaround + Enable workaround for buggy NPTL/Pthread libraries], set_nptl_workaround=1 if test "$enableval" = no ; then use_nptl_workaround=0 @@ -376,7 +475,9 @@ AC_SUBST(NPTL_WORKAROUND) use_aio_thcb=1 AC_ARG_ENABLE([aio-threaded-callbacks], -[ --disable-aio-threaded-callbacks Disable use of AIO threaded callbacks], +[ --disable-aio-threaded-callbacks + **EXPERIMENTAL** Disable use of AIO + threaded callbacks], if test "$enableval" = no ; then use_aio_thcb=0 ; fi) dnl there used to be a big hairy test in here, back when glibc-2.3.0 and @@ -443,46 +544,34 @@ AC_ARG_WITH(berkdb-debug, AC_DEFINE(BERKDB_ERROR_REPORTING, 1, Define if berkeley db error reporting was enabled) ) -dnl use the Pablo trace library -AC_ARG_WITH(pablo, -[ --with-pablo=path Use Pablo trace library installed in "path"], - if test "x$withval" = "xyes" ; then - AC_MSG_ERROR(--with-pablo must be given a pathname) - else - CFLAGS="$CFLAGS -I$withval/include" - LDFLAGS="$LDFLAGS -L$withval/lib" - LIBS="$LIBS -lPabloTrace" - AC_DEFINE(HAVE_PABLO, 1, [Define if Pablo library is used]) - fi -) +TAU_INCS= +BUILD_TAU= -dnl use MPE profiling library -AC_ARG_WITH(mpe, -[ --with-mpe=path Use MPE profiling library installed in "path"], +dnl use the tau trace library +AC_ARG_WITH(tau, +[ --with-tau=path Use TAU trace library installed in "path"], if test "x$withval" = "xyes" ; then - AC_MSG_ERROR(--with-mpe must be given a pathname) + AC_MSG_ERROR(--with-tau must be given a pathname) else - CFLAGS="$CFLAGS -I$withval/include" - LDFLAGS="$LDFLAGS -L$withval/lib" - LIBS="$LIBS -lmpe -lmpich" - AC_MSG_CHECKING(for valid mpe install) - AC_TRY_LINK( - [#include ], - [ - int eventID_begin, eventID_end; - eventID_begin = MPE_Log_get_event_number(); - eventID_end = MPE_Log_get_event_number(); - MPE_Describe_state( eventID_begin, eventID_end, "Amult", "bluegreen" ); - MPE_Log_event( eventID_begin, 0, (char *)0 ); - MPE_Log_event( eventID_end, 0, (char *)0 ); - ], - [ - AC_DEFINE(HAVE_MPE, 1, [Use the MPE profiling library]) - AC_MSG_RESULT(ok)], - AC_MSG_ERROR(Invalid path to mpe install)) + TAU_INCS="-I$withval/include" + CFLAGS="$CFLAGS $TAU_INCS -D__PVFS2_ENABLE_EVENT__" + LDFLAGS="$LDFLAGS -L$withval/$(uname -m)/lib" + tau_mkfile=$withval/$(uname -m)/lib/Makefile.*-profile-trace + if test ! -f $tau_mkfile; then + AC_MSG_ERROR(TAU not compiled with profiling and tracing support) + fi + tau_config=$(echo $tau_mkfile | sed -e "s|.*Makefile.tau-||") + LIBS="$LIBS -lTAU_tf -lTAU_traceinput-$tau_config -ltau-$tau_config -lpthread -lstdc++" + AC_DEFINE(HAVE_TAU, 1, [Define if TAU library is used]) + BUILD_TAU=1 fi ) +AC_SUBST(TAU_INCS) +AC_SUBST(BUILD_TAU) + +BUILD_KERNEL= + dnl dnl Enables the kernel module to build if the appropriate dnl linux-2.6.x path is specified @@ -499,18 +588,27 @@ AC_ARG_WITH(kernel, if ! test -r $withval/include/linux/version.h ; then AC_MSG_ERROR(The kernel source tree must have been configured.) fi - vers=`sed -n '/UTS_RELEASE/{; s/.*"\([0-9]\.[0-9]\).*".*/\1/; p; }' $withval/include/linux/version.h` - if ! grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/version.h ; then - # 2.6.18 moves UTS_RELEASE into its own header - if ! grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/utsrelease.h ; then - AC_MSG_ERROR(The kernel source tree does not appear to be 2.6) - fi + + if test -r $withval/include/linux/version.h && grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/version.h ; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\([0-9]\.[0-9]\).*".*/\1/; p; }' $withval/include/linux/version.h` + elif test -r $withval/include/linux/utsrelease.h && grep -q UTS_RELEASE..2\\.6\\. $withval/include/linux/utsrelease.h; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\([0-9]\.[0-9]\).*".*/\1/; p; }' $withval/include/linux/utsrelease.h` + elif test -r $withval/include/generated/utsrelease.h && grep -q UTS_RELEASE..2\\.6\\. $withval/include/generated/utsrelease.h; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\([0-9]\.[0-9]\).*".*/\1/; p; }' $withval/include/generated/utsrelease.h` + elif test -r $withval/include/generated/utsrelease.h && grep -qE UTS_RELEASE..3\\. $withval/include/generated/utsrelease.h; then + vers=`sed -n '/UTS_RELEASE/{; s/.*"\([0-9]\.[0-9]\).*".*/\1/; p; }' $withval/include/generated/utsrelease.h` + else + AC_MSG_ERROR(The kernel source tree does not appear to be 2.6 or 3.X) + fi + # At least up through 2.6.3 needed to write .__modpost.cmd; this changed # sometime between then and 2.6.10. Now anybody can compile out-of-tree # modules against a configured kernel tree LINUX_KERNEL_SRC="$withval" - , LINUX_KERNEL_SRC="") + BUILD_KERNEL=1 + , LINUX_KERNEL_SRC="" +) dnl dnl Enables the kernel module to build if the appropriate @@ -539,8 +637,10 @@ AC_ARG_WITH(kernel24, k24_minor_ver=$tmp_k24_minor_ver fi LINUX24_KERNEL_SRC="$withval" LINUX24_KERNEL_MINOR_VER="`echo $k24_minor_ver| cut -d'.' -f 1`" + BUILD_KERNEL=1 , LINUX24_KERNEL_SRC="" LINUX24_KERNEL_MINOR_VER="") +AC_SUBST(BUILD_KERNEL) dnl now that we have the path to kernel source we can feature-test kernels. @@ -565,7 +665,8 @@ AC_ARG_ENABLE(kernel-aio, ) AC_ARG_ENABLE(kernel-sendfile, - [ --enable-kernel-sendfile Forcibly enable kernel sendfile], + [ --enable-kernel-sendfile + **EXPERIMENTAL** Forcibly enable kernel sendfile], [ enable_kernel_sendfile=$enableval ], [ enable_kernel_sendfile=no ] ) @@ -581,7 +682,9 @@ dnl Note that even without this option, pvfs2-client-core always requires dnl pthreads to run its remount thread. dnl AC_ARG_ENABLE([threaded-kmod-helper], -[ --enable-threaded-kmod-helper Use threads in the kernel helper application], +[ --enable-threaded-kmod-helper + **EXPERIMENTAL** Use threads in the kernel + helper application], [ if test "x$enableval" = "xyes" ; then THREADED_KMOD_HELPER=yes fi @@ -592,8 +695,8 @@ dnl PAV configuration needs absolute location of source and build. dnl Linux-2.6 module needs absolute location of source, and uses the dnl relative location for soft links for out-of-tree builds. BUILD_ABSOLUTE_TOP=`pwd` -SRC_RELATIVE_TOP=$srcdir -SRC_ABSOLUTE_TOP=`cd $srcdir ; pwd` +SRC_RELATIVE_TOP=`echo $0 | sed -e "s|configure$||"` +SRC_ABSOLUTE_TOP=`cd $SRC_RELATIVE_TOP ; pwd` AC_SUBST(LINUX_KERNEL_SRC) AC_SUBST(LINUX24_KERNEL_SRC) @@ -611,6 +714,42 @@ if test "x$USR_CFLAGS_SET" = "xno"; then fi ]) +dnl +dnl a function to check for FUSE +dnl +test_for_fuse() +{ + AC_CHECK_PROG(HAVE_PKGCONFIG, pkg-config, yes, no) + if test "x$HAVE_PKGCONFIG" = "xyes" ; then + AC_MSG_CHECKING([for FUSE library]) + if `pkg-config --exists fuse` ; then + AC_MSG_RESULT(yes) + FUSE_LDFLAGS=`pkg-config --libs fuse` + FUSE_CFLAGS=`pkg-config --cflags fuse` + + AC_SUBST(FUSE_LDFLAGS) + AC_SUBST(FUSE_CFLAGS) + BUILD_FUSE="1" + AC_SUBST(BUILD_FUSE) + else + AC_MSG_ERROR([FUSE: FUSE library not found. Check LD_LIBRARY_PATH.]) + fi + else + AC_MSG_ERROR(FUSE: pkg-config not available. Please install pkg-config.) + fi +} + +dnl FUSE component +AC_ARG_ENABLE(fuse, +[ --enable-fuse **EXPERIMENTAL** Enable FUSE component], +[ +if test "x$enableval" = "xyes" ; then + test_for_fuse + CFLAGS="$CFLAGS -D__PVFS2_ENABLE_FUSE__" +fi +], +) + # default CFLAGS is -g -O2, unless user set CFLAGS or asked for --enable-fast if test "x$USR_CFLAGS_SET" = "xno" && test "x$enable_fast" != "xyes"; then CFLAGS="$CFLAGS -g -O2" @@ -683,7 +822,8 @@ AC_ARG_ENABLE(epoll, dnl enables a hack to print back traces out of segfault signal handler AC_ARG_ENABLE(segv-backtrace, -[ --disable-segv-backtrace Disables back traces in segfault signal handler],, +[ --disable-segv-backtrace + Disables back traces in segfault signal handler],, AC_MSG_CHECKING(if segv backtrace capable) AC_TRY_COMPILE([ #include @@ -712,6 +852,133 @@ if test "x$build_shared" = "xno" -a "x$build_static" = "xno" ; then AC_MSG_ERROR([Must do --enable-shared or --enable-static or both.]) fi +dnl Method to disable build of user interface libraries +BUILD_USRINT= +AC_ARG_ENABLE(usrint, +[ --disable-usrint Do not build posix and stdio user libraries], +[if test "x$enableval" = "xyes" ; then + AC_DEFINE(PVFS_USRINT_BUILD, 1, + [Should we build user interface libraries.]) + BUILD_USRINT=1 +else + AC_DEFINE(PVFS_USRINT_BUILD, 0, + [Should we build user interface libraries.]) +fi +AC_SUBST(BUILD_USRINT)], +[AC_DEFINE(PVFS_USRINT_BUILD, 1, + [Should we build user interface libraries.]) +BUILD_USRINT=1 +AC_SUBST(BUILDUSRINT)]) + +USRINT_KMOUNT= +dnl Method to enable user interface kmod support +AC_ARG_ENABLE(usrint-kmount, +[ --enable-usrint-kmount Assume FS is mounted via kernel], +[if test "x$enableval" = "xyes" ; then + if test "x$BUILD_KERNEL" = "x1" -o "x$BUILD_FUSE" = "x1" ; then + AC_DEFINE(PVFS_USRINT_KMOUNT, 1, + [Should user interface assume FS is mounted.]) + USRINT_KMOUNT=1 + else + AC_MSG_ERROR([Assume FS mounted but neither kernel nor FUSE build ]) + fi +else + AC_DEFINE(PVFS_USRINT_KMOUNT, 0, + [Should user interface assume FS is mounted.]) +fi], +[AC_DEFINE(PVFS_USRINT_KMOUNT, 0, + [Should user interface assume FS is mounted.]) +]) + +dnl Method to disable user interface CWD support +AC_ARG_ENABLE(usrint-cwd, +[ --disable-usrint-cwd Do not enable user interface CWD support], +[if test "x$enableval" = "xyes" ; then + AC_DEFINE(PVFS_USRINT_CWD, 1, + [Should we enable user interface CWD support.]) +else + AC_DEFINE(PVFS_USRINT_CWD, 0, + [Should we enable user interface CWD support.]) +fi], +[if test "x$USRINT_KMOUNT" = "x1" ; then + AC_DEFINE(PVFS_USRINT_CWD, 0, + [Should we enable user interface CWD support.]) +else + AC_DEFINE(PVFS_USRINT_CWD, 1, + [Should we enable user interface CWD support.]) +fi]) + +dnl Method to check if dl calls need an explicit link flag +AC_MSG_CHECKING(if dlfns needs explicit library request) +AC_LINK_IFELSE( +[ +#define _GNU_SOURCE 1 +#include +main() {void *p = dlsym(RTLD_DEFAULT,"sym");} +],[ + AC_MSG_RESULT(no) +],[ + AC_MSG_RESULT(yes) + LIBS="$LIBS -ldl" +]) + +dnl Method to enable user interface data cache +BUILD_UCACHE= +AC_ARG_ENABLE(ucache, +[ --enable-ucache Do not enable user user interface data cache.], +[if test "x$enableval" = "xyes" ; then + AC_DEFINE(PVFS_UCACHE_ENABLE, 1, + [Should we enable user interface data cache.]) + BUILD_UCACHE=1 +else + AC_DEFINE(PVFS_UCACHE_ENABLE, 0, + [Should we enable user interface data cache.]) +fi +AC_SUBST(BUILD_UCACHE)], +[AC_DEFINE(PVFS_UCACHE_ENABLE, 0, + [Should we enable user interface data cache.]) +AC_SUBST(BUILD_UCACHE)]) + +dnl See if we have includes needed for acls +BUILD_ACL_INTERFACE= +AC_MSG_CHECKING(for user acl includes sys/acl.h acl/libacl.h) +AC_TRY_COMPILE([ +#include +#include +], [], + AC_MSG_RESULT(yes) + BUILD_ACL_INTERFACE=1 + AC_DEFINE(PVFS_HAVE_ACL_INCLUDES, 1, + [Should we build user interface acl routines.]) +, + AC_MSG_RESULT(no) + BUILD_ACL_INTERFACE=0 +) +AC_SUBST(BUILD_ACL_INTERFACE) + +dnl See if scandir compare arg takes void pointers +AC_MSG_CHECKING(for scandir compare arg using void pointers) +AC_TRY_COMPILE([ +#define _LARGEFILE64_SOURCE 1 +#define _GNU_SOURCE 1 +#include +int scandir (const char *dir, struct dirent ***list, + int (*sel)(const struct dirent *), + int (*cmp)(const void *, const void *)) +{ return 0; } +int scandir64 (const char *dir, struct dirent64 ***list, + int (*sel)(const struct dirent64 *), + int (*cmp)(const void *, const void *)) +{ return 0; } +], [], + AC_MSG_RESULT(yes) + AC_DEFINE(PVFS_SCANDIR_VOID, 1, + [scandir compare arg takes void pointers.]) +, + AC_MSG_RESULT(no) +) + + dnl See if CC is a GNU compiler. This may require a real test in future dnl versions of autoconf. In 2.13 it is a side-effect of AC_PROG_CC. First dnl check if it is an Intel compiler; those lie and claim to be gcc but are @@ -810,9 +1077,6 @@ case "$host_os" in ;; esac -AC_CHECK_HEADERS(netdb.h) -AC_CHECK_HEADERS(arpa/inet.h) -AC_CHECK_HEADERS(sys/socket.h) dnl check for gethostbyname function AC_MSG_CHECKING(for gethostbyname) oldcflags="$CFLAGS" @@ -875,13 +1139,35 @@ dnl AX_PORTALS dnl -dnl Build OSD? For now, yes. This hack just so pvfs2-config spits -dnl out the right libraries. Great hack. +dnl Configure bmi_zoid, if --with-zoid or a variant given. +dnl +AX_ZOID + +dnl +dnl check for F_NOCACHE (MACOSX) +AC_MSG_CHECKING([for F_NOCACHE]) +AC_TRY_COMPILE([ + #include + int fd; + fcntl(fd, F_NOCACHE, 1); + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes), + AC_DEFINE(HAVE_FCNTL_F_NOCACHE, 1, Define if system defines F_NOCACHE fcntl) +) + dnl -BUILD_OSD=1 -OSD_LIBS="$SRC_ABSOLUTE_TOP/../osd-initiator/libosdinit.a $SRC_ABSOLUTE_TOP/../osd-util/libosdutil.a -lm" -AC_SUBST(BUILD_OSD) -AC_SUBST(OSD_LIBS) +dnl check for open O_DIRECT +AC_MSG_CHECKING([for open O_DIRECT]) +AC_TRY_COMPILE([ + #include + ], [ + open("somefile", O_DIRECT); + ], [], + AC_MSG_RESULT(no), + AC_MSG_RESULT(yes), + AC_DEFINE(HAVE_OPEN_O_DIRECT, 1, Define if system defines O_DIRECT fcntl) +) dnl dnl check if fgetxattr takes extra arguments @@ -903,6 +1189,46 @@ AC_TRY_COMPILE([ AC_MSG_RESULT(no) ) +dnl +dnl check if setxattr takes extra arguments +AC_MSG_CHECKING([for setxattr extra arguments]) +AC_TRY_COMPILE([ + #include + #ifdef HAVE_ATTR_XATTR_H + #include + #endif + #ifdef HAVE_SYS_XATTR_H + #include + #endif + ], + [ + setxattr(0, 0, 0, 0, 0, 0); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SETXATTR_EXTRA_ARGS, 1, Define if setxattr takes position and option arguments), + AC_MSG_RESULT(no) +) + +dnl +dnl check if getxattr takes extra arguments +AC_MSG_CHECKING([for getxattr extra arguments]) +AC_TRY_COMPILE([ + #include + #ifdef HAVE_ATTR_XATTR_H + #include + #endif + #ifdef HAVE_SYS_XATTR_H + #include + #endif + ], + [ + getxattr(0, 0, 0, 0, 0, 0); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GETXATTR_EXTRA_ARGS, 1, Define if getxattr takes position and option arguments), + AC_MSG_RESULT(no) +) + dnl dnl look for sysinfo header AC_CHECK_HEADER(sys/sysinfo.h, @@ -981,18 +1307,17 @@ AC_TRY_COMPILE([ dnl check for getmntent(), special test AC_FUNC_GETMNTENT -dnl if we use headers that might not be on every platform, add them here -AC_CHECK_HEADERS(malloc.h) -AC_CHECK_HEADERS(mntent.h) AC_ARG_ENABLE(static-server, [ --enable-static-server Builds the server static], -[ +[ staticserver=$enableval ], +[ staticserver="no" ] ) + +if test "$staticserver" = "yes"; then SERVER_LDFLAGS="$LDFLAGS -static" -], -[ +else SERVER_LDFLAGS="$LDFLAGS -rdynamic" -]) +fi AC_SUBST(SERVER_LDFLAGS) @@ -1006,30 +1331,43 @@ AC_SUBST(GOSSIP_ENABLE_BACKTRACE) dnl -- dnl Check for SDL -AC_MSG_CHECKING([for SDL]) -SDL_CONFIG=`which sdl-config 2> /dev/null` -if ! test -z "$SDL_CONFIG" && test -x "$SDL_CONFIG"; then - AC_MSG_RESULT(yes) - SDL_VERSION=`sdl-config --version` - VISCFLAGS=`sdl-config --cflags` - VISCLIBS=`sdl-config --libs` - - dnl We also need SDL ttf support - dnl only define BUILD_VIS if we have all required parts - AC_CHECK_HEADERS(SDL_ttf.h, - [BUILD_VIS="1" - CPPFLAGS="$CPPFLAGS $VISCFLAGS"], - [AC_CHECK_HEADERS(SDL/SDL_ttf.h, - [BUILD_VIS="1" - CPPFLAGS="$CPPFLAGS $VISCFLAGS"],)], -) +test_for_sdl() +{ + AC_MSG_CHECKING([for SDL]) + SDL_CONFIG=`which sdl-config 2> /dev/null` + if ! test -z "$SDL_CONFIG" && test -x "$SDL_CONFIG"; then + AC_MSG_RESULT(yes) + SDL_VERSION=`sdl-config --version` + VISCFLAGS=`sdl-config --cflags` + VISCLIBS=`sdl-config --libs` + + dnl We also need SDL ttf support + dnl only define BUILD_VIS if we have all required parts + AC_CHECK_HEADERS(SDL_ttf.h, + [BUILD_VIS="1" + CPPFLAGS="$CPPFLAGS $VISCFLAGS"], + [AC_CHECK_HEADERS(SDL/SDL_ttf.h, + [BUILD_VIS="1" + CPPFLAGS="$CPPFLAGS $VISCFLAGS"],)], + ) - AC_SUBST(BUILD_VIS) - AC_SUBST(VISCFLAGS) - AC_SUBST(VISLIBS) -else - AC_MSG_RESULT(no) + AC_SUBST(BUILD_VIS) + AC_SUBST(VISCFLAGS) + AC_SUBST(VISLIBS) + else + AC_MSG_RESULT(no) + fi +} + +dnl optionally disable building visualisation tools +AC_ARG_ENABLE(visual, +[ --enable-visual Enable visualisation tools. (Disabled by default, requires SDL)], +[ +if test "x$enableval" = "xyes" ; then + test_for_sdl fi +], +) dnl Which form of strerror? AC_MSG_CHECKING([for GNU-style strerror_r]) @@ -1053,13 +1391,18 @@ done dnl output final version of top level makefile and subdirectory dnl makefile includes -AC_OUTPUT(Makefile +AC_OUTPUT(include/pvfs2.h +Makefile module.mk src/apps/admin/module.mk src/apps/admin/pvfs2-config +src/apps/devel/module.mk src/apps/karma/module.mk src/apps/vis/module.mk +src/apps/fuse/module.mk +src/apps/ucache/module.mk src/apps/kernel/linux/module.mk +src/apps/user/module.mk src/io/trove/module.mk src/io/trove/trove-handle-mgmt/module.mk src/io/trove/trove-dbpf/module.mk @@ -1072,6 +1415,8 @@ src/common/gossip/module.mk src/common/gen-locks/module.mk src/common/llist/module.mk src/common/statecomp/module.mk +src/common/events/module.mk +src/common/mgmt/module.mk src/io/bmi/module.mk src/io/bmi/bmi_tcp/module.mk src/io/bmi/bmi_gm/module.mk @@ -1079,6 +1424,7 @@ src/io/bmi/bmi_mx/module.mk src/io/bmi/bmi_ib/module.mk src/io/bmi/bmi_osd/module.mk src/io/bmi/bmi_portals/module.mk +src/io/bmi/bmi_zoid/module.mk src/io/description/module.mk src/io/flow/module.mk src/io/flow/flowproto-bmi-trove/module.mk @@ -1092,6 +1438,7 @@ src/proto/module.mk src/server/module.mk src/server/request-scheduler/module.mk src/client/sysint/module.mk +src/client/usrint/module.mk src/kernel/linux-2.6/Makefile src/kernel/linux-2.4/Makefile doc/module.mk @@ -1105,9 +1452,20 @@ doc/doxygen/pvfs2-doxygen.conf chmod +x $BUILD_ABSOLUTE_TOP/src/apps/admin/pvfs2-config # print a summary of the configuration information -AC_MSG_RESULT() -AC_MSG_RESULT([***** Displaying PVFS2 Configuration Information *****]) -AC_MSG_RESULT([------------------------------------------------------]) +if test "x$BUILD_BMI_ONLY" = "x1" ; then + AC_MSG_RESULT() + AC_MSG_RESULT([***** Displaying BMI configuration information *****]) + AC_MSG_RESULT([----------------------------------------------------]) + PKGSTR="BMI" +else + AC_MSG_RESULT() + AC_MSG_RESULT([***** Displaying PVFS Configuration Information *****]) + AC_MSG_RESULT([------------------------------------------------------]) + PKGSTR="PVFS2" +fi + +# print PVFS configs (not used by BMI) +if test "x$BUILD_BMI_ONLY" != "x1" ; then if test "x$BUILD_KARMA" = "x1" ; then AC_MSG_RESULT([PVFS2 configured to build karma gui : yes]) @@ -1115,73 +1473,110 @@ else AC_MSG_RESULT([PVFS2 configured to build karma gui : no]) fi -if test "x$BUILD_EPOLL" = "x1" ; then - AC_MSG_RESULT([PVFS2 configured to use epoll : yes]) +if test "x$BUILD_VIS" = "x1" ; then + AC_MSG_RESULT([PVFS2 configured to build visualization tools : yes]) else - AC_MSG_RESULT([PVFS2 configured to use epoll : no]) + AC_MSG_RESULT([PVFS2 configured to build visualization tools : no]) fi -if test "x$ENABLE_COVERAGE" = "x1" ; then +if test "x$ENABLE_COVERAGE" = "x1" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 configured to perform coverage analysis : yes]) else AC_MSG_RESULT([PVFS2 configured to perform coverage analysis : no]) fi -if test "x$MISC_TROVE_FLAGS" = "x" ; then +if test "x$MISC_TROVE_FLAGS" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 configured for aio threaded callbacks : no]) else AC_MSG_RESULT([PVFS2 configured for aio threaded callbacks : yes]) fi -if test "x$LINUX_KERNEL_SRC" = "x" ; then - AC_MSG_RESULT([PVFS2 configured for the 2.6.x kernel module : no]) +if test "x$BUILD_FUSE" = "x1" -a "x$BUILD_BMI_ONLY" != "x1"; then + AC_MSG_RESULT([PVFS2 configured to use FUSE : yes]) +else + AC_MSG_RESULT([PVFS2 configured to use FUSE : no]) +fi + +if test "x$LINUX_KERNEL_SRC" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + AC_MSG_RESULT([PVFS2 configured for the 2.6/3 kernel module : no]) else - AC_MSG_RESULT([PVFS2 configured for the 2.6.x kernel module : yes]) + AC_MSG_RESULT([PVFS2 configured for the 2.6/3 kernel module : yes]) fi -if test "x$LINUX24_KERNEL_SRC" = "x" ; then +if test "x$LINUX24_KERNEL_SRC" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 configured for the 2.4.x kernel module : no]) else AC_MSG_RESULT([PVFS2 configured for the 2.4.x kernel module : yes]) fi -if test "x$MMAP_RA_CACHE" = "x" ; then +if test "x$MMAP_RA_CACHE" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 configured for using the mmap-ra-cache : no]) else AC_MSG_RESULT([PVFS2 configured for using the mmap-ra-cache : yes]) fi -if test "x$TRUSTED_CONNECTIONS" = "x" ; then - AC_MSG_RESULT([PVFS2 configured for using trusted connections : no]) -else - AC_MSG_RESULT([PVFS2 configured for using trusted connections : yes]) -fi - -if test "x$THREAD_LIB" = "x" ; then - AC_MSG_RESULT([PVFS2 configured for a thread-safe client library : no]) +if test "x$RESET_FILE_POS" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then + AC_MSG_RESULT([PVFS2 configured for resetting file position : no]) else - AC_MSG_RESULT([PVFS2 configured for a thread-safe client library : yes]) + AC_MSG_RESULT([PVFS2 configured for resetting file position : yes]) fi -if test "x$REDHAT_RELEASE" = "x"; then +if test "x$REDHAT_RELEASE" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 will use workaround for redhat 2.4 kernels : no]) else AC_MSG_RESULT([PVFS2 will use workaround for redhat 2.4 kernels : yes]) fi -if test "x$NPTL_WORKAROUND" = "x"; then +if test "x$NPTL_WORKAROUND" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 will use workaround for buggy NPTL : no]) else AC_MSG_RESULT([PVFS2 will use workaround for buggy NPTL : yes]) fi -if test "x$BUILD_SERVER" = "x1"; then +if test "x$BUILD_SERVER" = "x1" -a "x$BUILD_BMI_ONLY" != "x1"; then AC_MSG_RESULT([PVFS2 server will be built : yes]) else AC_MSG_RESULT([PVFS2 server will be built : no]) fi +if test "x$BUILD_USRINT" = "x1" ; then + AC_MSG_RESULT([PVFS2 user interface libraries will be built : yes]) +else + AC_MSG_RESULT([PVFS2 user interface libraries will be built : no]) + +fi + +if test "x$BUILD_UCACHE" = "x1" ; then + AC_MSG_RESULT([PVFS2 user interface library cache enabled : yes]) +else + AC_MSG_RESULT([PVFS2 user interface library cache enabled : no]) + +fi + +else + +# print BMI and PVFS configs +if test "x$THREAD_LIB" = "x"; then + AC_MSG_RESULT([$PKGSTR configured for a thread-safe client library : no]) +else + AC_MSG_RESULT([$PKGSTR configured for a thread-safe client library : yes]) +fi + +if test "x$BUILD_EPOLL" = "x1" ; then + AC_MSG_RESULT([$PKGSTR configured to use epoll : yes]) +else + AC_MSG_RESULT([$PKGSTR configured to use epoll : no]) +fi + +if test "x$TRUSTED_CONNECTIONS" = "x" ; then + AC_MSG_RESULT([$PKGSTR configured for using trusted connections : no]) +else + AC_MSG_RESULT([$PKGSTR configured for using trusted connections : yes]) +fi + +fi # end of BMI/PVFS config display + if test "x$WARN_ABOUT_HOSTNAMES" = "xyes" ; then AC_MSG_RESULT(WARNING: gethostbyname is not supported on this machine: ALL ADDRESSES MUST BE IN DOT NOTATION.) fi @@ -1196,9 +1591,9 @@ dnl When traffic will only appear on one device, suggest that TCP be disabled. dnl But leave TCP as default on, certainly, as that is what most people want. dnl if test x$BUILD_GM = x1 -o x$BUILD_MX = x1 -o x$BUILD_IB = x1 -o \ - x$BUILD_OPENIB = x1 -o x$BUILD_PORTALS = x1 ; then + x$BUILD_OPENIB = x1 -o x$BUILD_PORTALS = x1 -o x$BUILD_ZOID = x1 ; then if test x$BUILD_BMI_TCP = x1 ; then - AC_MSG_WARN([You have selected to build PVFS to use a "fast" network + AC_MSG_WARN([You have selected to build $PKGSTR to use a "fast" network interface, but have not disabled TCP. The way this is currently implemented will lead to rather slow response times on the fast interface. Suggest you configure with @@ -1206,7 +1601,14 @@ if test x$BUILD_GM = x1 -o x$BUILD_MX = x1 -o x$BUILD_IB = x1 -o \ fi fi +if test x$HAVE_DB_OLD = x1; then + AC_MSG_WARN([The detected version of Berkeley DB is not at least 4.8.30. + Metadata corruption has been documented in versions prior + to 4.8.30. It is highly recommended that you update the + version of Berkeley DB you are building against.]) +fi + AC_MSG_RESULT() -AC_MSG_RESULT([PVFS2 version string: $PVFS2_VERSION]) +AC_MSG_RESULT([$PKGSTR version string: $PVFS2_VERSION]) AC_MSG_RESULT() diff --git a/doc/REFERENCES.bib b/doc/REFERENCES.bib index 037be30..6c70c71 100644 --- a/doc/REFERENCES.bib +++ b/doc/REFERENCES.bib @@ -1,3 +1,203 @@ +@PhdThesis{bradles-diss, + author = {Bradley W. Settlemyer}, + title = {A Study of Client-based Caching for Parallel {I/O}}, + school = {Clemson University}, + address = {Clemson, SC}, + year = {2009}, + month = {August}, +} + +@conference{hadoop-pvfs, + title={In Search of an {API} for Scalable File Systems: Under the table or + above it?}, + author={Swapnil Patil and Garth A. Gibson and Gregory R. Ganger and + Julio Lopez and Milo Polte and Wittawat Tantisiroj and Lin Xiao}, + booktitle={USENIX HotCloud Workshop 2009}, + month={June}, + year={2009} +} + +@conference{syscall-diag, + title={System-Call Based Problem Diagnosis for {PVFS}}, + author={Michael P. Kasick and Keith A. Bare and Eugene E. Marinelli III + and Jiaqi Tan and Rajeev Gandhi and Priya Narasimhan}, + booktitle={Proceedings of the 5th Workshop on Hot Topics in System + Dependability (HotDep '09)}, + month={June}, + year={2009} +} + +@conference{coord-access-toappear-iasds09, + title={Interfaces for Coordinated Access in the File System}, + author={Sam Lang and Robert Latham and Dries Kimpe and Robert Ross}, + booktitle={Proceedings of 2009 Workshop on Interfaces and + Architectures for Scientific Data Storage}, + month={September}, + year={2009}, + note={(\textbf{To Appear})} +} + +@article{dyn-fs-semantics, + author = {Michael Kuhn and Julian Martin Kunkel and Thomas Ludwig}, + title = {Dynamic file system semantics to enable metadata optimizations + in {PVFS}}, + journal ={Concurrency and Computation: Practice and Experience}, + year = {2009}, + publisher = {John Wiley and Sons, Ltd}, +} + +@conference{pvfs-bgp-toappear-sc09, + title={{I/O} Performance Challenges at Leadership Scale}, + author={Samuel Lang and Philip Carns and Robert Latham and Robert Ross and Kevin Harms and William Allcock}, + booktitle={Proceedings of Supercomputing}, + month={November}, + year={2009}, + note={(\textbf{To Appear})} +} + +@article{IPDPS.2009.5161070, + author = {Xuechen Zhang and Song Jiang and Kei Davis}, + title = {Making resonance a common case: A high-performance implementation + of collective I/O on parallel file systems}, + journal ={Parallel and Distributed Processing Symposium, International}, + volume = {0}, + year = {2009}, + pages = {1-12}, + doi = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2009.5161070}, + publisher = {IEEE Computer Society}, + address = {Los Alamitos, CA, USA}, +} + +@inproceedings{SmallFilesIPDPS09, + author = {Philip Carns and Sam Lang and Robert Ross Murali Vilayannur Julian Kunkel and Thomas Ludwig}, + title = {Small File Accesses in Parallel File Systems}, + booktitle = {IPDPS}, + year = {2009}, + month = {March} +} + +@inproceedings{PosixExtTReport, + author = {M. Vilayannur and S. Lang and R. Ross and R. Klundt and L. Ward}, + title = {Extending the {POSIX I/O} Interface: A Parallel File System Perspective}, + booktitle = {Technical Memorandum ANL/MCS-TM-302}, + year = {2008}, + month = {October}, + url = {http://www.mcs.anl.gov/uploads/cels/papers/TM-302-FINAL.pdf} +} + +@inproceedings{LogBasedWritesPDSI08, + title = {Fast Log-based Concurrent Writing of Checkpoints}, + author = {Milo Polte and Jiri Simsa and Wittawat Tantisiriroj and Garth Gibson}, + booktitle = {Third Petascale Data Storage Workshop, Supercomputing}, + year = {2008}, + month = {November}, + url = {http://www.pdsi-scidac.org/events/PDSW08/resources/papers/polte.pdf} +} + +@inproceedings{OSDPDSI08, + title = {Revisiting the Metadata Architecture of Parallel File Systems}, + author = {N. Ali and A. Devulapalli and D. Dalessandro and P. Wyckoff and P. Sadayappan}, + booktitle = {Third Petascale Data Storage Workshop, Supercomputing}, + year = {2008}, + month = {November}, + url = {http://www.cse.ohio-state.edu/~alin/papers/pdsw2008.pdf} +} + +@inproceedings{OSDCluster08, + author = {N. Ali and A. Devulapalli and D. Dalessandro and P. Wyckoff and P. Sadayappan}, + title = {An {OSD-based} Approach to Managing Directory Operations in Parallel File Systems}, + booktitle = {IEEE International Conference on Cluster Computing}, + year = {2008}, + month = {September}, + url = {http://www.cse.ohio-state.edu/~alin/papers/cluster2008.pdf} +} + +@inproceedings{Resilience08, + author = {Bradley W. Settlemyer and Walter B. Ligon III}, + title = {A Technique for Lock-Less Mirroring in Parallel File Systems}, + booktitle = {The Workshop On Resiliency in High-Performance Computing at The Eighth IEEE International Symposium on Cluster Computing and the Grid}, + year = {2008}, + url = {http://www.parl.clemson.edu/~bradles/downloads/settlemyer-MirroringInPFS.pdf} +} + +@inproceedings{ServerToServerSC08, + author = {Philip H. Carns and Bradley W. Settlemyer and Water B. Ligon, III}, + title = {Using Server to Server Communication in Parallel File Systems to Simplify Consistency and Improve Performance}, + booktitle = {The International Conference for High Performance Computing, Networking, Storage, and Analysis}, + month = {November}, + year = {2008}, + url = {http://www.parl.clemson.edu/~bradles/downloads/settlemyer-PFSCollectiveIO.pdf} +} + +@inproceedings{A1429636, + author = {Kuhn, Michael and Kunkel, Julian and Ludwig, Thomas}, + title = {Directory-Based Metadata Optimizations for Small Files in {PVFS}}, + booktitle = {Euro-Par '08: Proceedings of the 14th international Euro-Par conference on Parallel Processing}, + year = {2008}, + pages = {90--99}, + location = {Las Palmas de Gran Canaria, Spain}, + doi = {http://dx.doi.org/10.1007/978-3-540-85451-7_11}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, +} + +@inproceedings{kunkel:bottlenecks, + author={Julian Kunkel and Thomas Ludwig}, + title={Bottleneck Detection in Parallel File Systems with Trace-Based Performance Monitoring}, + booktitle={Euro-Par 2008: Proceedings of the 14th International Euro-Par Conference on Parallel Processing}, + year = {2008}, + pages={212--221} +} + +@inproceedings{ludwig:pioviz, + author={Thomas Ludwig and Stephan Krempel and Michael Kuhn and Julian Kunkel and Christian Lohse}, + title={Analysis of the {MPI-IO} Optimization Levels with the {PIOViz} Jumpshot Enhancement}, + booktitle={Proceedings of EuroPVM/MPI 2007}, + year={2007}, + month={September}, + pages={213--222} +} + +@article{BradThesis, + author = {Bradley W. Settlemyer}, + title = {A Mechanism for Scalable Redundancy in Parallel File Systems}, + journal = {Clemson University Master's Thesis}, + month = {May}, + year = {2006} +} + +@inproceedings{DBLP:conf/sc/PatilGLP07, + author = {Swapnil V. Patil and + Garth A. Gibson and + Sam Lang and + Milo Polte}, + title = {GIGA+: scalable directories for shared file systems}, + booktitle = {PDSW}, + year = {2007}, + pages = {26-29}, + ee = {http://doi.acm.org/10.1145/1374596.1374604}, + bibsource = {DBLP, http://dblp.uni-trier.de}, + url = {http://www.pdl.cmu.edu/PDL-FTP/HECStorage/sc07-patil.pdf} +} + +@inproceedings{pNFSPDSW07, + author = {L. Chai and X. Ouyang and R. Noronha and D.K. Panda}, + title = {pNFS/PVFS2 over InfiniBand: Early Experiences}, + booktitle = {Petascale Data Storage Workshop}, + year = {2007}, + month = {November}, + url = {http://nowlab.cse.ohio-state.edu/publications/conf-papers/2007/pdsi07.pdf} +} + +@article{LinuxMag07, + AUTHOR = "Jeffrey Layton", + TITLE = "Parallel Platters: File Systems for HPC Clusters", + JOURNAL = "Linux Magazine", + MONTH = "November", + YEAR = "2007", + ANNOTE = "magazine", + URL = "http://www.linux-mag.com/id/4358" +} @article{HPCForum07, AUTHOR = "Jeffrey Layton", @@ -58,6 +258,14 @@ @article{ITIS07 URL = "http://ietisy.oxfordjournals.org/cgi/content/abstract/E90-D/1/22" } +@inproceedings{HAMDSJC08, + author = {Christian Engelmann and Stephen L. Scott and Chokchai (Box) Leangsuksun and Xubin (Ben) He}, + title = {Symmetric Active/Active High Availability for High-Performance Computing System Services}, + booktitle = {Journal of Computers (JCP)}, + year = 2006, + url = {http://www.csm.ornl.gov/~engelman/publications/engelmann06symmetric.pdf} +} + @article{JPDC06, AUTHOR = "Yifeng Zhu and Hong Jiang", TITLE = "CEFT: A cost-effective, fault-tolerant parallel virtual file system", @@ -140,13 +348,12 @@ @inproceedings{EuroMPI04 year = "2004" } -@inproceedings{ching:Cluster03, - AUTHOR = "A. Ching and A. Choudhary and W. Liao, R. Ross and W. Gropp", - TITLE = "Efficient Structured Access in Parallel File Systems", - BOOKTITLE = "Proceedings of the 2003 IEEE - International Conference on Cluster Computing", - month = "December", - year = "2003" +@inproceedings{CSAR05, + TITLE = {CSAR-2: A Case Study of Parallel File System Dependability Analysis }, + AUTHOR = {D. Cotroneo and G. Paolillo and S. Russo and M. Lauria}, + BOOKTITLE = {HPCC}, + YEAR = 2005, + URL = {ftp://ftp.cse.ohio-state.edu/pub/tech-report/2005/TR75.pdf} } @inproceedings{BMI-IPDPS05, @@ -167,27 +374,6 @@ @phdthesis{PHILPHD year = "2005" } -@mastersthesis{HARISH, - AUTHOR = "Harish Ramachandran", - TITLE = "Design and Implementation of the System Interface for PVFS2", - school = "Clemson University", - number = "TR PARL-2002-008", - month = "December", - year = "2002" -} - -@inproceedings{ICALT05, - AUTHOR = "Chao-Tung Yang and Hsin-Chuan Ho and Chien-Tung Pan", - TITLE = "Using Grid Computing and PVFS2 Technologies - for Construction of an e-Learning Environment", - BOOKTITLE = "Proceedings of the Fifth IEEE International Conference - on Advanced Learning Technologies (ICALT'05)", - volume = "00", - month = "July", - year = "2005", - location = "Kaohsiung, Taiwan" -} - @inproceedings{AIAA05, AUTHOR = "Parimala D. Pakalapati and Thomas Hauser", TITLE = "Benchmarking Parallel I/O Performance for @@ -222,6 +408,44 @@ @inproceedings{EuroMPI05 PUBLISHER = "Springer" } +@inproceedings{Pp03, + AUTHOR = {M. Pillai and M. Lauria}, + TITLE = {CSAR: cluster storage with adaptive redundancy}, + BOOKTITLE = {Parallel Processing}, + YEAR = 2003, + URL = {ftp://ftp.cse.ohio-state.edu/pub/tech-report/2003/TR15.pdf} +} + +@inproceedings{ching:Cluster03, + AUTHOR = "A. Ching and A. Choudhary and W. Liao, R. Ross and W. Gropp", + TITLE = "Efficient Structured Access in Parallel File Systems", + BOOKTITLE = "Proceedings of the 2003 IEEE + International Conference on Cluster Computing", + month = "December", + year = "2003" +} + +@mastersthesis{HARISH, + AUTHOR = "Harish Ramachandran", + TITLE = "Design and Implementation of the System Interface for PVFS2", + school = "Clemson University", + number = "TR PARL-2002-008", + month = "December", + year = "2002" +} + +@inproceedings{ICALT05, + AUTHOR = "Chao-Tung Yang and Hsin-Chuan Ho and Chien-Tung Pan", + TITLE = "Using Grid Computing and PVFS2 Technologies + for Construction of an e-Learning Environment", + BOOKTITLE = "Proceedings of the Fifth IEEE International Conference + on Advanced Learning Technologies (ICALT'05)", + volume = "00", + month = "July", + year = "2005", + location = "Kaohsiung, Taiwan" +} + @inproceedings{ISPCP04, AUTHOR = "Thomas Ludwig", TITLE = "Research Trends in High Performance Parallel Input/Output for @@ -436,6 +660,16 @@ @inproceedings{LSC00 note = "Best Paper Award" } +@article{LinuxMag02, + AUTHOR = {Forest Hoffman}, + TITLE = {Scalable I/O on Clusters}, + JOURNAL = {Linux Magazine}, + month = {July}, + year = {2002}, + ANNOTE = "magazine", + URL = "http://www.linux-mag.com/id/1107" +} + @article{LinuxJournal00, AUTHOR = "Ibrahim F. Haddad", TITLE = "PVFS: A Parallel Virtual File System for Linux Clusters", @@ -447,4 +681,113 @@ @article{LinuxJournal00 URL = "http://www.linuxjournal.com/article/4354" } +@inproceedings{A1442461, + author = {Peng Gu and Jun Wang and Robert Ross}, + title = {Bridging the Gap Between Parallel File Systems and Local File Systems: A Case Study with {PVFS}}, + booktitle = {{ICPP} '08: Proceedings of the 2008 37th International Conference on Parallel Processing}, + year = {2008}, + pages = {554--561}, + doi = {http://dx.doi.org/10.1109/ICPP.2008.43}, + publisher = {IEEE Computer Society}, + address = {Washington, DC, USA}, +} + +@inproceedings{homecache, + author = {In-Chul Hwang and Hanjo Jung and Seung-Ryoul Maeng and Jung-Wan Cho}, + title = {Design and Implementation of the Home-Based Cooperative Cache for {PVFS}}, + booktitle = {Workshop on Autonomic Distributed Data and Storage Systems Management - {ADSM2005}}, + year = {2005}, + pages = {407-414}, +} + +@inproceedings{A1019765, + author = {Tsujita, Yuichi}, + title = {Implementation of an {MPI-I/O} Mechanism Using {PVFS} in Remote I/O to a PC Cluster}, + booktitle = {HPCASIA '04: Proceedings of the High Performance Computing and Grid in Asia Pacific Region, Seventh International Conference}, + year = {2004}, + pages = {136--139}, + publisher = {IEEE Computer Society}, + address = {Washington, DC, USA}, +} + +@INPROCEEDINGS{Hildebr07pnfsand, + author = {Dean Hildebrand and Peter Honeyman and Wm. A. Adamson}, + title = {{pNFS and Linux}: Working Towards a Heterogeneous Future}, + booktitle = {In 8th {LCI International Conference on High-Performance Cluster Computing (Lake Tahoe)}}, + year = {2007}, + url = {http://www.citi.umich.edu/techreports/reports/citi-tr-07-1.pdf}, +} + +@INPROCEEDINGS{Hildebrand07direct-pnfsscalable, + author = {Dean Hildebrand and Peter Honeyman}, + title = {{Direct-pNFS}: scalable, transparent, and versatile access to parallel file systems}, + booktitle = {In HPDC}, + year = {2007}, + url = {http://www.citi.umich.edu/techreports/reports/citi-tr-07-2.pdf}, +} + +@phdthesis{Hildebrand07doctoralcommittee, + author = {Dean Hildebrand}, + title = {Distributed Access to Parallel File Systems}, + school = {University of Michigan}, + year = {2007}, + url = {http://www.citi.umich.edu/u/dean/docs/dissertation.pdf}, +} + +@INPROCEEDINGS{Honeyman06largefiles, + author = {Peter Honeyman and Dean Hildebrand and Lee Ward}, + title = {Large Files, Small Writes, and {pNFS}}, + booktitle = {in Proceedings of the 20th ACM International Conference on Supercomputing}, + year = {2006}, + pages = {116--124}, + url = {http://www.citi.umich.edu/techreports/reports/citi-tr-06-4.pdf}, +} + +@INPROCEEDINGS{Ching06evaluatingio, + author = {Avery Ching and Alok Choudhary and Wei-keng Liao and Neil Pundit}, + title = {Evaluating {I/O} characteristics and methods for storing structured scientific data}, + booktitle = {In Proceedings of the International Parallel and Distributed Processing Symposium}, + year = {2006}, + url = {http://cucis.ece.northwestern.edu/publications/pdf/ChiCho06A.pdf}, +} + +@INPROCEEDINGS{Vaidyanathan05workload-drivenanalysis, + author = {K. Vaidyanathan and P. Balaji and H. -w. Jin and D. K. Panda}, + title = {Workload-driven Analysis of File Systems in Multi-Tier Data-Centers over {InfiniBand}}, + booktitle = {In Computer Architecture Evaluation with Commercial Workloads ({CAECW8}), in conjunction with the International Symposium on High Performance Computer Architecture (HPCA)}, + year = {2005}, + url = {http://nowlab.cse.ohio-state.edu/publications/tech-reports/2005/vaidyana-caecw05-tr.pdf}, +} + +@inproceedings{PVFS1SC01, + author = {R. Ross and D. Nurmi and A. Cheng and M. Zingale}, + title = {A Case Study in Application I/O on Linux Clusters}, + booktitle = {Supercomputing}, + year = {2001}, + month = {November} +} + +@inproceedings{PVFS1ELW01, + author = {W. Ligon III and R. Ross}, + title = {An Overview of the Parallel Virtual File System}, + booktitle = {Extreme Linux Workshop}, + year = 1999, + month = {June} +} + +@inproceedings{PVFS1NASA98, + author = {M. Cettei and W. Ligon III and R. Ross}, + title = {Support for Parallel Out of Core Applications on Beowulf Workstations}, + booktitle = {IEEE Aerospace Conference}, + month = {March}, + year = {1998} +} + +@inproceedings{PVFS1HPDC96, + author = {W. Ligon III and R. Ross}, + title = {Implementation and Performance of a Parallel File System for High Performance Distributed Applications}, + booktitle = {HPDC}, + month = {August}, + year = {1996} +} diff --git a/doc/add-server-req b/doc/add-server-req index ccb6384..90da344 100644 --- a/doc/add-server-req +++ b/doc/add-server-req @@ -37,24 +37,14 @@ Steps in adding a request to the server: src/server/pvfs2-server.h PINT_server_op -- only if required -5) write state machine -- se details below +5) write state machine -- see details below src/server/.sm each state machine added must have a delcaration in src/server/pvfs2-server.h -6) update request scheduler - src/server/request-scheduler/request-scheduler.c - PINT_req_sched_target_handle() - -7) add entry to server operation parameters table - src/server/pvfs2-server.c - init_req_table -- see src/server/prelude.sm - prelude_perm_check() - -8) add entry in final response state machine - src/server/final-response.sm - s_req_resp_type_map - +6) update src/server/pvfs2-server-req.c + add new entry to PINT_server_req_table[] + reference the params structure from the state machine Writing State Machines diff --git a/doc/basics.tex b/doc/basics.tex index 2f301a8..4b6417f 100644 --- a/doc/basics.tex +++ b/doc/basics.tex @@ -189,8 +189,8 @@ \subsection{File system consistency} where a directory entry exists for a file that is not really ready to be accessed. If we carefully order the operations: \begin{enumerate} -\item create a metadata object for the new file \item create the data objects to hold data for the new file +\item create a metadata object for the new file \item point the metadata at the data objects \item create a directory entry for the new file pointing to the metadata object diff --git a/doc/db-recovery.txt b/doc/db-recovery.txt new file mode 100644 index 0000000..c8fa9b9 --- /dev/null +++ b/doc/db-recovery.txt @@ -0,0 +1,126 @@ +# 2009-11-5 +# Notes on recoverying corrupted Berkeley DB files in PVFS +==================================================================== + +The pvfs2-server daemon uses Berkeley DB as the mechanism for storing file +system metadata. There are 5 database files in total that can be found in +the following locations in the storage space: + +./storage_attributes.db +./50a6d673/collection_attributes.db +./50a6d673/dataspace_attributes.db +./50a6d673/keyval.db +./collections.db + +The dataspace_attributes.db and keyval.db are most frequently used by +the file system. If one of these database files is corrupted for some +reason, then it may prevent the file system from operating correctly. + +One common technique for repairing a Berkeley DB database is to dump its +contents using db_dump (possibly found in the db4-utils package) and then +reload it into a new .db file with db_load. However, both the keyval.db and +dataspace_attributes.db use a custom function for sorting entries in order +to improve PVFS performance. db_load must therefore be modified to use the +correct key order. + +Here are the steps needed to build a db_load utility that will work on the +keyval.db or dataspace_attributes.db file: + +- download the source code for Berkeley DB +- edit db_load/db_load.c +- find the section marked by #if 0 that indicates where to insert + application specific btree comparison or hash functions +- insert the code listed at the end of this file (NOTE: there is different + code depending on which .db file you are trying to recover) +- build Berkeley DB +- rename db_load binary to db_load_pvfs_keyval to avoid confusion + +For keyval.db: +==================================================================== +#include + +typedef uint64_t PVFS_handle; +typedef PVFS_handle TROVE_handle; + +#define PVFS_NAME_MAX 256 +#define DBPF_MAX_KEY_LENGTH PVFS_NAME_MAX + +struct dbpf_keyval_db_entry +{ + TROVE_handle handle; + char key[DBPF_MAX_KEY_LENGTH]; +}; + +#define DBPF_KEYVAL_DB_ENTRY_TOTAL_SIZE(_size) \ + (sizeof(TROVE_handle) + _size) + +#define DBPF_KEYVAL_DB_ENTRY_KEY_SIZE(_size) \ + (_size - sizeof(TROVE_handle)) + +int PINT_trove_dbpf_keyval_compare( + DB * dbp, const DBT * a, const DBT * b) +{ + const struct dbpf_keyval_db_entry * db_entry_a; + const struct dbpf_keyval_db_entry * db_entry_b; + + db_entry_a = (const struct dbpf_keyval_db_entry *) a->data; + db_entry_b = (const struct dbpf_keyval_db_entry *) b->data; + + if(db_entry_a->handle != db_entry_b->handle) + { + return (db_entry_a->handle < db_entry_b->handle) ? -1 : 1; + } + + if(a->size > b->size) + { + return 1; + } + + if(a->size < b->size) + { + return -1; + } + + /* must be equal */ + return (memcmp(db_entry_a->key, db_entry_b->key, + DBPF_KEYVAL_DB_ENTRY_KEY_SIZE(a->size))); +} + +if ((ret = dbp->set_bt_compare(dbp, PINT_trove_dbpf_keyval_compare)) != 0) + + dbp->err(dbp, ret, "DB->set_bt_compare"); + goto err; +} + +==================================================================== + +For dataspace_attributes.db: +==================================================================== + +#include + +typedef uint64_t PVFS_handle; +typedef PVFS_handle TROVE_handle; + +int PINT_trove_dbpf_ds_attr_compare(DB * dbp, const DBT * a, const DBT * b) +{ + const TROVE_handle * handle_a; + const TROVE_handle * handle_b; + + handle_a = (const TROVE_handle *) a->data; + handle_b = (const TROVE_handle *) b->data; + + if(*handle_a == *handle_b) + { + return 0; + } + + return (*handle_a > *handle_b) ? -1 : 1; +} + +if ((ret = dbp->set_bt_compare(dbp, PINT_trove_dbpf_ds_attr_compare)) != 0) { + dbp->err(dbp, ret, "DB->set_bt_compare"); + goto err; +} + +==================================================================== diff --git a/doc/design/concepts.tex b/doc/design/concepts.tex index 6991fd1..c4dc70e 100644 --- a/doc/design/concepts.tex +++ b/doc/design/concepts.tex @@ -23,7 +23,7 @@ \begin{document} \maketitle -\begin{verbatim}$Id: concepts.tex,v 1.3 2006/09/13 20:22:43 vilayann Exp $\end{verbatim} +\begin{verbatim}$Id: concepts.tex,v 1.3 2006-09-13 20:22:43 vilayann Exp $\end{verbatim} \section{Introduction} PVFS2 represents a complete redesign and reimplementation of the diff --git a/doc/design/figs/getconfiginit.eps b/doc/design/figs/getconfiginit.eps index 88b5728..47b160b 100644 --- a/doc/design/figs/getconfiginit.eps +++ b/doc/design/figs/getconfiginit.eps @@ -7,7 +7,7 @@ gsave 27 104 translate 0.910714 -0.910714 scale -% $Id: getconfiginit.eps,v 1.1 2003/01/24 23:29:21 pcarns Exp $ +% $Id: getconfiginit.eps,v 1.1 2003-01-24 23:29:21 pcarns Exp $ % Header for DDD PostScript Box output % Copyright (C) 1995-1998 Technische Universitaet Braunschweig, Germany. @@ -406,7 +406,7 @@ setfont 300 75 576 75 576 76 300 76 box* %(300, 75)(276, 1) %%Trailer -% $Id: getconfiginit.eps,v 1.1 2003/01/24 23:29:21 pcarns Exp $ +% $Id: getconfiginit.eps,v 1.1 2003-01-24 23:29:21 pcarns Exp $ % Trailer for DDD PostScript Box output grestore diff --git a/doc/design/figs/getconfigservop.eps b/doc/design/figs/getconfigservop.eps index 238f315..d895677 100644 --- a/doc/design/figs/getconfigservop.eps +++ b/doc/design/figs/getconfigservop.eps @@ -7,7 +7,7 @@ gsave 39 239 translate 0.382309 -0.382309 scale -% $Id: getconfigservop.eps,v 1.1 2003/01/24 23:29:21 pcarns Exp $ +% $Id: getconfigservop.eps,v 1.1 2003-01-24 23:29:21 pcarns Exp $ % Header for DDD PostScript Box output % Copyright (C) 1995-1998 Technische Universitaet Braunschweig, Germany. @@ -649,7 +649,7 @@ setfont 441 530 1341 530 1341 531 441 531 box* %(441, 530)(900, 1) %%Trailer -% $Id: getconfigservop.eps,v 1.1 2003/01/24 23:29:21 pcarns Exp $ +% $Id: getconfigservop.eps,v 1.1 2003-01-24 23:29:21 pcarns Exp $ % Trailer for DDD PostScript Box output grestore diff --git a/doc/design/handle-allocator.tex b/doc/design/handle-allocator.tex index a1a4f95..af9f281 100644 --- a/doc/design/handle-allocator.tex +++ b/doc/design/handle-allocator.tex @@ -21,7 +21,7 @@ \begin{document} \maketitle -\begin{verbatim}$Id: handle-allocator.tex,v 1.1 2003/01/24 23:29:18 pcarns Exp $\end{verbatim} +\begin{verbatim}$Id: handle-allocator.tex,v 1.1 2003-01-24 23:29:18 pcarns Exp $\end{verbatim} \section{Introduction} diff --git a/doc/design/state-machine.tex b/doc/design/state-machine.tex index e2dc5bd..aa00ec3 100644 --- a/doc/design/state-machine.tex +++ b/doc/design/state-machine.tex @@ -91,18 +91,130 @@ \section{State Machine Code} static PINT_sm_action state_action_1 ( struct PINT_smcb *smcb, job_status_s *js_p) { + PINT_server_op *sop = (PINT_server_op *)PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + retrn SM_ACTION_COMPLETE; } static PINT_sm_action state_action_3 ( struct PINT_smcb *smcb, job_status_s *js_p) { + PINT_server_op *sop = (PINT_server_op *)PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + /* This state action sets up a pjmp. It needs to push one frame + onto the frame stack for each parallel task. Each time is pushes a + frame it needs to specify a tag that matches one of the tags in the + state machine code above (4, 3, or RETVAL). + */ + for (i = 0; i < sop->req.u.foo.num_servers; i++) + { + PINT_server_op *new = (PINT_server_op *)malloc(sizeof(PINT_server_op)); + /* set up new for the new sm probably from the current frame */ + new->req.u.foo.blah = sop->req.u.foo.blah; + /* determine which state machine to run */ + if (i = 0) + tag = RETVAL; /* run parallel_state_machine_3 */ + else + if (somevar > someval) + tag = 4; /* run parallel_state_machine_1 */ + else + tag = 3; /* run parallel_state_machine_2 */ + /* push frame */ + PINT_push_frame(smcb,new,tag); + } + return SM_ACTION_DEFERED; } static PINT_sm_action state_action_4 ( struct PINT_smcb *smcb, job_status_s *js_p) { + PINT_server_op *sop = (PINT_server_op *)PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + /* This state action cleans up after the parallel SMs have been run with + pjmp. The frames pushed before the pjmp are still on the stack and must + be poped off. Presumably there is return information in each one that + must be aggrigated. In particular, error codes should be reviewed. + */ + return SM_ACTION_COMPLETE; } \end{verbatim} - +\section{State Machine Stacks} + +A running state machine has two stacks. One is used implicitly to control flow +in and out of nested state machines. When a state machine performs a jump it +automatically pushes information onto the state stack, and when a nested +state machine returns it automatically pops that information and uses it to +return to the calling state machine. Since this is all done by the built in +logic, we need not consider it further. + +The second stack used by a running state machine is the frame stack. A frame +is a collection of data used by the state actions to store local variables and +is analogous to the frames created by compilers of high level languages for +holding local variables and function parameters. The first step of every +state action function is to call PINT_sm_frame() which retrives a frame from the +stack that will be used by the state action. Normally, this will be the +the "current" frame which is indexed by the macro PINT_FRAME_CURRENT or zero. +When a state machine begins execution one frame is allocated to it and pushed +on the stack, becomming the current frame. If no additional frames are pushed +this frame will be the current frame for all states and nested state machines. + +Additional frames can be pushed on the stack. Pushing a frame on the stack +does not change the current frame. Frames pushed can be accessed by specifying +a positive index to PINT_sm_frame() and are indexed in the order pushed. Thus, +if a state action pushes 4 frames, they can be retrieved as index 1 (the first +frame pushed), 2, 3, and 4 (the last frame pushed, or top of stack). Of course +frames can also be poped from the stack in the usual LIFO manner. + +When there are frames pushed on the frame stack and a state machine executes a +jump to a nested state machine it is assumed that the top frame on the stack +is intended as the new current frame. Once the jump is exected, inside the +nested state machine the top of stack becomes the current frame at index 0, and +there will be no frames on top of it (unless and until it chooses to push some). +When the nested state machine return, the frame stack is restored to its +previous configuration. + +It is possible to access frames that were on the stack in the previous context +while inside a nested state machine. These frames are accessed using a negative +index. Thus the frame immediately below the current frame is -1, and the one +below it is -2, and so on. In general, all frames on the stack since the +initiation of the state machine can be accessed. The frame is organized as +follows. There is a linked list of frames accessible from the SMCB through the +field "frames" which is a struct with two fields "next" and "prev". The field +smcb->frames->next points to the top of stack, and the field smcb->frames->prev +points to the bottom of stack. The list is doubly linked and implemented with +the qlist facility in PVFS. All frames should be access via the PINT_sm_frame() +function which takes and SMCB and an integer index as arguments. + +The frames can be though of as numbered starting at zero from the bottom of the +stack to N-1 at the top of the stack, where there are N frames in the stack. +The field smcb->frame_count is equal to N. The index passed in through +PINT_sm_frame() is NOT this number. Instead, there is a second field which +records the number of the current frame smcb->base_frame. The index passed to +the function is added to the base_frame to arrive at the number of the desired +frame, counting from the bottom of the stack. For example: + +frame_count 6 +base_frame 2 + +frame_number 0 1 2 3 4 5 <- top of stack +index -2 -1 0 1 2 3 + +In general, the index of the top of stack can be computed as: + +(smcb->frame_count - 1) - smcb->base_frame + +This should be included in src/common/misc/state_machine.h as a macro: + +#define PINT_FRAME_TOP(smcb) (((smcb)->frame_count - 1) - (smcb)->base_frame) + +So call to retrieve the frame on top of the stack would be: + +top_frame = PINT_sm_frame(smcb,PINT_FRAME_TOP(smcb)); + +The current frame can be retrieved with the following macro: + +current_frame = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + +Offsets from the top or current frame can be made by either adding or subracting +from one of these macros. The bottom frame on the stack would be accessed as +the negative of the base_frame, but this is rarely needed and there isn't a +macro for it at this time. diff --git a/doc/doxygen/pvfs2-doxygen.conf.in b/doc/doxygen/pvfs2-doxygen.conf.in index 85e7952..91b9251 100644 --- a/doc/doxygen/pvfs2-doxygen.conf.in +++ b/doc/doxygen/pvfs2-doxygen.conf.in @@ -208,12 +208,12 @@ SUBGROUPING = YES # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES # EXTRACT_ALL = YES -EXTRACT_ALL = NO +EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. -EXTRACT_PRIVATE = NO +EXTRACT_PRIVATE = YES # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. @@ -229,14 +229,14 @@ EXTRACT_STATIC = YES # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. -EXTRACT_LOCAL_CLASSES = NO +EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. -EXTRACT_LOCAL_METHODS = NO +EXTRACT_LOCAL_METHODS = YES # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. @@ -248,7 +248,7 @@ EXTRACT_LOCAL_METHODS = NO # HOWEVER, IT ALSO ENABLES A BUNCH OF EXTRA CRAP IN HEADER FILE # OUTPUT... -- ROBR -HIDE_UNDOC_MEMBERS = YES +HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. @@ -257,7 +257,7 @@ HIDE_UNDOC_MEMBERS = YES # # NOTE: THIS INCLUDES UNDOCUMENTED STRUCTURES -- ROBR -HIDE_UNDOC_CLASSES = YES +HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. @@ -271,7 +271,7 @@ HIDE_FRIEND_COMPOUNDS = NO # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. -HIDE_IN_BODY_DOCS = YES +HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set @@ -451,7 +451,8 @@ RECURSIVE = YES # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. -EXCLUDE = test doc src/io/flow src/apps +#EXCLUDE = test doc src/io/flow src/apps +EXCLUDE = test doc # The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories # that are symbolic links (a Unix filesystem feature) are excluded from the input. @@ -524,12 +525,12 @@ FILTER_SOURCE_FILES = NO # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. -SOURCE_BROWSER = NO +SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. -INLINE_SOURCES = NO +INLINE_SOURCES = YES # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code @@ -553,7 +554,7 @@ REFERENCES_RELATION = YES # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. -VERBATIM_HEADERS = NO +VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index @@ -563,13 +564,13 @@ VERBATIM_HEADERS = NO # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. -ALPHABETICAL_INDEX = NO +ALPHABETICAL_INDEX = YES # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) -COLS_IN_ALPHA_INDEX = 5 +COLS_IN_ALPHA_INDEX = 4 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. @@ -683,7 +684,7 @@ ENUM_VALUES_PER_LINE = 4 # probably better off using the HTML help feature. # GENERATE_TREEVIEW = YES -GENERATE_TREEVIEW = NO +GENERATE_TREEVIEW = YES # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree diff --git a/doc/pvfs2-faq.tex b/doc/pvfs2-faq.tex index cd016d8..ee4a3ab 100644 --- a/doc/pvfs2-faq.tex +++ b/doc/pvfs2-faq.tex @@ -45,6 +45,97 @@ \subsection{What is PVFS?} allowing for easy inclusion of new hardware support and new algorithms. This makes PVFS a perfect research testbed as well. +\subsection{What is the history of PVFS?} + +PVFS was first developed at Clemson University in 1993 +by Walt Ligon and Eric Blumer as a parallel file system for +Parallel Virtual Machine (PVM). It was developed as part of a +NASA grant to study the I/O patterns of parallel programs. PVFS version +0 was based on Vesta, a parallel file system developed at IBM T. J. +Watson Research Center. Starting in 1994 Rob Ross re-wrote PVFS to +use TCP/IP and departed from many of the original Vesta design points. +PVFS version 1 was targeted to a cluster of DEC Alpha workstations +networked using switched FDDI. Like Vesta, PVFS striped data across +multiple servers and allowed I/O requests based on a file view that +described a strided access pattern. Unlike Vesta, the striping and view +were not dependent on a common record size. Ross' research focused on +scheduling of disk I/O when multiple clients were accessing the same +file. Previous results had show than scheduling according the best +possible disk access pattern was preferable. Ross showed that this +depended on a number of factors including the relative speed of the +network and the details of the file view. In some cases a scheduling +that based on network traffic was preferable, thus a dynamically +adaptable schedule provided the best overall performance. + +In late 1994 Ligon met with Thomas Sterling and John Dorband at Goddard +Space Flight Center (GSFC) and discussed their plans to build the first +Beowulf computer. It was agreed that PVFS would be ported to Linux +and be featured on the new machine. Over the next several years Ligon +and Ross worked with the GSFC group including Donald Becker, Dan Ridge, +and Eric Hendricks. In 1997 at a cluster meeting in Pasadena, CA +Sterling asked that PVFS be released as an open source package. + +In 1999 Ligon proposed the development of a new version of PVFS +initially dubbed PVFS2000 and later PVFS2. The design was initially +developed by Ligon, Ross, and Phil Carns. Ross completed his PhD in 2000 +and moved to Argonne National Laboratory and the design and +implementation was carried out by Ligon, Carns, Dale Witchurch, and +Harish Ramachandran at Clemson University, Ross, Neil Miller, and Rob +Lathrum at Argonne National Laboratory, and Pete Wyckoff at Ohio +Supercomputer Center. The new file system was released in 2003. The +new design featured object servers, distributed metadata, views based on +MPI, support for multiple network types, and a software architecture for +easy experimentation and extensibility. + +PVFS version 1 was retired in 2005. PVFS version 2 is still supported by +Clemson and Argonne. Carns completed his PhD in 2006 and joined Axicom, +Inc. where PVFS was deployed on several thousand nodes for data mining. +In 2008 Carns moved to Argonne and continues to work on PVFS along with +Ross, Latham, and Sam Lang. Brad Settlemyer developed a mirroring +subsystem at Clemson, and later a detailed simulation of PVFS used for +researching new developments. Settlemyer is now at Oak Ridge National +Laboratory. in 2007 Argonne began porting PVFS for use on an IBM Blue +Gene/P. In 2008 Clemson began developing extensions for supporting +large directories of small files, security enhancements, and redundancy +capabilities. As many of these goals conflicted with development for +Blue Gene, a second branch of the CVS source tree was created and dubbed +"Orange" and the original branch was dubbed "Blue." PVFS and OrangeFS +tracked each other very closely, but represent two different groups of +user requirements. + +\subsection{What is OrangeFS?} + +Simply put, OrangeFS is PVFS. OrangeFS is a branch of PVFS created by +the Clemson team PVFS developers to investigate new features and +implementations of PVFS. As of fall 2010 OrangeFS has become the main +branch of PVFS. So why the name change? PVFS was originally conceived +as a research parallel file system and later developed for production on +large high performance machines such as the BG/P at Argonne National +Lab. OrangeFS is taking a slightly different approach to support a +broader range of large and medium systems and a number of issues PVFS +was not concerned with including security, redundancy, and a broader +range of applications. The new name reflects this new focus, but for +now at least, OrangeFS is PVFS. + +The PVFS web site is still maintained. The PVFS mailing lists for +users and developers have not changed and will be used for OrangeFS. +At some point in the future +another group may decide to branch from the main but the PVFS site will +remain the home for the community. + +\subsection{What is Omnibond?} + +Omnibond is a software company that for years has worked with Clemson +University to market software developed at the university. As of fall +2010 Omnibond is offering commercial support for OrangeFS/PVFS. +OrangeFS is open source and will always be free; and the code, as +always, is developed and maintained by the PVFS community. Omnibond is +offering profesional services to those who are intersted in it, and +directly supports the PVFS community. Omnibond offers its customers the +option of dedicated support services and the opportunity to support the +development of new features that they feel are critical. Omnibond gives +back to the community through their support and development. + \subsection{What does the ``V'' in PVFS stand for?} The ``V'' in PVFS stands for virtual. This is a holdover from the original @@ -318,6 +409,9 @@ \subsection{Where can I find documentation?} The best place to look for documentation on PVFS is the PVFS web site at \url{http://www.pvfs.org/}. Documentation (including this FAQ) is also available in the \texttt{doc} subdirectory of the PVFS source distribution. +Please reference \texttt{pvfs2-logging.txt} to understand more about PVFS' +informational messages, where the logs exist, and how to turn logging +on and off. \subsection{What should I do if I have a problem?} @@ -713,6 +807,17 @@ \subsection{Problems with pre-release kernels} pre-release kernel, but do make an effort to publish necessary patches once a kernel is officially released. +\subsection{Does PVFS work with Open-MX?} +\label{sec:open-mx} + +Yes, PVFS does work with Open-MX. To use Open-MX, configure PVFS with +the the same arguments that you would use for a normal MX installation: +``--disable-bmi-tcp'' and ``--with-mx=PATH''. In addition, however, you +must set the ``MX\_IMM\_ACK'' environment variable to ``1'' before starting +the pvfs2-server or pvfs2-client daemons. This is necessary in order to +account for differences in how MX and Open-MX handle message progression by +default. + % % PERFORMANCE % @@ -1076,6 +1181,16 @@ \subsection{Does PVFS have a maximum file system size? If so, what is it?} using for PVFS server storage and multiply these values by the number of servers you are using. +\subsection{Mouning PVFS with the interrupt option} +\label{sec:mountintr} +The PVFS kernel module supports the {\tt intr} option provided by +network file systems. This allows applications to be sent kill signals +when a filesystem is unresponsive (due to network failures, etc.). The +option can be specified at mount time: +\begin{verbatim} +mount -t pvfs2 -o intr tcp://hosta:3334/pvfs2-fs /pvfs-storage/ +\end{verbatim} + % % MISSING FEATURES % diff --git a/doc/pvfs2-ha-heartbeat-v2.tex b/doc/pvfs2-ha-heartbeat-v2.tex index a3b84a7..bb126c1 100644 --- a/doc/pvfs2-ha-heartbeat-v2.tex +++ b/doc/pvfs2-ha-heartbeat-v2.tex @@ -16,8 +16,8 @@ \headsep 0.0in \headheight 0.0in -\title{PVFS2 High-Availability Clustering using Heartbeat 2.0} -\date{2007} +\title{PVFS High Availability Clustering using Heartbeat 2.0} +\date{2008} \pagestyle{plain} \begin{document} @@ -37,39 +37,26 @@ \section{Introduction} -This document describes how to configure PVFS2 for high availability -using Heartbeat version 2.x from www.linux-ha.org. See pvfs2-ha.tex for -documentation on how to configure PVFS2 for high availability using -Heartbeat version 1.x. - -Heartbeat 2.x offers several improvements. First of all, it allows for -an arbitrary cluster size. The servers do not have to be paired up for -failover. For example, if you configure 16 servers and one of them -fails, then any of the remaining 15 can serve as the failover machine. - -Secondly, Heartbeat 2.x supports monitoring of resources. Examples of -resources that you may want to actively monitor include the PVFS2 server -daemon, the IP interface, and connectivity to storage hardware. - -Finally, Heartbeat 2.x includes a configuration mechanism to express -dependencies between resources. This can be used to -express a preference for where certain servers run within the cluster, -or to enforce that resources need to be started or stopped in a specific -order. - -This document describes how to set up PVFS2 for high availability with -an arbitrary number of active servers and an arbitrary number of passive spare -nodes. Spare nodes are not required unless you wish to avoid -performance degradation upon failure. As configured in this document, -PVFS2 will be able to tolerate $\lceil ((N/2)-1) \rceil$ node failures, -where N is the number of nodes present in the Heartbeat cluster -including spares. Over half of the nodes -must be available in order to reach a quorum and decide if another node has -failed. - -No modifications of PVFS2 are required. Example scripts referenced in +This document describes how to configure PVFS for high availability +using Heartbeat version 2.x from www.linux-ha.org. + +The combination of PVFS and Heartbeat can support an arbitrary number +of active server nodes and an arbitrary number of passive spare nodes. +Spare nodes are not required unless you wish to avoid performance +degradation upon failure. As configured in this document, PVFS will +be able to tolerate $\lceil ((N/2)-1) \rceil$ node failures, where N is +the number of nodes present in the Heartbeat cluster including spares. +Over half of the nodes must be available in order to reach a quorum and +decide if another node has failed. + +Heartbeat can be configured to monitor IP connectivity, storage hardware +connectivity, and responsiveness of the PVFS daemons. Failure of any of +these components will trigger a node level failover event. PVFS clients +will transparently continue operation following a failover. + +No modifications of PVFS are required. Example scripts referenced in this document are available in the \texttt{examples/heartbeat} directory of -the PVFS2 source tree. +the PVFS source tree. \section{Requirements} @@ -77,60 +64,57 @@ \subsection{Hardware} \subsubsection{Nodes} -Any number of nodes may be configured, although you need at least 3 in -order to tolerate a failure. See the explanation in the introduction of -this document. You may also use any number of spare nodes. A spare -node is a node that does not run any services until a failover occurs. -If you have one or more spares, then they will be selected first to run -resources in a failover situation. If you have no spares (or all spares -are exhausted), then at least one node will have to run two services -simultaneously, which may degrade performance. +Any number of nodes may be configured, although you need at least three +total in order to tolerate a failure. You may also use any number of +spare nodes. A spare node is a node that does not run any services until +a failover occurs. If you have one or more spares, then they will be +selected first to run resources in a failover situation. If you have +no spares (or all spares are exhausted), then at least one node will +have to run two services simultaneously, which may degrade performance. -The examples in this document will use 4 active nodes and one spare -node. +The examples in this document will use 4 active nodes and 1 spare node, +for a total of 5 nodes. Heartbeat has been tested with up to 16 nodes +in configurations similar to the one outlined in this document. \subsubsection{Storage} -The specific type of storage hardware is not important, but it must be -possible to allocate a separate block device to each server, and all -servers must be capable of accessing all block devices. +A shared storage device is required. The storage must be configured +to allocate a separate block device to each PVFS daemon, and all nodes +(including spares) must be capable of accessing all block devices. -One way of achieving this is by using a SAN. In the examples used in -this document, the SAN has been divided into 4 LUNs. Each of the 5 -servers in the cluster is capable of mounting all 4 LUNs. However, the -same LUN should never be mounted on two nodes simultaneously. This -document assumes that each block device is formatted using ext3. -The Heartbeat software will insure that a given LUN is mounted in only -one location at a time. +One way of achieving this is by using a SAN. In the examples used in this +document, the SAN has been divided into 4 LUNs. Each of the 5 nodes in +the cluster is capable of mounting all 4 LUNs. The Heartbeat software +will insure that a given LUN is mounted in only one location at a time. -It is also important that the device naming be consistent across all -nodes. For example, if node1 mounts /dev/fooa, then it should see the -same data as if node2 were to mount /dev/fooa. Likewise for /dev/foob, -etc. +Each block device should be formatted with a local file system. +This document assumes the use of ext3. Unique labels should be set on +each file system (for example, using the \texttt{-L} argument to +\texttt{mke2fs} or \texttt{tune2fs}). This will allow the block devices +to be mounted by consistent labels regardless of how Linux assigned +device file names to the devices. \subsubsection{Stonith} -Heartbeat needs some mechanism to fence or stonith a failed node. One -straightforward way to do this is to connect each server node to a -network controllable power strip. That will allow any given server to -send a command over the network to power off another server. +Heartbeat needs some mechanism to fence or stonith a failed node. +Two popular ways to do this are either to use IPMI or a network +controllable power strip. Each node needs to have a mechanism available +to reset any other node in the cluster. The example configuration in +this document uses IPMI. -It is possible to configure PVFS2 and Heartbeat without a power control +It is possible to configure PVFS and Heartbeat without a power control device. However, if you deploy this configuration for any purpose other than evaluation, then you run a very serious risk of data corruption. Without stonith, there is no way to guarantee that a failed node has completely shutdown and stopped accessing its storage device before failing over. -The example in this document is using an APC switched PDU (which allows -commands to be sent via SNMP or ssh) as the power control device. - \subsection{Software} -This document assumes that you are using Hearbeat version 2.0.8, and -PVFS2 version 2.6.x or greater. You may also wish to use example -scripts included in the \texttt{examples/heartbeat} directory of the PVFS2 source -tree. +This document assumes that you are using Heartbeat version 2.1.3, +and PVFS version 2.7.x or greater. You may also wish to use example +scripts included in the \texttt{examples/heartbeat} directory of the +PVFS source tree. \subsection{Network} @@ -139,44 +123,47 @@ \subsection{Network} address to use for communication within the cluster nodes. Secondly, you need to allocate an extra IP address and hostname for each -active PVFS2 server. In the example that this document uses, we must -allocate 4 extra IP addresses, along with 4 hostnames in DNS -for those IP addresses. In this document, we will refer to these as -``virtual addresses''. Each active PVFS2 server will be configured +PVFS daemon. In the example that this document uses, we must allocate 4 +extra IP addresses, along with 4 hostnames in DNS for those IP addresses. +In this document, we will refer to these as ``virtual addresses'' or +``virtual hostnames''. Each active PVFS server will be configured to automatically bring up one of these virtual addresses to use for communication. If the node fails, then that IP address is migrated to another node so that clients will appear to communicate with the same server regardless of where it fails over to. It is important that you -not use the primary IP address of each node for this purpose. +\emph{not} use the primary IP address of each node for this purpose. In the example in this document, we use 225.0.0.1 as the multicast -address, node\{1-5\} as the normal node hostnames, and -virtualnode\{1-4\} as the virtual hostnames. +address, node\{1-5\} as the normal node hostnames, +virtual\{1-4\} as the virtual hostnames, and 192.168.0.\{1-4\} as the +virtual addresses. + +Note that the virtual addresses must be on the same subnet as the true +IP addresses for the nodes. -\section{Configuring PVFS2} +\section{Configuring PVFS} -Download, build, install, and configure PVFS2. +Download, build, and install PVFS on all server nodes. Configure PVFS +for use on each of the active nodes. -There are a few points to consider when configuring PVFS2: +There are a few points to consider when configuring PVFS: \begin{itemize} -\item Use the virtual addresses when specifying meta servers and I/O +\item Use the virtual hostnames when specifying meta servers and I/O servers \item Synchronize file data on every operation (necessary for consistency on failover) \item Synchronize meta data on every operation (necessary for consistency on -failover) +failover). Coalescing is allowed. \item Use the \texttt{TCPBindSpecific} option (this allows multiple daemons to -run on the same node if needed) +run on the same node using different virtual addresses) \item Tune retry and timeout values appropriately for your system. This may depend on how long it takes for your power control device to safely shutdown a node. \end{itemize} -Figure~\ref{fig:pvfs2conf} shows one example of how to configure PVFS2. -Only the parameters relevant to the Heartbeat scenario are shown. +An example PVFS configuration is shown below including the sections +relevant to Heartbeat: -\begin{figure} -\begin{scriptsize} \begin{verbatim} ... @@ -191,38 +178,31 @@ \section{Configuring PVFS2} - Alias virtualnode1_tcp3334 tcp://virtualnode1:3334 - Alias virtualnode2_tcp3334 tcp://virtualnode2:3334 - Alias virtualnode3_tcp3334 tcp://virtualnode3:3334 - Alias virtualnode4_tcp3334 tcp://virtualnode4:3334 + Alias virtual1_tcp3334 tcp://virtual1:3334 + Alias virtual2_tcp3334 tcp://virtual2:3334 + Alias virtual3_tcp3334 tcp://virtual3:3334 + Alias virtual4_tcp3334 tcp://virtual4:3334 ... - Range virtualnode1_tcp3334 4-536870914 - Range virtualnode2_tcp3334 536870915-1073741825 - Range virtualnode3_tcp3334 1073741826-1610612736 - Range virtualnode4_tcp3334 1610612737-2147483647 + Range virtual1_tcp3334 4-536870914 + Range virtual2_tcp3334 536870915-1073741825 + Range virtual3_tcp3334 1073741826-1610612736 + Range virtual4_tcp3334 1610612737-2147483647 - Range virtualnode1_tcp3334 2147483648-2684354558 - Range virtualnode2_tcp3334 2684354559-3221225469 - Range virtualnode3_tcp3334 3221225470-3758096380 - Range virtualnode4_tcp3334 3758096381-4294967291 + Range virtual1_tcp3334 2147483648-2684354558 + Range virtual2_tcp3334 2684354559-3221225469 + Range virtual3_tcp3334 3221225470-3758096380 + Range virtual4_tcp3334 3758096381-4294967291 TroveSyncMeta yes TroveSyncData yes - CoalescingHighWatermark 1 - CoalescingLowWatermark 1 - \end{verbatim} -\end{scriptsize} -\caption{Example \texttt{pvfs2-fs.conf} file} -\label{fig:pvfs2conf} -\end{figure} Download, build, and install Heartbeat following the instructions on their web site. No special parameters or options are required. Do not @@ -231,28 +211,37 @@ \section{Configuring PVFS2} \section{Configuring storage} Make sure that there is a block device allocated for each active server -in the file system. Format each one with ext3. Do not create a PVFS2 +in the file system. Format each one with ext3. Do not create a PVFS storage space yet, but you can create subdirectories within each file system if you wish. -Confirm that each block device can be mounted from every node, and that -the device names are consistent. Do this one node at a time. Never mount -the same block device concurrently on two or more nodes. +Confirm that each block device can be mounted from every node using the +file system label. Do this one node at a time. Never mount +the same block device concurrently on two nodes. + +\section{Configuring stonith} + +Make sure that your stonith device is accessible and responding from each +node in the cluster. For the IPMI stonith example used in this document, +this means confirming that \texttt{ipmitool} is capable of monitoring +each node. Each node will have its own IPMI IP address, username, and +password. + +\begin{verbatim} +$ ipmitool -I lan -U Administrator -P password -H 192.168.0.10 power status +Chassis Power is on +\end{verbatim} \section{Distributing Heartbeat scripts} -The scripts that are in the \texttt{examples/heartbeat} subdirectory may be -installed to the following suggested locations on each server node: -\begin{itemize} -\item pvfs2-ha-heartbeat-configure.sh: /usr/bin -\item apc*: /usr/bin -\item baytech*: /usr/bin -\item qla*: /usr/bin -\item PVFS2: /usr/lib/ocf/resource.d/external/ -\item PVFS2-notify: /usr/lib/ocf/resource.d/external -\item Filesystem-qla-monitor: /usr/lib/ocf/resource.d/external -\item pvfs2-stonith-plugin: /usr/lib/stonith/plugins/external -\end{itemize} +The PVFS2 resource script must be installed and set as runnable on every +cluster node as follows: + +\begin{verbatim} +$ mkdir -p /usr/lib/ocf/resource.d +$ cp examples/heartbeat/PVFS2 /usr/lib/ocf/resource.d/external/ +$ chmod a+x /usr/lib/ocf/resource.d/external/PVFS2 +\end{verbatim} \section{Base Heartbeat configuration} @@ -260,29 +249,20 @@ \section{Base Heartbeat configuration} parameters, which include an authentication key and a list of nodes that will participate in the cluster. -Begin by generating a random sha1 key, which is used to secure -communication between the cluster nodes. Then run the -pvfs2-ha-heartbeat-configure.sh script as shown in -figure~\ref{fig:haconf} on every node (both active and spare). You -should use your multicast address as described in the network -requirements, your own sha1 key, and a list of -nodes (including spares) that will participate. - +Begin by generating a random sha1 key, which is used to +secure communication between the cluster nodes. Then run the +pvfs2-ha-heartbeat-configure.sh script on every node (both active +and spare) as shown below. Make sure to use the multicast address, sha1 +key, and list of nodes (including spares) appropriate for your environment. -\begin{figure} -\begin{scriptsize} \begin{verbatim} $ dd if=/dev/urandom count=4 2>/dev/null | openssl dgst -sha1 dcdebc13c41977eac8cca0023266a8b16d234262 -$ pvfs2-ha-heartbeat-configure.sh /etc/ha.d 225.0.0.1 \ +$ examples/heartbeat/pvfs2-ha-heartbeat-configure.sh /etc/ha.d 225.0.0.1 \ dcdebc13c41977eac8cca0023266a8b16d234262 \ node1 node2 node3 node4 node5 \end{verbatim} -\end{scriptsize} -\caption{Example \texttt{pvfs2-ha-heartbeat-configure.sh} commands} -\label{fig:haconf} -\end{figure} You can view the configuration file that this generates in /etc/ha.d/ha.cf. An example ha.cf file (with comments) is provided with @@ -300,13 +280,32 @@ \section{CIB configuration} CIB, but it is simpler to begin with a populated XML file on all nodes. \texttt{cib.xml.example} provides an example of a fully populated -Heartbeat configuration with 5 nodes and 4 active PVFS2 servers. It -also includes some optional components for completeness. Relevant +Heartbeat configuration with 5 nodes and 4 active PVFS servers. Relevant portions of the XML file are outlined below. -This file should be modified to reflect your configuration, and then -copied into /var/lib/crm/cib.xml on every node in the cluster (including -spares). +This file should be modified to reflect your configuration. You can +test the validity of the XML with the following commands before +installing it. The former checks generic XML syntax, while the latter +performs Heartbeat specific checking: + +\begin{verbatim} +$ xmllint --noout cib.xml +$ crm_verify -x cib.xml +\end{verbatim} + +Once your XML is filled in correctly, it must be copied into the correct +location (with correct ownership) on each node in the cluster: + +\begin{verbatim} +$ mkdir -p /var/lib/heartbeat/crm +$ cp cib.xml /var/lib/heartbeat/crm +$ chown -R hacluster:haclient /var/lib/heartbeat/crm +\end{verbatim} + +Please note that once Heartbeat has been started, it is no longer +legal to modify cib.xml by hand. See the \texttt{cibadmin} command line +tool and Heartbeat information on making modifications to +existing or online CIB configurations. \subsection{crm\_config} @@ -337,37 +336,9 @@ \subsection{resources and groups} start or stop all associated resources for a node with one unified command. In the example \texttt{cib.xml}, there are 4 groups (server0 through -server3). These represent the 4 active PVFS2 servers that will run on +server3). These represent the 4 active PVFS servers that will run on the cluster. -\subsection{PVFS2-notify} - -The \texttt{PVFS2-notify} resources, such as \texttt{server0\_notify}, are -used as a mechanism to send alerts when a server process fails over to -another node. This is provided by the \texttt{PVFS2-notify} script in -the examples directory. - -The use of a notify resource is entirely optional and may be omitted. -This particular script is designed to take four parameters: -\begin{itemize} -\item \texttt{firsthost}: name of the node that the server group should normally -run on -\item \texttt{fsname}: arbitrary name for the PVFS2 file system -\item \texttt{conf\_dir}: location of notification configuration files -\item \texttt{title}: component of the title for the notification -\end{itemize} - -The \texttt{PVFS2-notify} script serves as an example for how one might -implement a notification mechanism. However, it is incomplete on its -own. This example relies on a secondary script called -\texttt{fs-instance-alarm.pl} to send the actual notification. For -example, one could implement a script that sends an email when a failure -occurs. The \texttt{conf\_dir} parameter could be passed along to -provide a location to read a configurable list of email addresses from. - -\texttt{fs-instance-alarm.pl} is not provided with this example or -documentation. - \subsection{IPaddr} The \texttt{IPaddr} resources, such as \texttt{server0\_address}, are @@ -380,39 +351,41 @@ \subsection{Filesystem} The \texttt{Filesystem} resources, such as \texttt{server0\_fs}, are used to describe the shared storage block devices that serve as back end storage -for PVFS2. This is where the PVFS2 storage space for each server will -be created. In this example, the device names are \texttt{/dev/fooa1} -through \texttt{/dev/food1}. They are each mounted on directories such -as \texttt{/san\_mounta1} through \texttt{/san\_mountd1}. Please note +for PVFS. This is where the PVFS storage space for each server will +be created. In this example, the device names are labeled as +\texttt{label0} +through \texttt{label3}. They are each mounted on directories such +as \texttt{/san\_mount0} through \texttt{/san\_mount3}. Please note that each device should be mounted on a different mount point to allow multiple \texttt{pvfs2-server} processes to operate on the same node without -collision. +collision. The file system type can be changed to reflect the use of +alternative underlying file systems. -\subsection{PVFS2} +\subsection{PVFS} The \texttt{PVFS2} resources, such as \texttt{server0\_daemon}, are used to describe each \texttt{pvfs2-server} process. This resource is provided by the PVFS2 script in the examples directory. The parameters to this resource are listed below: \begin{itemize} -\item \texttt{fsconfig}: location of PVFS2 fs configuration file -\item \texttt{serverconfig}: location of PVFS2 server configuration file +\item \texttt{fsconfig}: location of PVFS fs configuration file \item \texttt{port}: TCP/IP port that the server will listen on (must match server configuration file) \item \texttt{ip}: IP address that the server will listen on (must match both the file system configuration file and the IPAddr resource) \item \texttt{pidfile}: Location where a pid file can be written +\item \texttt{alias}: alias to identify this PVFS daemon instance \end{itemize} -Also notice that there is a monitor operation associated with the PVFS2 +Also notice that there is a monitor operation associated with the PVFS resource. This will cause the \texttt{pvfs2-check-server} utility to be triggered periodically to make sure that the \texttt{pvfs2-server} process is not only -running, but is correctly responding to PVFS2 protocol requests. This +running, but is correctly responding to PVFS protocol requests. This allows problems such as hung \texttt{pvfs2-server} processes to be treated as failure conditions. Please note that the PVFS2 script provided in the examples will attempt -to create a storage space for each server if it is not already present. +to create a storage space on startup for each server if it is not already present. \subsection{rsc\_location} @@ -420,7 +393,7 @@ \subsection{rsc\_location} are used to express a preference for where each resource group should run (if possible). It may be useful for administrative purposes to have the first server group default to run on the first node of your cluster, -etc. +for example. Otherwise the placement will be left up to Heartbeat. \subsection{rsc\_order} @@ -430,120 +403,60 @@ \subsection{rsc\_order} organized into groups, but without ordering constraints, the resources within a group may be started in any order relative to each other. These constraints are necessary because a \texttt{pvfs2-server} process will not -start properly if the IP address that it should listen on and the shared -storage that it should use are not available yet. - -\subsection{pvfs2-stonith-plugin} +start properly until its IP address and storage are available. -The \texttt{pvfs2-stonith-plugin} resource is an example of how to -configure a stonith device for use in Heartbeat. See the Heartbeat -documentation for a list of officially supported devices. +\subsection{stonith} -In this example, the stonith device is setup as a clone, which means -that there are N identical copies of the resource (one per node). This -allows any node in the cluster to quickly send a stonith command if -needed. - -The \texttt{pvfs2-stonith-plugin} is provided by a script in the -examples directories. It requires a parameter to specify the file -system name, and a parameter to specify a configuration directory. This -plugin is not complete by itself, however. It relies on three scripts -to actually perform the stonith commands: -\begin{itemize} -\item \texttt{fs-power-control.pl}: used to send commands to control power to a -node -\item \texttt{fs-power-gethosts.pl}: used to print a list of nodes that can be -controlled with this device -\item \texttt{fs-power-monitor.pl}: used to monitor the stonith device and -confirm that is available -\end{itemize} +The \texttt{external/ipmi} stonith device is used in this example. +Please see the Heartbeat documentation for instructions on configuring +other types of devices. -These three stonith scripts are not provided with these examples. They -may need to be specifically implemented for your environment. As an alternative, -you can simply use one of the standard stonith devices that are -supported by Heartbeat (see Heartbeat documentation for details). - -The following scripts provide lower level examples of how to control an APC power -strip (via SNMP or SSH) or a Baytech power strip (via SSH): -\begin{itemize} -\item \texttt{apc-switched-pdu-hybrid-control.pl} -\item \texttt{apc-switched-pdu-hybrid-monitor.pl} -\item \texttt{baytech-mgmt-control.pl} -\item \texttt{baytech-mgmt-monitor.pl} -\end{itemize} - -One approach to implementing power control would be to use the -pvfs2-stonith-plugin device script and write -\texttt{fs-power\{control/monitor/gethosts\}} scripts that can parse -configuration files describing your cluster and send appropriate -commands to the above provided APC and Baytech control scripts. - -\subsection{SAN monitoring} - -The example CIB configuration does not use this feature, but an -additional resource script has been included that modifies the -\texttt{Filesystem} resource to allow it to monitor SAN connectivity. This -script is called \texttt{Filesystem-qla-monitor}. It requires that the -nodes use QLogic fibre channel adapters and EMC PowerPath -software for SAN connectivity. If this configuration is available, then this script can -issue appropriate PowerPath commands periodically to confirm that there -is connectivity between each node and its block device. +There is one IPMI stonith device for each node. The attributes for that +resources specify which node is being controlled, and the username, +password, and IP address of corresponding IPMI device. \section{Starting Heartbeat} -Once the CIB file is completed and installed in the correct location, -then the Heartbeat services can be started on every node with the -command in figure~\ref{fig:start}. The \texttt{crm\_mon} command, when -run with the arguments shown, will provide a periodically updated view -of the state of each resource configured within Heartbeat. Check -\texttt{/var/log/messages} if any of the groups fail to start. +Once the CIB file is completed and installed in the correct +location, then the Heartbeat services can be started on every node. +The \texttt{crm\_mon} command, when run with the arguments shown, +will provide a periodically updated view of the state of each resource +configured within Heartbeat. Check \texttt{/var/log/messages} if any +of the groups fail to start. -\begin{figure} -\begin{scriptsize} \begin{verbatim} $ /etc/init.d/heartbeat start -$ # wait until all Heartbeat services started +$ # wait a few minutes for heartbeat services to start $ crm_mon -r \end{verbatim} -\end{scriptsize} -\caption{Starting Heartbeat services} -\label{fig:start} -\end{figure} \section{Mounting the file system} -Mounting PVFS2 with high availability is no different than mounting a -normal PVFS2 file system, except that you must use the virtual hostname -for the PVFS2 server rather than the primary hostname of the node. -Figure~\ref{fig:mount} provides an example. +Mounting PVFS with high availability is no different than mounting a +normal PVFS file system, except that you must use the virtual hostname +for the PVFS server rather than the primary hostname of the node: -\begin{figure} -\begin{scriptsize} \begin{verbatim} -$ mount -t pvfs2 tcp://virtualnode1:3334/pvfs2-fs /mnt/pvfs2 +$ mount -t pvfs2 tcp://virtual1:3334/pvfs2-fs /mnt/pvfs2 \end{verbatim} -\end{scriptsize} -\caption{Mounting PVFS2 file system} -\label{fig:mount} -\end{figure} \section{What happens during failover} The following example illustrates the steps that occur when a node fails: \begin{enumerate} -\item Node2 (which is running a \texttt{pvfs2-server} on the virtualnode2 IP +\item Node2 (which is running a \texttt{pvfs2-server} on the virtual2 IP address) suffers a failure \item Client node begins timeout/retry cycle -\item Heartbeat services running on remaining servers notice that node2 +\item Heartbeat services running on remaining nodes notice that node2 is not responding -\item After a timeout has elapsed, remaining servers reach a quorum and +\item After a timeout has elapsed, remaining nodes reach a quorum and vote to treat node2 as a failed node \item Node1 sends a stonith command to reset node2 \item Node2 either reboots or remains powered off (depending on nature of failure) \item Once stonith command succeeds, node5 is selected to replace it -\item The virtualnode2 IP address, mount point, and +\item The virtual2 IP address, mount point, and \texttt{pvfs2-server} service are started on node5 \item Client node retry eventually succeeds, but now the network @@ -567,6 +480,26 @@ \section{Controlling Heartbeat} information is valid and consistent \end{itemize} +\section{Additional examples} + +The \texttt{examples/heartbeat/hardware-specific} directory contains +additional example scripts that may be helpful in some scenarios: + +\begin{itemize} +\item \texttt{pvfs2-stonith-plugin}: An example stonith plugin +that can use an arbitrary script to power off nodes. May be used (for +example) with the \texttt{apc*} and \texttt{baytech*} scripts to control +remote controlled power strips if the scripts provided by Heartbeat are +not sufficient. +\item \texttt{Filesystem-qla-monitor}: A modified version of the +standard FileSystem resource that uses the \texttt{qla-monitor.pl} +script to provide additional monitoring capability for QLogic fibre +channel cards. +\item \texttt{PVFS2-notify}: An example of a dummy resource that could +be added to the configuration to perform additional logging or +notification steps on startup. +\end{itemize} + \end{document} % vim: tw=72 diff --git a/doc/pvfs2-logging.txt b/doc/pvfs2-logging.txt new file mode 100644 index 0000000..8ddc895 --- /dev/null +++ b/doc/pvfs2-logging.txt @@ -0,0 +1,191 @@ +PVFS Logging +============ + +This document describes log files produced by PVFS and how to control +what messages are included in them. + +PVFS Log Format +--------------- + +The log messages from all PVFS components except for the kernel module +are in the following format: + + [ ] LOG MESSAGE + +The type will be one of 4 different letters depending on what type of +log message it is: + + D - DEBUG + E - ERROR + A - ACCESS LOGGING + P - PERFORMANCE COUNTER + +The timestamp defaults to showing the date, hour, and minute that the +log message was generated. The timestamp format can be modified to one +of the following styles by using the --logstamp argument to pvfs2-client +or the LogStamp field in the pvfs2-server config file: + + datetime: (default, as described above) + usec: shows time with microsecond granularity (but no date) + none: no time stamp + thread: includes thread ID with each message + +PVFS Log Locations +------------------ + +The pvfs2-server daemon writes log messages to /tmp/pvfs2-server.log by +default. A different output file can be specified using the LogFile +parameter in the configuration file. The logs can also be sent to syslog +instead by adding "LogType syslog" to the configuration file. + +The pvfs2-client daemon writes log messages to /tmp/pvfs2-client.log by +default. This can be overridden using the --logfile or --logtype command +line arguments to pvfs2-client. + +The PVFS kernel module (pvfs2.ko) generates log messages to dmesg and/or +/var/log/messages and/or /var/log/kern depending on your Linux distribution. + +The PVFS client library (libpvfs2) and command line utilities generate log +messages to stderr if enabled. + +Logging Levels +-------------- + +All PVFS components log critical error messages automatically. However, +you can also turn on additional logging for debugging purposes. This is +controlled by specifying which logging "masks" should be enabled. + +You can see a list of available pvfs2-server, client library, and +pvfs2-client logging levels by running the pvfs2-set-debugmask utility +with no arguments. + +You can see a list of available kernel module logging masks and client +logging masks by running "cat /proc/sys/pvfs2/debug-help". + +The "verbose" mask is commonly used to turn on most of the logging +levels that are useful for debugging problems. + +Changing the log mask for pvfs2-server +-------------------------------------- + +Use the EventLogging field in the configuration file to specify +a comma separated list of masks to enable. You can also use the +pvfs2-set-debugmask command line utility to change the mask dynamically +without restarting the server. + +Changing the log mask for libpvfs and command line utilities +------------------------------------------------------------ + +Set the PVFS2_DEBUGMASK environment variable to a comma separated list of +client-appropriate masks prior to launching the application. + + +Changing the log mask for the kernel module +------------------------------------------- + +There are three ways to set the debugging level for the kernel module: + +1. Set module_parm_debug_mask parameter when the kernel module is +loaded. + +2. Set the environment variable PVFS2_KMODMASK before starting the +pvfs2-client. NOTE: the kernel module must be loaded before starting +the client-core. + +3. Write a debug string to /proc/sys/pvfs2/kernel-debug after the kernel +module is loaded. + + +Options 1 and 2 allow the kernel debug mask to be set ONLY when PVFS is started, +while option 3 allows the kernel debug mask to be modified while PVFS is +running. Thus, option 3 dynamically updates the kernel debug mask, +immediately turning on the debugging options specified, and REPLACES the +existing debug mask. Whenever you modify the kernel debug mask using +option 3, an informational message is printed to the system log file, +giving both its numerical value and a comma-separated list of keywords representing +the areas of debugging just turned on. + +Options 1 and 2 require the user to specify a numerical value that is +an OR'd list of gossip debug values. These values can be found in +include/pvfs2-debug.h. For example, to load the kernel module with +"file" debugging turned on, issue the following command: + +insmod pvfs2.ko module_parm_debug_mask=4 + +The 4 is the value of GOSSIP_FILE_DEBUG, and module_parm_debug_mask is the kernel +module's input parameter for the kernel debug mask. To turn on multiple areas,for +example, file and dcache, set module_parm_debug_mask = (GOSSIP_FILE_DEBUG | GOSSIP_DCACHE_DEBUG) = +(4 | 128) = 132. Its string equivalent would be "file,dcache". + +An informational message is displayed in the system log whenever you load the kernel +module, giving you the kernel debug mask's numerical value and its string +equivalent. Be aware that you can modify this value later using option 3. + +To set the kernel debug mask using PVFS2_KMODMASK, create a global environment +variable and set it equal to the desired numerical value. When the pvfs2-client +is started, the kernel debug mask and its string equivalent will be modified. Note that +PVFS2_KMODMASK will override any value set by the kernel module load process. Again, +option 3 allows you to change the debug mask at any time. + +To set the kernel debug mask using the /proc variable, write a debug string to +/proc/sys/pvfs2/kernel-debug. Example: echo "file,dcache" > /proc/sys/pvfs2/kernel-debug. +An informational message will be written to the system log file displaying the new +kernel debug mask and its string equivalent. + +To see the kernel debug mask without looking in the system log, issue a "cat" on +/proc/sys/pvfs2/kernel-debug and you will see the string equivalent of the kernel +debug mask. + +A helper /proc variable, /proc/sys/pvfs2/debug-help, will display a list of valid +keywords for both the kernel and client debug masks, when you issue a "cat" on it. These +keywords are used to build the string that represent the areas of debugging that you +want turned on. + + + +Changing the log mask for the client module +------------------------------------------- + +There are three ways to set the debugging level for the pvfs2-client: + +1. Set --gossip-mask=MASK_LIST on the command line when starting the client. This +list can be overridden by PVFS2_DEBUGMASK or by setting the /proc variable client-debug. + +2. Write a debug string to /proc/sys/pvfs2/client-debug after starting the client. This +will override any value set by --gossip-mask on the command line and any value set by +PVFS2_DEBUGMASK. + +3. Set the environment variable PVFS2_DEBUGMASK before starting the client. This will +override any value set by --gossip-mask on the command line. + + +Options 1 and 2 require a string of comma-separated keywords to set the client debug mask. +For example: + +./pvfs2-client --gossip-mask="server,trove" -p ./pvfs2-client-core +NOTE: after kernel module is loaded and during client startup. + +echo "server,trove" > /proc/sys/pvfs2/client-debug +NOTE: after kernel module is loaded and client is started. + +A list of client debug keywords can be found in include/pvfs2-debug.h or by accessing +the /proc/sys/pvfs2/debug-help variable after the kernel module is loaded. Example: + +cat /proc/sys/pvfs2/debug-help + + +When the client starts, the client debug mask information is sent to the kernel module +where a local version of the mask and its string equivalent is maintained. This process +writes an informational message to the system log file displaying the numerical value of +the client debug mask and its string equivalent. You can also see the mask's current string +equivalent by issuing the following: + +cat /proc/sys/pvfs2/client-debug. + + +Whenever you modify the client debug mask after the client has started, an informational message will +be written to the system log file displaying the new numerical value and string equivalent. At any +time, once PVFS is running, you can view the client debug mask using the "cat" statement above without +having to look in the system log file for the last modification. + + diff --git a/doc/pvfs2-quickstart.tex b/doc/pvfs2-quickstart.tex index 9b91369..59eddd7 100755 --- a/doc/pvfs2-quickstart.tex +++ b/doc/pvfs2-quickstart.tex @@ -64,11 +64,15 @@ \subsection{Dependencies} The following software packages are currently required by PVFS2: \begin{itemize} -\item Berkely DB (version 3 or 4) +\item Berkely DB with development libraries (version 3 or 4) \item aio support (provided by glibc and librt) \item pthreads \item gcc 2.96 or newer (DO NOT USE gcc 2.95! gcc 3.x recommended) \item GNU Make +\item flex +\item bison +\item kernel sources (for client kernel interface) +\item GTK+ (for Karma) \end{itemize} The following software packages are currently recommended for use with PVFS2: @@ -171,14 +175,10 @@ \subsection{Server configuration} particular attention to the listing of the metadata servers and I/O servers. In this example we will use ``testhost'' for both. -The pvfs2-genconfig tool will generate two configuration files. One -is a file system configuration file that will be identical for all -servers (if we had more than one). The second is a server specific -configuration file that will be different for each server. The server -specific files have the hostname of the server that they belong to -appended to the file name. This script should be excuted as root, so -that we can place the configuration files in their default /etc/ -locations. +The pvfs2-genconfig tool will generate a single file system configuration +file that will be identical for all servers. This script should be +excuted as root, so that we can place the configuration file in its +default /etc/ location. In this simple configuration, we can accept the default options for every field. We will use the hostname ``testhost'' rather than @@ -186,18 +186,18 @@ \subsection{Server configuration} \begin{verbatim} root@testhost:~# /usr/bin/pvfs2-genconfig \ - /etc/pvfs2-fs.conf /etc/pvfs2-server.conf + /etc/pvfs2-fs.conf ********************************************************************** - Welcome to the PVFS2 Configuration Generator: + Welcome to the PVFS2 Configuration Generator: This interactive script will generate configuration files suitable for use with a new PVFS2 file system. Please see the PVFS2 quickstart guide for details. ********************************************************************** - You must first select the network protocol that your file system will use. -The only currently supported options are "tcp", "gm", and "ib". +The only currently supported options are "tcp", "gm", "mx", "ib", and "portals". +(For multi-homed configurations, use e.g. "ib,tcp".) * Enter protocol type [Default is tcp]: @@ -206,15 +206,26 @@ \subsection{Server configuration} * Enter port number [Default is 3334]: +Choose a directory for each server to store data in. + +* Enter directory name: [Default is /pvfs2-storage-space]: + +Choose a directory for each server to store metadata in. + +* Enter directory name: [Default is /pvfs2-storage-space]: + +Choose a file for each server to write log messages to. + +* Enter log file location [Default is /tmp/pvfs2-server.log]: + Next you must list the hostnames of the machines that will act as I/O servers. Acceptable syntax is "node1, node2, ..." or "node{#-#,#,#}". * Enter hostnames [Default is localhost]: testhost -Now list the hostnames of the machines that will act as Metadata -servers. This list may or may not overlap with the I/O server list. +Use same servers for metadata? (recommended) -* Enter hostnames [Default is localhost]: testhost +* Enter yes or no [Default is yes]: Configured a total of 1 servers: 1 of them are I/O servers. @@ -222,21 +233,10 @@ \subsection{Server configuration} * Would you like to verify server list (y/n) [Default is n]? -Choose a file for each server to write log messages to. - -* Enter log file location [Default is /tmp/pvfs2-server.log]: - -Choose a directory for each server to store data in. - -* Enter directory name: [Default is /pvfs2-storage-space]: - -Writing fs config file... Done. -Writing 1 server config file(s)... Done. - -Configuration complete! +Writing fs config file... done \end{verbatim} -The generated config files will have conservative default values. The PVFS2 +The generated config file will have conservative default values. The PVFS2 Users Guide has more information about the settings and the consequences of setting more aggressive, high performance values. @@ -249,16 +249,14 @@ \subsection{Starting the server} files. \begin{verbatim} -bash-2.05b# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \ - /etc/pvfs2-server.conf-testhost -f +bash-2.05b# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf -f \end{verbatim} Once the above step is done, you can start the server in normal mode as follows: \begin{verbatim} -bash-2.05b# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \ - /etc/pvfs2-server.conf-testhost +bash-2.05b# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \end{verbatim} All log messages will be directed to /tmp/pvfs2-server.log, unless you specified @@ -267,10 +265,17 @@ \subsection{Starting the server} you may run the server as follows: \begin{verbatim} -bash-2.05b# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \ - /etc/pvfs2-server.conf-testhost -d +bash-2.05b# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf -d \end{verbatim} +On startup, the PVFS2 server uses the hostname of the machine that it +is running on to determine necessary information from the configuration +file. If the hostname doesn't match any of the addresses specified in +the config file, then then you must use the -a option. For example, +each of above command lines could include ``-a testhost'' to +specify that the server is using the \texttt{testhost} alias in the +configuration file. + \subsubsection{Automatic server startup and shutdown} \label{sec:rc} @@ -281,11 +286,11 @@ \subsubsection{Automatic server startup and shutdown} \begin{verbatim} bash-2.05b# cp /usr/src/pvfs2/examples/pvfs2-server.rc \ - /etc/rc.d/init.d/pvfs2-server -bash-2.05b# chmod a+x /etc/rc.d/init.d/pvfs2-server + /etc/init.d/pvfs2-server +bash-2.05b# chmod a+x /etc/init.d/pvfs2-server bash-2.05b# chkconfig pvfs2-server on -bash-2.05b# ls -al /etc/rc.d/rc3.d/S35pvfs2-server -lrwxrwxrwx 1 root root 22 Sep 21 13:11 /etc/rc.d/rc3.d/S35pvfs2-server \ +bash-2.05b# ls -al /etc/rc3.d/S35pvfs2-server +lrwxrwxrwx 1 root root 22 Sep 21 13:11 /etc/rc3.d/S35pvfs2-server \ -> ../init.d/pvfs2-server \end{verbatim} @@ -294,14 +299,14 @@ \subsubsection{Automatic server startup and shutdown} To manually start the server, you can run the following command: \begin{verbatim} -bash-2.05b# /etc/rc.d/init.d/pvfs2-server start +bash-2.05b# /etc/init.d/pvfs2-server start Starting PVFS2 server: [ OK ] \end{verbatim} To manually stop the server: \begin{verbatim} -bash-2.05b# /etc/rc.d/init.d/pvfs2-server stop +bash-2.05b# /etc/init.d/pvfs2-server stop Stopping PVFS2 server: [ OK ] \end{verbatim} @@ -320,7 +325,7 @@ \subsection{Client configuration} information is presented in the same way as an \texttt{fstab (5)} entry: \begin{verbatim} -tcp://testhost:3334/pvfs2-fs /mnt/pvfs2 pvfs2 default,noauto 0 0 +tcp://testhost:3334/pvfs2-fs /mnt/pvfs2 pvfs2 defaults,noauto 0 0 \end{verbatim} The entry lists a PVFS2 server (\texttt{tcp://testhost:3334/pvfs2-fs}) and a @@ -343,7 +348,7 @@ \subsection{Client configuration} substitute your host name in place of ``testhost'': \begin{verbatim} -tcp://testhost:3334/pvfs2-fs /mnt/pvfs2 pvfs2 default,noauto 0 0 +tcp://testhost:3334/pvfs2-fs /mnt/pvfs2 pvfs2 defaults,noauto 0 0 \end{verbatim} There are a few alternatives to using an /etc/pvfs2tab which may be useful @@ -464,7 +469,7 @@ \subsection{Server configuration} \begin{verbatim} root@cluster1:~# /usr/local/pvfs2/bin/pvfs2-genconfig \ - /etc/pvfs2-fs.conf /etc/pvfs2-server.conf + /etc/pvfs2-fs.conf ********************************************************************** Welcome to the PVFS2 Configuration Generator: @@ -524,7 +529,6 @@ \subsection{Server configuration} * Enter directory name: [Default is /pvfs2-storage-space]: Writing fs config file... Done. -Writing 8 server config file(s)... Done. Configuration complete! \end{verbatim} @@ -536,11 +540,7 @@ \subsection{Server configuration} We have now made all the config files for an 8-node storage cluster: \begin{verbatim} root@cluster1:~# ls /etc/pvfs2/foo/ -pvfs2-fs.conf pvfs2-server.conf-cluster5 -pvfs2-server.conf-cluster1 pvfs2-server.conf-cluster6 -pvfs2-server.conf-cluster2 pvfs2-server.conf-cluster7 -pvfs2-server.conf-cluster3 pvfs2-server.conf-cluster8 -pvfs2-server.conf-cluster4 +pvfs2-fs.conf \end{verbatim} Now the config files must be copied out to all of the server nodes. If you @@ -557,10 +557,9 @@ \subsection{Server configuration} \begin{verbatim} root@cluster1:~# for i in `seq 1 8`; do -> scp /etc/pvfs2-server.conf-cluster\${i} cluster\${i}:/etc/ > scp /etc/pvfs2-fs.conf cluster\${i}:/etc/ > scp /usr/src/pvfs2/examples/pvfs2-server.rc \ - cluster\${i}:/etc/rc.d/init.d/pvfs2-server + cluster\${i}:/etc/init.d/pvfs2-server > ssh cluster\${i} /sbin/chkconfig pvfs2-server on > done \end{verbatim} @@ -573,16 +572,14 @@ \subsection{Starting the servers} or IO node in the cluster: \begin{verbatim} -root@cluster1# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \ - /etc/pvfs2-server.conf -f +root@cluster1# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf -f \end{verbatim} Then once the storage space is created, start the server for real with a command like this on every metadata or IO node in the cluster: \begin{verbatim} -root@cluster1# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \ - /etc/pvfs2-server.conf +root@cluster1# /usr/sbin/pvfs2-server /etc/pvfs2-fs.conf \end{verbatim} If you want to run the server in the foreground (e.g. for debugging), use the @@ -946,8 +943,10 @@ \section{Debugging your PVFS2 configuration} verbose kmod subsystem error diagnostics are written to the system ring buffer and eventually to the kernel logs. One could also set the kmod diagnostic level when the kernel module is loaded -like so, insmod pvfs2.ko gossip\_debug\_mask=. +like so, insmod pvfs2.ko module\_parm\_debug\_mask=. The diagnostic level will be a bitwise OR of values specified in pvfs2-debug.h. +For more information on setting the kernel or client debug mask, see +\texttt{doc/pvfs2-logging.txt} in the PVFS source tree. \section{ROMIO Support} \label{sec:romio} diff --git a/doc/pvfs2-tuning.tex b/doc/pvfs2-tuning.tex index 97881c8..55d2235 100644 --- a/doc/pvfs2-tuning.tex +++ b/doc/pvfs2-tuning.tex @@ -123,16 +123,252 @@ \subsection{Maximum I/O Size} \subsection{Workload Specifics} -\subsection{Extended Attributes} - -\subsubsection{Directory Hints} +\section{Number of Datafiles} + +Each file stored on PVFS is broken into smaller parts to be +distributed across servers. The metadata (which includes +information such as the owner and permissions of the file) is stored in +a metadata file on one server. The actual file contents are stored in +\emph{datafiles} distributed among multiple servers. + +By default, each file in PVFS is made up of N datafiles, where N is +the number of servers. In most situations, this is the most efficient +number of datafiles to use because it leverages all available resources +evenly and allows the load to be distributed. This is especially +beneficial for large files accessed in parallel. + +However, there are also cases where it may be helpful to use a different +number of datafiles. For example, if you have a set of many small +files (a few KB each) that are accessed in serial, then distributing +them across all servers increases overhead without gaining any benefit +from parallelism. In this case it will most likely perform better if +the number of datafiles is set to 1 for each file. + +The most straightforward way to change the number of datafiles is by +setting extended attributes on a directory. Any new files created in +that directory will utilize the specified number of datafiles +regardless of how many servers are available. New subdirectories will +inherit the same settings. It is also possible to set a default number +of datafiles in the configuration file for the entire file system, but +this is rarely advisable. + +PVFS does not allow the number of datafiles to be changed dynamically +for existing files. If you wish to convert an existing file, then you must +copy it to a new file with the appropriate datafile setting and then delete +the old file. + +Use this command to specify the number of datafiles to use within a given +directory: + +\begin{verbatim} +$ setfattr -n user.pvfs2.num_dfiles -v 1 /mnt/pvfs2/dir +\end{verbatim} + +Use these commands to create a new file and confirm the number of +datafiles that it is using: + +\begin{verbatim} +$ touch /mnt/pvfs2/dir/foo +$ pvfs2-viewdist -f /mnt/pvfs2/dir/foo +dist_name = simple_stripe +dist_params: +strip_size:65536 + +Number of datafiles/servers = 1 +Server 0 - tcp://localhost:3334, handle: 5223372036854744173 +(487d2531626f846d.bstream) +\end{verbatim} + +\section{Distributions} + +A \emph{distribution} is an algorithm that defines how a file's data +will be distributed among available servers. PVFS provides an API +for implementing arbitrary distributions, but four specific ones are +available by default. Each distribution has different performance +characteristics that may be helpful depending on your workload. + +The most straightforward way to use an alternative distribution is by +setting an extended attribute on a specific directory. Any new files +created in that directory will utilize the specified distribution and +parameters. New subdirectories will inherit the same settings. You may +also use the server configuration files to define default distribution +settings for the entire file system, but we suggest experimenting at a +directory level first. + +PVFS does not allow the distribution to be changed dynamically for +existing files. If you wish to convert an existing file, then you must +copy it to a new file with the appropriate distribution and then delete +the old file. + +This section describes the four available distributions and gives +command line examples of how to use each one. + +% TODO: some figures would be spiffy (but time consuming) + +\subsection{Simple Stripe} + +The simple stripe distribution is the default distribution used by PVFS. +It dictates that file data will be striped evenly across all available +servers in a round robin fashion. This is very similar to RAID 0, +except the data is distributed across servers rather than local block +devices. + +The only tuning parameter within simple stripe is the \emph{strip size}. +The strip size determines how much data is stored on one server before +switching to the next server. The default value in PVFS is 64 KB. You +may want to experiment with this value in order to find a tradeoff +between the amount of concurrency achieved with small accesses vs. the +amount of data streamed to each server. + +\begin{verbatim} +# to enable simple stripe distribution for a directory: +$ setfattr -n user.pvfs2.dist_name -v simple_stripe /mnt/pvfs2/dir + +# to change the strip size to 128 KB: +$ setfattr -n user.pvfs2.dist_params -v strip_size:131072 /mnt/pvfs2/dir + +# to create a new file and confirm the distribution: +$ touch /mnt/pvfs2/dir/file +$ pvfs2-viewdist -f /mnt/pvfs2/dir/file +dist_name = simple_stripe +dist_params: +strip_size:131072 + +Number of datafiles/servers = 4 +Server 0 - tcp://localhost:3337, handle: 8223372036854744180 (721f494c589b8474.bstream) +Server 1 - tcp://localhost:3334, handle: 5223372036854744174 (487d2531626f846e.bstream) +Server 2 - tcp://localhost:3335, handle: 6223372036854744178 (565ddbe509d38472.bstream) +Server 3 - tcp://localhost:3336, handle: 7223372036854744180 (643e9298b1378474.bstream) +\end{verbatim} + +\subsection{Basic} + +The basic distribution is mainly included as an example for distribution +developers. It performs no striping at all, and instead places all +data on one server. The basic distribution overrides the number of +datafiles (as shown in the previous section) and only uses one datafile +in all cases. There are no tunable parameters. + +\begin{verbatim} +# to enable basic distribution for a directory: +$ setfattr -n user.pvfs2.dist_name -v basic_dist /mnt/pvfs2/dir + +# to create a new file and confirm the distribution: +$ touch /mnt/pvfs2/dir/file +$ pvfs2-viewdist -f /mnt/pvfs2/dir/file +dist_name = basic_dist +dist_params: +none +Number of datafiles/servers = 1 +Server 0 - tcp://localhost:3334, handle: 5223372036854744172 (487d2531626f846c.bstream) +\end{verbatim} + + +\subsection{Two Dimensional Stripe} + +The two dimensional stripe distribution is a variation of the +simple stripe distribution that is intended to combat the affects of +\emph{incast}. Incast occurs when a client requests a range of data that +is striped across several servers, therefore causing the transmission of +data from many sources to one destination simultaneously. Some networks +or network protocols may perform poorly in this scenario due to switch +buffering or congestion avoidance. This problem becomes more +significant as more servers are used. + +The two dimensional stripe distribution operates by grouping servers +into smaller subsets and striping data within each group multiple times +before switching. Three parameters control the grouping and +striping: \begin{itemize} -\item Number of Datafiles -\item Stripe Size -\item Distribution +\item strip size: same as in the simple stripe distribution +\item number of groups: how many groups to divide the servers into +\item factor: how many times to stripe within each group \end{itemize} +The common access pattern that benefits from this distribution is the +case of N clients operating on one file of size B, where each client is +responsible for a contiguous region of size B/N. With simple striping, +each client may have to access all servers in order to read its specific +B/N byte range. With two dimensional striping (using appropriate +parameters), each client only accesses a subset of servers. All servers +are still active over the file as a whole so that full bandwidth is +still preserved. However, the network traffic patterns limit the amount +of incast produced by any one client. + +The default incast parameters use a strip size of 64 KB, 2 groups, and a +factor of 256. + +\begin{verbatim} +# to enable basic distribution for a directory: +$ setfattr -n user.pvfs2.dist_name -v twod_stripe /mnt/pvfs2/dir + +# to change the strip size to 128 KB, the number of groups to 4, and a +# factor of 228: +$ setfattr -n user.pvfs2.dist_params -v strip_size:131072,num_groups:4,group_strip_factor:128 /mnt/pvfs2/dir + +# to create a new file and confirm the distribution: +$ touch /mnt/pvfs2/dir/file +$ pvfs2-viewdist -f /mnt/pvfs2/dir/file +dist_name = twod_stripe +dist_params: +num_groups:4,strip_size:131072,factor:128 + +Number of datafiles/servers = 4 +Server 0 - tcp://localhost:3336, handle: 7223372036854744175 +(643e9298b137846f.bstream) +Server 1 - tcp://localhost:3337, handle: 8223372036854744175 +(721f494c589b846f.bstream) +Server 2 - tcp://localhost:3334, handle: 5223372036854744167 +(487d2531626f8467.bstream) +Server 3 - tcp://localhost:3335, handle: 6223372036854744173 +(565ddbe509d3846d.bstream) +\end{verbatim} + +\subsection{Variable Strip} + +Variable strip is similar to simple stripe, except that it allows you to +specify a different strip size for each server. For example, you could +place the first 64 KB on one server, the next 32 KB on the next server, +and then 128 KB on the final server. The striping still round robins +once all servers have been filled. This distribution may be useful for +applications that have a very specific data format and can take +advantage of a correspondingly specific placement of file data to match it. + +The only parameter used by the variable strip distribution is the +\emph{strips} parameter, which specifies the strip size for each server. +The number of datafiles used will not exceed the number of servers +listed. For example, if the strips parameter specifies a strip size for +three servers, then files using this distribution will have at most +three datafiles. + +The format of the strips parameter is a list of semicolon separated +$:$ pairs. The strip size can be specified with short hand +notation, such as ``K'' for kilobytes or ``M'' for megabytes. + +\begin{verbatim} + +# to enable basic distribution for a directory: +$ setfattr -n user.pvfs2.dist_name -v varstrip_dist /mnt/pvfs2/dir + +# to change the strip sizes to match the example above: +$ setfattr -n user.pvfs2.dist_params -v "strips:0:32K;1:64K;2:128K" /mnt/pvfs2/dir + +# to create a new file and confirm the distribution: +$ touch /mnt/pvfs2/dir/file +$ pvfs2-viewdist -f /mnt/pvfs2/dir/file +dist_name = varstrip_dist +dist_params: +0:32K;1:64K;2:128K +Number of datafiles/servers = 3 +Server 0 - tcp://localhost:3336, handle: 7223372036854744173 +(643e9298b137846d.bstream) +Server 1 - tcp://localhost:3334, handle: 5223372036854744165 +(487d2531626f8465.bstream) +Server 2 - tcp://localhost:3335, handle: 6223372036854744171 +(565ddbe509d3846b.bstream) +\end{verbatim} + \section{Workloads} \subsection{Small files} diff --git a/examples/heartbeat/PVFS2 b/examples/heartbeat/PVFS2 index 1c50291..cee7419 100644 --- a/examples/heartbeat/PVFS2 +++ b/examples/heartbeat/PVFS2 @@ -13,17 +13,17 @@ # # OCF parameters (required in cib): # OCF_RESKEY_fsconfig -# OCF_RESKEY_serverconfig # OCF_RESKEY_port # OCF_RESKEY_pidfile # OCF_RESKEY_ip +# OCF_RESKEY_alias # example values: #OCF_RESKEY_fsconfig=/root/simple.conf -#OCF_RESKEY_serverconfig=/root/server.conf-localhost #OCF_RESKEY_port=3334 #OCF_RESKEY_pidfile=/tmp/pvfs2-server.pid -#OCF_RESKEY_ip=pvfs2ha0001 +#OCF_RESKEY_ip=virtual1 +#OCF_RESKEY_alias=virtual1 VARRUN=/var/run @@ -40,8 +40,8 @@ fi # ####################################################################### # -PVFS2D=/usr/sbin/pvfs2-server -PVFS2CHECK=/usr/bin/pvfs2-check-server +PVFS2D=/usr/local/sbin/pvfs2-server +PVFS2CHECK=/usr/local/bin/pvfs2-check-server # End of Configuration options @@ -129,10 +129,10 @@ start_pvfs2() { ulimit -n 100000 && ulimit -c unlimited # try to create storage space (ok if it fails) - $PVFS2D $OCF_RESKEY_fsconfig $OCF_RESKEY_serverconfig -f + $PVFS2D $OCF_RESKEY_fsconfig -a $OCF_RESKEY_alias -f # launch daemon - ocf_run $PVFS2D -p $OCF_RESKEY_pidfile $OCF_RESKEY_fsconfig $OCF_RESKEY_serverconfig + ocf_run $PVFS2D -p $OCF_RESKEY_pidfile -a $OCF_RESKEY_alias $OCF_RESKEY_fsconfig if [ $? -ne 0 ] then @@ -243,13 +243,6 @@ The full path name of file system config file. fs config file path - - -The full path name of server config file. - -server config file path - - Port number that server will listen on. @@ -271,6 +264,12 @@ Virtual ip address that the server will run on. virtual IP address + + +PVFS server alias to be passed to -a argument. + +PVFS server alias + @@ -306,12 +305,6 @@ validate_all_pvfs2() { exit $OCF_ERR_ARGS fi -# serverconfig: make sure the file exists -if [ ! -f $OCF_RESKEY_serverconfig ]; then - ocf_log err "Configuration file $OCF_RESKEY_serverconfig not found!" - exit $OCF_ERR_CONFIGURED -fi - # fsconfig: make sure the file exists if [ ! -f $OCF_RESKEY_fsconfig ]; then ocf_log err "Configuration file $OCF_RESKEY_fsconfig not found!" diff --git a/examples/heartbeat/cib.xml.example b/examples/heartbeat/cib.xml.example index 55cd0cd..c26fdaa 100644 --- a/examples/heartbeat/cib.xml.example +++ b/examples/heartbeat/cib.xml.example @@ -14,8 +14,6 @@ - - @@ -23,16 +21,6 @@ - - - - - - - - - - @@ -43,42 +31,29 @@ - - + + - - - - - + - + + - + - - - - - - - - - - @@ -89,41 +64,29 @@ - - + + - - - - - + - + + - + - - - - - - - - - @@ -135,41 +98,29 @@ - - + + - - - - - + - + + - + - - - - - - - - - @@ -181,50 +132,93 @@ - - + + - - - - - + - + + - + - - + + + + + + + + - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/heartbeat/hardware-specific/Filesystem-qla-monitor b/examples/heartbeat/hardware-specific/Filesystem-qla-monitor new file mode 100644 index 0000000..a9e01b7 --- /dev/null +++ b/examples/heartbeat/hardware-specific/Filesystem-qla-monitor @@ -0,0 +1,840 @@ +#!/bin/sh +# +# Support: linux-ha@lists.linux-ha.org +# License: GNU General Public License (GPL) +# +# Filesystem +# Description: Manages a Filesystem on a shared storage medium. +# Original Author: Eric Z. Ayers (eric.ayers@compgen.com) +# Original Release: 25 Oct 2000 +# +# Modified to support monitoring of a QLogic adapter, 2007 +# Relies on underlying scripts named fs-instance-alarm.pl and +# fs-power-control.pl to take action in the event of monitoring failure +# +# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} +# +# OCF parameters are as below: +# OCF_RESKEY_device +# OCF_RESKEY_directory +# OCF_RESKEY_fstype +# OCF_RESKEY_options +# OCF_RESKEY_fsname +# OCF_RESKEY_conf_dir +# +#OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 +# Or a -U or -L option for mount, or an NFS mount specification +#OCF_RESKEY_directory : the mount point for the filesystem +#OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 +#OCF_RESKEY_options : options to be given to the mount command via -o +#OCF_RESKEY_fsname : file system name (PVFS2) +#OCF_RESKEY_conf_dir : file system conf directory (PVFS2) +# +# +# An example usage in /etc/ha.d/haresources: +# node1 10.0.0.170 Filesystem::/dev/sda1::/data1::ext2 +# or +# node1 10.0.0.170 Filesystem::-Ldata1::/data1::ext2 +# or +# node1 10.0.0.170 Filesystem::server:/data1::/data1::nfs::ro +# +# This assumes you want to manage a filesystem on a shared (scsi) bus. +# Do not put this filesystem in /etc/fstab. This script manages all of +# that for you. +# +# If you are interested in High Availability, you will probably also want +# some sort of external hardware RAID controller in front of the actual +# disks. I don't mean a RAID controller embedded in the host controller - +# it has to be an external controller. +# +# It can also be an internal RAID controller if the controller supports +# failover. IBM's ServeRAID controller does this, and it automatically +# prohibits concurrent access too, so it's pretty cool in this application. +# +# There is a script for software RAID-1 included in this directory. Right +# now, I wouldn't recommend using software RAID (see notes in the Raid1 script) +# +# NOTE: There is no locking (such as a SCSI reservation) being done here. +# I would if the SCSI driver could properly maintain the reservation, +# which it cannot, even with the 'scsi reservation' patch submitted +# earlier this year by James Bottomley. The patch minimizes the +# bus resets caused by a RESERVATION_CONFLICT return, and helps the +# reservation stay when 2 nodes contend for a reservation, +# but it does not attempt to recover the reservation in the +# case of a bus reset. +# +# What all this means is that if 2 nodes mount the same file system +# read-write, the filesystem is going to become corrupted. +# +# As a result, you should use this together with the stonith option +# and redundant, independent communications paths. +# +# If you don't do this, don't blame us when you scramble your disk. +# +# Note: the ServeRAID controller does prohibit concurrent acess +# In this case, you don't actually need STONITH, but redundant comm is +# still an excellent idea. +# + +####################################################################### +# Initialization: + +# newer versions of heartbeat have moved the ocf-shellfuncs file +if [ -f /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs ] ; then +. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs +else +. /usr/lib/heartbeat/ocf-shellfuncs +fi + +####################################################################### + +# Utilities used by this script +MODPROBE=/sbin/modprobe +FSCK=/sbin/fsck +FUSER=/sbin/fuser +MOUNT=/bin/mount +UMOUNT=/bin/umount +BLOCKDEV=/sbin/blockdev + +check_util () { + if [ ! -x "$1" ] ; then + ocf_log err "Setup problem: Couldn't find utility $1" + exit $OCF_ERR_GENERIC + fi +} + +usage() { + cat <<-EOT + usage: $0 {start|stop|status|monitor|validate-all|meta-data} + EOT +} + +meta_data() { + cat < + + +1.0 + + +Resource script for Filesystem. It manages a Filesystem on a shared storage medium. + +Filesystem resource agent + + + + +The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. + +block device + + + + + +The mount point for the filesystem. + +mount point + + + + + +Name of PVFS2 file system + +File system + + + + + +Path to file system conf directory + +File system conf directory + + + + + +The optional type of filesystem to be mounted. + +filesystem type + + + + + +Any extra options to be given as -o options to mount. + +options + + + + + +The name (UUID) of the OCFS2 cluster this filesystem is part of, +iff this is an OCFS2 resource and there's more than one cluster. You +should not need to specify this. + +OCFS2 cluster name/UUID + + + + + +Mountpoint of the cluster hierarchy below configfs. You should not +need to specify this. + +OCFS2 configfs root + + + + + + + + + + + + + + +END +} + +# +# Make sure the kernel does the right thing with the FS buffers +# This function should be called after unmounting and before mounting +# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt +# anything either... +# +# It's really a bug that you have to do this at all... +# +flushbufs() { + if + [ "$BLOCKDEV" != "" -a -x "$BLOCKDEV" -a "$blockdevice" = "yes" ] + then + $BLOCKDEV --flushbufs $1 + return $? + fi + + return 0 +} + +# Take advantage of /proc/mounts if present, use portabel mount command +# otherwise. Normalize format to "dev mountpoint fstype". +list_mounts() { + if [ -f "/proc/mounts" -a -r "/proc/mounts" ]; then + cut -d' ' -f1,2,3 /dev/null) + set -- $OCFS2_CLUSTER + local n="$#" + if [ $n -gt 1 ]; then + ocf_log err "$OCFS2_CLUSTER: several clusters found." + exit $OCF_ERR_GENERIC + fi + if [ $n -eq 0 ]; then + ocf_log err "$OCFS2_CONFIGFS: no clusters found." + exit $OCF_ERR_GENERIC + fi + fi + + OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER" + if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then + ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?" + exit $OCF_ERR_GENERIC + fi + + OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID +} + +# +# START: Start up the filesystem +# +Filesystem_start() +{ + if [ "$FSTYPE" = "ocfs2" ]; then + # "start" now has the notification data available; that + # we're being started means we didn't get the + # pre-notification, because we weren't running, so + # process the information now first. + ocf_log info "$OCFS2_UUID: Faking pre-notification on start." + OCF_RESKEY_CRM_meta_notify_type="pre" + OCF_RESKEY_CRM_meta_notify_operation="start" + Filesystem_notify + fi + + # See if the device is already mounted. + if Filesystem_status >/dev/null 2>&1 ; then + ocf_log info "Filesystem $MOUNTPOINT is already mounted." + return $OCF_SUCCESS + fi + + # Insert SCSI module + # TODO: This probably should go away. Why should the filesystem + # RA magically load a kernel module? + $MODPROBE scsi_hostadapter >/dev/null 2>&1 + + if [ -z $FSTYPE ]; then + : No $FSTYPE specified, rely on the system has the right file-system support already + else + # Insert Filesystem module + $MODPROBE $FSTYPE >/dev/null 2>&1 + grep -e "$FSTYPE"'$' /proc/filesystems >/dev/null + if [ $? != 0 ] ; then + ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems" + return $OCF_ERR_ARGS + fi + fi + + # Check the filesystem & auto repair. + # NOTE: Some filesystem types don't need this step... Please modify + # accordingly + + if [ $blockdevice = "yes" ]; then + if [ ! -b "$DEVICE" ] ; then + ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + exit $OCF_ERR_ARGS + fi + + if + case $FSTYPE in + ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs|cifs|smbfs|ocfs2) false;; + *) true;; + esac + then + ocf_log info "Starting filesystem check on $DEVICE" + if [ -z $FSTYPE ]; then + $FSCK -a $DEVICE + else + $FSCK -t $FSTYPE -a $DEVICE + fi + + # NOTE: if any errors at all are detected, it returns non-zero + # if the error is >= 4 then there is a big problem + if + [ $? -ge 4 ] + then + ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE" + return $OCF_ERR_GENERIC + fi + fi + fi + + if [ ! -d "$MOUNTPOINT" ] ; then + ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point" + exit $OCF_ERR_ARGS + fi + + flushbufs $DEVICE + # Mount the filesystem. + if [ -z $FSTYPE ]; then + $MOUNT $options $DEVICE $MOUNTPOINT + else + $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT + fi + + if [ $? -ne 0 ]; then + ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" + if [ "$FSTYPE" = "ocfs2" ]; then + ocfs2_cleanup + fi + return $OCF_ERR_GENERIC + fi + + return 0 +} +# end of Filesystem_start + +Filesystem_notify() { + # Process notifications; this is the essential glue level for + # giving user-space membership events to a cluster-aware + # filesystem. Right now, only OCFS2 is supported. + # + # When we get a pre-start notification, we set up all the nodes + # which will be active in our membership for the filesystem. + # (For the resource to be started, this happens at the time of + # the actual 'start' operation.) + # + # At a post-start, actually there's nothing to do for us really, + # but no harm done in re-syncing either. + # + # pre-stop is meaningless; we can't remove any node yet, it + # first needs to unmount. + # + # post-stop: the node is removed from the membership of the + # other nodes. + # + # Note that this expects that the base cluster is already + # active; ie o2cb has been started and populated + # $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by + # simply having o2cb run on all nodes by the CRM too. This + # probably ought to be mentioned somewhere in the to be written + # documentation. ;-) + # + + if [ "$FSTYPE" != "ocfs2" ]; then + # One of the cases which shouldn't occur; it should have + # been caught much earlier. Still, you know ... + ocf_log err "$DEVICE: Notification received for non-ocfs2 mount." + return $OCF_ERR_GENERIC + fi + + local n_type="$OCF_RESKEY_CRM_meta_notify_type" + local n_op="$OCF_RESKEY_CRM_meta_notify_operation" + local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" + local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" + local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" + + ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op" + ocf_log info "$OCFS2_UUID: notify active: $n_active" + ocf_log info "$OCFS2_UUID: notify stop: $n_stop" + ocf_log info "$OCFS2_UUID: notify start: $n_start" + + case "$n_type" in + pre) + case "$n_op" in + stop) + ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop." + return $infoUCCESS + ;; + start) + # These are about to become active; prepare to + # communicate with them. + n_active="$n_active $n_start" + ;; + esac + ;; + post) + case "$n_op" in + stop) + # remove unames from notify_stop_uname; these have been + # stopped and can no longer be considered active. + for UNAME in "$n_stop"; do + n_active="${n_active//$UNAME/}" + done + ;; + start) + if [ "$n_op" = "start" ]; then + ocf_log info "$OCFS2_UUID: ignoring post-notify for start." + return $OCF_SUCCESS + fi + ;; + esac + ;; + esac + + ocf_log info "$OCFS2_UUID: post-processed active: $n_active" + + local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} + ocf_log info "$OCFS2_UUID: I am node $n_myself." + + case " $n_active " in + *" $n_myself "*) ;; + *) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!" + return $OCF_ERR_GENERIC + ;; + esac + + if [ -d "$OCFS2_FS_ROOT" ]; then + entry_prefix=$OCFS2_FS_ROOT/ + for entry in $OCFS2_FS_ROOT/* ; do + n_fs="${entry##$entry_prefix}" +# ocf_log info "$OCFS2_UUID: Found current node $n_fs" + case " $n_active " in + *" $n_fs "*) + # Construct a list of nodes which are present + # already in the membership. + n_exists="$n_exists $n_fs" + ocf_log info "$OCFS2_UUID: Keeping node: $n_fs" + ;; + *) + # Node is in the membership currently, but not on our + # active list. Must be removed. + if [ "$n_op" = "start" ]; then + ocf_log warn "$OCFS2_UUID: Removing nodes on start" + fi + ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs" + if ! rm -f $entry ; then + ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!" + fi + ;; + esac + done + else + ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating." + mkdir -p $OCFS2_FS_ROOT + fi + + ocf_log info "$OCFS2_UUID: Existing node list: $n_exists" + + # (2) + for entry in $n_active ; do +# ocf_log info "$OCFS2_UUID: Expected active node: $entry" + case " $n_exists " in + *" $entry "*) + ocf_log info "$OCFS2_UUID: Already active: $entry" + ;; + *) + if [ "$n_op" = "stop" ]; then + ocf_log warn "$OCFS2_UUID: Adding nodes on stop" + fi + ocf_log info "$OCFS2_UUID: Activating node: $entry" + if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then + ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link" + fi + ;; + esac + done +} + +# +# STOP: Unmount the filesystem +# +Filesystem_stop() +{ + # See if the device is currently mounted + Filesystem_status >/dev/null 2>&1 + if [ $? -ne $OCF_NOT_RUNNING ]; then + # Determine the real blockdevice this is mounted on (if + # possible) prior to unmounting. + determine_blockdevice + + # For networked filesystems, there's merit in trying -f: + case "$FSTYPE" in + nfs|cifs|smbfs) umount_force="-f" ;; + esac + + # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. + for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do + ocf_log info "Trying to unmount $MOUNTPOINT" + for sig in SIGTERM SIGTERM SIGTERM SIGKILL SIGKILL SIGKILL; do + if $UMOUNT $umount_force $SUB ; then + rc=$OCF_SUCCESS + ocf_log info "unmounted $SUB successfully" + break + else + rc=$OCF_ERR_GENERIC + ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig" + # fuser returns a non-zero return code if none of the + # specified files is accessed or in case of a fatal + # error. + if $FUSER -$sig -m -k $SUB ; then + ocf_log info "Some processes on $SUB were signalled" + else + ocf_log info "No processes on $SUB were signalled" + fi + sleep 1 + fi + done + + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log err "Couldn't unmount $SUB, giving up!" + fi + done + else + # Already unmounted, wonderful. + rc=$OCF_SUCCESS + fi + + flushbufs $DEVICE + + if [ "$FSTYPE" = "ocfs2" ]; then + ocfs2_cleanup + fi + + return $rc +} +# end of Filesystem_stop + +# +# STATUS: is the file system mounted and the SAN path alive? +# +Filesystem_status_qla_monitor() +{ + Filesystem_status >/dev/null 2>&1 + ORIG_RET=$? + if [ $ORIG_RET -eq $OCF_SUCCESS ]; then + # the fs is mounted. Are the paths ok? + /usr/bin/qla-monitor.pl --device $DEVICE + if [ $? -ne 0 ]; then + # log as much as we can + ocf_log info "Error: $DEVICE is dead." + echo "Error: $DEVICE is dead on node `hostname`, failing over" >> /var/log/pvfs2/pvfs2-failover.log; + /usr/bin/fs-instance-alarm.pl --fs-name $fsname --ce `hostname` --type PVFS2_HA --msg "$DEVICE is dead, failing over." + /usr/bin/fs-power-control.pl --fs-name $fsname --conf-dir $conf_dir --host `hostname` --command reboot + # shoot ourselves in the head + return $OCF_ERR_GENERIC + fi + dd bs=4096 count=1 if=/dev/zero oflag=direct of=${MOUNTPOINT}/filesystem.monitor + if [ $? -ne 0 ]; then + ocf_log err "Error: Unable to write to $DEVICE on node `hostname`." + /usr/bin/logger -p user.err -t PVFS2 "Error: Unable to write to $DEVICE on node `hostname`, failing over." + /usr/bin/fs-instance-alarm.pl --fs-name $fsname --ce `hostname` --type PVFS2_HA --msg "Error: Unable to write to $DEVICE on node `hostname`, failing over." + # shoot ourselves in the head + /usr/bin/fs-power-control.pl --fs-name $fsname --conf-dir $conf_dir --host `hostname` --command reboot + return $OCF_ERR_GENERIC + fi + fi + return $ORIG_RET +} +# end of Filesystem_status_qla_monitor + +# +# STATUS: is the filesystem mounted or not? +# +Filesystem_status() +{ + if + list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1 + then + rc=$OCF_SUCCESS + msg="$MOUNTPOINT is mounted (running)" + else + rc=$OCF_NOT_RUNNING + msg="$MOUNTPOINT is unmounted (stopped)" + fi + + # TODO: For ocfs2, or other cluster filesystems, should we be + # checking connectivity to other nodes here, or the IO path to + # the storage? + + case "$OP" in + status) ocf_log info "$msg";; + esac + return $rc +} +# end of Filesystem_status + +# +# VALIDATE_ALL: Are the instance parameters valid? +# FIXME!! The only part that's useful is the return code. +# This code always returns $OCF_SUCCESS (!) +# +Filesystem_validate_all() +{ + if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then + ocf_log warn "Mountpoint $MOUNTPOINT does not exist" + fi + +# Check if the $FSTYPE is workable +# NOTE: Without inserting the $FSTYPE module, this step may be imprecise +# TODO: This is Linux specific crap. + if [ ! -z $FSTYPE ]; then + cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ + if [ $? -ne 0 ]; then + modpath=/lib/modules/`uname -r` + moddep=$modpath/modules.dep + # Do we have $FSTYPE in modules.dep? + cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" + if [ $? -ne 0 ]; then + ocf_log info "It seems we do not have $FSTYPE support" + fi + fi + fi + +#TODO: How to check the $options ? + + return $OCF_SUCCESS +} + +# Check the arguments passed to this script +if + [ $# -ne 1 ] +then + usage + exit $OCF_ERR_ARGS +fi + +OP=$1 + +# These operations do not require instance parameters +case $OP in + meta-data) meta_data + exit $OCF_SUCCESS + ;; + usage) usage + exit $OCF_SUCCESS + ;; +esac + +if + [ -z "$OCF_RESKEY_fsname" ] +then + ocf_log err "Filesystem-qla-monitor must specify fsname!" +# usage + exit $OCF_ERR_GENERIC +fi + +if + [ -z "$OCF_RESKEY_conf_dir" ] +then + ocf_log err "Filesystem-qla-monitor must specify conf_dir!" +# usage + exit $OCF_ERR_GENERIC +fi + +# Check the OCF_RESKEY_ environment variables... +DEVICE=$OCF_RESKEY_device +FSTYPE=$OCF_RESKEY_fstype +if [ ! -z "$OCF_RESKEY_options" ]; then + options="-o $OCF_RESKEY_options" +fi +fsname=$OCF_RESKEY_fsname +conf_dir=$OCF_RESKEY_conf_dir + +blockdevice=no +case $DEVICE in + "") ocf_log err "Please set OCF_RESKEY_device to the device to be managed" + exit $OCF_ERR_ARGS + ;; + -*) # Oh... An option to mount instead... Typically -U or -L + ;; + [^/]*:/*) # An NFS filesystem specification... + ;; + //[^/]*/*) # An SMB filesystem specification... + ;; + *) if [ ! -b "$DEVICE" -a "X$OP" != Xstart ] ; then + ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + fi + blockdevice=yes + ;; +esac + +case $FSTYPE in + ocfs2) + ocfs2_init + ;; + nfs) + : # this is kind of safe too + ;; + *) + if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ]; then + ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" + ocf_log err "DO NOT RUN IT AS A CLONE!" + ocf_log err "Politely refusing to proceed to avoid data corruption." + exit $OCF_ERR_GENERIC + fi + ;; +esac + +# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". +# But the output of `mount` and /proc/mounts do not. +if [ -z $OCF_RESKEY_directory ]; then + if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then + ocf_log err "Please specify the directory" + exit $OCF_ERR_ARGS + fi +else + MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') + : ${MOUNTPOINT:=/} + # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" + # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll + # kill the whole system. Is that a good idea? +fi + +# Check to make sure the utilites are found +check_util $MODPROBE +check_util $FSCK +check_util $FUSER +check_util $MOUNT +check_util $UMOUNT + +if [ "$OP" != "monitor" ]; then + ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" +fi + +case $OP in + start) Filesystem_start + ;; + notify) Filesystem_notify + ;; + stop) Filesystem_stop + ;; + status|monitor) Filesystem_status_qla_monitor + ;; + validate-all) Filesystem_validate_all + ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +exit $? diff --git a/examples/heartbeat/hardware-specific/PVFS2-notify b/examples/heartbeat/hardware-specific/PVFS2-notify new file mode 100644 index 0000000..3119e32 --- /dev/null +++ b/examples/heartbeat/hardware-specific/PVFS2-notify @@ -0,0 +1,260 @@ +#!/bin/sh +# +# Resource script for PVFS2_notify +# +# Provides a generic hook for sending notifications if a PVFS2 resource is +# failed over to another node. +# +# Based on MailTo script by Alan Robertson +# +# Description: Logs a message whenever a resource group starts or stops. +# +# OCF parameters are as below: +# OCF_RESKEY_conf_dir +# OCF_RESKEY_title +# OCF_RESKEY_firsthost +# +# License: GNU General Public License (GPL) + +VARRUN=/var/run +MAILTOFILE=$VARRUN/PVFS2_notify +####################################################################### +# Initialization: + +# newer versions of heartbeat have moved the ocf-shellfuncs file +if [ -f /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs ] ; then +. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs +else +. /usr/lib/heartbeat/ocf-shellfuncs +fi + +####################################################################### + +ARGS="$0 $*" + +us=`uname -n` + +usage() { + echo "Usage: $0 {start|stop|status|monitor|meta-data|validate-all}" +} + +meta_data() { + cat < + + +1.0 + + +This is a resource agent for PVFS2_notify. It logs or performs notification when a takeover occurs. + +PVFS2_notify resource agent + + + + +Original host for this resource + +Original host for this resource + + + + + +Title/identifier for the notification. + +Subject + + + + + +Configuration directory for file system (PVFS2) + +Configuration Directory + + + + + +Name of file system (PVFS2) + +Name of file system + + + + + + + + + + + + + + +END +} + +MailProgram() { + mail -s "$1" "$email" </dev/null +# if [ $? -eq 0 ]; then +# : OK, mail to $item@localhost.localdomain +# else +# ocf_log err "Invalid email address [$email]" +# exit $OCF_ERR_ARGS +# fi +# ;; +# esac +# done + +# Any title is OK + + return $OCF_SUCCESS +} + +# +# See how we were called. +# +# The order in which heartbeat provides arguments to resource +# scripts is broken. It should be fixed. +# + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_GENERIC +fi + +case $1 in + meta-data) meta_data + exit $OCF_SUCCESS + ;; + # Not quite sure what to do with this one... + # We aren't a continuously running service - so it's not clear + # + status|monitor) PVFS2_notifyStatus + exit $? + ;; + usage) usage + exit $OCF_SUCCESS + ;; + *) ;; +esac + +if + [ -z "$OCF_RESKEY_conf_dir" ] +then + ocf_log err "PVFS2-notify must specify conf_dir!" +# usage + exit $OCF_ERR_GENERIC +fi + +if + [ -z "$OCF_RESKEY_fsname" ] +then + ocf_log err "PVFS2-notify must specify fsname!" +# usage + exit $OCF_ERR_GENERIC +fi + +conf_dir=$OCF_RESKEY_conf_dir +fsname=$OCF_RESKEY_fsname +title=$OCF_RESKEY_title +firsthost=$OCF_RESKEY_firsthost + +case $1 in + start) PVFS2_notifyStart + ;; + stop) PVFS2_notifyStop + ;; + validate-all) PVFS2_notifyValidateAll + ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +exit $? diff --git a/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-control.pl b/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-control.pl new file mode 100644 index 0000000..cf4b1e3 --- /dev/null +++ b/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-control.pl @@ -0,0 +1,138 @@ +#!/usr/bin/perl + +# requires APC MIB file, which is available for download from APC's web site + +# this script will first attempt to use SNMP to send power control commands, +# and then fail back to using SSH + +use strict; +use Getopt::Long; +use File::Find; +use File::Path; +use POSIX qw(setsid); +use Socket; + +my $host; +my $ssh_user; +my $ssh_pass; +my $snmp_user; +my $snmp_pass; +my $outlet; +my $cmd; + +my $snmp_command; + +&process_args; + +if($cmd eq "on") +{ + $snmp_command = "snmpset -v3 -a MD5 -l authNoPriv -u $snmp_user -A $snmp_pass -m \"/var/lib/filesystems/powernet387.mib\" $host PowerNet-MIB::rPDUOutletControlOutletCommand.$outlet = 1"; +} +elsif($cmd eq "off") +{ + $snmp_command = "snmpset -v3 -a MD5 -l authNoPriv -u $snmp_user -A $snmp_pass -m \"/var/lib/filesystems/powernet387.mib\" $host PowerNet-MIB::rPDUOutletControlOutletCommand.$outlet = 2"; +} +elsif($cmd eq "reboot") +{ + $snmp_command = "snmpset -v3 -a MD5 -l authNoPriv -u $snmp_user -A $snmp_pass -m \"/var/lib/filesystems/powernet387.mib\" $host PowerNet-MIB::rPDUOutletControlOutletCommand.$outlet = 3"; +} +else +{ + die("Error: $cmd is not a valid outlet command.\n"); +} + +# try snmp first +my $snmp_output = `$snmp_command 2>&1`; +if ( $? == 0 ) +{ + exit 0; +} + +# fall back to ssh +my $ssh_command = "/usr/bin/apc-switched-pdu-ssh-control.exp $host $ssh_user $ssh_pass $outlet $cmd"; + +my $ssh_output = `$ssh_command 2>&1`; +if ( $? == 0 ) +{ + exit 0; +} + +# if we reach this point, then neither worked +print "Error: failed to contact APC unit.\n"; +print "SNMP output: $snmp_output"; +print "SSH output: $ssh_output"; + +exit 1; + +sub process_args +{ + # Parse the command line options + # For a description of the command line options see &print_help + use vars qw( $opt_help $opt_host $opt_ssh_user $opt_ssh_pass $opt_snmp_user $opt_snmp_pass $opt_outlet $opt_cmd); + + Getopt::Long::Configure( "no_ignore_case", "bundling"); + GetOptions( "help", + "host=s", + "ssh-user=s", + "ssh-pass=s", + "snmp-user=s", + "snmp-pass=s", + "outlet=i", + "cmd=s"); + + if ($opt_help) + { + &print_help; + exit(0); + } + + if(!$opt_host || !$opt_ssh_user || !$opt_ssh_pass || + !$opt_snmp_user || !$opt_snmp_pass || !$opt_outlet || !$opt_cmd) + { + &print_help; + die "Error: missing arguments.\n"; + } + + $host = $opt_host; + $ssh_user = $opt_ssh_user; + $ssh_pass = $opt_ssh_pass; + $snmp_user = $opt_snmp_user; + $snmp_pass = $opt_snmp_pass; + $outlet = $opt_outlet; + $cmd = $opt_cmd; +} + + +# --------------- print help information ------------------------------ +sub print_help { + + print < + +options: + --help print this help and exit + --host hostname of APC unit + --ssh-user ssh username for APC unit + --ssh-pass ssh password for APC unit + --snmp-user SNMP username for APC unit + --snmp-pass SNMP authentication pass phrase (MD5) for APC unit + --outlet APC outlet to control + --cmd control command to send (on, off, or reboot) + +EOF +} + +# Local variables: +# c-basic-offset: 3 +# perl-indent-level: 3 +# tab-width: 3 +# indent-tabs-mode: nil +# shiftwidth: 3 +# End: +# +# vim: ts=3 expandtab + diff --git a/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-monitor.pl b/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-monitor.pl new file mode 100644 index 0000000..c6ea74f --- /dev/null +++ b/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-monitor.pl @@ -0,0 +1,116 @@ +#!/usr/bin/perl + +# requires APC MIB file, which is available for download from APC's web site + +# this script will first attempt to use SNMP to monitor a power strip, +# and then fail back to using SSH + +use strict; +use Getopt::Long; +use File::Find; +use File::Path; +use POSIX qw(setsid); +use Socket; + +my $host; +my $ssh_user; +my $ssh_pass; +my $snmp_user; +my $snmp_pass; + +&process_args; + +# try snmp first +my $snmp_command = "snmpstatus -v3 -a MD5 -l authNoPriv -u $snmp_user -A $snmp_pass $host"; + +my $snmp_output = `$snmp_command 2>&1`; +if ( $? == 0 ) +{ + # print "Success, snmp\n"; + exit 0; +} + +# fall back to ssh +my $ssh_command = "/usr/bin/apc-switched-pdu-ssh-monitor.exp $host $ssh_user $ssh_pass"; + +my $ssh_output = `$ssh_command 2>&1`; +if ( $? == 0 ) +{ + # print "Success, ssh\n"; + exit 0; +} + +# if we reach this point, then neither worked +print "Error: failed to contact APC unit.\n"; +print "SNMP output: $snmp_output"; +print "SSH output: $ssh_output"; + +exit 1; + +sub process_args +{ + # Parse the command line options + # For a description of the command line options see &print_help + use vars qw( $opt_help $opt_host $opt_ssh_user $opt_ssh_pass $opt_snmp_user $opt_snmp_pass); + + Getopt::Long::Configure( "no_ignore_case", "bundling"); + GetOptions( "help", + "host=s", + "ssh-user=s", + "ssh-pass=s", + "snmp-user=s", + "snmp-pass=s"); + + if ($opt_help) + { + &print_help; + exit(0); + } + + if(!$opt_host || !$opt_ssh_user || !$opt_ssh_pass || + !$opt_snmp_user || !$opt_snmp_pass) + { + &print_help; + die "Error: missing arguments.\n"; + } + + $host = $opt_host; + $ssh_user = $opt_ssh_user; + $ssh_pass = $opt_ssh_pass; + $snmp_user = $opt_snmp_user; + $snmp_pass = $opt_snmp_pass; +} + + +# --------------- print help information ------------------------------ +sub print_help { + + print < + +options: + --help print this help and exit + --host hostname of APC unit + --ssh-user ssh username for APC unit + --ssh-pass ssh password for APC unit + --snmp-user SNMP username for APC unit + --snmp-pass SNMP authentication pass phrase (MD5) for APC unit + +EOF +} + +# Local variables: +# c-basic-offset: 3 +# perl-indent-level: 3 +# tab-width: 3 +# indent-tabs-mode: nil +# shiftwidth: 3 +# End: +# +# vim: ts=3 expandtab + diff --git a/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-outlet-status.pl b/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-outlet-status.pl new file mode 100644 index 0000000..2204f0d --- /dev/null +++ b/examples/heartbeat/hardware-specific/apc-switched-pdu-hybrid-outlet-status.pl @@ -0,0 +1,124 @@ +#!/usr/bin/perl + +# requires APC MIB file, which is available for download from APC's web site + +# this script will first attempt to use SNMP to send power control commands, +# and then fail back to using SSH + +use strict; +use Getopt::Long; +use File::Find; +use File::Path; +use POSIX qw(setsid); +use Socket; + +my $host; +my $ssh_user; +my $ssh_pass; +my $snmp_user; +my $snmp_pass; +my $outlet; + +my $snmp_command; + +&process_args; + +$snmp_command = "snmpget -v3 -a MD5 -l authNoPriv -u $snmp_user -A $snmp_pass -m \"/var/lib/filesystems/powernet387.mib\" $host PowerNet-MIB::rPDUOutletControlOutletCommand.$outlet"; + +my $snmp_output = `$snmp_command 2>&1`; +if ( $? == 0 ) +{ + if($snmp_output =~ /immediateOn/) + { + print "On\n"; + } + elsif($snmp_output =~ /immediateOff/) + { + print "Off\n"; + } + else + { + print "Unknown\n"; + } + + exit 0; +} + +# no fall back to ssh implemented in this script + +print "Error: failed to contact APC unit.\n"; +print "SNMP output: $snmp_output"; +print "SSH unsupported in this utility.\n"; + +exit 1; + +sub process_args +{ + # Parse the command line options + # For a description of the command line options see &print_help + use vars qw( $opt_help $opt_host $opt_ssh_user $opt_ssh_pass $opt_snmp_user $opt_snmp_pass $opt_outlet); + + Getopt::Long::Configure( "no_ignore_case", "bundling"); + GetOptions( "help", + "host=s", + "ssh-user=s", + "ssh-pass=s", + "snmp-user=s", + "snmp-pass=s", + "outlet=i"); + + if ($opt_help) + { + &print_help; + exit(0); + } + + if(!$opt_host || !$opt_ssh_user || !$opt_ssh_pass || + !$opt_snmp_user || !$opt_snmp_pass || !$opt_outlet) + { + &print_help; + die "Error: missing arguments.\n"; + } + + $host = $opt_host; + $ssh_user = $opt_ssh_user; + $ssh_pass = $opt_ssh_pass; + $snmp_user = $opt_snmp_user; + $snmp_pass = $opt_snmp_pass; + $outlet = $opt_outlet; +} + + +# --------------- print help information ------------------------------ +sub print_help { + + print < + +options: + --help print this help and exit + --host hostname of APC unit + --ssh-user ssh username for APC unit + --ssh-pass ssh password for APC unit + --snmp-user SNMP username for APC unit + --snmp-pass SNMP authentication pass phrase (MD5) for APC unit + --outlet APC outlet to control + +EOF +} + +# Local variables: +# c-basic-offset: 3 +# perl-indent-level: 3 +# tab-width: 3 +# indent-tabs-mode: nil +# shiftwidth: 3 +# End: +# +# vim: ts=3 expandtab + diff --git a/examples/heartbeat/hardware-specific/apc-switched-pdu-ssh-control.exp b/examples/heartbeat/hardware-specific/apc-switched-pdu-ssh-control.exp new file mode 100644 index 0000000..6374f24 --- /dev/null +++ b/examples/heartbeat/hardware-specific/apc-switched-pdu-ssh-control.exp @@ -0,0 +1,217 @@ +#!/usr/bin/expect -f + +# expect script to login to a baytech management module and control power +# to one of its outlets + +# gather command line arguments into variables +set host [lrange $argv 0 0] +set user [lrange $argv 1 1] +set password [lrange $argv 2 2] +set outlet [lrange $argv 3 3] +set command [lrange $argv 4 4] + +# complain if we don't get exactly 5 arguments +if {$argc!=5} { + send_user "Usage: apc-switched-pdu-ssh-control.exp \n" + send_user " NOTE: may be \"on\" \"off\" or \"reboot\"\n" + exit 1 +} + +set apc_command "" + +if { [string compare $command "on"] == 0 } { + set apc_command "1" +} +if { [string compare $command "off"] == 0 } { + set apc_command "2" +} +if { [string compare $command "reboot"] == 0 } { + set apc_command "3" +} + +if { [string compare $apc_command ""] == 0 } { + send_error "Error: must be one of on|off|reboot.\n" + exit 1 +} + +# use a 30 second timeout +set timeout 30 + +# this disables showing interaction on stdout. It should be commented +# if you are trying to debug this script and want to see what it is doing +log_user 0 + +# delete old log file and start a new one +#system rm -f /tmp/expect.log +#log_file -a /tmp/expect.log + +# open ssh connection. Turn off strict host checking so ssh doesn't ask us +# if it is ok to connect to this hostname +spawn ssh "-oStrictHostKeyChecking no" $user@$host + +# Look for passwod prompt +expect { + "*?assword:*" {} + default { + # password prompt never showed up + send_user "failed to ssh to host $host\n" + exit 1 + } +} + +# Send password aka $password +send -- "$password\r" + +# look for top level prompt +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: host $host failed to accept username and password\n" + exit 1 + } +} + +send -- "1\r" + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- "2\r" + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- "1\r" + +while {1} { + expect { + "*to continue*" {send -- "\r"} + "> " {break } + default {} + } +} + +send -- "$outlet\r" + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- "1\r" + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + + +send -- "$apc_command\r" + +expect { + "*to continue*" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- "YES\r" + +while {1} { + expect { + "*to continue*" {send -- "\r"} + "> " {break } + default {} + } +} + +send -- \003 + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- \003 + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- \003 + +while {1} { + expect { + "*to continue*" {send -- "\r"} + "> " {break } + default {} + } +} + +send -- \003 + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +send -- \003 + +expect { + "> *" {} + default { + # our user name and password did not work + send_user "Error: unable to control outlet\n" + exit 1 + } +} + +# send logout command +send -- "4\r" + +expect { + eof {} + default { + send_user "Error: could not log out cleanly\n" + close + wait + exit 1 + } +} + diff --git a/examples/heartbeat/hardware-specific/apc-switched-pdu-ssh-monitor.exp b/examples/heartbeat/hardware-specific/apc-switched-pdu-ssh-monitor.exp new file mode 100644 index 0000000..d374312 --- /dev/null +++ b/examples/heartbeat/hardware-specific/apc-switched-pdu-ssh-monitor.exp @@ -0,0 +1,65 @@ +#!/usr/bin/expect -f + +# expect script to login to a baytech management module and control power +# to one of its outlets + +# gather command line arguments into variables +set host [lrange $argv 0 0] +set user [lrange $argv 1 1] +set password [lrange $argv 2 2] + +# complain if we don't get exactly 5 arguments +if {$argc!=3} { + send_user "Usage: apc-switched-pdu-ssh-monitor.exp \n" + exit 1 +} + +# use a 30 second timeout +set timeout 30 + +# this disables showing interaction on stdout. It should be commented +# if you are trying to debug this script and want to see what it is doing +log_user 0 + +# delete old log file and start a new one +#system rm -f /tmp/expect.log +#log_file -a /tmp/expect.log + +# open ssh connection. Turn off strict host checking so ssh doesn't ask us +# if it is ok to connect to this hostname +spawn ssh "-oStrictHostKeyChecking no" $user@$host + +# Look for passwod prompt +expect { + "*?assword:*" {} + default { + # password prompt never showed up + send_user "failed to ssh to host $host\n" + exit 1 + } +} + +# Send password aka $password +send -- "$password\r" +# look for top level prompt +expect { + "*> *" {} + default { + # our user name and password did not work + send_user "Error: host $host failed to accept username and password\n" + exit 1 + } +} + +# send logout command +send -- "4\r" + +expect { + eof {} + default { + send_user "Error: could not log out cleanly\n" + close + wait + exit 1 + } +} diff --git a/examples/heartbeat/hardware-specific/baytech-mgmt-control.exp b/examples/heartbeat/hardware-specific/baytech-mgmt-control.exp new file mode 100644 index 0000000..8f82037 --- /dev/null +++ b/examples/heartbeat/hardware-specific/baytech-mgmt-control.exp @@ -0,0 +1,119 @@ +#!/usr/bin/expect -f + +# expect script to login to a baytech management module and control power +# to one of its outlets + +# gather command line arguments into variables +set host [lrange $argv 0 0] +set user [lrange $argv 1 1] +set password [lrange $argv 2 2] +set strip [lrange $argv 3 3] +set outlet [lrange $argv 4 4] +set command [lrange $argv 5 5] + +# complain if we don't get exactly 5 arguments +if {$argc!=6} { + send_user "Usage: baytech.exp \n" + send_user " NOTE: may be \"on\" \"off\" or \"reboot\"\n" + exit 1 +} + +set bay_command "" + +if { [string compare $command "on"] == 0 } { + set bay_command "On" +} +if { [string compare $command "off"] == 0 } { + set bay_command "Off" +} +if { [string compare $command "reboot"] == 0 } { + set bay_command "Reboot" +} + +if { [string compare $bay_command ""] == 0 } { + send_error "Error: must be one of on|off|reboot.\n" + exit 1 +} + +# use a 15 second timeout +set timeout 15 + +# this disables showing interaction on stdout. It should be commented +# if you are trying to debug this script and want to see what it is doing +log_user 0 + +# delete old log file and start a new one +#system rm -f /tmp/expect.log +#log_file -a /tmp/expect.log + +# open ssh connection. Turn off strict host checking so ssh doesn't ask us +# if it is ok to connect to this hostname +spawn ssh "-oStrictHostKeyChecking no" $user@$host + +# Look for passwod prompt +expect { + "*?assword:*" {} + default { + # password prompt never showed up + send_user "failed to ssh to host $host\n" + exit 1 + } +} + +# Send password aka $password +send -- "$password\r" +# look for top level prompt +expect { + "*Enter Request*" {} + default { + # our user name and password did not work + send_user "Error: host $host failed to accept username and password\n" + exit 1 + } +} + +# send strip name, wait, and then send an extra carriage return +# (for some reason the Baytech will not always continue to the next screen on +# its own) +send -- "$strip\r" +sleep 5 +send -- "\r" + +# wait for prompt for particular strip, then send command to power on +expect { + "*RPC-28A>*" {} + default { + send_user "Error: failed to select strip $strip\n" + exit 1 + } +} +send -- "$bay_command $outlet\r" + +# wait for the Y/N confirmation and send a Y +expect { + "*Y/N*" {} + default { + send_user "Error: failed to issue command for outlet $outlet\n" + exit 1 + } +} + +send -- "Y\r" + +# wait for command to complete +expect { + "*RPC-28A>*" {} + default { + send_user "Error: failed to confirm command for outlet $outlet\n" + exit 1 + } +} + +# Ordinarily we would now send some sort of "logout" command and then expect +# eof. For some reason the Baytech devices will not let us logout gracefully +# however, so we instead just close the connection and return after waiting +# for the result + +close +wait + diff --git a/examples/heartbeat/hardware-specific/baytech-mgmt-monitor.exp b/examples/heartbeat/hardware-specific/baytech-mgmt-monitor.exp new file mode 100644 index 0000000..fe052bb --- /dev/null +++ b/examples/heartbeat/hardware-specific/baytech-mgmt-monitor.exp @@ -0,0 +1,59 @@ +#!/usr/bin/expect -f + +# expect script to login to a baytech management module and control power +# to one of its outlets + +# gather command line arguments into variables +set host [lrange $argv 0 0] +set user [lrange $argv 1 1] +set password [lrange $argv 2 2] + +# complain if we don't get exactly 5 arguments +if {$argc!=3} { + send_user "Usage: baytech.exp \n" + exit 1 +} + +# use a 15 second timeout +set timeout 15 + +# this disables showing interaction on stdout. It should be commented +# if you are trying to debug this script and want to see what it is doing +log_user 0 + +# delete old log file and start a new one +#system rm -f /tmp/expect.log +#log_file -a /tmp/expect.log + +# open ssh connection. Turn off strict host checking so ssh doesn't ask us +# if it is ok to connect to this hostname +spawn ssh "-oStrictHostKeyChecking no" $user@$host + +# Look for passwod prompt +expect { + "*?assword:*" {} + default { + # password prompt never showed up + send_user "failed to ssh to host $host\n" + exit 1 + } +} + +# Send password aka $password +send -- "$password\r" +# look for top level prompt +expect { + "*Enter Request*" {} + default { + # our user name and password did not work + send_user "Error: host $host failed to accept username and password\n" + exit 1 + } +} + +# send logout command +send -- "T\r" + +close +wait + diff --git a/examples/heartbeat/hardware-specific/pvfs2-stonith-plugin b/examples/heartbeat/hardware-specific/pvfs2-stonith-plugin new file mode 100644 index 0000000..b746a4e --- /dev/null +++ b/examples/heartbeat/hardware-specific/pvfs2-stonith-plugin @@ -0,0 +1,91 @@ +#!/bin/sh +# +# External STONITH module for power control of PVFS2 servers + +case $1 in +gethosts) + /usr/bin/fs-power-gethosts.pl --fs-name $fs_name + exit 0 + ;; +on) + /usr/bin/fs-power-control.pl --fs-name $fs_name --conf-dir $fs_conf_dir --host $2 --command on + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + exit 1 + fi + exit 0 + ;; +off) + /usr/bin/fs-power-control.pl --fs-name $fs_name --conf-dir $fs_conf_dir --host $2 --command off + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + exit 1 + fi + exit 0 + ;; +reset) + /usr/bin/fs-power-control.pl --fs-name $fs_name --conf-dir $fs_conf_dir --host $2 --command reboot + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + exit 1 + fi + exit 0 + ;; +status) + /usr/bin/fs-power-monitor.pl --conf-dir $fs_conf_dir --fs-name $fs_name + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + exit 1 + fi + exit 0 + ;; +getconfignames) + echo "fs_name" + echo "fs_conf_dir" + exit 0 + ;; +getinfo-devid) + echo "PVFS2 stonith device" + exit 0 + ;; +getinfo-devname) + echo "PVFS2 stonith device" + exit 0 + ;; +getinfo-devdescr) + echo "Power control for PVFS2 servers" + exit 0 + ;; +getinfo-devurl) + echo "NONE" + exit 0 + ;; +getinfo-xml) + cat << XML + + + + +file system name + + +Name of the file system that this device is being used with + + + + + +file system configuration directory + + +Path to the shared configuration directory for the file system + + + +XML + exit 0 + ;; +*) + exit 1 + ;; +esac diff --git a/examples/heartbeat/hardware-specific/qla-monitor.pl b/examples/heartbeat/hardware-specific/qla-monitor.pl new file mode 100644 index 0000000..69129d9 --- /dev/null +++ b/examples/heartbeat/hardware-specific/qla-monitor.pl @@ -0,0 +1,97 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Long; +use File::Find; +use File::Path; +use POSIX qw(setsid); +use Socket; + +my $device = undef; +my $mon_device = undef; + +&process_args; + +# strip path and partition off of device name +if($device =~ /\/+dev\/+([a-zA-Z]+)[1234567890]*/) +{ + $mon_device = $1; +} +else +{ + print "Error: poorly formated device name.\n"; + exit 1; +} + +my $devcount = `powermt display dev=$mon_device 2>&1 |grep -c -E "(qla)|(lpfc)"`; +if ($devcount < 1) +{ + print "Error: could not find device $mon_device.\n"; + exit 1; +} +chomp($devcount); + +my $deadcount = `powermt display dev=$mon_device 2>&1 |grep -E "(qla)|(lpfc)" | grep -c dead`; +chomp($deadcount); + +if($devcount == $deadcount) +{ + print "Error: all paths for device $device are dead.\n"; + exit 1; +} + +exit 0; + +sub process_args +{ + # Parse the command line options + # For a description of the command line options see &print_help + use vars qw( $opt_help $opt_device); + + Getopt::Long::Configure( "no_ignore_case", "bundling"); + GetOptions( "help", + "device=s"); + + if ($opt_help) + { + &print_help; + exit(0); + } + + if($opt_device) + { + $device = $opt_device; + } + else + { + &print_help; + die "Error: must specify device.\n"; + } +} + +# --------------- print help information ------------------------------ +sub print_help { + + print < + +options: + --help Print this help and exit. + --device SAN device (such as /dev/emcpowera1). + +EOF +} + +# Local variables: +# c-basic-offset: 3 +# perl-indent-level: 3 +# tab-width: 3 +# indent-tabs-mode: nil +# shiftwidth: 3 +# End: +# +# vim: ts=3 expandtab + diff --git a/examples/heartbeat/pvfs2-ha-heartbeat-configure.sh b/examples/heartbeat/pvfs2-ha-heartbeat-configure.sh index 4c04b77..d4440b6 100644 --- a/examples/heartbeat/pvfs2-ha-heartbeat-configure.sh +++ b/examples/heartbeat/pvfs2-ha-heartbeat-configure.sh @@ -24,12 +24,13 @@ then fi # put mcast information in the middle (ordering is important) -echo "logfacility user" > ${OUTDIR}/ha.cf +echo "use_logd yes" > ${OUTDIR}/ha.cf echo "mcast eth0 ${MCAST} 3335 1 0" >> ${OUTDIR}/ha.cf -echo "auto_failback off" >> ${OUTDIR}/ha.cf -echo "use_logd no" >> ${OUTDIR}/ha.cf -echo "respawn hacluster /usr/lib/heartbeat/cibmon -d" >> ${OUTDIR}/ha.cf +echo "auto_failback yes" >> ${OUTDIR}/ha.cf echo "crm yes" >> ${OUTDIR}/ha.cf +echo "keepalive 1" >> ${OUTDIR}/ha.cf +echo "deadtime 10" >> ${OUTDIR}/ha.cf +echo "initdead 80" >> ${OUTDIR}/ha.cf echo "compression bz2" >> ${OUTDIR}/ha.cf # shift arguments down diff --git a/include/pvfs2-compat.h b/include/pvfs2-compat.h new file mode 100644 index 0000000..fa465ed --- /dev/null +++ b/include/pvfs2-compat.h @@ -0,0 +1,85 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * This file provides macro definitions that can help ease the + * transition to the new function prototypes introduced in + * version 2.7.1 + */ + +#ifndef PVFS2_COMPAT_H +#define PVFS2_COMPAT_H + +#include "pvfs2.h" +#include "pvfs2-sysint.h" + +#if PVFS2_VERSION_MAJOR == 2 && PVFS2_VERSION_MINOR > 7 + +#define PVFS_sys_create(entry_name,ref,attr,credentials,dist,resp)\ + PVFS_sys_create(entry_name,ref,attr,credentials,dist,resp,\ + PVFS_SYS_LAYOUT_DEFAULT,PVFS_HINT_NULL) + +#define PVFS_sys_lookup(fs_id,name,credentials,resp,follow_link)\ + PVFS_sys_lookup(fs_id,name,credentials,resp,follow_link,PVFS_HINT_NULL) + + +#define PVFS_sys_getattr(ref,attrmask,credentials,resp) \ + PVFS_sys_getattr(ref,attrmask,credentials,resp,PVFS_HINT_NULL) + +#define PVFS_sys_setattr(ref,attr,credentials) \ + PVFS_sys_setattr(ref,attr,credentials,PVFS_HINT_NULL) + +#define PVFS_sys_ref_lookup(fs_id,pathname,parent_ref,\ + credentials,resp, follow_link) \ + PVFS_sys_ref_lookup(fs_id,pathname,parent_ref,credentials,resp,\ + follow_link, PVFS_HINT_NULL) + +#undef PVFS_sys_read +#define PVFS_sys_read(ref,req,off,buf,mem_req,creds,resp) \ + PVFS_sys_io(ref,req,off,buf,mem_req,creds,resp,PVFS_IO_READ,PVFS_HINT_NULL) + +#undef PVFS_sys_write +#define PVFS_sys_write(ref,req,off,buf,mem_req,creds,resp) \ + PVFS_sys_io(ref,req,off,buf,mem_req,creds,resp,PVFS_IO_WRITE,PVFS_HINT_NULL) + +#undef PVFS_isys_read +#define PVFS_isys_read(ref,req,off,buf,mem_req,creds,resp,opid,ptr)\ + PVFS_isys_io(ref,req,off,buf,mem_req,creds,resp,PVFS_IO_READ,opid,PVFS_HINT_NULL,ptr) + +#undef PVFS_isys_write +#define PVFS_isys_write(ref,req,off,buf,mem_req,creds,resp,opid,ptr)\ + PVFS_isys_io(ref,req,off,buf,mem_req,creds,resp,PVFS_IO_WRITE,opid,PVFS_HINT_NULL,ptr) + +#define PVFS_sys_remove(entry,ref,creds)\ + PVFS_sys_remove(entry,ref,creds,PVFS_HINT_NULL) + +#define PVFS_sys_mkdir(entry,ref,attr,creds,resp)\ + PVFS_sys_mkdir(entry,ref,attr,creds,resp,PVFS_HINT_NULL) + +#define PVFS_sys_readdir(ref,token,count,creds,resp)\ + PVFS_sys_readdir(ref,token,count,creds,resp,PVFS_HINT_NULL) + +#define PVFS_sys_truncate(ref,size,creds)\ + PVFS_sys_truncate(ref,size,creds,PVFS_HINT_NULL) + +#define PVFS_sys_getparent(ref,name,cred,resp) \ + PVFS_sys_getparent(ref,name,cred,resp,PVFS_HINT_NULL) + +#define PVFS_sys_flush(ref,cred)\ + PVFS_sys_flush(ref,cred,PVFS_HINT_NULL) + +#define PVFS_sys_symlink(tof,ref,from,attr,cred,resp) \ + PVFS_sys_symlink(tof,ref,from,attr,cred,resp,PVFS_HINT_NULL) + +#define PVFS_sys_rename(from,fref,to,tref,cred) \ + PVFS_sys_rename(from,fref,to,tref,cred,PVFS_HINT_NULL) + +#define PVFS_sys_statfs(fsid,creds,resp) \ + PVFS_sys_statfs(fsid,creds,resp,PVFS_HINT_NULL) +#endif + + +#endif diff --git a/include/pvfs2-debug.h b/include/pvfs2-debug.h index 49ebc90..3309322 100644 --- a/include/pvfs2-debug.h +++ b/include/pvfs2-debug.h @@ -73,21 +73,17 @@ #define GOSSIP_BSTREAM_DEBUG ((uint64_t)1 << 51) #define GOSSIP_BMI_DEBUG_PORTALS ((uint64_t)1 << 52) #define GOSSIP_USER_DEV_DEBUG ((uint64_t)1 << 53) -#define GOSSIP_BMI_DEBUG_OSD ((uint64_t)1 << 54) - -/* NOTE: if you want your gossip flag to be controllable from - * pvfs2-set-debugmask you have to add it in - * src/common/misc/pvfs2-debug.c - */ +#define GOSSIP_DIRECTIO_DEBUG ((uint64_t)1 << 54) +#define GOSSIP_MGMT_DEBUG ((uint64_t)1 << 55) +#define GOSSIP_MIRROR_DEBUG ((uint64_t)1 << 56) +#define GOSSIP_WIN_CLIENT_DEBUG ((uint64_t)1 << 57) +#define GOSSIP_BMI_DEBUG_OSD ((uint64_t)1 << 58) #define GOSSIP_BMI_DEBUG_ALL (uint64_t) \ (GOSSIP_BMI_DEBUG_TCP + GOSSIP_BMI_DEBUG_CONTROL + \ GOSSIP_BMI_DEBUG_GM + GOSSIP_BMI_DEBUG_OFFSETS + GOSSIP_BMI_DEBUG_IB \ + GOSSIP_BMI_DEBUG_MX + GOSSIP_BMI_DEBUG_PORTALS + GOSSIP_BMI_DEBUG_OSD) -uint64_t PVFS_debug_eventlog_to_mask( - const char *event_logging); - const char *PVFS_debug_get_next_debug_keyword( int position); @@ -110,13 +106,159 @@ const char *PVFS_debug_get_next_debug_keyword( #define GOSSIP_MAX_NR 15 #define GOSSIP_MAX_DEBUG (((uint64_t)1 << GOSSIP_MAX_NR) - 1) -/* - * To allow these masks to be settable from pvfs2-client-core, - * edit pvfs2-debug.c to add human readable event mask strings - * in s_kmod_keyword_mask_map[] array. - */ -uint64_t PVFS_kmod_eventlog_to_mask( - const char *event_logging); + +/*function prototypes*/ +uint64_t PVFS_kmod_eventlog_to_mask(const char *event_logging); +uint64_t PVFS_debug_eventlog_to_mask(const char *event_logging); +char * PVFS_debug_mask_to_eventlog(uint64_t mask); +char * PVFS_kmod_mask_to_eventlog(uint64_t mask); + +/* a private internal type */ +typedef struct +{ + const char *keyword; + uint64_t mask_val; +} __keyword_mask_t; + +#define __DEBUG_ALL ((uint64_t) -1) + +/* map all config keywords to pvfs2 debug masks here */ +static __keyword_mask_t s_keyword_mask_map[] = +{ + /* Log trove debugging info. Same as 'trove'.*/ + { "storage", GOSSIP_TROVE_DEBUG }, + /* Log trove debugging info. Same as 'storage'. */ + { "trove", GOSSIP_TROVE_DEBUG }, + /* Log trove operations. */ + { "trove_op", GOSSIP_TROVE_OP_DEBUG }, + /* Log network debug info. */ + { "network", GOSSIP_BMI_DEBUG_ALL }, + /* Log server info, including new operations. */ + { "server", GOSSIP_SERVER_DEBUG }, + /* Log client sysint info. This is only useful for the client. */ + { "client", GOSSIP_CLIENT_DEBUG }, + /* Debug the varstrip distribution */ + { "varstrip", GOSSIP_VARSTRIP_DEBUG }, + /* Log job info */ + { "job", GOSSIP_JOB_DEBUG }, + /* Debug PINT_process_request calls. EXTREMELY verbose! */ + { "request", GOSSIP_REQUEST_DEBUG }, + /* Log request scheduler events */ + { "reqsched", GOSSIP_REQ_SCHED_DEBUG }, + /* Log the flow protocol events, including flowproto_multiqueue */ + { "flowproto", GOSSIP_FLOW_PROTO_DEBUG }, + /* Log flow calls */ + { "flow", GOSSIP_FLOW_DEBUG }, + /* Debug the client name cache. Only useful on the client. */ + { "ncache", GOSSIP_NCACHE_DEBUG }, + /* Debug read-ahead cache events. Only useful on the client. */ + { "mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG }, + /* Debug the attribute cache. Only useful on the client. */ + { "acache", GOSSIP_ACACHE_DEBUG }, + /* Log/Debug distribution calls */ + { "distribution", GOSSIP_DIST_DEBUG }, + /* Debug the server-side dbpf attribute cache */ + { "dbpfattrcache", GOSSIP_DBPF_ATTRCACHE_DEBUG }, + /* Debug the client lookup state machine. */ + { "lookup", GOSSIP_LOOKUP_DEBUG }, + /* Debug the client remove state macine. */ + { "remove", GOSSIP_REMOVE_DEBUG }, + /* Debug the server getattr state machine. */ + { "getattr", GOSSIP_GETATTR_DEBUG }, + /* Debug the server setattr state machine. */ + { "setattr", GOSSIP_SETATTR_DEBUG }, + /* vectored getattr server state machine */ + { "listattr", GOSSIP_LISTATTR_DEBUG }, + /* Debug the client and server get ext attributes SM. */ + { "geteattr", GOSSIP_GETEATTR_DEBUG }, + /* Debug the client and server set ext attributes SM. */ + { "seteattr", GOSSIP_SETEATTR_DEBUG }, + /* Debug the readdir operation (client and server) */ + { "readdir", GOSSIP_READDIR_DEBUG }, + /* Debug the mkdir operation (server only) */ + { "mkdir", GOSSIP_MKDIR_DEBUG }, + /* Debug the io operation (reads and writes) + * for both the client and server */ + { "io", GOSSIP_IO_DEBUG }, + /* Debug the server's open file descriptor cache */ + { "open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG }, + /* Debug permissions checking on the server */ + { "permissions", GOSSIP_PERMISSIONS_DEBUG }, + /* Debug the cancel operation */ + { "cancel", GOSSIP_CANCEL_DEBUG }, + /* Debug the msgpair state machine */ + { "msgpair", GOSSIP_MSGPAIR_DEBUG }, + /* Debug the client core app */ + { "clientcore", GOSSIP_CLIENTCORE_DEBUG }, + /* Debug the client timing state machines (job timeout, etc.) */ + { "clientcore_timing", GOSSIP_CLIENTCORE_TIMING_DEBUG }, + /* network encoding */ + { "endecode", GOSSIP_ENDECODE_DEBUG }, + /* Show server file (metadata) accesses (both modify and read-only). */ + { "access", GOSSIP_ACCESS_DEBUG }, + /* Show more detailed server file accesses */ + { "access_detail", GOSSIP_ACCESS_DETAIL_DEBUG }, + /* Debug the listeattr operation */ + { "listeattr", GOSSIP_LISTEATTR_DEBUG }, + /* Debug the state machine management code */ + { "sm", GOSSIP_STATE_MACHINE_DEBUG }, + /* Debug the metadata dbpf keyval functions */ + { "keyval", GOSSIP_DBPF_KEYVAL_DEBUG }, + /* Debug the metadata sync coalescing code */ + { "coalesce", GOSSIP_DBPF_COALESCE_DEBUG }, + /* Display the hostnames instead of IP addrs in debug output */ + { "access_hostnames", GOSSIP_ACCESS_HOSTNAMES }, + /* Show the client device events */ + { "user_dev", GOSSIP_USER_DEV_DEBUG }, + /* Debug the fsck tool */ + { "fsck", GOSSIP_FSCK_DEBUG }, + /* Debug the bstream code */ + { "bstream", GOSSIP_BSTREAM_DEBUG }, + /* Debug trove in direct io mode */ + {"directio", GOSSIP_DIRECTIO_DEBUG}, + /* Debug mirroring process */ + {"mirror",GOSSIP_MIRROR_DEBUG}, + /* Windows client */ + {"win_client", GOSSIP_WIN_CLIENT_DEBUG}, + /* Everything except the periodic events. Useful for debugging */ + { "verbose", + (__DEBUG_ALL & ~(GOSSIP_PERFCOUNTER_DEBUG | GOSSIP_STATE_MACHINE_DEBUG | + GOSSIP_ENDECODE_DEBUG | GOSSIP_USER_DEV_DEBUG)) + }, + /* No debug output */ + { "none", GOSSIP_NO_DEBUG }, + /* Everything */ + { "all", __DEBUG_ALL } +}; +#undef __DEBUG_ALL + +/* map all kmod keywords to kmod debug masks here */ +static __keyword_mask_t s_kmod_keyword_mask_map[] = +{ + {"super" , GOSSIP_SUPER_DEBUG}, + {"inode" , GOSSIP_INODE_DEBUG}, + {"file" , GOSSIP_FILE_DEBUG}, + {"dir" , GOSSIP_DIR_DEBUG}, + {"utils" , GOSSIP_UTILS_DEBUG}, + {"wait" , GOSSIP_WAIT_DEBUG}, + {"acl" , GOSSIP_ACL_DEBUG}, + {"dcache", GOSSIP_DCACHE_DEBUG}, + {"dev" , GOSSIP_DEV_DEBUG}, + {"name" , GOSSIP_NAME_DEBUG}, + {"bufmap", GOSSIP_BUFMAP_DEBUG}, + {"cache" , GOSSIP_CACHE_DEBUG}, + {"proc" , GOSSIP_PROC_DEBUG}, + {"xattr" , GOSSIP_XATTR_DEBUG}, + {"init" , GOSSIP_INIT_DEBUG}, + {"none" , GOSSIP_NO_DEBUG}, + {"all" , GOSSIP_MAX_DEBUG} +}; + +static const int num_kmod_keyword_mask_map = (int) \ +(sizeof(s_kmod_keyword_mask_map) / sizeof(__keyword_mask_t)); + +static const int num_keyword_mask_map = (int) \ +(sizeof(s_keyword_mask_map) / sizeof(__keyword_mask_t)); #endif /* __PVFS2_DEBUG_H */ diff --git a/include/pvfs2-dist-basic.h b/include/pvfs2-dist-basic.h index 6f28539..86132c7 100644 --- a/include/pvfs2-dist-basic.h +++ b/include/pvfs2-dist-basic.h @@ -12,6 +12,9 @@ #define PVFS_DIST_BASIC_NAME_SIZE 11 struct PVFS_basic_params_s { +#ifdef WIN32 + int field; +#endif }; typedef struct PVFS_basic_params_s PVFS_basic_params; diff --git a/include/pvfs2-encode-stubs.h b/include/pvfs2-encode-stubs.h index cd6a094..5b581e6 100644 --- a/include/pvfs2-encode-stubs.h +++ b/include/pvfs2-encode-stubs.h @@ -38,38 +38,40 @@ * expect these noop #defines. */ #ifdef __PINT_REQPROTO_ENCODE_FUNCS_C -# include "src/proto/endecode-funcs.h" +#include "src/proto/endecode-funcs.h" #else /* __PINT_REQPROTO_ENCODE_FUNCS_C */ /* dummy declarations to turn off functions */ -#define endecode_fields_1(n,t1,x1) -#define endecode_fields_1_struct(n,t1,x1) -#define endecode_fields_2(n,t1,x1,t2,x2) -#define endecode_fields_2_struct(n,t1,x1,t2,x2) -#define endecode_fields_3(n,t1,x1,t2,x2,t3,x3) -#define endecode_fields_3_struct(n,t1,x1,t2,x2,t3,x3) -#define endecode_fields_4(n,t1,x1,t2,x2,t3,x3,t4,x4) -#define endecode_fields_4_struct(n,t1,x1,t2,x2,t3,x3,t4,x4) -#define endecode_fields_5(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5) -#define endecode_fields_5_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5) -#define endecode_fields_6(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6) -#define endecode_fields_7_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7) -#define endecode_fields_8_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8) -#define endecode_fields_9_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9) -#define endecode_fields_10_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9,t10,x10) -#define endecode_fields_11_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9,t10,x10,t11,x11) -#define endecode_fields_12(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9,t10,x10,t11,x11,t12,x12) +#define endecode_fields_1(n,t1,x1) struct endecode_fake_struct +#define endecode_fields_1_struct(n,t1,x1) struct endecode_fake_struct +#define endecode_fields_2(n,t1,x1,t2,x2) struct endecode_fake_struct +#define endecode_fields_2_struct(n,t1,x1,t2,x2) struct endecode_fake_struct +#define endecode_fields_3(n,t1,x1,t2,x2,t3,x3) struct endecode_fake_struct +#define endecode_fields_3_struct(n,t1,x1,t2,x2,t3,x3) struct endecode_fake_struct +#define endecode_fields_4(n,t1,x1,t2,x2,t3,x3,t4,x4) struct endecode_fake_struct +#define endecode_fields_4_struct(n,t1,x1,t2,x2,t3,x3,t4,x4) struct endecode_fake_struct +#define endecode_fields_5(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5) struct endecode_fake_struct +#define endecode_fields_5_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5) struct endecode_fake_struct +#define endecode_fields_6(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6) struct endecode_fake_struct +#define endecode_fields_7_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7) struct endecode_fake_struct +#define endecode_fields_8_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8) struct endecode_fake_struct +#define endecode_fields_9_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9) struct endecode_fake_struct +#define endecode_fields_10_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9,t10,x10) struct endecode_fake_struct +#define endecode_fields_11_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9,t10,x10,t11,x11) struct endecode_fake_struct +#define endecode_fields_12(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7,t8,x8,t9,x9,t10,x10,t11,x11,t12,x12) struct endecode_fake_struct -#define endecode_fields_1a(n,t1,x1,tn1,n1,ta1,a1) -#define endecode_fields_1a_struct(n,t1,x1,tn1,n1,ta1,a1) -#define endecode_fields_1aa_struct(n,t1,x1,tn1,n1,ta1,a1,ta2,a2) -#define endecode_fields_2a_struct(n,t1,x1,t2,x2,tn1,n1,ta1,a1) -#define endecode_fields_2aa_struct(n,t1,x1,t2,x2,tn1,n1,ta1,a1,ta2,a2) -#define endecode_fields_3a_struct(n,t1,x1,t2,x2,t3,x3,tn1,n1,ta1,a1) -#define endecode_fields_4aa_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,tn1,n1,ta1,a1,ta2,a2) -#define endecode_fields_1a_1a_struct(n,t1,x1,tn1,n1,ta1,a1,t2,x2,tn2,n2,ta2,a2) -#define endecode_fields_4a_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,tn1,n1,ta1,a1) -#define endecode_fields_5a_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,tn1,n1,ta1,a1) +#define endecode_fields_1a(n,t1,x1,tn1,n1,ta1,a1) struct endecode_fake_struct +#define endecode_fields_1a_struct(n,t1,x1,tn1,n1,ta1,a1) struct endecode_fake_struct +#define endecode_fields_1aa_struct(n,t1,x1,tn1,n1,ta1,a1,ta2,a2) struct endecode_fake_struct +#define endecode_fields_2a_struct(n,t1,x1,t2,x2,tn1,n1,ta1,a1) struct endecode_fake_struct +#define endecode_fields_2aa_struct(n,t1,x1,t2,x2,tn1,n1,ta1,a1,ta2,a2) struct endecode_fake_struct +#define endecode_fields_3a_struct(n,t1,x1,t2,x2,t3,x3,tn1,n1,ta1,a1) struct endecode_fake_struct +#define endecode_fields_4aa_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,tn1,n1,ta1,a1,ta2,a2) struct endecode_fake_struct +#define endecode_fields_1a_1a_struct(n,t1,x1,tn1,n1,ta1,a1,t2,x2,tn2,n2,ta2,a2) struct endecode_fake_struct +#define endecode_fields_4a_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,tn1,n1,ta1,a1) struct endecode_fake_struct +#define endecode_fields_5a_struct(n,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,tn1,n1,ta1,a1) struct endecode_fake_struct + +#define encode_enum_union_2_struct(name, ename, uname, ut1, un1, en1, ut2, un2, en2) struct endecode_fake_struct #endif /* __PINT_REQPROTO_ENCODE_FUNCS_C */ diff --git a/include/pvfs2-event.h b/include/pvfs2-event.h index 35f89c2..bdf3042 100644 --- a/include/pvfs2-event.h +++ b/include/pvfs2-event.h @@ -65,6 +65,7 @@ enum PVFS_event_op PVFS_EVENT_TROVE_KEYVAL_WRITE_LIST = 25, PVFS_EVENT_TROVE_KEYVAL_GET_HANDLE_INFO = 26, PVFS_EVENT_TROVE_DSPACE_GETATTR_LIST = 27, + PVFS_EVENT_TROVE_KEYVAL_REMOVE_LIST = 28, }; #endif /* __PVFS2_EVENT_H */ diff --git a/include/pvfs2-hint.h b/include/pvfs2-hint.h new file mode 100644 index 0000000..390a68d --- /dev/null +++ b/include/pvfs2-hint.h @@ -0,0 +1,65 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __PVFS2_HINT_H +#define __PVFS2_HINT_H + +#include "pvfs2-types.h" + +/* these are for tracing requests */ +#define PVFS_HINT_REQUEST_ID_NAME "pvfs.hint.request_id" +#define PVFS_HINT_CLIENT_ID_NAME "pvfs.hint.client_id" +#define PVFS_HINT_HANDLE_NAME "pvfs.hint.handle" +#define PVFS_HINT_OP_ID_NAME "pvfs.hint.op_id" +#define PVFS_HINT_RANK_NAME "pvfs.hint.rank" +#define PVFS_HINT_SERVER_ID_NAME "pvfs.hint.server_id" +/* these are file creation parameters */ +#define PVFS_HINT_DISTRIBUTION_NAME "pvfs.hint.disribution" +#define PVFS_HINT_DFILE_COUNT_NAME "pvfs.hint.dfile_count" +#define PVFS_HINT_LAYOUT_NAME "pvfs.hint.layout" +#define PVFS_HINT_SERVERLIST_NAME "pvfs.hint.serverlist" +#define PVFS_HINT_NOCACHE_NAME "pvfs.hint.nocache" + +typedef struct PVFS_hint_s *PVFS_hint; + +#define PVFS_HINT_NULL NULL + +int PVFS_hint_add(PVFS_hint *hint, + const char *name, + int length, + void *value); + +int PVFS_hint_replace(PVFS_hint *hint, + const char *name, + int length, + void *value); + +int PVFS_hint_copy(PVFS_hint old_hint, PVFS_hint *new_hint); + +void PVFS_hint_free(PVFS_hint hint); + +/* check to see if a hint has already been added */ +int PVFS_hint_check(PVFS_hint *hints, const char *name); + +/* check to see if any hints should be transferred */ +int PVFS_hint_check_transfer(PVFS_hint *hints); + +/* + * function allows users to specify hints in an environment variable. + */ +int PVFS_hint_import_env(PVFS_hint *out_hint); + +#endif /* __PVFS2_HINT_H */ + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/include/pvfs2-mgmt.h b/include/pvfs2-mgmt.h index 1054bb7..09b37c8 100644 --- a/include/pvfs2-mgmt.h +++ b/include/pvfs2-mgmt.h @@ -25,6 +25,7 @@ #include "pvfs2-sysint.h" #include "pvfs2-types.h" +#include "pint-uid-mgmt.h" /* non-blocking mgmt operation handle */ typedef PVFS_id_gen_t PVFS_mgmt_op_id; @@ -49,32 +50,29 @@ struct PVFS_mgmt_server_stat }; /* performance monitoring statistics */ -struct PVFS_mgmt_perf_stat + +/* + * defines all of the keys known to PVFS + * performance monitoring subsystem + * keys defined here must also appear + * in the table in src/common/misc/pint-perf-counter.c + * with the same index + */ +enum PINT_server_perf_keys { - int32_t valid_flag; /* is this entry valid? */ - uint32_t id; /* timestep id */ - uint64_t start_time_ms; /* start time of perf set, ms since epoch */ - int64_t write; /* bytes written */ - int64_t read; /* bytes read */ - int64_t metadata_write; /* # of modifying metadata ops */ - int64_t metadata_read; /* # of non-modifying metadata ops */ - int32_t dspace_queue; /* # of metadata dspace ops in the queue */ - int32_t keyval_queue; /* # of metadata keyval ops in the queue */ - int32_t reqsched; /* # of currently scheduled request posted */ + PINT_PERF_READ = 0, /* bytes read */ + PINT_PERF_WRITE = 1, /* bytes written */ + PINT_PERF_METADATA_READ = 2, /* metadata read ops */ + PINT_PERF_METADATA_WRITE = 3, /* metadata write ops */ + PINT_PERF_METADATA_DSPACE_OPS = 4, /* metadata dspace ops */ + PINT_PERF_METADATA_KEYVAL_OPS = 5, /* metadata keyval ops */ + PINT_PERF_REQSCHED = 6, /* instantaneous active requests */ + PINT_PERF_REQUESTS = 7, /* requests received */ + PINT_PERF_SMALL_READ = 8, /* bytes read by small_io */ + PINT_PERF_SMALL_WRITE = 9, /* bytes written by small_io */ + PINT_PERF_FLOW_READ = 10, /* bytes read by flow */ + PINT_PERF_FLOW_WRITE = 11, /* bytes written by flow */ }; -endecode_fields_11_struct( - PVFS_mgmt_perf_stat, - int32_t, valid_flag, - uint32_t, id, - uint64_t, start_time_ms, - int64_t, write, - int64_t, read, - int64_t, metadata_write, - int64_t, metadata_read, - int32_t, dspace_queue, - int32_t, keyval_queue, - int32_t, reqsched, - skip4,) /* low level information about individual server level objects */ struct PVFS_mgmt_dspace_info @@ -93,7 +91,7 @@ endecode_fields_7_struct( PVFS_ds_type, type, skip4,, PVFS_size, b_size, - PVFS_handle, dirdata_handle) + PVFS_handle, dirdata_handle); /* individual datapoint from event monitoring */ struct PVFS_mgmt_event @@ -115,7 +113,7 @@ endecode_fields_8_struct( int32_t, flags, int32_t, tv_sec, int32_t, tv_usec, - skip4,) + skip4,); /* values which may be or'd together in the flags field above */ enum @@ -124,7 +122,6 @@ enum PVFS_MGMT_META_SERVER = 2 }; - PVFS_error PVFS_mgmt_count_servers( PVFS_fs_id fs_id, PVFS_credentials *credentials, @@ -143,12 +140,14 @@ PVFS_error PVFS_imgmt_noop( PVFS_credentials *credentials, PVFS_BMI_addr_t addr, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_noop( PVFS_fs_id, PVFS_credentials *credentials, - PVFS_BMI_addr_t addr); + PVFS_BMI_addr_t addr, + PVFS_hint hints); const char* PVFS_mgmt_map_addr( PVFS_fs_id fs_id, @@ -160,11 +159,11 @@ PVFS_error PVFS_imgmt_setparam_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, + struct PVFS_mgmt_setparam_value *value, PVFS_BMI_addr_t *addr_array, - uint64_t *old_value_array, int count, PVFS_error_details *details, + PVFS_hint hints, PVFS_mgmt_op_id *op_id, void *user_ptr); @@ -172,28 +171,28 @@ PVFS_error PVFS_mgmt_setparam_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, + struct PVFS_mgmt_setparam_value *value, PVFS_BMI_addr_t *addr_array, - uint64_t *old_value_array, int count, - PVFS_error_details *details); + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_mgmt_setparam_all( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, - uint64_t *old_value_array, - PVFS_error_details *details); + struct PVFS_mgmt_setparam_value *value, + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_mgmt_setparam_single( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, + struct PVFS_mgmt_setparam_value *value, char *server_addr_str, - uint64_t *old_value, - PVFS_error_details *details); + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_imgmt_statfs_list( PVFS_fs_id fs_id, @@ -203,6 +202,7 @@ PVFS_error PVFS_imgmt_statfs_list( int count, PVFS_error_details *details, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_statfs_list( @@ -211,38 +211,44 @@ PVFS_error PVFS_mgmt_statfs_list( struct PVFS_mgmt_server_stat *stat_array, PVFS_BMI_addr_t *addr_array, int count, - PVFS_error_details *details); + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_mgmt_statfs_all( PVFS_fs_id fs_id, PVFS_credentials *credentials, struct PVFS_mgmt_server_stat *stat_array, int *inout_count_p, - PVFS_error_details *details); + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_imgmt_perf_mon_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, - struct PVFS_mgmt_perf_stat **perf_matrix, + int64_t **perf_matrix, uint64_t *end_time_ms_array, PVFS_BMI_addr_t *addr_array, uint32_t* next_id_array, int server_count, + int *key_count, int history_count, PVFS_error_details *details, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_perf_mon_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, - struct PVFS_mgmt_perf_stat** perf_matrix, + int64_t **perf_matrix, uint64_t *end_time_ms_array, PVFS_BMI_addr_t *addr_array, uint32_t *next_id_array, int server_count, + int *key_count, int history_count, - PVFS_error_details *details); + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_imgmt_event_mon_list( PVFS_fs_id fs_id, @@ -253,6 +259,7 @@ PVFS_error PVFS_imgmt_event_mon_list( int event_count, PVFS_error_details *details, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_event_mon_list( @@ -262,7 +269,9 @@ PVFS_error PVFS_mgmt_event_mon_list( PVFS_BMI_addr_t *addr_array, int server_count, int event_count, - PVFS_error_details *details); + PVFS_error_details *details, + PVFS_hint hints); + PVFS_error PVFS_imgmt_iterate_handles_list( PVFS_fs_id fs_id, @@ -272,7 +281,9 @@ PVFS_error PVFS_imgmt_iterate_handles_list( PVFS_ds_position *position_array, PVFS_BMI_addr_t *addr_array, int server_count, + int flags, PVFS_error_details *details, + PVFS_hint hints, PVFS_mgmt_op_id *op_id, void *user_ptr); @@ -284,7 +295,9 @@ PVFS_error PVFS_mgmt_iterate_handles_list( PVFS_ds_position *position_array, PVFS_BMI_addr_t *addr_array, int server_count, - PVFS_error_details *details); + int flags, + PVFS_error_details *details, + PVFS_hint hints); PVFS_error PVFS_imgmt_get_dfile_array( PVFS_object_ref ref, @@ -292,35 +305,41 @@ PVFS_error PVFS_imgmt_get_dfile_array( PVFS_handle *dfile_array, int dfile_count, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_get_dfile_array( PVFS_object_ref ref, PVFS_credentials *credentials, PVFS_handle *dfile_array, - int dfile_count); + int dfile_count, + PVFS_hint hints); PVFS_error PVFS_imgmt_remove_object( - PVFS_object_ref object_ref, + PVFS_object_ref object_ref, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_remove_object( - PVFS_object_ref object_ref, - PVFS_credentials *credentials); + PVFS_object_ref object_ref, + PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_imgmt_remove_dirent( PVFS_object_ref parent_ref, char *entry, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_remove_dirent( PVFS_object_ref parent_ref, char *entry, - PVFS_credentials *credentials); + PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_imgmt_create_dirent( PVFS_object_ref parent_ref, @@ -328,25 +347,29 @@ PVFS_error PVFS_imgmt_create_dirent( PVFS_handle entry_handle, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_create_dirent( PVFS_object_ref parent_ref, char *entry, PVFS_handle entry_handle, - PVFS_credentials *credentials); + PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_imgmt_get_dirdata_handle( PVFS_object_ref parent_ref, PVFS_handle *out_dirdata_handle, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_mgmt_get_dirdata_handle( PVFS_object_ref parent_ref, PVFS_handle *out_dirdata_handle, - PVFS_credentials *credentials); + PVFS_credentials *credentials, + PVFS_hint hints); int PVFS_mgmt_wait( PVFS_mgmt_op_id op_id, @@ -391,6 +414,30 @@ PVFS_error PVFS_mgmt_map_handle( PVFS_handle handle, PVFS_BMI_addr_t *addr); +PVFS_error PVFS_imgmt_get_uid_list( + PVFS_fs_id fs_id, + PVFS_credentials *credentials, + int server_count, + PVFS_BMI_addr_t *addr_array, + uint32_t history, + PVFS_uid_info_s **uid_info_array, + uint32_t *uid_count, + PVFS_mgmt_op_id *op_id, + PVFS_hint hints, + void *user_ptr); + +PVFS_error PVFS_mgmt_get_uid_list( + PVFS_fs_id fs_id, + PVFS_credentials *credentials, + int server_count, + PVFS_BMI_addr_t *addr_array, + uint32_t history, + PVFS_uid_info_s **uid_info_array, + uint32_t *uid_count, + PVFS_hint hints, + void *user_ptr); + + #endif /* __PVFS2_MGMT_H */ /* @} */ diff --git a/include/pvfs2-mirror.h b/include/pvfs2-mirror.h new file mode 100644 index 0000000..5aa202e --- /dev/null +++ b/include/pvfs2-mirror.h @@ -0,0 +1,23 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + */ + +#ifndef __MIRROR_H +#define __MIRROR_H + +enum MIRROR_MODE_t { + BEGIN_MIRROR_MODE = 100, + NO_MIRRORING = 100, + MIRROR_ON_IMMUTABLE = 200, + END_MIRROR_MODE = 200 +}; + +typedef enum MIRROR_MODE_t MIRROR_MODE; + +#define USER_PVFS2_MIRROR_HANDLES "user.pvfs2.mirror.handles" +#define USER_PVFS2_MIRROR_COPIES "user.pvfs2.mirror.copies" +#define USER_PVFS2_MIRROR_STATUS "user.pvfs2.mirror.status" +#define USER_PVFS2_MIRROR_MODE "user.pvfs2.mirror.mode" + +#endif /* __MIRROR_H */ diff --git a/include/pvfs2-sysint.h b/include/pvfs2-sysint.h index 9f64e0b..b4d10b6 100644 --- a/include/pvfs2-sysint.h +++ b/include/pvfs2-sysint.h @@ -23,6 +23,16 @@ #include "pvfs2-types.h" #include "pvfs2-request.h" +/** Options supported by get_info() and set_info(). */ +enum PVFS_sys_setinfo_opt +{ + PVFS_SYS_NCACHE_TIMEOUT_MSECS = 1, + PVFS_SYS_ACACHE_TIMEOUT_MSECS, + PVFS_SYS_MSG_TIMEOUT_SECS, + PVFS_SYS_MSG_RETRY_LIMIT, + PVFS_SYS_MSG_RETRY_DELAY_MSECS, +}; + /** Holds a non-blocking system interface operation handle. */ typedef PVFS_id_gen_t PVFS_sys_op_id; @@ -36,37 +46,37 @@ struct PVFS_sys_attr_s PVFS_time mtime; PVFS_time ctime; PVFS_size size; - PVFS2_ALIGN_VAR(char *, link_target);/* NOTE: caller must free if valid */ + PVFS_handle cid; + PVFS2_ALIGN_VAR(char *, link_target);/**< NOTE: caller must free if valid */ PVFS2_ALIGN_VAR(int32_t, dfile_count); /* Changed to int32_t so that size of structure does not change */ - PVFS2_ALIGN_VAR(char*, dist_name); /* NOTE: caller must free if valid */ - PVFS2_ALIGN_VAR(char*, dist_params); /* NOTE: caller must free if valid */ + PVFS2_ALIGN_VAR(uint32_t, mirror_copies_count); + PVFS2_ALIGN_VAR(char*, dist_name); /**< NOTE: caller must free if valid */ + PVFS2_ALIGN_VAR(char*, dist_params); /**< NOTE: caller must free if valid */ PVFS_size dirent_count; PVFS_ds_type objtype; PVFS_flags flags; uint32_t mask; + PVFS_size blksize; }; typedef struct PVFS_sys_attr_s PVFS_sys_attr; /** Describes a PVFS2 file system. */ struct PVFS_sys_mntent { - char **pvfs_config_servers; /* addresses of servers with config info */ - int32_t num_pvfs_config_servers; /* changed to int32_t so that size of structure does not change */ - char *the_pvfs_config_server; /* first of the entries above that works */ - char *pvfs_fs_name; /* name of PVFS2 file system */ - enum PVFS_flowproto_type flowproto; /* flow protocol */ - enum PVFS_encoding_type encoding; /* wire data encoding */ - /* fs id, filled in by system interface when it looks up the fs */ - PVFS_fs_id fs_id; - - /* Default number of dfiles mount option value */ - int32_t default_num_dfiles; /* int32_t for portable, fixed size structure */ - /* Check to determine whether the mount process must perform the integrity checks on the config files */ - int32_t integrity_check; + char **pvfs_config_servers; /**< addresses of servers with config info */ + int32_t num_pvfs_config_servers; /**< changed to int32_t so that size of structure does not change */ + char *the_pvfs_config_server; /**< first of the entries above that works */ + char *pvfs_fs_name; /**< name of PVFS2 file system */ + enum PVFS_flowproto_type flowproto; /**< flow protocol */ + enum PVFS_encoding_type encoding; /**< wire data encoding */ + PVFS_fs_id fs_id; /**< fs id, filled in by system interface when it looks up the fs */ + /* int32_t for portable, fixed size structure */ + int32_t default_num_dfiles; /**< Default number of dfiles mount option value */ + int32_t integrity_check; /**< Check to determine whether the mount process must perform the integrity checks on the config files */ /* the following fields are included for convenience; * useful if the file system is "mounted" */ - char *mnt_dir; /* local mount path */ - char *mnt_opts; /* full option list */ + char *mnt_dir; /**< local mount path */ + char *mnt_opts; /**< full option list */ }; /** Describes file distribution parameters. */ @@ -82,9 +92,14 @@ typedef struct PVFS_sys_dist_s PVFS_sys_dist; /**********************************************************************/ /** Holds results of a lookup operation (reference to object). */ +/* if error_path is passed in NULL then nothing returned on error */ +/* otherwise up to error_path_size chars of unresolved path */ +/* segments are passed out in null terminated string */ struct PVFS_sysresp_lookup_s { PVFS_object_ref ref; + char *error_path; /* on error, the unresolved path segments */ + int error_path_size; /* size of the buffer provided by the user */ }; typedef struct PVFS_sysresp_lookup_s PVFS_sysresp_lookup; @@ -159,7 +174,7 @@ struct PVFS_sysresp_readdirplus_s { PVFS_ds_position token; PVFS_dirent *dirent_array; - uint32_t pvfs_dirent_outcount; /* uint32_t for portable, fixed size structure */ + uint32_t pvfs_dirent_outcount; /**< uint32_t for portable, fixed size structure */ uint64_t directory_version; PVFS_error *stat_err_array; PVFS_sys_attr *attr_array; @@ -250,6 +265,7 @@ PVFS_error PVFS_isys_ref_lookup( PVFS_sysresp_lookup * resp, int32_t follow_link, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_ref_lookup( @@ -258,14 +274,16 @@ PVFS_error PVFS_sys_ref_lookup( PVFS_object_ref parent_ref, const PVFS_credentials *credentials, PVFS_sysresp_lookup * resp, - int32_t follow_link); + int32_t follow_link, + PVFS_hint hints); PVFS_error PVFS_sys_lookup( PVFS_fs_id fs_id, char *name, const PVFS_credentials *credentials, PVFS_sysresp_lookup * resp, - int32_t follow_link); + int32_t follow_link, + PVFS_hint hints); PVFS_error PVFS_isys_getattr( PVFS_object_ref ref, @@ -273,25 +291,29 @@ PVFS_error PVFS_isys_getattr( const PVFS_credentials *credentials, PVFS_sysresp_getattr *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_getattr( PVFS_object_ref ref, uint32_t attrmask, const PVFS_credentials *credentials, - PVFS_sysresp_getattr *resp); + PVFS_sysresp_getattr *resp, + PVFS_hint hints); PVFS_error PVFS_isys_setattr( PVFS_object_ref ref, PVFS_sys_attr attr, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_setattr( PVFS_object_ref ref, PVFS_sys_attr attr, - const PVFS_credentials *credentials); + const PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_isys_mkdir( char *entry_name, @@ -300,6 +322,7 @@ PVFS_error PVFS_isys_mkdir( const PVFS_credentials *credentials, PVFS_sysresp_mkdir *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_mkdir( @@ -307,7 +330,8 @@ PVFS_error PVFS_sys_mkdir( PVFS_object_ref parent_ref, PVFS_sys_attr attr, const PVFS_credentials *credentials, - PVFS_sysresp_mkdir *resp); + PVFS_sysresp_mkdir *resp, + PVFS_hint hints); PVFS_error PVFS_isys_readdir( PVFS_object_ref ref, @@ -316,6 +340,7 @@ PVFS_error PVFS_isys_readdir( const PVFS_credentials *credentials, PVFS_sysresp_readdir *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_readdir( @@ -323,7 +348,8 @@ PVFS_error PVFS_sys_readdir( PVFS_ds_position token, int32_t pvfs_dirent_incount, const PVFS_credentials *credentials, - PVFS_sysresp_readdir *resp); + PVFS_sysresp_readdir *resp, + PVFS_hint hints); PVFS_error PVFS_isys_readdirplus( PVFS_object_ref ref, @@ -333,6 +359,7 @@ PVFS_error PVFS_isys_readdirplus( uint32_t attrmask, PVFS_sysresp_readdirplus *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_readdirplus( @@ -341,7 +368,8 @@ PVFS_error PVFS_sys_readdirplus( int32_t pvfs_dirent_incount, const PVFS_credentials *credentials, uint32_t attrmask, - PVFS_sysresp_readdirplus *resp); + PVFS_sysresp_readdirplus *resp, + PVFS_hint hints); PVFS_error PVFS_isys_create( char *entry_name, @@ -352,6 +380,7 @@ PVFS_error PVFS_isys_create( PVFS_sys_layout *layout, PVFS_sysresp_create *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_create( @@ -360,20 +389,23 @@ PVFS_error PVFS_sys_create( PVFS_sys_attr attr, const PVFS_credentials *credentials, PVFS_sys_dist *dist, + PVFS_sysresp_create *resp, PVFS_sys_layout *layout, - PVFS_sysresp_create *resp); + PVFS_hint hints); PVFS_error PVFS_isys_remove( char *entry_name, PVFS_object_ref ref, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_remove( char *entry_name, PVFS_object_ref ref, - const PVFS_credentials *credentials); + const PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_isys_rename( char *old_entry, @@ -382,6 +414,7 @@ PVFS_error PVFS_isys_rename( PVFS_object_ref new_parent_ref, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_rename( @@ -389,7 +422,8 @@ PVFS_error PVFS_sys_rename( PVFS_object_ref old_parent_ref, char *new_entry, PVFS_object_ref new_parent_ref, - const PVFS_credentials *credentials); + const PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_isys_symlink( char *entry_name, @@ -399,6 +433,7 @@ PVFS_error PVFS_isys_symlink( const PVFS_credentials *credentials, PVFS_sysresp_symlink *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_symlink( @@ -407,7 +442,8 @@ PVFS_error PVFS_sys_symlink( char *target, PVFS_sys_attr attr, const PVFS_credentials *credentials, - PVFS_sysresp_symlink *resp); + PVFS_sysresp_symlink *resp, + PVFS_hint hints); PVFS_error PVFS_isys_io( PVFS_object_ref ref, @@ -419,13 +455,16 @@ PVFS_error PVFS_isys_io( PVFS_sysresp_io *resp, enum PVFS_io_type type, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); -#define PVFS_isys_read(x1,x2,x3,x4,x5,x6,y,x7,x8) \ -PVFS_isys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_READ,x7,x8) +/** Macro for convenience read is a call to io */ +#define PVFS_isys_read(x1,x2,x3,x4,x5,x6,y,x7,x8,x9) \ +PVFS_isys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_READ,x7,x8,x9) -#define PVFS_isys_write(x1,x2,x3,x4,x5,x6,y,x7,x8) \ -PVFS_isys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_WRITE,x7,x8) +/** Macro for convenience write is a call to io */ +#define PVFS_isys_write(x1,x2,x3,x4,x5,x6,y,x7,x8,x9) \ +PVFS_isys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_WRITE,x7,x8,x9) PVFS_error PVFS_sys_io( PVFS_object_ref ref, @@ -435,53 +474,63 @@ PVFS_error PVFS_sys_io( PVFS_Request mem_req, const PVFS_credentials *credentials, PVFS_sysresp_io *resp, - enum PVFS_io_type type); + enum PVFS_io_type type, + PVFS_hint hints); -#define PVFS_sys_read(x1,x2,x3,x4,x5,x6,y) \ -PVFS_sys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_READ) +/** Macro for convenience read is a call to io */ +#define PVFS_sys_read(x1,x2,x3,x4,x5,x6,y,z) \ +PVFS_sys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_READ,z) -#define PVFS_sys_write(x1,x2,x3,x4,x5,x6,y) \ -PVFS_sys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_WRITE) +/** Macro for convenience write is a call to io */ +#define PVFS_sys_write(x1,x2,x3,x4,x5,x6,y,z) \ +PVFS_sys_io(x1,x2,x3,x4,x5,x6,y,PVFS_IO_WRITE,z) PVFS_error PVFS_isys_truncate( PVFS_object_ref ref, PVFS_size size, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_truncate( PVFS_object_ref ref, PVFS_size size, - const PVFS_credentials *credentials); + const PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_sys_getparent( PVFS_fs_id fs_id, char *entry_name, const PVFS_credentials *credentials, - PVFS_sysresp_getparent *resp); + PVFS_sysresp_getparent *resp, + PVFS_hint hints); PVFS_error PVFS_isys_flush( PVFS_object_ref ref, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_flush( PVFS_object_ref ref, - const PVFS_credentials *credentials); + const PVFS_credentials *credentials, + PVFS_hint hints); PVFS_error PVFS_isys_statfs( PVFS_fs_id fs_id, const PVFS_credentials *credentials, PVFS_sysresp_statfs *statfs, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_statfs( PVFS_fs_id fs_id, const PVFS_credentials *credentials, - PVFS_sysresp_statfs *resp); + PVFS_sysresp_statfs *resp, + PVFS_hint hints); PVFS_sys_dist* PVFS_sys_dist_lookup( const char* dist_identifier); @@ -500,13 +549,15 @@ PVFS_error PVFS_isys_geteattr( PVFS_ds_keyval *key_p, PVFS_sysresp_geteattr *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_geteattr( PVFS_object_ref ref, const PVFS_credentials *credentials, PVFS_ds_keyval *key_p, - PVFS_ds_keyval *val_p); + PVFS_ds_keyval *val_p, + PVFS_hint hints); PVFS_error PVFS_isys_geteattr_list( PVFS_object_ref ref, @@ -515,6 +566,7 @@ PVFS_error PVFS_isys_geteattr_list( PVFS_ds_keyval *key_p, PVFS_sysresp_geteattr *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_geteattr_list( @@ -522,7 +574,8 @@ PVFS_error PVFS_sys_geteattr_list( const PVFS_credentials *credentials, int32_t nkey, PVFS_ds_keyval *key_p, - PVFS_sysresp_geteattr *resp); + PVFS_sysresp_geteattr *resp, + PVFS_hint hints); PVFS_error PVFS_isys_seteattr( PVFS_object_ref ref, @@ -531,6 +584,7 @@ PVFS_error PVFS_isys_seteattr( PVFS_ds_keyval *val_p, int32_t flags, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_seteattr( @@ -538,7 +592,8 @@ PVFS_error PVFS_sys_seteattr( const PVFS_credentials *credentials, PVFS_ds_keyval *key_p, PVFS_ds_keyval *val_p, - int32_t flags); + int32_t flags, + PVFS_hint hints); PVFS_error PVFS_isys_seteattr_list( PVFS_object_ref ref, @@ -548,6 +603,7 @@ PVFS_error PVFS_isys_seteattr_list( PVFS_ds_keyval *val_array, int32_t flags, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_seteattr_list( @@ -556,19 +612,22 @@ PVFS_error PVFS_sys_seteattr_list( int32_t nkey, PVFS_ds_keyval *key_array, PVFS_ds_keyval *val_array, - int32_t flags); + int32_t flags, + PVFS_hint hints); PVFS_error PVFS_isys_deleattr( PVFS_object_ref ref, const PVFS_credentials *credentials, PVFS_ds_keyval *key_p, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_deleattr( PVFS_object_ref ref, const PVFS_credentials *credentials, - PVFS_ds_keyval *key_p); + PVFS_ds_keyval *key_p, + PVFS_hint hints); PVFS_error PVFS_isys_listeattr( PVFS_object_ref ref, @@ -577,6 +636,7 @@ PVFS_error PVFS_isys_listeattr( const PVFS_credentials *credentials, PVFS_sysresp_listeattr *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr); PVFS_error PVFS_sys_listeattr( @@ -584,7 +644,16 @@ PVFS_error PVFS_sys_listeattr( PVFS_ds_position token, int32_t nkey, const PVFS_credentials *credentials, - PVFS_sysresp_listeattr *resp); + PVFS_sysresp_listeattr *resp, + PVFS_hint hints); + +PVFS_error PVFS_sys_set_info( + enum PVFS_sys_setinfo_opt option, + unsigned int arg); + +PVFS_error PVFS_sys_get_info( + enum PVFS_sys_setinfo_opt option, + unsigned int* arg); /* exported test functions for isys calls */ diff --git a/include/pvfs2-types.h b/include/pvfs2-types.h index 949e833..5b668ba 100644 --- a/include/pvfs2-types.h +++ b/include/pvfs2-types.h @@ -6,7 +6,6 @@ /* NOTE: if you make any changes to the encoding definitions in this file, * please update the PVFS2_PROTO_VERSION in pvfs2-req-proto.h accordingly */ - /** \file * * Definitions of types used throughout PVFS2. @@ -15,28 +14,17 @@ #define __PVFS2_TYPES_H #ifdef __KERNEL__ - -#ifdef HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H -#include -#endif - -#include -#include -#include -#include #include -#include -#include - #else - #include #include +#ifdef WIN32 +#include "wincommon.h" +#else #include +#endif #include #include -#include - #endif #ifndef INT32_MAX @@ -47,6 +35,9 @@ #ifndef NAME_MAX #define NAME_MAX 255 #endif +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif /* figure out the size of a pointer */ #if defined(__WORDSIZE) @@ -59,18 +50,14 @@ #elif INTPTR_MIN == INT64_MIN #define PVFS2_SIZEOF_VOIDP 64 #endif +#elif defined(_WIN64) + #define PVFS2_SIZEOF_VOIDP 64 +#elif defined(WIN32) + #define PVFS2_SIZEOF_VOIDP 32 #else #error "Unhandled size of void pointer" #endif -#ifdef __KERNEL__ -#define strdup(s) kstrdup(s, GFP_KERNEL) -#define assert(x) BUG_ON(!(x)) -#ifndef intptr_t -#define intptr_t long -#endif -#endif - /* we need to align some variables in 32bit case to match alignment * in 64bit case */ @@ -84,6 +71,7 @@ /* empty stubs to turn off encoding definition generation */ #include "pvfs2-encode-stubs.h" +#include "pvfs2-hint.h" /* Basic types used throughout the code. */ typedef uint8_t PVFS_boolean; @@ -95,6 +83,17 @@ typedef int64_t PVFS_id_gen_t; /** Opaque value representing a destination address. */ typedef int64_t PVFS_BMI_addr_t; +/* Windows - inline functions can't be exported */ +#ifdef WIN32 +void encode_PVFS_BMI_addr_t(char **pptr, const PVFS_BMI_addr_t *x); +int encode_PVFS_BMI_addr_t_size_check(const PVFS_BMI_addr_t *x); +void decode_PVFS_BMI_addr_t(char **pptr, PVFS_BMI_addr_t *x); +#else +inline void encode_PVFS_BMI_addr_t(char **pptr, const PVFS_BMI_addr_t *x); +inline int encode_PVFS_BMI_addr_t_size_check(const PVFS_BMI_addr_t *x); +inline void decode_PVFS_BMI_addr_t(char **pptr, PVFS_BMI_addr_t *x); +#endif + #define encode_PVFS_error encode_int32_t #define decode_PVFS_error decode_int32_t #define encode_PVFS_offset encode_int64_t @@ -106,7 +105,7 @@ typedef int64_t PVFS_BMI_addr_t; /* Basic types used by communication subsystems. */ typedef int32_t PVFS_msg_tag_t; -typedef int32_t PVFS_context_id; +typedef PVFS_id_gen_t PVFS_context_id; enum PVFS_flowproto_type { @@ -121,8 +120,9 @@ enum PVFS_encoding_type { ENCODING_DIRECT = 1, ENCODING_LE_BFIELD = 2, - ENCODING_XDR = 3, + ENCODING_XDR = 3 }; + /* these values must correspond to the defined encoding types above */ #define ENCODING_INVALID_MIN 0 #define ENCODING_INVALID_MAX 4 @@ -134,7 +134,7 @@ enum PVFS_encoding_type #define ENCODING_IS_SUPPORTED(enc_type) \ ((enc_type >= ENCODING_SUPPORTED_MIN) && \ (enc_type <= ENCODING_SUPPORTED_MAX)) -#define ENCODING_DEFAULT ENCODING_LE_BFIELD +#define PVFS2_ENCODING_DEFAULT ENCODING_LE_BFIELD /* basic types used by storage subsystem */ @@ -148,6 +148,8 @@ typedef uint64_t PVFS_handle; typedef int32_t PVFS_fs_id; typedef uint64_t PVFS_ds_position; typedef int32_t PVFS_ds_flags; + + #define encode_PVFS_handle encode_uint64_t #define decode_PVFS_handle decode_uint64_t #define encode_PVFS_fs_id encode_int32_t @@ -161,10 +163,13 @@ typedef uint32_t PVFS_gid; typedef uint64_t PVFS_time; typedef uint32_t PVFS_permissions; typedef uint64_t PVFS_flags; +typedef uint64_t PVFS_cid; #define encode_PVFS_uid encode_uint32_t #define decode_PVFS_uid decode_uint32_t #define encode_PVFS_gid encode_uint32_t #define decode_PVFS_gid decode_uint32_t +#define encode_PVFS_cid encode_uint64_t +#define decode_PVFS_cid decode_uint64_t #define encode_PVFS_time encode_int64_t #define decode_PVFS_time decode_int64_t #define encode_PVFS_permissions encode_uint32_t @@ -210,8 +215,12 @@ enum PVFS_sys_layout_algorithm PVFS_SYS_LAYOUT_RANDOM = 3, /* order the datafiles based on the list specified */ - PVFS_SYS_LAYOUT_LIST = 4 + PVFS_SYS_LAYOUT_LIST = 4, + + /* order the datafiles based on the list specified */ + PVFS_SYS_LAYOUT_LOCAL = 5 }; +#define PVFS_SYS_LAYOUT_DEFAULT NULL /* The list of datafile servers that can be passed into PVFS_sys_create * to specify the exact layout of a file. The count parameter will override @@ -238,11 +247,22 @@ typedef struct PVFS_sys_layout_s */ struct PVFS_sys_server_list server_list; } PVFS_sys_layout; +#define extra_size_PVFS_sys_layout PVFS_REQ_LIMIT_LAYOUT + +#ifdef WIN32 +void encode_PVFS_sys_layout(char **pptr, const struct PVFS_sys_layout_s *x); +void decode_PVFS_sys_layout(char **pptr, struct PVFS_sys_layout_s *x); +#else +inline void encode_PVFS_sys_layout(char **pptr, const struct PVFS_sys_layout_s *x); +inline void decode_PVFS_sys_layout(char **pptr, struct PVFS_sys_layout_s *x); +#endif /* predefined special values for types */ +#define PVFS_CONTEXT_NULL ((PVFS_context_id)-1) #define PVFS_HANDLE_NULL ((PVFS_handle)0) #define PVFS_FS_ID_NULL ((PVFS_fs_id)0) -#define PVFS_OP_NULL ((id_gen_t)0) +#define PVFS_OP_NULL ((PVFS_id_gen_t)0) +#define PVFS_BMI_ADDR_NULL ((PVFS_BMI_addr_t)0) #define PVFS_ITERATE_START (INT32_MAX - 1) #define PVFS_ITERATE_END (INT32_MAX - 2) #define PVFS_READDIR_START PVFS_ITERATE_START @@ -281,6 +301,9 @@ typedef struct PVFS_sys_layout_s #define PVFS_ALL_READ (PVFS_U_READ|PVFS_G_READ|PVFS_O_READ) /** Object and attribute types. */ +/* If this enum is modified the server parameters related to the precreate pool + * batch and low threshold sizes may need to be modified to reflect this + * change. Also, the PVFS_DS_TYPE_COUNT #define below must be updated */ typedef enum { PVFS_TYPE_NONE = 0, @@ -288,21 +311,62 @@ typedef enum PVFS_TYPE_DATAFILE = (1 << 1), PVFS_TYPE_DIRECTORY = (1 << 2), PVFS_TYPE_SYMLINK = (1 << 3), - PVFS_TYPE_DIRDATA = (1 << 4) + PVFS_TYPE_DIRDATA = (1 << 4), + PVFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ } PVFS_ds_type; #define decode_PVFS_ds_type decode_enum #define encode_PVFS_ds_type encode_enum +#define PVFS_DS_TYPE_COUNT 7 /* total number of DS types defined in + * the PVFS_ds_type enum */ + + +/* helper to translate bit-shifted enum types to array index number in the + * range (0-(PVFS_DS_TYPE_COUNT-1)) */ +#define PVFS_ds_type_to_int(__type, __intp) \ +do { \ + uint32_t r = 0; \ + PVFS_ds_type t = __type; \ + if( t == 0 ) \ + { \ + *((uint32_t *)__intp) = 0; \ + } \ + else \ + { \ + while( t >>=1 ) \ + { \ + r++; \ + } \ + *((uint32_t *)__intp) = r+1; \ + } \ +} while( 0 ) + +/* helper to translate array index int to a proper PVFS_ds_type bit-shifted + * value */ +#define int_to_PVFS_ds_type(__i, __typep) \ +do { \ + if( __i == 0 ) \ + { \ + *((PVFS_ds_type *)__typep) = 0; \ + } \ + else \ + { \ + *((PVFS_ds_type *)__typep) = 1 << (__i - 1);\ + } \ +} while(0) #ifdef __KERNEL__ #include #endif + +/*The value for PVFS_MIRROR_FL will not conflict with the FS values.*/ #if defined(FS_IMMUTABLE_FL) #define PVFS_IMMUTABLE_FL FS_IMMUTABLE_FL #define PVFS_APPEND_FL FS_APPEND_FL #define PVFS_NOATIME_FL FS_NOATIME_FL +#define PVFS_MIRROR_FL 0x01000000ULL #else @@ -310,9 +374,15 @@ typedef enum #define PVFS_IMMUTABLE_FL 0x10ULL #define PVFS_APPEND_FL 0x20ULL #define PVFS_NOATIME_FL 0x80ULL +#define PVFS_MIRROR_FL 0x01000000ULL #endif +#define ALL_FS_META_HINT_FLAGS \ + (PVFS_IMMUTABLE_FL | \ + PVFS_APPEND_FL | \ + PVFS_NOATIME_FL ) + /* Key/Value Pairs */ /* Extended attributes are stored on objects with */ /* a Key/Value pair. A key or a value is simply */ @@ -344,6 +414,8 @@ typedef struct #define PVFS_ATTR_SYS_DFILE_COUNT (1 << 25) #define PVFS_ATTR_SYS_DIRENT_COUNT (1 << 26) #define PVFS_ATTR_SYS_DIR_HINT (1 << 27) +#define PVFS_ATTR_SYS_BLKSIZE (1 << 28) +#define PVFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29) #define PVFS_ATTR_SYS_UID (1 << 0) #define PVFS_ATTR_SYS_GID (1 << 1) #define PVFS_ATTR_SYS_PERM (1 << 2) @@ -353,34 +425,33 @@ typedef struct #define PVFS_ATTR_SYS_TYPE (1 << 6) #define PVFS_ATTR_SYS_ATIME_SET (1 << 7) #define PVFS_ATTR_SYS_MTIME_SET (1 << 8) +#define PVFS_ATTR_SYS_CID (1 << 9) #define PVFS_ATTR_SYS_COMMON_ALL \ (PVFS_ATTR_SYS_UID | PVFS_ATTR_SYS_GID | \ PVFS_ATTR_SYS_PERM | PVFS_ATTR_SYS_ATIME | \ PVFS_ATTR_SYS_CTIME | PVFS_ATTR_SYS_MTIME | \ - PVFS_ATTR_SYS_TYPE) + PVFS_ATTR_SYS_TYPE | PVFS_ATTR_SYS_CID) #define PVFS_ATTR_SYS_ALL \ (PVFS_ATTR_SYS_COMMON_ALL | PVFS_ATTR_SYS_SIZE | \ PVFS_ATTR_SYS_LNK_TARGET | PVFS_ATTR_SYS_DFILE_COUNT | \ - PVFS_ATTR_SYS_DIRENT_COUNT | PVFS_ATTR_SYS_DIR_HINT) + PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + PVFS_ATTR_SYS_DIRENT_COUNT | PVFS_ATTR_SYS_DIR_HINT | PVFS_ATTR_SYS_BLKSIZE) #define PVFS_ATTR_SYS_ALL_NOHINT \ (PVFS_ATTR_SYS_COMMON_ALL | PVFS_ATTR_SYS_SIZE | \ PVFS_ATTR_SYS_LNK_TARGET | PVFS_ATTR_SYS_DFILE_COUNT | \ - PVFS_ATTR_SYS_DIRENT_COUNT) + PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + PVFS_ATTR_SYS_DIRENT_COUNT | PVFS_ATTR_SYS_BLKSIZE) #define PVFS_ATTR_SYS_ALL_NOSIZE \ (PVFS_ATTR_SYS_COMMON_ALL | PVFS_ATTR_SYS_LNK_TARGET | \ - PVFS_ATTR_SYS_DFILE_COUNT | PVFS_ATTR_SYS_DIRENT_COUNT \ - | PVFS_ATTR_SYS_DIR_HINT) + PVFS_ATTR_SYS_DFILE_COUNT | PVFS_ATTR_SYS_DIRENT_COUNT | \ + PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + PVFS_ATTR_SYS_DIR_HINT | PVFS_ATTR_SYS_BLKSIZE) #define PVFS_ATTR_SYS_ALL_SETABLE \ (PVFS_ATTR_SYS_COMMON_ALL-PVFS_ATTR_SYS_TYPE) #define PVFS_ATTR_SYS_ALL_TIMES \ ((PVFS_ATTR_SYS_COMMON_ALL-PVFS_ATTR_SYS_TYPE) | PVFS_ATTR_SYS_ATIME_SET | PVFS_ATTR_SYS_MTIME_SET) - -/* Extended attribute flags */ -#define PVFS_XATTR_CREATE 0x1 -#define PVFS_XATTR_REPLACE 0x2 - /* Extended attribute flags */ #define PVFS_XATTR_CREATE 0x1 #define PVFS_XATTR_REPLACE 0x2 @@ -422,12 +493,16 @@ typedef struct { PVFS_handle handle; PVFS_fs_id fs_id; + uint64_t cid; int32_t __pad1; } PVFS_object_ref; /** Credentials (stubbed for future authentication methods). */ typedef struct { +#ifdef WIN32 + /* TODO - store username string? */ +#endif PVFS_uid uid; PVFS_gid gid; } PVFS_credentials; @@ -442,17 +517,24 @@ endecode_fields_2( #define PVFS_NAME_MAX 256 /* max len of individual path element */ #define PVFS_SEGMENT_MAX PVFS_NAME_MAX +/* max len of an entire path */ +/* note protocol only handles a segment, not a path */ +#define PVFS_PATH_MAX 4096 /* max extended attribute name len as imposed by the VFS and exploited for the * upcall request types. * NOTE: Please retain them as multiples of 8 even if you wish to change them * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit kernel. + * Due to implementation within DBPF, this really needs to be PVFS_NAME_MAX, + * which it was the same value as, but no reason to let it break if that + * changes in the future. */ -#define PVFS_MAX_XATTR_NAMELEN 256 /* Not the same as XATTR_NAME_MAX defined - by */ -#define PVFS_MAX_XATTR_VALUELEN 256 /* Not the same as XATTR_SIZE_MAX defined +#define PVFS_MAX_XATTR_NAMELEN PVFS_NAME_MAX /* Not the same as + XATTR_NAME_MAX defined + by */ +#define PVFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX defined by */ -#define PVFS_MAX_XATTR_LISTLEN 8 /* Not the same as XATTR_LIST_MAX +#define PVFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX defined by */ /* This structure is used by the VFS-client interaction alone */ @@ -484,12 +566,35 @@ enum PVFS_server_param PVFS_SERV_PARAM_FSID_CHECK = 2, /* verify that an fsid is ok */ PVFS_SERV_PARAM_ROOT_CHECK = 3, /* verify existance of root handle */ PVFS_SERV_PARAM_MODE = 4, /* change the current server mode */ - PVFS_SERV_PARAM_EVENT_ON = 5, /* event logging on or off */ - PVFS_SERV_PARAM_EVENT_MASKS = 6, /* API masks for event logging */ + PVFS_SERV_PARAM_EVENT_ENABLE = 5, /* event enable */ + PVFS_SERV_PARAM_EVENT_DISABLE = 6, /* event disable */ PVFS_SERV_PARAM_SYNC_META = 7, /* metadata sync flags */ PVFS_SERV_PARAM_SYNC_DATA = 8, /* file data sync flags */ + PVFS_SERV_PARAM_DROP_CACHES = 9 +}; + +enum PVFS_mgmt_param_type +{ + PVFS_MGMT_PARAM_TYPE_UINT64, + PVFS_MGMT_PARAM_TYPE_STRING +} ; + +struct PVFS_mgmt_setparam_value +{ + enum PVFS_mgmt_param_type type; + union + { + uint64_t value; + char *string_value; + } u; }; +encode_enum_union_2_struct( + PVFS_mgmt_setparam_value, + type, u, + uint64_t, value, PVFS_MGMT_PARAM_TYPE_UINT64, + string, string_value, PVFS_MGMT_PARAM_TYPE_STRING); + enum PVFS_server_mode { PVFS_SERVER_NORMAL_MODE = 1, /* default server operating mode */ @@ -527,7 +632,10 @@ typedef struct { int PVFS_strerror_r(int errnum, char *buf, int n); void PVFS_perror(const char *text, int retcode); void PVFS_perror_gossip(const char* text, int retcode); +void PVFS_perror_gossip_silent(void); +void PVFS_perror_gossip_verbose(void); PVFS_error PVFS_get_errno_mapping(PVFS_error error); +PVFS_error PVFS_errno_to_error(int err); /* special bits used to differentiate PVFS error codes from system * errno values @@ -622,6 +730,7 @@ PVFS_error PVFS_get_errno_mapping(PVFS_error error); #define PVFS_EALREADY E(57) /* Operation already in progress */ #define PVFS_EACCES E(58) /* Access not allowed */ #define PVFS_ECONNRESET E(59) /* Connection reset by peer */ +#define PVFS_ERANGE E(60) /* Math out of range or buf too small */ /***************** non-errno/pvfs2 specific error codes *****************/ #define PVFS_ECANCEL (1|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) @@ -631,6 +740,7 @@ PVFS_error PVFS_get_errno_mapping(PVFS_error error); #define PVFS_EADDRNTFD (5|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) #define PVFS_ENORECVR (6|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) #define PVFS_ETRYAGAIN (7|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENOTPVFS (8|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) /* NOTE: PLEASE DO NOT ARBITRARILY ADD NEW ERRNO ERROR CODES! * @@ -751,6 +861,7 @@ const char *PINT_non_errno_strerror_mapping[] = { \ "No address associated with name", \ "Unknown server error", \ "Host name lookup failure", \ + "Path contains non-PVFS elements", \ }; \ PVFS_error PINT_non_errno_mapping[] = { \ 0, /* leave this one empty */ \ @@ -761,6 +872,7 @@ PVFS_error PINT_non_errno_mapping[] = { \ PVFS_EADDRNTFD, /* 5 */ \ PVFS_ENORECVR, /* 6 */ \ PVFS_ETRYAGAIN, /* 7 */ \ + PVFS_ENOTPVFS, /* 8 */ \ } /* @@ -796,6 +908,19 @@ PVFS_error PVFS_get_errno_mapping(PVFS_error error) \ } \ return ret; \ } \ +PVFS_error PVFS_errno_to_error(int err) \ +{ \ + PVFS_error e = 0; \ + \ + for(; e < PVFS_ERRNO_MAX; ++e) \ + { \ + if(PINT_errno_mapping[e] == err) \ + { \ + return e; \ + } \ + } \ + return 0; \ +} \ DECLARE_ERRNO_MAPPING() #define PVFS_ERROR_TO_ERRNO(__error) PVFS_get_errno_mapping(__error) @@ -835,6 +960,43 @@ enum PVFS_io_type */ #define PVFS2_SUPER_MAGIC 0x20030528 +/* flag value that can be used with mgmt_iterate_handles to retrieve + * reserved handle values + */ +#define PVFS_MGMT_RESERVED 1 + +/* + * Structure and macros for timing things for profile-like output. + * + */ +struct profiler +{ + struct timeval start; + struct timeval finish; + uint64_t save_timing; +}; + +#define INIT_PROFILER(prof_struct) prof_struct.cumulative_diff = 0; + +#define START_PROFILER(prof_struct) \ + gettimeofday(&prof_struct.start, NULL); + +#define FINISH_PROFILER(label, prof_struct, print_timing) \ +{ \ + double t_start, t_finish; \ + gettimeofday(&prof_struct.finish, NULL); \ + t_start = prof_struct.start.tv_sec + (prof_struct.start.tv_usec/1000000.0); \ + t_finish = prof_struct.finish.tv_sec + (prof_struct.finish.tv_usec/1000000.0); \ + prof_struct.save_timing = t_finish - t_start * 1000000.0; \ + if (print_timing) { \ + gossip_err("PROFILING %s: %f\n", label, t_finish - t_start); \ + } \ +} + +#define PRINT_PROFILER(label, prof_struct) \ + gossip_err("PROFILING %s: %f\n", label, prof_struct.save_timing / 1000000.0); + + #endif /* __PVFS2_TYPES_H */ /* diff --git a/include/pvfs2-util.h b/include/pvfs2-util.h index 84f54bc..ff314c0 100644 --- a/include/pvfs2-util.h +++ b/include/pvfs2-util.h @@ -73,6 +73,11 @@ int PVFS_util_resolve( PVFS_fs_id* out_fs_id, char* out_fs_path, int out_fs_path_max); +int PVFS_util_resolve_absolute( + const char* local_path, + PVFS_fs_id* out_fs_id, + char* out_fs_path, + int out_fs_path_max); int PVFS_util_get_default_fsid( PVFS_fs_id* out_fs_id); diff --git a/include/pvfs2.h b/include/pvfs2.h index c88af37..842da60 100644 --- a/include/pvfs2.h +++ b/include/pvfs2.h @@ -12,6 +12,12 @@ #ifndef __PVFS2_H #define __PVFS2_H +#ifndef PVFS2_VERSION_MAJOR +#define PVFS2_VERSION_MAJOR 2 +#define PVFS2_VERSION_MINOR 8 +#define PVFS2_VERSION_SUB 6 +#endif + #include "pvfs2-types.h" #include "pvfs2-sysint.h" #include "pvfs2-debug.h" diff --git a/include/pvfs2.h.in b/include/pvfs2.h.in new file mode 100644 index 0000000..41e7ca1 --- /dev/null +++ b/include/pvfs2.h.in @@ -0,0 +1,36 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* This is the master header file for pvfs2. It pulls in all header + * files needed by client side for software that operates at or above + * the system interface level. + */ + +#ifndef __PVFS2_H +#define __PVFS2_H + +#ifndef PVFS2_VERSION_MAJOR +#define PVFS2_VERSION_MAJOR @PVFS2_VERSION_MAJOR@ +#define PVFS2_VERSION_MINOR @PVFS2_VERSION_MINOR@ +#define PVFS2_VERSION_SUB @PVFS2_VERSION_SUB@ +#endif + +#include "pvfs2-types.h" +#include "pvfs2-sysint.h" +#include "pvfs2-debug.h" +#include "pvfs2-util.h" +#include "pvfs2-request.h" + +#endif /* __PVFS2_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/maint/config/bdb.m4 b/maint/config/bdb.m4 index 9e17735..239e0f1 100644 --- a/maint/config/bdb.m4 +++ b/maint/config/bdb.m4 @@ -231,5 +231,24 @@ AC_DEFUN([AX_BERKELEY_DB], AC_DEFINE(HAVE_DB_GET_PAGESIZE, 1, [Define if DB has get_pagesize function]), AC_MSG_RESULT(no)) + dnl Check BDB version here since it's just a warning + AC_MSG_CHECKING([Berkeley DB version]) + AC_TRY_COMPILE( + [ + #include + ], + [ + #if DB_VERSION_MAJOR < 4 || \ + (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 8) || \ + (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 8 && \ + DB_VERSION_PATCH < 30) + #error "Recommend version of Berkeley DB at least 4.8.30" + #endif + ], + AC_MSG_RESULT(yes) + HAVE_DB_OLD=0, + AC_MSG_RESULT(no) + HAVE_DB_OLD=1 + ) CFLAGS="$oldcflags" ]) diff --git a/maint/config/gm.m4 b/maint/config/gm.m4 index 06fb152..7bcda1a 100644 --- a/maint/config/gm.m4 +++ b/maint/config/gm.m4 @@ -13,7 +13,7 @@ AC_DEFUN([AX_GM], dnl --with-gm-libs=/lib (or lib64 if that exists) gm_home= AC_ARG_WITH(gm, - [ --with-gm= Location of the GM install (default no GM)], +[ --with-gm= Location of the GM install (default no GM)], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-gm requires the path to your GM tree.]) elif test "$withval" != no ; then @@ -21,7 +21,8 @@ AC_DEFUN([AX_GM], fi ) AC_ARG_WITH(gm-includes, - [ --with-gm-includes= Location of the GM includes], +[ --with-gm-includes= + Location of the GM includes], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-gm-includes requires path to GM headers.]) elif test "$withval" != no ; then @@ -29,7 +30,7 @@ AC_DEFUN([AX_GM], fi ) AC_ARG_WITH(gm-libs, - [ --with-gm-libs= Location of the GM libraries], +[ --with-gm-libs= Location of the GM libraries], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-gm-libs requires path to GM libraries.]) elif test "$withval" != no ; then diff --git a/maint/config/ib.m4 b/maint/config/ib.m4 index 8a7297e..8e5fc9c 100644 --- a/maint/config/ib.m4 +++ b/maint/config/ib.m4 @@ -14,7 +14,8 @@ AC_DEFUN([AX_IB], fi ) AC_ARG_WITH(ib-includes, - [ --with-ib-includes= Location of the IB includes], +[ --with-ib-includes= + Location of the IB includes], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-ib-includes requires path to IB headers.]) elif test "$withval" != no ; then @@ -22,7 +23,7 @@ AC_DEFUN([AX_IB], fi ) AC_ARG_WITH(ib-libs, - [ --with-ib-libs= Location of the IB libraries], +[ --with-ib-libs= Location of the IB libraries], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-ib-libs requires path to IB libraries.]) elif test "$withval" != no ; then @@ -80,7 +81,8 @@ AC_DEFUN([AX_IB], fi ) AC_ARG_WITH(openib-includes, - [ --with-openib-includes= Location of the OpenIB includes], +[ --with-openib-includes= + Location of the OpenIB includes], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-openib-includes requires path to OpenIB headers.]) elif test "$withval" != no ; then @@ -88,7 +90,8 @@ AC_DEFUN([AX_IB], fi ) AC_ARG_WITH(openib-libs, - [ --with-openib-libs= Location of the OpenIB libraries], +[ --with-openib-libs= + Location of the OpenIB libraries], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-openib-libs requires path to OpenIB libraries.]) elif test "$withval" != no ; then diff --git a/maint/config/kernel.m4 b/maint/config/kernel.m4 index db12f4e..7f7e029 100644 --- a/maint/config/kernel.m4 +++ b/maint/config/kernel.m4 @@ -13,15 +13,83 @@ AC_DEFUN([AX_KERNEL_FEATURES], dnl 'implicit function declaration' usually ends up in an undefined dnl symbol somewhere. + dnl opensuse11.2 32bit only reports the correct include path when in + dnl specific directories, must be some search path broken-ness? + dnl switching to / fixes the problem and shouldn't break others + NOSTDINCFLAGS="-Werror-implicit-function-declaration -nostdinc -isystem `$CC -print-file-name=include`" - CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src/include -I$lk_src/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty) -DKBUILD_MODNAME=KBUILD_STR(empty) -imacros $lk_src/include/linux/autoconf.h" + dnl SuSE and other distros that have a separate kernel obj directory + dnl need to have include files from both the obj directory and the + dnl full source listed in the includes. Kbuild handles this when + dnl compiling but the configure checks don't handle this on their own. + dnl The strategy here is just to set a new variable, lk_src_source, + dnl when the provided kernel source has a source directory. If it + dnl doesn't exist just set it lk_src. There may be a cleaner way to do + dnl this, for now, this appears to do the trick. + if test -d $lk_src/source; then + lk_src_source="$lk_src/source" + else + lk_src_source=$lk_src + fi + + CFLAGS="$USR_CFLAGS $NOSTDINCFLAGS -I$lk_src_source/include -I$lk_src_source/include/asm/mach-default -DKBUILD_STR(s)=#s -DKBUILD_BASENAME=KBUILD_STR(empty) -DKBUILD_MODNAME=KBUILD_STR(empty)" + + dnl kernels > 2.6.32 now use generated/autoconf.h + dnl look in lk_src for the generated autoconf.h + if test -f $lk_src/include/generated/autoconf.h ; then + CFLAGS="$CFLAGS -imacros $lk_src/include/generated/autoconf.h" + else + CFLAGS="$CFLAGS -imacros $lk_src/include/linux/autoconf.h" + fi dnl we probably need additional includes if this build is intended dnl for a different architecture if test -n "${ARCH}" ; then - CFLAGS="$CFLAGS -I$lk_src/arch/${ARCH}/include" + CFLAGS="$CFLAGS -I$lk_src_source/arch/${ARCH}/include -I$lk_src_source/arch/${ARCH}/include/asm/mach-default" + else + SUBARCH=`uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ -e s/sa110/arm/ \ + -e s/s390x/s390/ -e s/parisc64/parisc/ \ + -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ + -e s/sh.*/sh/` + if test "x$SUBARCH" = "xi386"; then + ARCH=x86 + elif test "x$SUBARCH" = "xx86_64"; then + ARCH=x86 + elif test "x$SUBARCH" = "xsparc64"; then + ARCH=sparc + else + ARCH=$SUBARCH + fi + + CFLAGS="$CFLAGS -I$lk_src_source/arch/${ARCH}/include -I$lk_src_source/arch/${ARCH}/include/asm/mach-default" + fi + + dnl if there are two different include paths (lk_src/include and + dnl lk_src_source/include) add the lk_src/include path to the CFLAGS + dnl here. + if test "$lk_src" != "$lk_src_source"; then + CFLAGS="$CFLAGS -I$lk_src/include" + fi + + dnl in 2.6.40 (maybe .39 too) inclusion of linux/fs.h breaks unless + dnl optimization flag of some sort is set. To complicate matters + dnl checks in earlier versions break when optimization is turned on. + need_optimize_flag=0 + AC_MSG_CHECKING(for sanity of linux/fs.h include) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [], + AC_MSG_RESULT(yes), + AC_MSG_RESULT(no) + need_optimize_flag=1, + ) + if test $need_optimize_flag -eq 1; then + CFLAGS="-Os $CFLAGS" + fi AC_MSG_CHECKING(for i_size_write in kernel) dnl if this test passes, the kernel does not have it @@ -244,6 +312,20 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_MSG_RESULT(no) ) + dnl 2.6.16 removed this member + AC_MSG_CHECKING(for i_sem in struct inode) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct inode i = { + .i_sem = {0}, + }; + ], [], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_SEM_IN_STRUCT_INODE, 1, Define if struct inode in kernel has i_sem member), + AC_MSG_RESULT(no) + ) + dnl checking if we have a statfs_lite callback in super_operations AC_MSG_CHECKING(for statfs_lite callback in struct super_operations in kernel) AC_TRY_COMPILE([ @@ -415,18 +497,7 @@ AC_DEFUN([AX_KERNEL_FEATURES], fi - dnl certain Fedora FC5 kernel header files throw extra (spurious) - dnl warnings, which -Wno-pointer-sign silences, but that option is - dnl only supported by gcc-4. - if test "x$GCC" = "xyes" ; then - AC_MSG_CHECKING(for gcc major version) - gcc_version=`$CC --version| head -1 | tr . ' ' | cut -d ' ' -f 3` - AC_MSG_RESULT($gcc_version) - if test $gcc_version -gt 3 ; then - extra_gcc_flags="-Wno-pointer-sign -Wno-strict-aliasing -Wno-strict-aliasing=2" - fi - fi - + CFLAGS="$CFLAGS -Werror" AC_MSG_CHECKING(for dentry argument in kernel super_operations statfs) dnl Rely on the fact that there is an external vfs_statfs that is dnl of the same type as the .statfs in struct super_operations to @@ -442,18 +513,28 @@ AC_DEFUN([AX_KERNEL_FEATURES], dnl If this test passes, the kernel uses a struct dentry argument. dnl If this test fails, the kernel uses something else (old struct dnl super_block perhaps). + dnl + dnl Need to use the second approach because vfs_statfs changes without + dnl a cooresponding change in statfs in super_operations. I'm not that + dnl concerned with reliance on Werror since we use it heavily + dnl throughout these checks AC_TRY_COMPILE([ #define __KERNEL__ #include - int vfs_statfs(struct dentry *de, struct kstatfs *kfs) + struct super_operations sop; + int s(struct dentry *de, struct kstatfs *kfs) { return 0; } - ], [], + ], + [ + sop.statfs = s; + ], AC_MSG_RESULT(yes) AC_DEFINE(HAVE_DENTRY_STATFS_SOP, 1, Define if super_operations statfs has dentry argument), AC_MSG_RESULT(no) ) + CFLAGS=$tmp_cflags AC_MSG_CHECKING(for vfsmount argument in kernel file_system_type get_sb) dnl Same trick as above. A single commit changed mayn things at once: @@ -482,6 +563,63 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_MSG_RESULT(no) ) + AC_MSG_CHECKING(for get_sb_nodev) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int v_fill_sb(struct super_block *sb, void *data, int s) + { + return 0; + } + ], + [ + int ret = 0; + struct super_block *sb = NULL; +#ifdef HAVE_VFSMOUNT_GETSB + ret = get_sb_nodev(NULL, 0, NULL, v_fill_sb, NULL ); +#else + sb = get_sb_nodev(NULL, 0, NULL, v_fill_sb); +#endif + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GETSB_NODEV, 1, Define if get_sb_nodev function exists ), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for file_system_type get_sb) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], + [ + struct file_system_type f; + f.get_sb = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FSTYPE_GET_SB, 1, Define if only filesystem_type has get_sb), + AC_MSG_RESULT(no) + ) + + + + AC_MSG_CHECKING(for file_system_type mount exclusively) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], + [ +#ifdef HAVE_FSTYPE_GET_SB + assert(0); +#else + struct file_system_type f; + f.mount = NULL; +#endif + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FSTYPE_MOUNT_ONLY, 1, Define if only filesystem_type has mount and HAVE_FSTYPE_GET_SB is false), + AC_MSG_RESULT(no) + ) + AC_MSG_CHECKING(for xattr support in kernel) dnl if this test passes, the kernel has it dnl if this test fails, the kernel does not have it @@ -524,18 +662,63 @@ AC_DEFUN([AX_KERNEL_FEATURES], ) fi - dnl Test to see if sysctl proc handlers have a 6th argument - AC_MSG_CHECKING(for 6th argument to sysctl proc handlers) - dnl if this test passes, there is a 6th argument + dnl the proc handler functions have changed over the years. + dnl pre-2.6.8: proc_handler(ctl_table *ctl, + dnl int write, + dnl struct file *filp, + dnl void *buffer, + dnl size_t *lenp) + dnl + dnl 2.6.8-2.6.31: proc_handler(ctl_table *ctl, + dnl int write, + dnl struct file *filp, + dnl void *buffer, + dnl size_t *lenp, + dnl loff_t *ppos) + dnl > 2.6.31: proc_handler(ctl_table *ctl, + dnl int write, + dnl void *buffer, + dnl size_t *lenp, + dnl loff_t *ppos) + + dnl Test to see if sysctl proc handlers have a file argument + AC_MSG_CHECKING(for file argument to sysctl proc handlers) AC_TRY_COMPILE([ #define __KERNEL__ #include #include ], [ - proc_dointvec_minmax(NULL, 0, NULL, NULL, NULL, NULL); + struct ctl_table * ctl = NULL; + int write = 0; + struct file * filp = NULL; + void __user * buffer = NULL; + size_t * lenp = NULL; + loff_t * ppos = NULL; + + proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PROC_HANDLER_SIX_ARG, 1, Define if sysctl proc handlers have 6th argument), + AC_DEFINE(HAVE_PROC_HANDLER_FILE_ARG, 1, Define if sysctl proc handlers have 6th argument), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for ppos argument to sysctl proc handlers) + dnl if this test passes, there is a ppos argument + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + struct ctl_table * ctl = NULL; + int write = 0; + void __user * buffer = NULL; + size_t * lenp = NULL; + loff_t * ppos = NULL; + + proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PROC_HANDLER_PPOS_ARG, 1, Define if sysctl proc handlers have ppos argument), AC_MSG_RESULT(no) ) @@ -589,6 +772,12 @@ AC_DEFUN([AX_KERNEL_FEATURES], #include ]) + dnl no bkl, no need for smp_lock.h + AC_CHECK_HEADER([linux/smp_lock.h], [], [], + [#define __KERNEL__ + #include + ]) + AC_MSG_CHECKING(for generic_file_readv api in kernel) dnl if this test passes, the kernel does not have it dnl if this test fails, the kernel has it defined with a different @@ -627,13 +816,234 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_DEFINE(HAVE_GENERIC_PERMISSION, 1, Define if kernel has generic_permission), ) + dnl generic_permission in < 2.6.38 has three parameters + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for three-param generic_permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *f; + ], + [ + generic_permission(f, 0, NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_THREE_PARAM_GENERIC_PERMISSION, 1, [Define if generic_permission takes three parameters]), + AC_MSG_RESULT(no) + ) + + dnl generic_permission in >= 2.6.38 and 3.0.x has four parameters + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for four-param generic_permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *f; + ], + [ + generic_permission(f, 0, 0, NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FOUR_PARAM_GENERIC_PERMISSION, 1, [Define if generic_permission takes four parameters]), + AC_MSG_RESULT(no) + ) + + dnl generic_permission in >= 3.1.x has two parameters + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for two-param generic_permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *f; + ], + [ + generic_permission(f, 0); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TWO_PARAM_GENERIC_PERMISSION, 1, [Define if generic_permission takes two parameters]), + AC_MSG_RESULT(no) + ) + + dnl set_nlink is defined in 3.2.x + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for set_nlink) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *i; + ], + [ + set_nlink(i, 0); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_SET_NLINK, 1, [Define if set_nlink exists]), + AC_MSG_RESULT(no) + ) + + dnl inc_nlink is defined in 3.2.x + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for inc_nlink) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *i; + ], + [ + inc_nlink(i); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_INC_NLINK, 1, [Define if inc_nlink exists]), + AC_MSG_RESULT(no) + ) + + dnl drop_nlink is defined in 3.2.x + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for drop_nlink) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *i; + ], + [ + drop_nlink(i); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_DROP_NLINK, 1, [Define if drop_nlink exists]), + AC_MSG_RESULT(no) + ) + + dnl clear_nlink is defined in 3.2.x + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for clear_nlink) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *i; + ], + [ + clear_nlink(i); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_I_CLEAR_NLINK, 1, [Define if clear_nlink exists]), + AC_MSG_RESULT(no) + ) + + dnl check for posix_acl_equiv_mode umode_t type + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for posix_acl_equiv_mode umode_t) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct posix_acl *acl; + umode_t mode = 0; + ], + [ + posix_acl_equiv_mode(acl, &mode); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T, 1, [Define if posix_acl_equiv_mode accepts umode_t type]), + AC_MSG_RESULT(no) + ) + + dnl check for posix_acl_create + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for posix_acl_create) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct posix_acl *acl; + umode_t mode = 0; + ], + [ + posix_acl_create(&acl, GFP_KERNEL, &mode); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_POSIX_ACL_CREATE, 1, [Define if posix_acl_create_masq accepts umode_t type]), + AC_MSG_RESULT(no) + ) + + dnl check for posix_acl_chmod + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for posix_acl_chmod) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct posix_acl *acl; + struct inode *inode; + umode_t mode = 0; + ], + [ + posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode ); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1, [Define if posix_acl_chmod exists]), + AC_MSG_RESULT(no) + ) + + + dnl check for posix_acl_clone + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for posix_acl_clone) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct posix_acl *acl; + ], + [ + posix_acl_clone(acl, GFP_KERNEL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_POSIX_ACL_CLONE, 1, [Define if posix_acl_clone exists]), + AC_MSG_RESULT(no) + ) + + dnl check for fsync with loff_t + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for fsync with loff_t) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + + int my_fsync(struct file *, loff_t, loff_t, int); + + int my_fsync(struct file *f, loff_t start, loff_t end, int datasync) + { + } + ], + [ + struct file_operations fop; + + fop.fsync = my_fsync; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FSYNC_LOFF_T_PARAMS, 1, [Define if fsync has loff_t params]), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for generic_getxattr api in kernel) dnl if this test passes, the kernel does not have it dnl if this test fails, the kernel has it defined AC_TRY_COMPILE([ #define __KERNEL__ #include - #include + #include int generic_getxattr(struct inode *inode) { return 0; @@ -657,6 +1067,32 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_MSG_RESULT(no) ) + AC_MSG_CHECKING(for fh_to_dentry member in export_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct export_operations x; + x.fh_to_dentry = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FHTODENTRY_EXPORT_OPERATIONS, 1, Define if export_operations has an fh_to_dentry member), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for encode_fh member in export_operations in kernel) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct export_operations x; + x.encode_fh = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ENCODEFH_EXPORT_OPERATIONS, 1, Define if export_operations has an encode_fh member), + AC_MSG_RESULT(no) + ) + dnl Using -Werror is not an option, because some arches throw lots of dnl warnings that would trigger false negatives. We know that the dnl change to the releasepage() function signature was accompanied by @@ -863,15 +1299,18 @@ AC_DEFUN([AX_KERNEL_FEATURES], ) dnl old linux kernels do not have class_create and related functions + dnl + dnl check for class_device_destroy() to weed out RHEL4 kernels that + dnl have some class functions but not others AC_MSG_CHECKING(if kernel has device classes) AC_TRY_COMPILE([ #define __KERNEL__ #include ], [ - class_create(NULL, "pvfs2") + class_device_destroy(NULL, "pvfs2") ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_DEVICE_CLASSES, 1, Define if kernel lacks device classes), + AC_DEFINE(HAVE_KERNEL_DEVICE_CLASSES, 1, Define if kernel has device classes), AC_MSG_RESULT(no) ) @@ -888,6 +1327,61 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_MSG_RESULT(no) ) + dnl 2.6.27 changed the constructor parameter signature of + dnl kmem_cache_create. Check for this newer one-param style + dnl If they don't match, gcc complains about + dnl passing argument ... from incompatible pointer type, hence the + dnl need for the -Werror. Note that the next configure test will + dnl determine if we have a two param constructor or not. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for one-param kmem_cache_create constructor) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + void ctor(void *req) + { + } + ], [ + kmem_cache_create("config-test", 0, 0, 0, ctor); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM, 1, [Define if kernel kmem_cache_create constructor has newer-style one-parameter form]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.27 changed the parameter signature of + dnl inode_operations->permission. Check for this newer two-param style + dnl If they don't match, gcc complains about + dnl passing argument ... from incompatible pointer type, hence the + dnl need for the -Werror and -Wall. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror -Wall" + AC_MSG_CHECKING(for two param permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + #include + #include + int ctor(struct inode *i, int a) + { + return 0; + } + struct inode_operations iop = { + .permission = ctor, + }; + ], [ + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TWO_PARAM_PERMISSION, 1, [Define if kernel's inode_operations has two parameters permission function]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.24 changed the constructor parameter signature of dnl kmem_cache_create. Check for this newer two-param style and dnl if not, assume it is old. Note we can get away with just @@ -927,6 +1421,8 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_MSG_RESULT(no) ) + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" AC_MSG_CHECKING(if kernel address_space struct has a rwlock_t field named tree_lock) AC_TRY_COMPILE([ #define __KERNEL__ @@ -936,9 +1432,26 @@ AC_DEFUN([AX_KERNEL_FEATURES], read_lock(&as.tree_lock); ], AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock member named tree_lock instead of rw_lock]), + AC_DEFINE(HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a rw_lock_t member named tree_lock]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(if kernel address_space struct has a spinlock_t field named tree_lock) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct address_space as; + spin_lock(&as.tree_lock); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT, 1, [Define if kernel address_space struct has a spin_lock_t member named tree_lock]), AC_MSG_RESULT(no) ) + CFLAGS=$tmp_cflags AC_MSG_CHECKING(if kernel address_space struct has a priv_lock field - from RT linux) AC_TRY_COMPILE([ @@ -980,5 +1493,509 @@ AC_DEFUN([AX_KERNEL_FEATURES], AC_MSG_RESULT(no) ) + dnl Starting with 2.6.26, drop_inode and put_inode go away + AC_MSG_CHECKING(if kernel super_operations contains drop_inode field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_operations sops; + sops.drop_inode(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DROP_INODE, 1, [Define if kernel super_operations contains drop_inode field]), + AC_MSG_RESULT(no) + ) + + dnl Starting with 2.6.26, drop_inode and put_inode go away + AC_MSG_CHECKING(if kernel super_operations contains put_inode field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_operations sops; + sops.put_inode(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PUT_INODE, 1, [Define if kernel super_operations contains put_inode field]), + AC_MSG_RESULT(no) + ) + + dnl older 2.6 kernels don't have MNT_NOATIME + AC_MSG_CHECKING(if mount.h defines MNT_NOATIME) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + int flag = MNT_NOATIME; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MNT_NOATIME, 1, [Define if mount.h contains + MNT_NOATIME flags]), + AC_MSG_RESULT(no) + ) + + dnl older 2.6 kernels don't have MNT_NODIRATIME + AC_MSG_CHECKING(if mount.h defines MNT_NODIRATIME) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + int flag = MNT_NODIRATIME; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MNT_NODIRATIME, 1, [Define if mount.h contains + MNT_NODIRATIME flags]), + AC_MSG_RESULT(no) + ) + + dnl newer 2.6 kernels (2.6.28) use d_obtain_alias instead of d_alloc_anon + AC_MSG_CHECKING(for d_alloc_anon) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct inode *i; + d_alloc_anon(i); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_ALLOC_ANON, 1, [Define if dcache.h contains + d_alloc_annon]), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for s_dirty in struct super_block) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + ], [ + struct super_block *s; + list_empty(&s->s_dirty); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SB_DIRTY_LIST, 1, [Define if struct super_block has s_dirty list]), + AC_MSG_RESULT(no) + ) + + dnl newer 2.6 kernels (2.6.29-ish) use current_fsuid() macro instead + dnl of accessing task struct fields directly + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for current_fsuid) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + int uid = current_fsuid(); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CURRENT_FSUID, 1, [Define if cred.h contains current_fsuid]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.32 added a mandatory name field to the bdi structure + AC_MSG_CHECKING(if kernel backing_dev_info struct has a name field) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + struct backing_dev_info foo = + { + .name = "foo" + }; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BACKING_DEV_INFO_NAME, 1, [Define if kernel backing_dev_info struct has a name field]), + AC_MSG_RESULT(no) + ) + + dnl some 2.6 kernels have functions to explicitly initialize bdi structs + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for bdi_init) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + ], [ + int ret = bdi_init(NULL); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDI_INIT, 1, [Define if bdi_init function is present]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl 2.6.33 API change, + dnl Removed .ctl_name from struct ctl_table. + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct ctl_table c = { .ctl_name = 0, }; + ],[ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CTL_NAME, 1, Define if struct ctl_table has ctl_name member), + AC_MSG_RESULT(no) + ) + + dnl Removed .strategy from struct ctl_table. + AC_MSG_CHECKING([whether struct ctl_table has strategy]) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct ctl_table c = { .strategy = 0, }; + ], [ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STRATEGY_NAME, 1, Define if struct ctl_table has strategy member), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.33 changed the parameter signature of xattr_handler get + dnl member functions to have a fifth argument and changed the first + dnl parameter from struct inode to struct dentry. if the test fails + dnl assume the old 4 param with struct inode + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for five-param xattr_handler.get) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct xattr_handler x; + static int get_xattr_h( struct dentry *d, const char *n, + void *b, size_t s, int h) + { return 0; } + ], + [ + x.get = get_xattr_h; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_XATTR_HANDLER_GET_FIVE_PARAM, 1, [Define if kernel xattr_handle get function has dentry as first parameter and a fifth parameter]), + AC_MSG_RESULT(no) + ) + + dnl 2.6.33 changed the parameter signature of xattr_handler set + dnl member functions to have a sixth argument and changed the first + dnl parameter from struct inode to struct dentry. if the test fails + dnl assume the old 5 param with struct inode + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for six-param xattr_handler.set) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct xattr_handler x; + static int set_xattr_h( struct dentry *d, const char *n, + const void *b, size_t s, int f, int h) + { return 0; } + ], + [ + x.set = set_xattr_h; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_XATTR_HANDLER_SET_SIX_PARAM, 1, [Define if kernel xattr_handle set function has dentry as first parameter and a sixth parameter]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl xattr_handler is also a const + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for const s_xattr member in super_block struct) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct super_block sb; + const struct xattr_handler *x[] = { NULL }; + ], + [ + sb.s_xattr = x; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CONST_S_XATTR_IN_SUPERBLOCK, 1, [Define if s_xattr member of super_block struct is const]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl early 2.6 kernels do not contain true/false enum in stddef.h + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(stddef.h true/false enum) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + int f = true; + ], + [ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TRUE_FALSE_ENUM, 1, [Define if kernel stddef has true/false enum]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl fsync no longer has a dentry second parameter + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for dentry argument in fsync) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct file_operations f; + static int local_fsync(struct file *f, struct dentry *d, int i) + { return 0; } + ], + [ + f.fsync = local_fsync; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FSYNC_DENTRY_PARAM, 1, [Define if fsync function in file_operations struct wants a dentry pointer as the second parameter]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl file_operations has unlocked_ioctl instead of ioctl as of 2.6.36 + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for unlocked_ioctl in file_operations) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + static struct file_operations f; + ], + [ + f.unlocked_ioctl = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_UNLOCKED_IOCTL_HANDLER, 1, [Define if file_operations struct has unlocked_ioctl member]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl 2.6.36 removed inode_setattr with the other BKL removal changes + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for inode_setattr) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct iattr *iattr; + struct inode *inode; + int ret; + ], + [ + ret = inode_setattr(inode, iattr); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_SETATTR, 1, [Define if inode_setattr is defined]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl dentry operations struct d_hash function has a different signature + dnl in 2.6.38 and newer, second param is an inode + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for three-param dentry_operations.d_hash) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct dentry_operations d; + static int d_hash_t(const struct dentry *d, + const struct inode *i, + struct qstr * q) + { return 0; } + ], + [ + d.d_hash = d_hash_t; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_THREE_PARAM_D_HASH, 1, [Define if d_hash member of dentry_operations has three params, the second inode paramsbeing the difference]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl dentry operations struct d_compare function has a different + dnl signature in 2.6.38 and newer, split out dentry/inodes, string and + dnl qstr params + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for seven-param dentry_operations.d_compare) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct dentry_operations d; + static int d_compare_t(const struct dentry *d1, + const struct inode *i1, + const struct dentry *d2, + const struct inode *i2, + unsigned int len, + const char *str, + const struct qstr *qstr) + { return 0; } + ], + [ + d.d_compare = d_compare_t; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SEVEN_PARAM_D_COMPARE, 1, [Define if d_compare member of dentry_operations has seven params]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + + dnl dentry operations struct d_delete argumentis constified in + dnl 2.6.38 and newer + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for constified dentry_operations.d_delete) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + static struct dentry_operations d; + static int d_delete_t(const struct dentry *d) + { return 0; } + ], + [ + d.d_delete = d_delete_t; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_DELETE_CONST, 1, [Define if d_delete member of dentry_operations has a const dentry param]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl dentry member d_count is no longer atomic and has it's own spinlock + dnl in 2.6.38 and newer + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for dentry.d_count atomic_t type) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + #include + struct dentry d; + atomic_t x; + ], + [ + x = d.d_count; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DENTRY_D_COUNT_ATOMIC, 1, [Define if d_count member of dentry is of type atomic_t]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl permission function pointer in the inode_operations struct now + dnl takes three params with the third being an unsigned int (circa + dnl 2.6.38 + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for three-param inode_operations permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode_operations i; + int p(struct inode *i, int mode, unsigned int flags) + { return 0; } + ], + [ + i.permission = p; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_THREE_PARAM_PERMISSION_WITH_FLAG, 1, [Define if permission function pointer of inode_operations struct has three parameters and the third parameter is for flags (unsigned int)]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl the acl_check parameter of the generic_permission function has a + dnl third parameter circa 2.6.38 + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for three-param acl_check of generic_permission) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct inode *i; + int p(struct inode *i, int mode, unsigned int flags) + { return 0; } + ], + [ + generic_permission(i, 0, 0, p); + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_THREE_PARAM_ACL_CHECK, 1, [Define if acl_check param of generic_permission function has three parameters]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl SPIN_LOCK_UNLOCKED has gone away in 2.6.39 in lieu of + dnl DEFINE_SPINLOCK() + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for SPIN_LOCK_UNLOCKED ) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + spinlock_t test_lock = SPIN_LOCK_UNLOCKED; + struct inode *i; + ], [ ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPIN_LOCK_UNLOCKED, 1, [Define if SPIN_LOCK_UNLOCKED defined]), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + + dnl get_sb goes away in 2.6.39 for mount_X + tmp_cflags=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_MSG_CHECKING(for get_sb ) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + struct file_system_type f; + ], + [ + f.get_sb = NULL; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GET_SB_MEMBER_FILE_SYSTEM_TYPE, 1, [Define if get_sb is a member of file_system_type struct]), + AC_MSG_RESULT(no) + ) + + AC_MSG_CHECKING(for dirty_inode flag) + AC_TRY_COMPILE([ + #define __KERNEL__ + #include + void di(struct inode *i, int f) + { + return; + } + ], + [ + struct super_operations s; + s.dirty_inode = di; + ], + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DIRTY_INODE_FLAGS, 1, Define if dirty_inode takes a flag argument ), + AC_MSG_RESULT(no) + ) + CFLAGS=$tmp_cflags + CFLAGS=$oldcflags + ]) diff --git a/maint/config/mx.m4 b/maint/config/mx.m4 index ab5d151..3b453bc 100644 --- a/maint/config/mx.m4 +++ b/maint/config/mx.m4 @@ -13,7 +13,7 @@ AC_DEFUN([AX_MX], dnl --with-mx-libs=/lib (or lib64 if that exists) mx_home= AC_ARG_WITH(mx, - [ --with-mx= Location of the MX install (default no MX)], +[ --with-mx= Location of the MX install (default no MX)], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-mx requires the path to your MX tree.]) elif test "$withval" != no ; then @@ -21,7 +21,8 @@ AC_DEFUN([AX_MX], fi ) AC_ARG_WITH(mx-includes, - [ --with-mx-includes= Location of the MX includes], +[ --with-mx-includes= + Location of the MX includes], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-mx-includes requires path to MX headers.]) elif test "$withval" != no ; then @@ -29,7 +30,7 @@ AC_DEFUN([AX_MX], fi ) AC_ARG_WITH(mx-libs, - [ --with-mx-libs= Location of the MX libraries], +[ --with-mx-libs= Location of the MX libraries], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-mx-libs requires path to MX libraries.]) elif test "$withval" != no ; then @@ -68,6 +69,33 @@ AC_DEFUN([AX_MX], AC_SUBST(BUILD_MX) AC_SUBST(MX_INCDIR) AC_SUBST(MX_LIBDIR) + + if test -n "$BUILD_MX" ; then + dnl Check for existence of mx_decompose_endpoint_addr2 + save_ldflags="$LDFLAGS" + LDFLAGS="-L$MX_LIBDIR $LDFLAGS" + save_libs="$LIBS" + LIBS="-lmyriexpress -lpthread $LIBS" + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$MX_INCDIR" + + AC_MSG_CHECKING(for mx_decompose_endpoint_addr2) + AC_TRY_LINK([ + #include "mx_extensions.h" + #include + ], [ + mx_endpoint_addr_t epa; + mx_decompose_endpoint_addr2(epa, NULL, NULL, NULL); + ], + AC_MSG_RESULT(yes), + AC_MSG_RESULT(no) + AC_MSG_ERROR([Function mx_decompose_endpoint_addr2() not found.]) + ) + + LDFLAGS="$save_ldflags" + CPPFLAGS="$save_cppflags" + LIBS="$save_libs" + fi ]) dnl vim: set ft=config : diff --git a/maint/config/openssl.m4 b/maint/config/openssl.m4 index a8401b3..d04fddb 100644 --- a/maint/config/openssl.m4 +++ b/maint/config/openssl.m4 @@ -12,7 +12,7 @@ AC_DEFUN([AX_OPENSSL], LDFLAGS="$LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" SERVER_LDFLAGS="$SERVER_LDFLAGS -L${opensslpath}/lib64 -L${opensslpath}/lib" fi - LIBS="$LIBS -lcrypt -lssl" + LIBS="$LIBS -lcrypto -lssl" AC_COMPILE_IFELSE( [#include "openssl/bio.h"], @@ -29,6 +29,7 @@ AC_DEFUN([AX_OPENSSL], AC_CHECK_HEADERS(openssl/evp.h) AC_CHECK_HEADERS(openssl/crypto.h) + AC_CHECK_HEADERS(openssl/sha.h) fi ]) @@ -36,7 +37,7 @@ AC_DEFUN([AX_OPENSSL_OPTIONAL], [ AC_MSG_CHECKING([for openssl library]) TMPLIBS=${LIBS} - LIBS="$LIBS -lcrypt -lssl" + LIBS="$LIBS -lcrypto -lssl" AC_COMPILE_IFELSE( [#include "openssl/bio.h"], @@ -56,6 +57,7 @@ AC_DEFUN([AX_OPENSSL_OPTIONAL], AC_CHECK_HEADERS(openssl/evp.h) AC_CHECK_HEADERS(openssl/crypto.h) + AC_CHECK_HEADERS(openssl/sha.h) ]) diff --git a/maint/config/portals.m4 b/maint/config/portals.m4 index 2395cbc..9143740 100644 --- a/maint/config/portals.m4 +++ b/maint/config/portals.m4 @@ -30,7 +30,8 @@ AC_DEFUN([AX_PORTALS], fi ) AC_ARG_WITH(portals-includes, - [ --with-portals-includes= Extra CFLAGS to specify Portals includes], +[ --with-portals-includes= + Extra CFLAGS to specify Portals includes], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-portals-includes requires an argument.]) elif test "$withval" != no ; then @@ -38,7 +39,8 @@ AC_DEFUN([AX_PORTALS], fi ) AC_ARG_WITH(portals-libs, - [ --with-portals-libs= Extra LIBS to link Portals libraries], +[ --with-portals-libs= + Extra LIBS to link Portals libraries], if test -z "$withval" -o "$withval" = yes ; then AC_MSG_ERROR([Option --with-portals-libs requires an argument.]) elif test "$withval" != no ; then diff --git a/maint/config/rt.m4 b/maint/config/rt.m4 index 0cfab8c..5a03c2c 100644 --- a/maint/config/rt.m4 +++ b/maint/config/rt.m4 @@ -3,8 +3,6 @@ AC_DEFUN([AX_CHECK_NEEDS_LIBRT], [ AC_MSG_CHECKING([if server lib needs -lrt]) -oldldflags=$LDFLAGS - AC_TRY_LINK( [#include #include @@ -12,7 +10,8 @@ AC_TRY_LINK( [lio_listio(LIO_NOWAIT, NULL, 0, NULL);], [AC_MSG_RESULT(no)], [ - LDFLAGS="$LDFLAGS -lrt" + oldlibs=$LIBS + LIBS="$LIBS -lrt" AC_TRY_LINK( [#include #include @@ -22,7 +21,6 @@ AC_TRY_LINK( AC_SUBST(NEEDS_LIBRT) AC_MSG_RESULT(yes)], [AC_MSG_ERROR(failed attempting to link lio_listio)]) + LIBS=$oldlibs ]) - -LDFLAGS=$oldldflags ]) diff --git a/maint/config/zoid.m4 b/maint/config/zoid.m4 new file mode 100644 index 0000000..e79f772 --- /dev/null +++ b/maint/config/zoid.m4 @@ -0,0 +1,31 @@ +# +# Configure rules for ZOID +# +# See COPYING in top-level directory. +# +AC_DEFUN([AX_ZOID], +[ + dnl Configure options for ZOID install path. + dnl --with-zoid= + AC_ARG_WITH(zoid, +[ --with-zoid= Location of the ZOID tree (default no ZOID)], + if test -z "$withval" -o "$withval" = yes ; then + AC_MSG_ERROR([Option --with-zoid requires the path to your ZOID source tree.]) + elif test "$withval" != no ; then + ZOID_SRCDIR="$withval" + fi + ) + if test -n "$ZOID_SRCDIR" ; then + save_cppflags="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -Isrc/io/bmi -I$ZOID_SRCDIR/include -I$ZOID_SRCDIR/zbmi -I$ZOID_SRCDIR/zbmi/implementation" + AC_CHECK_HEADER(zbmi.h,, AC_MSG_ERROR([Header zbmi.h not found.])) + AC_CHECK_HEADER(zoid_api.h,, AC_MSG_ERROR([Header zoid_api.h not found.])) + AC_CHECK_HEADER(zbmi_protocol.h,, AC_MSG_ERROR([Header zbmi_protocol.h not found.])) + CPPFLAGS="$save_cppflags" + BUILD_ZOID=1 + fi + AC_SUBST(BUILD_ZOID) + AC_SUBST(ZOID_SRCDIR) +]) + +dnl vim: set ft=config : diff --git a/maint/make-bmi-dist.sh b/maint/make-bmi-dist.sh new file mode 100755 index 0000000..00806de --- /dev/null +++ b/maint/make-bmi-dist.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# +# custom script for building a BMI distribution +# +# this should only be run from a freshly exported pvfs2 +# tagged release version, or a cvs checkout; a working +# directory will leave local changes in the distribution +# + +echo "make-bmi-dist" +if test -z $1; then + echo "No source directory specified" + echo "Usage: make-bmi-dist.sh " + exit 1 +fi + +if test -z $3; then + echo "No version string specified" + echo "Usage: make-bmi-dist.sh " + exit 1 +fi + +BUILDDIR=$2 +SRCDIR="$1" +PVFS2_VERSION="$3" + +if test "x$SRCDIR" = "x."; then + SRCIR=`pwd` + echo "Assuming top-level source directory is $SRCDIR" +fi + +STARTDIR=`pwd` +TARGETBASE="bmi-$PVFS2_VERSION" +TARGETDIR="/tmp/$TARGETBASE" +TARFILE_NAME="$BUILDDIR/bmi-$PVFS2_VERSION.tar" +TARBALL_NAME="$BUILDDIR/bmi-$PVFS2_VERSION.tar.gz" +TAR=`which tar` +GZIP=`which gzip` + +if test -z $TAR; then + echo "The required tar program is not in your path; aborting" + exit 1 +fi + +if test -z $GZIP; then + echo "The required gzip program is not in your path; aborting" + exit 1 +fi + +################## +# start processing +################## + +# new directory to construct release in +if test -d $TARGETDIR; then + rm -rf $TARGETDIR +fi +mkdir -p $TARGETDIR/src/common/misc +if [ $? -ne 0 ] +then + exit 1 +fi + +mkdir -p $TARGETDIR/src/io +if [ $? -ne 0 ] +then + exit 1 +fi + +mkdir -p $TARGETDIR/include +if [ $? -ne 0 ] +then + exit 1 +fi + +# copy source over +cp -f --no-dereference -R $SRCDIR/AUTHORS $TARGETDIR +cp -f --no-dereference -R $SRCDIR/COPYING $TARGETDIR +cp -f --no-dereference -R $SRCDIR/CREDITS $TARGETDIR +cp -f --no-dereference -R $SRCDIR/ChangeLog $TARGETDIR +cp -f --no-dereference -R $SRCDIR/INSTALL $TARGETDIR +cp -f --no-dereference -R $SRCDIR/Makefile.in $TARGETDIR +cp -f --no-dereference -R $SRCDIR/README $TARGETDIR +cp -f --no-dereference -R $SRCDIR/README.name_change $TARGETDIR +cp -f --no-dereference -R $BUILDDIR/config.save $TARGETDIR +cp -f --no-dereference -R $SRCDIR/configure $TARGETDIR +cp -f --no-dereference -R $SRCDIR/configure.in $TARGETDIR +cp -f --no-dereference -R $SRCDIR/module.mk.in $TARGETDIR +cp -f --no-dereference -R $SRCDIR/prepare $TARGETDIR +cp -f --no-dereference -R $SRCDIR/maint $TARGETDIR +cp -f --no-dereference -R $SRCDIR/pvfs2-config.h.in $TARGETDIR +cp -f --no-dereference -R $SRCDIR/src/io/bmi $TARGETDIR/src/io/ +cp -f --no-dereference -R $SRCDIR/src/common/gen-locks $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/gossip $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/id-generator $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/llist $TARGETDIR/src/common/ +cp -f $SRCDIR/include/pvfs2.h.in $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-debug.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-encode-stubs.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-event.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-mgmt.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-request.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-sysint.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-util.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-types.h $TARGETDIR/include/ +cp -f $SRCDIR/include/pvfs2-hint.h $TARGETDIR/include/ +cp -f $SRCDIR/src/common/misc/str-utils.h $TARGETDIR/src/common/misc/ +cp -f $SRCDIR/src/common/misc/str-utils.c $TARGETDIR/src/common/misc/ +cp -f $SRCDIR/src/common/misc/pint-event.h $TARGETDIR/src/common/misc/ +cp -f $SRCDIR/src/common/misc/pint-event.c $TARGETDIR/src/common/misc/ +cp -f $SRCDIR/src/common/misc/errno-mapping.c $TARGETDIR/src/common/misc/ +cp -f $SRCDIR/src/common/misc/pvfs2-internal.h $TARGETDIR/src/common/misc/ +cp -f $SRCDIR/src/common/misc/module.mk.in $TARGETDIR/src/common/misc/ +cp -f --no-dereference -R $SRCDIR/src/common/quickhash $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/quicklist $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/statecomp $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/mgmt $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/common/misc $TARGETDIR/src/common/ +cp -f --no-dereference -R $SRCDIR/src/proto $TARGETDIR/src/ + +cd $TARGETDIR + +# make configure.in BMI specific +sed -ie "s/AC_INIT(include\/pvfs2-types.h)/AC_INIT()/" configure.in +sed -ie "s/dnl BMI_ONLY_TAG/BUILD_BMI_ONLY=1/" configure.in +sed -ie "s/chmod/dnl chmod/" configure.in + +# make AC_OUTPUT bmi specific +sed -i "/src\/apps\/admin/d" configure.in +sed -i "/src\/apps\/karma/d" configure.in +sed -i "/src\/apps\/vis/d" configure.in +sed -i "/src\/apps\/fuse/d" configure.in +sed -i "/src\/apps\/kernel\/linux\/module.mk/d" configure.in +sed -i "/src\/io\/trove/d" configure.in +sed -i "/src\/common\/dotconf/d" configure.in +sed -i "/src\/io\/description/d" configure.in +sed -i "/src\/io\/flow/d" configure.in +sed -i "/src\/io\/buffer/d" configure.in +sed -i "/src\/io\/job/d" configure.in +sed -i "/src\/io\/dev/d" configure.in +sed -i "/src\/server\/module.mk/d" configure.in +sed -i "/src\/server\/request-scheduler/d" configure.in +sed -i "/src\/client\/sysint/d" configure.in +sed -i "/src\/kernel\/linux-2.6/d" configure.in +sed -i "/src\/kernel\/linux-2.4/d" configure.in +sed -i "/doc\/module.mk/d" configure.in +sed -i "/doc\/coding\/module.mk/d" configure.in +sed -i "/doc\/design\/module.mk/d" configure.in +sed -i "/doc\/random\/module.mk/d" configure.in +sed -i "/examples\/pvfs2-server.rc/d" configure.in +sed -i "/doc\/doxygen\/pvfs2-doxygen.conf/d" configure.in +sed -i "/common\/events\/module.mk/d" configure.in + +# dump some special options into the top level module.mk.in +echo "DIST_RELEASE = 1" >> module.mk.in + +# Run ./prepare to regenerate the configure script +./prepare + +# Run config.save to regenerate files. +#./config.save +#if [ $? -ne 0 ] +#then +# exit 1 +#fi +#make docs +#if [ $? -ne 0 ] +#then +# exit 1 +#fi +#make statecompgen +#if [ $? -ne 0 ] +#then +# exit 1 +#fi + +# clean out extra files (distclean will not remove .sm or doc files if +# DIST_RELEASE is set) +#make distclean +#if [ $? -ne 0 ] +#then +# exit 1 +#fi + +# clean out cvs directories and other cruft (if any) +for f in `find . | grep CVS`; do rm -rf $f; done +for f in `find . | grep \#`; do rm -rf $f; done +for f in `find . | grep \~`; do rm -rf $f; done +for f in `find . -name *.o`; do rm -rf $f; done +for f in `find . -name core`; do rm -rf $f; done +for f in `find . -name core\.[1-9]*`; do rm -rf $f; done +for f in `find . -name module.mk`; do rm -rf $f; done +for f in `find . -name "*.log"`; do rm -rf $f; done +for f in `find . -name "*.toc"`; do rm -rf $f; done +for f in `find . -name "*.aux"`; do rm -rf $f; done +rm -f Makefile pvfs2-config.h PVFS2-GLOBAL-TODO.txt +rm -f src/common/statecomp/statecomp + +# make sure the cleaned up directory exists +cd /tmp +if ! test -d "$TARGETBASE"; then + echo "Newly created target directory doesn't exist; aborting" + exit 1 +fi + +# tar up the cleaned up directory +tar c "$TARGETBASE" > $TARFILE_NAME 2> /dev/null + +if ! test -f $TARFILE_NAME; then + echo "Newly created tarfile does not exist!" + echo "Error creating tarfile $TARBALL_NAME; aborting" + exit 1 +fi + +# if a tarball already exists, remove it before creating a new one +if test -f $TARBALL_NAME; then + rm -f $TARBALL_NAME +fi + +# gzip the newly created tarfile +gzip $TARFILE_NAME + +if ! test -f "$TARBALL_NAME"; then + echo "Newly created tarball does not exist!" + echo "Error creating tarball $TARBALL_NAME; aborting" + exit 1 +fi + +rm -rf $TARGETDIR + +cd $STARTDIR + +if test -f "$TARBALL_NAME"; then + echo "Distribution file is ready at:" + echo "$TARBALL_NAME" +fi + +exit 0 diff --git a/maint/make-dist.sh b/maint/make-dist.sh index 5bacc51..b86c446 100644 --- a/maint/make-dist.sh +++ b/maint/make-dist.sh @@ -49,22 +49,50 @@ fi ################## # start processing ################## -make clean -make docs -make statecompgen +# new directory to construct release in if test -d $TARGETDIR; then rm -rf $TARGETDIR fi mkdir $TARGETDIR +if [ $? -ne 0 ] +then + exit 1 +fi +# copy source over cp -f --no-dereference -R $SRCDIR/* $TARGETDIR -cp $BUILDDIR/src/common/statecomp/parser.h $TARGETDIR/src/common/statecomp/ -cp $BUILDDIR/src/common/statecomp/parser.c $TARGETDIR/src/common/statecomp/ -cp $BUILDDIR/src/common/statecomp/scanner.c $TARGETDIR/src/common/statecomp/ cd $TARGETDIR +# dump some special options into the top level module.mk.in +echo "DIST_RELEASE = 1" >> module.mk.in + +# Run config.save to regenerate files. +./config.save +if [ $? -ne 0 ] +then + exit 1 +fi +make docs +if [ $? -ne 0 ] +then + exit 1 +fi +make statecompgen +if [ $? -ne 0 ] +then + exit 1 +fi + +# clean out extra files (distclean will not remove .sm or doc files if +# DIST_RELEASE is set) +make distclean +if [ $? -ne 0 ] +then + exit 1 +fi + # clean out cvs directories and other cruft (if any) for f in `find . | grep CVS`; do rm -rf $f; done for f in `find . | grep \#`; do rm -rf $f; done @@ -76,12 +104,9 @@ for f in `find . -name module.mk`; do rm -rf $f; done for f in `find . -name "*.log"`; do rm -rf $f; done for f in `find . -name "*.toc"`; do rm -rf $f; done for f in `find . -name "*.aux"`; do rm -rf $f; done -rm -f Makefile pvfs2-config.h PVFS2-GLOBAL-TODO.txt +rm -f Makefile pvfs2-config.h rm -f src/common/statecomp/statecomp -# dump some special options into the top level module.mk.in -echo "DIST_RELEASE = 1" >> module.mk.in - # make sure the cleaned up directory exists cd /tmp if ! test -d "$TARGETBASE"; then diff --git a/maint/pvfs2-options-parser.pl b/maint/pvfs2-options-parser.pl index e284fbe..566ea01 100755 --- a/maint/pvfs2-options-parser.pl +++ b/maint/pvfs2-options-parser.pl @@ -39,8 +39,9 @@ my ($optsdoc, $optdef) = $optstr =~ /\/\*((?:[^*]*(?:\*(?!\/))?[^*]*)*)\*\/\s*static\s*const\s*configoption_t[^{]+\{(.+)LAST_OPTION.*$/sg; print << "EOF" -PVFS2 $version: Config File Description + +PVFS2 $version: Config File Description - + + + +

PVFS2 $version: Config File Description

$optsdoc @@ -481,3 +485,4 @@ sub print_context } } +print ""; diff --git a/maint/pvfs2htmlfixup.sh b/maint/pvfs2htmlfixup.sh index efeef92..bb33fb9 100755 --- a/maint/pvfs2htmlfixup.sh +++ b/maint/pvfs2htmlfixup.sh @@ -4,5 +4,17 @@ # through latex2html-init . I couldn't find a good way to make latex2html add # the closing tags and '#include virtual=...' stuff -perl -p -i -e 's~~\n~' $@ -perl -p -i -e 's~~\n\n\n\n\n\n\n~' $@ +basename=$(echo $@ | sed -e "s|\(.*\)\.html$|\1|") +target=$(echo $@ | sed -e "s|^.*/\([^/]*\)/[^/]*\.html$|\1|") + +prefixes="../../../.." + +cp $basename.html $basename.php + +perl -p -i -e 's~~~' $basename.php +perl -p -i -e 's~ ~' $basename.php +perl -p -i -e 's~^.*top_of_page.*$~~' $basename.php +perl -p -i -e 's~~~' $basename.php +perl -p -i -e 's~ALIGN="CENTER"~ALIGN="LEFT"~' $basename.php +perl -p -i -e 's~align="center"~align="left"~' $basename.php +perl -p -i -e "s~$target.html~$target.php~" $basename.php diff --git a/patches/pnfs/p00002_pnfs_nfslayoutsupport.patch b/patches/pnfs/p00002_pnfs_nfslayoutsupport.patch index 86162dc..3f07aed 100644 --- a/patches/pnfs/p00002_pnfs_nfslayoutsupport.patch +++ b/patches/pnfs/p00002_pnfs_nfslayoutsupport.patch @@ -191,7 +191,7 @@ diff -puN src/apps/kernel/linux/pvfs2-client-core.c~nfslayoutsupport src/apps/ke + gossip_err("service_pnfs_get_devlist: Could not retrieve server array\n"); + goto out_free; + } -+ bcopy(hep->h_addr, &(io_server.sin_addr.s_addr), hep->h_length); ++ memcpy(&(io_server.sin_addr.s_addr), hep->h_addr, hep->h_length); + ip_addr = inet_ntoa(io_server.sin_addr); + /* add ip addr to downcall result */ + encode_string(&buffer, &ip_addr); diff --git a/pvfs2-config.h.in b/pvfs2-config.h.in index eeb8223..98bc2dc 100644 --- a/pvfs2-config.h.in +++ b/pvfs2-config.h.in @@ -21,15 +21,21 @@ /* Define if read_descriptor_t has an arg member */ #undef HAVE_ARG_IN_READ_DESCRIPTOR_T -/* Define to 1 if you have the header file. */ +/* Define if arpa/inet.h exists */ #undef HAVE_ARPA_INET_H /* Define to 1 if you have the header file. */ #undef HAVE_ASM_IOCTL32_H -/* Define to 1 if you have the header file. */ +/* Define if attr/xattr.h exists */ #undef HAVE_ATTR_XATTR_H +/* Define if kernel backing_dev_info struct has a name field */ +#undef HAVE_BACKING_DEV_INFO_NAME + +/* Define if bdi_init function is present */ +#undef HAVE_BDI_INIT + /* Define if struct backing_dev_info in kernel has memory_backed */ #undef HAVE_BDI_MEMORY_BACKED @@ -40,9 +46,18 @@ /* Define if there exists a compat_ioctl member in file_operations */ #undef HAVE_COMPAT_IOCTL_HANDLER +/* Define if s_xattr member of super_block struct is const */ +#undef HAVE_CONST_S_XATTR_IN_SUPERBLOCK + /* Define if third param (message) to DB error callback function is const */ #undef HAVE_CONST_THIRD_PARAMETER_TO_DB_ERROR_CALLBACK +/* Define if struct ctl_table has ctl_name member */ +#undef HAVE_CTL_NAME + +/* Define if cred.h contains current_fsuid */ +#undef HAVE_CURRENT_FSUID + /* Define if DB error callback function takes dbenv parameter */ #undef HAVE_DBENV_PARAMETER_TO_DB_ERROR_CALLBACK @@ -55,12 +70,36 @@ /* Define if DB has get_pagesize function */ #undef HAVE_DB_GET_PAGESIZE +/* Define if d_count member of dentry is of type atomic_t */ +#undef HAVE_DENTRY_D_COUNT_ATOMIC + /* Define if super_operations statfs has dentry argument */ #undef HAVE_DENTRY_STATFS_SOP +/* Define if dirty_inode takes a flag argument */ +#undef HAVE_DIRTY_INODE_FLAGS + +/* Define if kernel super_operations contains drop_inode field */ +#undef HAVE_DROP_INODE + +/* Define if dcache.h contains d_alloc_annon */ +#undef HAVE_D_ALLOC_ANON + +/* Define if d_delete member of dentry_operations has a const dentry param */ +#undef HAVE_D_DELETE_CONST + +/* Define if export_operations has an encode_fh member */ +#undef HAVE_ENCODEFH_EXPORT_OPERATIONS + /* Define to 1 if you have the header file. */ #undef HAVE_EXECINFO_H +/* Define if system defines F_NOCACHE fcntl */ +#undef HAVE_FCNTL_F_NOCACHE + +/* Define if features.h exists */ +#undef HAVE_FEATURES_H + /* Define to 1 if you have the `fgetxattr' function. */ #undef HAVE_FGETXATTR @@ -70,6 +109,9 @@ /* Define if system provides fgtxattr prototype */ #undef HAVE_FGETXATTR_PROTOTYPE +/* Define if export_operations has an fh_to_dentry member */ +#undef HAVE_FHTODENTRY_EXPORT_OPERATIONS + /* Define if struct inode_operations in kernel has fill_handle callback */ #undef HAVE_FILL_HANDLE_INODE_OPERATIONS @@ -77,15 +119,31 @@ */ #undef HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS +/* Define if generic_permission takes four parameters */ +#undef HAVE_FOUR_PARAM_GENERIC_PERMISSION + /* Define to 1 if you have the `fsetxattr' function. */ #undef HAVE_FSETXATTR /* Define if fsetxattr takes position and option arguments */ #undef HAVE_FSETXATTR_EXTRA_ARGS -/* Define to 1 if you have the header file. */ +/* Define if fstab.h exists */ #undef HAVE_FSTAB_H +/* Define if only filesystem_type has get_sb */ +#undef HAVE_FSTYPE_GET_SB + +/* Define if only filesystem_type has mount and HAVE_FSTYPE_GET_SB is false */ +#undef HAVE_FSTYPE_MOUNT_ONLY + +/* Define if fsync function in file_operations struct wants a dentry pointer + as the second parameter */ +#undef HAVE_FSYNC_DENTRY_PARAM + +/* Define if fsync has loff_t params */ +#undef HAVE_FSYNC_LOFF_T_PARAMS + /* Define if kernel has generic_file_readv */ #undef HAVE_GENERIC_FILE_READV @@ -107,9 +165,18 @@ /* Define to 1 if you have the `getmntent' function. */ #undef HAVE_GETMNTENT +/* Define if get_sb_nodev function exists */ +#undef HAVE_GETSB_NODEV + +/* Define if getxattr takes position and option arguments */ +#undef HAVE_GETXATTR_EXTRA_ARGS + /* Define if struct super_operations in kernel has get_fs_key callback */ #undef HAVE_GET_FS_KEY_SUPER_OPERATIONS +/* Define if get_sb is a member of file_system_type struct */ +#undef HAVE_GET_SB_MEMBER_FILE_SYSTEM_TYPE + /* Define if strerror_r is GNU-specific */ #undef HAVE_GNU_STRERROR_R @@ -131,6 +198,9 @@ /* Define if kernel has iget_locked */ #undef HAVE_IGET_LOCKED +/* Define if inode_setattr is defined */ +#undef HAVE_INODE_SETATTR + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H @@ -150,15 +220,34 @@ /* Define if struct inode in kernel has i_blksize member */ #undef HAVE_I_BLKSIZE_IN_STRUCT_INODE +/* Define if clear_nlink exists */ +#undef HAVE_I_CLEAR_NLINK + +/* Define if drop_nlink exists */ +#undef HAVE_I_DROP_NLINK + +/* Define if inc_nlink exists */ +#undef HAVE_I_INC_NLINK + +/* Define if struct inode in kernel has i_sem member */ +#undef HAVE_I_SEM_IN_STRUCT_INODE + +/* Define if set_nlink exists */ +#undef HAVE_I_SET_NLINK + /* Define if kernel has i_size_read */ #undef HAVE_I_SIZE_READ /* Define if kernel has i_size_write */ #undef HAVE_I_SIZE_WRITE -/* Define if kernel lacks device classes */ +/* Define if kernel has device classes */ #undef HAVE_KERNEL_DEVICE_CLASSES +/* Define if kernel kmem_cache_create constructor has newer-style + one-parameter form */ +#undef HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM + /* Define if kernel kmem_cache_create constructor has new-style two-parameter form */ #undef HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM @@ -184,6 +273,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_IOCTL32_H +/* Define if linux/malloc.h exists */ +#undef HAVE_LINUX_MALLOC_H + /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_MOUNT_H @@ -196,28 +288,31 @@ /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_SYSCALLS_H +/* Define if linux/types.h exists */ +#undef HAVE_LINUX_TYPES_H + /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_XATTR_ACL_H -/* Define to 1 if you have the header file. */ +/* Define if malloc.h exists */ #undef HAVE_MALLOC_H /* Define if kernel defines mapping_nrpages macro -- defined by RT linux */ #undef HAVE_MAPPING_NRPAGES_MACRO -/* Define to 1 if you have the header file. */ +/* Define if memory.h exists */ #undef HAVE_MEMORY_H -/* Define to 1 if you have the header file. */ +/* Define if mntent.h exists */ #undef HAVE_MNTENT_H -/* Define to 1 if you have the header file. */ -#undef HAVE_MNTENT_H_ +/* Define if mount.h contains MNT_NOATIME flags */ +#undef HAVE_MNT_NOATIME -/* Use the MPE profiling library */ -#undef HAVE_MPE +/* Define if mount.h contains MNT_NODIRATIME flags */ +#undef HAVE_MNT_NODIRATIME -/* Define to 1 if you have the header file. */ +/* Define if netdb.h exists */ #undef HAVE_NETDB_H /* Define if including linux/config.h gives no warnings */ @@ -235,11 +330,29 @@ /* Define to 1 if you have the header file. */ #undef HAVE_OPENSSL_EVP_H -/* Define if Pablo library is used */ -#undef HAVE_PABLO +/* Define to 1 if you have the header file. */ +#undef HAVE_OPENSSL_SHA_H + +/* Define if system defines O_DIRECT fcntl */ +#undef HAVE_OPEN_O_DIRECT + +/* Define if posix_acl_chmod exists */ +#undef HAVE_POSIX_ACL_CHMOD + +/* Define if posix_acl_clone exists */ +#undef HAVE_POSIX_ACL_CLONE + +/* Define if posix_acl_create_masq accepts umode_t type */ +#undef HAVE_POSIX_ACL_CREATE + +/* Define if posix_acl_equiv_mode accepts umode_t type */ +#undef HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T /* Define if sysctl proc handlers have 6th argument */ -#undef HAVE_PROC_HANDLER_SIX_ARG +#undef HAVE_PROC_HANDLER_FILE_ARG + +/* Define if sysctl proc handlers have ppos argument */ +#undef HAVE_PROC_HANDLER_PPOS_ARG /* Define if have PtlACEntry with jid argument. */ #undef HAVE_PTLACENTRY_JID @@ -250,6 +363,12 @@ /* Define to 1 if you have the `PtlEventKindStr' function. */ #undef HAVE_PTLEVENTKINDSTR +/* Define if kernel super_operations contains put_inode field */ +#undef HAVE_PUT_INODE + +/* Define if pwd.h exists */ +#undef HAVE_PWD_H + /* Define if struct file_operations in kernel has readdirplus_lite callback */ #undef HAVE_READDIRPLUSLITE_FILE_OPERATIONS @@ -272,6 +391,13 @@ instead of rw_lock -- used by RT linux */ #undef HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT +/* Define if kernel address_space struct has a rw_lock_t member named + tree_lock */ +#undef HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT + +/* Define if struct super_block has s_dirty list */ +#undef HAVE_SB_DIRTY_LIST + /* Define to 1 if you have the header file. */ #undef HAVE_SDL_SDL_TTF_H @@ -284,6 +410,12 @@ /* Define if kernel setxattr has const void* argument */ #undef HAVE_SETXATTR_CONST_ARG +/* Define if setxattr takes position and option arguments */ +#undef HAVE_SETXATTR_EXTRA_ARGS + +/* Define if d_compare member of dentry_operations has seven params */ +#undef HAVE_SEVEN_PARAM_D_COMPARE + /* Define if SLAB_KERNEL is defined in kernel */ #undef HAVE_SLAB_KERNEL @@ -291,23 +423,29 @@ page_lock instead of rw_lock */ #undef HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT -/* Define if kernel address_space struct has a spin_lock member named - tree_lock instead of rw_lock */ +/* Define if kernel address_space struct has a spin_lock_t member named + tree_lock */ #undef HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT +/* Define if SPIN_LOCK_UNLOCKED defined */ +#undef HAVE_SPIN_LOCK_UNLOCKED + /* Define if struct super_operations in kernel has statfs_lite callback */ #undef HAVE_STATFS_LITE_SUPER_OPERATIONS -/* Define to 1 if you have the header file. */ +/* Define if stdarg.h exists */ #undef HAVE_STDARG_H -/* Define to 1 if you have the header file. */ +/* Define if stdint.h exists */ #undef HAVE_STDINT_H -/* Define to 1 if you have the header file. */ +/* Define if stdlib.h exists */ #undef HAVE_STDLIB_H -/* Define to 1 if you have the header file. */ +/* Define if struct ctl_table has strategy member */ +#undef HAVE_STRATEGY_NAME + +/* Define if strings.h exists */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ @@ -337,24 +475,55 @@ /* Define if sys/mount.h exists */ #undef HAVE_SYS_MOUNT_H -/* Define to 1 if you have the header file. */ +/* Define if sys/sendfile.h exists */ +#undef HAVE_SYS_SENDFILE_H + +/* Define if sys/socket.h exists */ #undef HAVE_SYS_SOCKET_H -/* Define to 1 if you have the header file. */ +/* Define if sys/stat.h exists */ #undef HAVE_SYS_STAT_H -/* Define to 1 if you have the header file. */ +/* Define if sys/types.h exists */ #undef HAVE_SYS_TYPES_H /* Define if sys/vfs.h exists */ #undef HAVE_SYS_VFS_H -/* Define to 1 if you have the header file. */ +/* Define if sys/xattr.h exists */ #undef HAVE_SYS_XATTR_H +/* Define if TAU library is used */ +#undef HAVE_TAU + +/* Define if acl_check param of generic_permission function has three + parameters */ +#undef HAVE_THREE_PARAM_ACL_CHECK + +/* Define if d_hash member of dentry_operations has three params, the second + inode paramsbeing the difference */ +#undef HAVE_THREE_PARAM_D_HASH + +/* Define if generic_permission takes three parameters */ +#undef HAVE_THREE_PARAM_GENERIC_PERMISSION + +/* Define if permission function pointer of inode_operations struct has three + parameters and the third parameter is for flags (unsigned int) */ +#undef HAVE_THREE_PARAM_PERMISSION_WITH_FLAG + +/* Define if kernel stddef has true/false enum */ +#undef HAVE_TRUE_FALSE_ENUM + /* Define if register_sysctl_table takes two arguments */ #undef HAVE_TWO_ARG_REGISTER_SYSCTL_TABLE +/* Define if generic_permission takes two parameters */ +#undef HAVE_TWO_PARAM_GENERIC_PERMISSION + +/* Define if kernel's inode_operations has two parameters permission function + */ +#undef HAVE_TWO_PARAM_PERMISSION + /* Define if DB open function takes a txnid parameter */ #undef HAVE_TXNID_PARAMETER_TO_DB_OPEN @@ -367,6 +536,9 @@ /* Define if DB stat function takes malloc function ptr */ #undef HAVE_UNKNOWN_PARAMETER_TO_DB_STAT +/* Define if file_operations struct has unlocked_ioctl member */ +#undef HAVE_UNLOCKED_IOCTL_HANDLER + /* Define if include file valgrind.h exists */ #undef HAVE_VALGRIND_H @@ -382,6 +554,14 @@ /* Define if kernel has xattr support */ #undef HAVE_XATTR +/* Define if kernel xattr_handle get function has dentry as first parameter + and a fifth parameter */ +#undef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + +/* Define if kernel xattr_handle set function has dentry as first parameter + and a sixth parameter */ +#undef HAVE_XATTR_HANDLER_SET_SIX_PARAM + /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT @@ -406,10 +586,28 @@ /* sub version number */ #undef PVFS2_VERSION_SUB -/* The size of `long int', as computed by sizeof. */ +/* Should we build user interface acl routines. */ +#undef PVFS_HAVE_ACL_INCLUDES + +/* scandir compare arg takes void pointers. */ +#undef PVFS_SCANDIR_VOID + +/* Should we enable user interface data cache. */ +#undef PVFS_UCACHE_ENABLE + +/* Should we build user interface libraries. */ +#undef PVFS_USRINT_BUILD + +/* Should we enable user interface CWD support. */ +#undef PVFS_USRINT_CWD + +/* Should user interface assume FS is mounted. */ +#undef PVFS_USRINT_KMOUNT + +/* The size of a `long int', as computed by sizeof. */ #undef SIZEOF_LONG_INT -/* The size of `void *', as computed by sizeof. */ +/* The size of a `void *', as computed by sizeof. */ #undef SIZEOF_VOID_P /* Define to 1 if you have the ANSI C header files. */ diff --git a/src/apps/admin/module.mk.in b/src/apps/admin/module.mk.in index 01136ca..5bec602 100644 --- a/src/apps/admin/module.mk.in +++ b/src/apps/admin/module.mk.in @@ -7,13 +7,12 @@ ADMINSRC := \ $(DIR)/pvfs2-set-eventmask.c \ $(DIR)/pvfs2-set-sync.c \ $(DIR)/pvfs2-ls.c \ - $(DIR)/pvfs2-lsplus.c \ $(DIR)/pvfs2-ping.c \ $(DIR)/pvfs2-rm.c \ $(DIR)/pvfs2-stat.c \ $(DIR)/pvfs2-statfs.c \ $(DIR)/pvfs2-perf-mon-example.c \ - $(DIR)/pvfs2-event-mon-example.c \ + $(DIR)/pvfs2-perf-mon-snmp.c \ $(DIR)/pvfs2-mkdir.c \ $(DIR)/pvfs2-chmod.c \ $(DIR)/pvfs2-chown.c \ @@ -27,7 +26,10 @@ ADMINSRC := \ $(DIR)/pvfs2-remove-object.c \ $(DIR)/pvfs2-ln.c \ $(DIR)/pvfs2-perror.c \ - $(DIR)/pvfs2-check-server.c + $(DIR)/pvfs2-check-server.c \ + $(DIR)/pvfs2-drop-caches.c +#don't build in 2.8.5 release +# $(DIR)/pvfs2-get-uid.c ADMINSRC_SERVER := \ $(DIR)/pvfs2-mkspace.c \ diff --git a/src/apps/admin/pvfs2-change-fsid.c b/src/apps/admin/pvfs2-change-fsid.c index 2e89517..74e7dcc 100644 --- a/src/apps/admin/pvfs2-change-fsid.c +++ b/src/apps/admin/pvfs2-change-fsid.c @@ -38,7 +38,8 @@ typedef struct char db_path[PATH_MAX]; char fs_conf[PATH_MAX]; char fs_name[PATH_MAX]; - char storage_path[PATH_MAX]; + char data_storage_path[PATH_MAX]; + char meta_storage_path[PATH_MAX]; int32_t old_fsid; int32_t new_fsid; char old_fsid_hex[9]; @@ -220,8 +221,8 @@ int update_fs_conf(void) fprintf(stderr,"Error opening pipe. errno=%d",errno); exit(-1); } - fscanf(fptr, "%s", output); - if(!strncmp(output,"",PATH_MAX)) + ret = fscanf(fptr, "%s", output); + if(ret != 1 || !strncmp(output,"",PATH_MAX)) { printf("fsid [%" PRId32 "] not found in file\n",opts.old_fsid); return -1; @@ -267,6 +268,7 @@ int get_old_fsid_from_conf(void) int i = 0; char buffer[512][512]; int32_t read_fsid = 0; + int ret; memset(buffer, 0, sizeof(buffer)); @@ -280,8 +282,11 @@ int get_old_fsid_from_conf(void) /* Read in file */ while(!feof(fptr)) { - fscanf(fptr,"%s",buffer[i]); - i++; + ret = fscanf(fptr,"%s",buffer[i]); + if(ret == 1) + { + i++; + } } fclose(fptr); @@ -309,23 +314,37 @@ int get_old_fsid_from_conf(void) int move_hex_dir(void) { FILE * fptr = NULL; - char command[PATH_MAX]; - char output[PATH_MAX]; + char datacommand[PATH_MAX]; + char metacommand[PATH_MAX]; + char dataoutput[PATH_MAX]; + char metaoutput[PATH_MAX]; struct stat buf; - char path[PATH_MAX]; - char new_path[PATH_MAX]; + char datapath[PATH_MAX]; + char new_datapath[PATH_MAX]; + char metapath[PATH_MAX]; + char new_metapath[PATH_MAX]; int ret = 0; - memset(path,0,sizeof(path)); - sprintf(path,"%s/%s", opts.storage_path, opts.old_fsid_hex); + memset(datapath, 0, sizeof(datapath)); + memset(metapath, 0, sizeof(metapath)); + sprintf(datapath, "%s%s", opts.data_storage_path, opts.old_fsid_hex); + sprintf(metapath, "%s/%s", opts.meta_storage_path, opts.old_fsid_hex); - /* See if directory exists */ - ret = stat(path, &buf); + /* See if each directory exists */ + ret = stat(datapath, &buf); if(ret) { fprintf(stderr, - "Error checking for directory's existance. [%s]\n", - path); + "Error checking for data directory's existance. [%s]\n", + datapath); + return -1; + } + ret = stat(metapath, &buf); + if(ret) + { + fprintf(stderr, + "Error checking for meta directory's existance. [%s]\n", + metapath); return -1; } @@ -336,31 +355,53 @@ int move_hex_dir(void) return 0; } - memset(command,0,sizeof(command)); - memset(output,0,sizeof(output)); - memset(new_path,0,sizeof(new_path)); - - /* Move the directory */ - sprintf(new_path, "%s/%s", opts.storage_path, opts.new_fsid_hex); - sprintf(command, "mv %s %s", path, new_path); + memset(datacommand, 0, sizeof(datacommand)); + memset(metacommand, 0, sizeof(metacommand)); + memset(dataoutput, 0, sizeof(dataoutput)); + memset(metaoutput, 0, sizeof(metaoutput)); + memset(new_datapath, 0, sizeof(new_datapath)); + memset(new_metapath, 0, sizeof(new_metapath)); + + /* Move the directories */ + sprintf(new_datapath, "%s/%s", opts.data_storage_path, opts.new_fsid_hex); + sprintf(new_metapath, "%s%s", opts.meta_storage_path, opts.new_fsid_hex); + sprintf(datacommand, "mv %s %s", datapath, new_datapath); + sprintf(metacommand, "mv %s %s", metapath, new_metapath); + + /* move the data dir */ + fptr = popen(datacommand, "r"); + if(fptr == NULL) + { + fprintf(stderr,"Error opening pipe. errno=%d",errno); + exit(-1); + } + ret = fscanf(fptr, "%s", dataoutput); + if(ret && strncmp(dataoutput,"",PATH_MAX)) + { + printf("mv from [%s] to [%s] failed.\n", datapath, new_datapath); + return -1; + } + pclose(fptr); - fptr = popen(command, "r"); + /* move the meta dir */ + fptr = popen(metacommand, "r"); if(fptr == NULL) { fprintf(stderr,"Error opening pipe. errno=%d",errno); exit(-1); } - fscanf(fptr, "%s", output); - if(strncmp(output,"",PATH_MAX)) + ret = fscanf(fptr, "%s", metaoutput); + if(ret && strncmp(metaoutput,"",PATH_MAX)) { - printf("mv from [%s] to [%s] failed.\n", path, new_path); + printf("mv from [%s] to [%s] failed.\n", metapath, new_metapath); return -1; } pclose(fptr); if(opts.verbose) { - printf("Successful dir move from [%s] to [%s]\n", path, new_path); + printf("Successful data dir move from [%s] to [%s]\n", datapath, new_datapath); + printf("Successful meta dir move from [%s] to [%s]\n", metapath, new_metapath); } return 0; @@ -512,7 +553,8 @@ int process_args(int argc, char ** argv) {"fsname",1,0,0}, {"dbpath",1,0,0}, {"fsconf",1,0,0}, - {"storage",1,0,0}, + {"datastorage",1,0,0}, + {"metastorage",1,0,0}, {"view",0,0,0}, {0,0,0,0} }; @@ -557,11 +599,15 @@ int process_args(int argc, char ** argv) strncpy(opts.fs_conf, optarg, PATH_MAX); break; - case 7: /* storage */ - strncpy(opts.storage_path, optarg, PATH_MAX); + case 7: /* data storage */ + strncpy(opts.data_storage_path, optarg, PATH_MAX); break; - case 8: /* view */ + case 8: /* meta storage */ + strncpy(opts.meta_storage_path, optarg, PATH_MAX); + break; + + case 9: /* view */ opts.view_only = 1; break; @@ -588,10 +634,18 @@ int process_args(int argc, char ** argv) return(-1); } - /* storage_path must be set */ - if(!strncmp(opts.storage_path,"",PATH_MAX)) + /* data storage_path must be set */ + if(!strncmp(opts.data_storage_path,"",PATH_MAX)) + { + fprintf(stderr,"Error: --datastorage option must be given.\n"); + print_help(argv[0]); + return(-1); + } + + /* meta storage_path must be set */ + if(!strncmp(opts.meta_storage_path,"",PATH_MAX)) { - fprintf(stderr,"Error: --storage option must be given.\n"); + fprintf(stderr,"Error: --metastorage option must be given.\n"); print_help(argv[0]); return(-1); } @@ -608,8 +662,10 @@ void print_help(char * progname) "The current file system ID.\n"); fprintf(stderr," --fsconf= " "Fs config file for the the file system being modified.\n"); - fprintf(stderr," --storage= " - "Local storage space for the the file system being modified.\n"); + fprintf(stderr," --datastorage= " + "Local data storage space for the the file system being modified.\n"); + fprintf(stderr," --metastorage= " + "Local meta storage space for the the file system being modified.\n"); fprintf(stderr, "\n"); fprintf(stderr,"The following arguments are optional:\n"); fprintf(stderr,"--------------\n"); diff --git a/src/apps/admin/pvfs2-chmod.c b/src/apps/admin/pvfs2-chmod.c index 0c7ea0f..f0a2a01 100644 --- a/src/apps/admin/pvfs2-chmod.c +++ b/src/apps/admin/pvfs2-chmod.c @@ -108,7 +108,7 @@ int pvfs2_chmod (PVFS_permissions perms, char *destfile) { memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); ret = PVFS_sys_lookup(cur_fs, pvfs_path, &credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_lookup", ret); @@ -147,7 +147,7 @@ int pvfs2_chmod (PVFS_permissions perms, char *destfile) { ret = PVFS_sys_ref_lookup(parent_ref.fs_id, str_buf, parent_ref, &credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (ret != 0) { fprintf(stderr, "Target '%s' does not exist!\n", str_buf); @@ -156,7 +156,7 @@ int pvfs2_chmod (PVFS_permissions perms, char *destfile) { memset(&resp_getattr,0,sizeof(PVFS_sysresp_getattr)); attrmask = (PVFS_ATTR_SYS_ALL_SETABLE); - ret = PVFS_sys_getattr(resp_lookup.ref,attrmask,&credentials,&resp_getattr); + ret = PVFS_sys_getattr(resp_lookup.ref,attrmask,&credentials,&resp_getattr, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_getattr",ret); @@ -168,7 +168,7 @@ int pvfs2_chmod (PVFS_permissions perms, char *destfile) { new_attr.perms = perms; new_attr.mask = PVFS_ATTR_SYS_PERM; - ret = PVFS_sys_setattr(resp_lookup.ref,new_attr,&credentials); + ret = PVFS_sys_setattr(resp_lookup.ref,new_attr,&credentials, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_setattr",ret); diff --git a/src/apps/admin/pvfs2-chown.c b/src/apps/admin/pvfs2-chown.c index 4e85c0d..69e9236 100644 --- a/src/apps/admin/pvfs2-chown.c +++ b/src/apps/admin/pvfs2-chown.c @@ -114,7 +114,7 @@ int pvfs2_chown (PVFS_uid owner, PVFS_gid group, char *destfile) { memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); ret = PVFS_sys_lookup(cur_fs, pvfs_path, &credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_lookup", ret); @@ -153,7 +153,7 @@ int pvfs2_chown (PVFS_uid owner, PVFS_gid group, char *destfile) { ret = PVFS_sys_ref_lookup(parent_ref.fs_id, str_buf, parent_ref, &credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (ret != 0) { fprintf(stderr, "Target '%s' does not exist!\n", str_buf); @@ -162,7 +162,7 @@ int pvfs2_chown (PVFS_uid owner, PVFS_gid group, char *destfile) { memset(&resp_getattr,0,sizeof(PVFS_sysresp_getattr)); attrmask = (PVFS_ATTR_SYS_ALL_SETABLE); - ret = PVFS_sys_getattr(resp_lookup.ref,attrmask,&credentials,&resp_getattr); + ret = PVFS_sys_getattr(resp_lookup.ref,attrmask,&credentials,&resp_getattr, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_getattr",ret); @@ -175,7 +175,7 @@ int pvfs2_chown (PVFS_uid owner, PVFS_gid group, char *destfile) { new_attr.group = group; new_attr.mask = PVFS_ATTR_SYS_UID | PVFS_ATTR_SYS_GID; - ret = PVFS_sys_setattr(resp_lookup.ref,new_attr,&credentials); + ret = PVFS_sys_setattr(resp_lookup.ref,new_attr,&credentials, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_setattr",ret); diff --git a/src/apps/admin/pvfs2-config.in b/src/apps/admin/pvfs2-config.in index c863ec0..a77800f 100755 --- a/src/apps/admin/pvfs2-config.in +++ b/src/apps/admin/pvfs2-config.in @@ -45,7 +45,7 @@ while test $# -gt 0; do ;; --libs|--static-libs) - libflags="-L@libdir@ -lpvfs2 @LIBS@ @THREAD_LIB@" + libflags="-L@libdir@ -lpvfs2 -lm @LIBS@ @THREAD_LIB@" if [ x"@BUILD_GM@" = x"1" ]; then libflags="$libflags -L@GM_LIBDIR@ -lgm" fi @@ -61,14 +61,11 @@ while test $# -gt 0; do if [ x"@BUILD_PORTALS@" = x"1" ]; then libflags="$libflags @PORTALS_LIBS@" fi - if [ x"@BUILD_OSD@" = x"1" ]; then - libflags="$libflags @OSD_LIBS@" - fi echo $libflags ;; --serverlibs|--static-serverlibs) - libflags="-L@libdir@ -lpthread -lpvfs2-server @LIBS@ @DB_LIB@" + libflags="-L@libdir@ -lpvfs2-server -lpthread @LIBS@ @DB_LIB@" if [ x"@NEEDS_LIBRT@" = x"1" ]; then libflags="$libflags -lrt" fi diff --git a/src/apps/admin/pvfs2-cp.c b/src/apps/admin/pvfs2-cp.c index de3be4e..69b98b8 100644 --- a/src/apps/admin/pvfs2-cp.c +++ b/src/apps/admin/pvfs2-cp.c @@ -25,6 +25,7 @@ #include "str-utils.h" #include "pint-sysint-utils.h" #include "pvfs2-internal.h" +#include "pvfs2-hint.h" /* optional parameters, filled in by parse_args() */ struct options @@ -50,8 +51,8 @@ enum open_type { typedef struct pvfs2_file_object_s { PVFS_fs_id fs_id; PVFS_object_ref ref; - char pvfs2_path[PVFS_NAME_MAX]; - char user_path[PVFS_NAME_MAX]; + char pvfs2_path[PVFS_NAME_MAX+1]; + char user_path[PVFS_NAME_MAX+1]; PVFS_sys_attr attr; PVFS_permissions perms; } pvfs2_file_object; @@ -59,7 +60,7 @@ typedef struct pvfs2_file_object_s { typedef struct unix_file_object_s { int fd; int mode; - char path[NAME_MAX]; + char path[NAME_MAX+1]; } unix_file_object; typedef struct file_object_s { @@ -70,6 +71,7 @@ typedef struct file_object_s { } u; } file_object; +static PVFS_hint hints = NULL; static struct options* parse_args(int argc, char* argv[]); static void usage(int argc, char** argv); @@ -132,6 +134,8 @@ int main (int argc, char ** argv) return(-1); } + PVFS_hint_import_env(& hints); + ret = PVFS_util_init_defaults(); if (ret < 0) { @@ -198,6 +202,7 @@ int main (int argc, char ** argv) { print_timings(time2-time1, total_written); } + ret = 0; main_out: @@ -205,6 +210,8 @@ int main (int argc, char ** argv) PVFS_sys_finalize(); free(user_opts); free(buffer); + + PVFS_hint_free(hints); return(ret); } @@ -291,7 +298,7 @@ static void usage(int argc, char** argv) fprintf(stderr, "Where ARGS is one or more of" "\n-s \t\t\tsize of access to PVFS2 volume" "\n-n \t\tnumber of PVFS2 datafiles to use" - "\n-b \t\thow much data to read/write at once" + "\n-b \thow much data to read/write at once" "\n-t\t\t\t\tprint some timing information" "\n-v\t\t\t\tprint version number and exit\n"); return; @@ -331,7 +338,7 @@ static size_t generic_read(file_object *src, char *buffer, return (ret); } ret = PVFS_sys_read(src->u.pvfs2.ref, file_req, offset, - buffer, mem_req, credentials, &resp_io); + buffer, mem_req, credentials, &resp_io, hints); if (ret == 0) { PVFS_Request_free(&mem_req); @@ -363,7 +370,7 @@ static size_t generic_write(file_object *dest, char *buffer, return(ret); } ret = PVFS_sys_write(dest->u.pvfs2.ref, file_req, offset, - buffer, mem_req, credentials, &resp_io); + buffer, mem_req, credentials, &resp_io, hints); if (ret == 0) { PVFS_Request_free(&mem_req); @@ -421,7 +428,6 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials, int ret = -1; char *entry_name; /* name of the pvfs2 file */ char str_buf[PVFS_NAME_MAX]; /* basename of pvfs2 file */ - PVFS_sys_layout layout; if (obj->fs_type == UNIX_FILE) { @@ -485,7 +491,7 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials, memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); ret = PVFS_sys_lookup(obj->u.pvfs2.fs_id, obj->u.pvfs2.pvfs2_path, credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, hints); if (ret < 0) { PVFS_perror("PVFS_sys_lookup", ret); @@ -554,13 +560,13 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials, memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); ret = PVFS_sys_ref_lookup(parent_ref.fs_id, entry_name, parent_ref, credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, hints); if ((ret == 0) && (open_type == OPEN_SRC)) { memset(&resp_getattr, 0, sizeof(PVFS_sysresp_getattr)); ret = PVFS_sys_getattr(resp_lookup.ref, PVFS_ATTR_SYS_ALL_NOHINT, - credentials, &resp_getattr); + credentials, &resp_getattr, hints); if (ret) { fprintf(stderr, "Failed to do pvfs2 getattr on %s\n", @@ -627,10 +633,9 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials, new_dist=NULL; } - layout.algorithm = PVFS_SYS_LAYOUT_NONE; ret = PVFS_sys_create(entry_name, parent_ref, obj->u.pvfs2.attr, credentials, - new_dist, &layout, &resp_create); + new_dist, &resp_create, NULL, hints); if (ret < 0) { PVFS_perror("PVFS_sys_create", ret); @@ -664,7 +669,7 @@ static int generic_cleanup(file_object *src, file_object *dest, /* preserve permissions doing a pvfs2 => pvfs2 copy */ if ((src->fs_type == PVFS2_FILE) && (dest->fs_type == PVFS2_FILE)) { - PVFS_sys_setattr(dest->u.pvfs2.ref, src->u.pvfs2.attr, credentials); + PVFS_sys_setattr(dest->u.pvfs2.ref, src->u.pvfs2.attr, credentials, hints); } if ((src->fs_type == UNIX_FILE) && (src->u.ufs.fd != -1)) diff --git a/src/apps/admin/pvfs2-drop-caches.c b/src/apps/admin/pvfs2-drop-caches.c new file mode 100644 index 0000000..89900ca --- /dev/null +++ b/src/apps/admin/pvfs2-drop-caches.c @@ -0,0 +1,179 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pvfs2.h" +#include "pvfs2-mgmt.h" + +#ifndef PVFS2_VERSION +#define PVFS2_VERSION "Unknown" +#endif + +struct options +{ + char* mnt_point; + int mnt_point_set; +}; + +static struct options* parse_args(int argc, char* argv[]); +static void usage(int argc, char** argv); + +int main(int argc, char **argv) +{ + int ret = -1; + PVFS_fs_id cur_fs; + struct options* user_opts = NULL; + char pvfs_path[PVFS_NAME_MAX] = {0}; + PVFS_credentials creds; + + /* look at command line arguments */ + user_opts = parse_args(argc, argv); + if(!user_opts) + { + fprintf(stderr, "Error: failed to parse command line arguments.\n"); + usage(argc, argv); + return(-1); + } + + ret = PVFS_util_init_defaults(); + if(ret < 0) + { + PVFS_perror("PVFS_util_init_defaults", ret); + return(-1); + } + + /* translate local path into pvfs2 relative path */ + ret = PVFS_util_resolve(user_opts->mnt_point, + &cur_fs, pvfs_path, PVFS_NAME_MAX); + if(ret < 0) + { + fprintf(stderr, "Error: could not find filesystem for %s in pvfstab\n", + user_opts->mnt_point); + return(-1); + } + + PVFS_util_gen_credentials(&creds); + + ret = PVFS_mgmt_setparam_all(cur_fs, + &creds, + PVFS_SERV_PARAM_DROP_CACHES, + 0, + NULL, + NULL /* detailed errors */); + if(ret < 0) + { + PVFS_perror("PVFS_mgmt_setparam_all", ret); + return(-1); + } + + PVFS_sys_finalize(); + + return(ret); +} + + +/* parse_args() + * + * parses command line arguments + * + * returns pointer to options structure on success, NULL on failure + */ +static struct options* parse_args(int argc, char* argv[]) +{ + char flags[] = "vm:"; + int one_opt = 0; + int len = 0; + + struct options* tmp_opts = NULL; + int ret = -1; + + /* create storage for the command line options */ + tmp_opts = (struct options*)malloc(sizeof(struct options)); + if(!tmp_opts){ + return(NULL); + } + memset(tmp_opts, 0, sizeof(struct options)); + + /* look at command line arguments */ + while((one_opt = getopt(argc, argv, flags)) != EOF){ + switch(one_opt) + { + case('v'): + printf("%s\n", PVFS2_VERSION); + exit(0); + case('m'): + len = strlen(optarg)+1; + tmp_opts->mnt_point = (char*)malloc(len+1); + if(!tmp_opts->mnt_point) + { + free(tmp_opts); + return(NULL); + } + memset(tmp_opts->mnt_point, 0, len+1); + ret = sscanf(optarg, "%s", tmp_opts->mnt_point); + if(ret < 1){ + free(tmp_opts); + return(NULL); + } + /* TODO: dirty hack... fix later. The remove_dir_prefix() + * function expects some trailing segments or at least + * a slash off of the mount point + */ + strcat(tmp_opts->mnt_point, "/"); + tmp_opts->mnt_point_set = 1; + break; + case('?'): + usage(argc, argv); + exit(EXIT_FAILURE); + } + } + + if(optind < argc) + { + usage(argc, argv); + exit(EXIT_FAILURE); + } + + if(!tmp_opts->mnt_point_set) + { + free(tmp_opts); + return(NULL); + } + + return(tmp_opts); +} + + +static void usage(int argc, char** argv) +{ + fprintf(stderr, "\n"); + fprintf(stderr, "Usage : %s [-m fs_mount_point]\n", + argv[0]); + fprintf(stderr, "Request that the OS flush and drop all I/O caches on servers.\n"); + + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/apps/admin/pvfs2-event-mon-example.c b/src/apps/admin/pvfs2-event-mon-example.c index 67c1ed6..ecf257a 100644 --- a/src/apps/admin/pvfs2-event-mon-example.c +++ b/src/apps/admin/pvfs2-event-mon-example.c @@ -16,7 +16,6 @@ #include "pvfs2.h" #include "pvfs2-mgmt.h" -#include "pvfs2-event.h" #ifndef PVFS2_VERSION #define PVFS2_VERSION "Unknown" @@ -49,16 +48,16 @@ int main(int argc, char **argv) user_opts = parse_args(argc, argv); if (!user_opts) { - fprintf(stderr, "Error: failed to parse command line arguments.\n"); - usage(argc, argv); - return -1; + fprintf(stderr, "Error: failed to parse command line arguments.\n"); + usage(argc, argv); + return -1; } ret = PVFS_util_init_defaults(); if(ret < 0) { - PVFS_perror("PVFS_util_init_defaults", ret); - return(-1); + PVFS_perror("PVFS_util_init_defaults", ret); + return(-1); } /* translate local path into pvfs2 relative path */ @@ -66,94 +65,95 @@ int main(int argc, char **argv) &cur_fs, pvfs_path, PVFS_NAME_MAX); if(ret < 0) { - PVFS_perror("PVFS_util_resolve", ret); - return -1; + PVFS_perror("PVFS_util_resolve", ret); + return -1; } PVFS_util_gen_credentials(&creds); /* count how many I/O servers we have */ ret = PVFS_mgmt_count_servers(cur_fs, - &creds, - PVFS_MGMT_IO_SERVER, - &io_server_count); + &creds, + PVFS_MGMT_IO_SERVER, + &io_server_count); if (ret < 0) { - PVFS_perror("PVFS_mgmt_count_servers", ret); - return -1; + PVFS_perror("PVFS_mgmt_count_servers", ret); + return -1; } /* allocate a 2 dimensional array for events */ event_matrix = (struct PVFS_mgmt_event **) - malloc(io_server_count * sizeof(struct PVFS_mgmt_event *)); + malloc(io_server_count * sizeof(struct PVFS_mgmt_event *)); if (event_matrix == NULL) { - perror("malloc"); - return -1; + perror("malloc"); + return -1; } for (i=0; i < io_server_count; i++) { - event_matrix[i] = (struct PVFS_mgmt_event *) - malloc(EVENT_DEPTH * sizeof(struct PVFS_mgmt_event)); - if (event_matrix[i] == NULL) - { - perror("malloc"); - return -1; - } + event_matrix[i] = (struct PVFS_mgmt_event *) + malloc(EVENT_DEPTH * sizeof(struct PVFS_mgmt_event)); + if (event_matrix[i] == NULL) + { + perror("malloc"); + return -1; + } } /* build a list of servers to talk to */ addr_array = (PVFS_BMI_addr_t *) - malloc(io_server_count * sizeof(PVFS_BMI_addr_t)); + malloc(io_server_count * sizeof(PVFS_BMI_addr_t)); if (addr_array == NULL) { - perror("malloc"); - return -1; + perror("malloc"); + return -1; } ret = PVFS_mgmt_get_server_array(cur_fs, - &creds, - PVFS_MGMT_IO_SERVER, - addr_array, - &io_server_count); + &creds, + PVFS_MGMT_IO_SERVER, + addr_array, + &io_server_count); if (ret < 0) { - PVFS_perror("PVFS_mgmt_get_server_array", ret); - return -1; + PVFS_perror("PVFS_mgmt_get_server_array", ret); + return -1; } /* grap current events */ ret = PVFS_mgmt_event_mon_list(cur_fs, - &creds, - event_matrix, - addr_array, - io_server_count, - EVENT_DEPTH, - NULL /* detailed errors */); + &creds, + event_matrix, + addr_array, + io_server_count, + EVENT_DEPTH, + NULL, /* detailed errors */ + NULL); if (ret < 0) { - PVFS_perror("PVFS_mgmt_event_mon_list", EVENT_DEPTH); - return -1; + PVFS_perror("PVFS_mgmt_event_mon_list", EVENT_DEPTH); + return -1; } printf("# (server number) (api) (operation) (value) (id) (flags) (sec) (usec)\n"); for (i=0; i < io_server_count; i++) { - for (j=0; j < EVENT_DEPTH; j++) - { - if ((event_matrix[i][j].flags & PVFS_EVENT_FLAG_INVALID) == 0) - { - printf("%d %d %d %lld %lld %d %d %d\n", - i, - (int)event_matrix[i][j].api, - (int)event_matrix[i][j].operation, - (long long)event_matrix[i][j].value, - (long long)event_matrix[i][j].id, - (int)event_matrix[i][j].flags, - (int)event_matrix[i][j].tv_sec, - (int)event_matrix[i][j].tv_usec); - } - } + for (j=0; j < EVENT_DEPTH; j++) + { + if ((event_matrix[i][j].flags & PVFS_EVENT_FLAG_INVALID) == 0) + { + printf("%d %d %d %lld %lld %d %d %d\n", + i, + (int)event_matrix[i][j].api, + (int)event_matrix[i][j].operation, + (long long)event_matrix[i][j].value, + (long long)event_matrix[i][j].id, + (int)event_matrix[i][j].flags, + (int)event_matrix[i][j].tv_sec, + (int)event_matrix[i][j].tv_usec); + } + } } PVFS_sys_finalize(); @@ -181,48 +181,50 @@ static struct options* parse_args(int argc, char* argv[]) tmp_opts = (struct options *) malloc(sizeof(struct options)); if (tmp_opts == NULL) { - return NULL; + return NULL; } memset(tmp_opts, 0, sizeof(struct options)); /* look at command line arguments */ - while((one_opt = getopt(argc, argv, flags)) != EOF){ - switch(one_opt) + while((one_opt = getopt(argc, argv, flags)) != EOF) + { + switch(one_opt) { case('v'): printf("%s\n", PVFS2_VERSION); exit(0); - case('m'): - len = strlen(optarg)+1; - tmp_opts->mnt_point = (char *) malloc(len + 1); - if (tmp_opts->mnt_point == NULL) - { - free(tmp_opts); - return NULL; - } - memset(tmp_opts->mnt_point, 0, len+1); - ret = sscanf(optarg, "%s", tmp_opts->mnt_point); - if(ret < 1){ - free(tmp_opts); - return NULL; - } - /* TODO: dirty hack... fix later. The remove_dir_prefix() - * function expects some trailing segments or at least - * a slash off of the mount point - */ - strcat(tmp_opts->mnt_point, "/"); - tmp_opts->mnt_point_set = 1; - break; - case('?'): - usage(argc, argv); - exit(EXIT_FAILURE); - } + case('m'): + len = strlen(optarg)+1; + tmp_opts->mnt_point = (char *) malloc(len + 1); + if (tmp_opts->mnt_point == NULL) + { + free(tmp_opts); + return NULL; + } + memset(tmp_opts->mnt_point, 0, len+1); + ret = sscanf(optarg, "%s", tmp_opts->mnt_point); + if(ret < 1) + { + free(tmp_opts); + return NULL; + } + /* TODO: dirty hack... fix later. The remove_dir_prefix() + * function expects some trailing segments or at least + * a slash off of the mount point + */ + strcat(tmp_opts->mnt_point, "/"); + tmp_opts->mnt_point_set = 1; + break; + case('?'): + usage(argc, argv); + exit(EXIT_FAILURE); + } } if (!tmp_opts->mnt_point_set) { - free(tmp_opts); - return NULL; + free(tmp_opts); + return NULL; } return tmp_opts; @@ -232,10 +234,8 @@ static struct options* parse_args(int argc, char* argv[]) static void usage(int argc, char** argv) { fprintf(stderr, "\n"); - fprintf(stderr, "Usage : %s [-m fs_mount_point]\n", - argv[0]); - fprintf(stderr, "Example: %s -m /mnt/pvfs2\n", - argv[0]); + fprintf(stderr, "Usage : %s [-m fs_mount_point]\n", argv[0]); + fprintf(stderr, "Example: %s -m /mnt/pvfs2\n", argv[0]); return; } diff --git a/src/apps/admin/pvfs2-fs-dump.c b/src/apps/admin/pvfs2-fs-dump.c index cc6c2f4..099b283 100644 --- a/src/apps/admin/pvfs2-fs-dump.c +++ b/src/apps/admin/pvfs2-fs-dump.c @@ -111,6 +111,7 @@ int main(int argc, char **argv) PVFS_credentials creds; int server_count; PVFS_BMI_addr_t *addr_array; + struct PVFS_mgmt_setparam_value param_value; /* look at command line arguments */ user_opts = parse_args(argc, argv); @@ -166,26 +167,31 @@ int main(int argc, char **argv) return -1; } + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_ADMIN_MODE; + /* put the servers into administrative mode */ ret = PVFS_mgmt_setparam_list(cur_fs, - &creds, - PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_ADMIN_MODE, - addr_array, - NULL, - server_count, - NULL /* detailed errors */); + &creds, + PVFS_SERV_PARAM_MODE, + ¶m_value, + addr_array, + server_count, + NULL, /* detailed errors */ + NULL); if (ret != 0) { + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + PVFS_perror("PVFS_mgmt_setparam_list", ret); PVFS_mgmt_setparam_list(cur_fs, &creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return(-1); } @@ -213,15 +219,16 @@ int main(int argc, char **argv) handlelist_finalize(); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; PVFS_mgmt_setparam_list( cur_fs, &creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); PVFS_sys_finalize(); @@ -241,20 +248,22 @@ int build_handlelist(PVFS_fs_id cur_fs, unsigned long *total_count_array; PVFS_ds_position *position_array; struct PVFS_mgmt_server_stat *stat_array; + struct PVFS_mgmt_setparam_value param_value; /* find out how many handles are in use on each */ stat_array = (struct PVFS_mgmt_server_stat *) malloc(server_count * sizeof(struct PVFS_mgmt_server_stat)); if (stat_array == NULL) { + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; PVFS_mgmt_setparam_list(cur_fs, creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return -1; } @@ -263,18 +272,20 @@ int build_handlelist(PVFS_fs_id cur_fs, stat_array, addr_array, server_count, - NULL /* details */); + NULL /* details */ + , NULL); if (ret != 0) { - PVFS_perror("PVFS_mgmt_statfs_list", ret); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + PVFS_perror("PVFS_mgmt_statfs_list", ret); PVFS_mgmt_setparam_list(cur_fs, creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return -1; } @@ -348,18 +359,21 @@ int build_handlelist(PVFS_fs_id cur_fs, position_array, addr_array, server_count, - NULL /* details */); + 0, + NULL /* details */, + NULL /* hints */); if (ret < 0) { - PVFS_perror("PVFS_mgmt_iterate_handles_list", ret); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + PVFS_perror("PVFS_mgmt_iterate_handles_list", ret); PVFS_mgmt_setparam_list(cur_fs, creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return -1; } @@ -434,14 +448,14 @@ int traverse_directory_tree(PVFS_fs_id cur_fs, PVFS_object_ref pref; PVFS_sys_lookup(cur_fs, "/", creds, - &lookup_resp, PVFS2_LOOKUP_LINK_NO_FOLLOW); + &lookup_resp, PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); /* lookup_resp.pinode_refn.handle gets root handle */ pref = lookup_resp.ref; PVFS_sys_getattr(pref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp); + &getattr_resp, NULL); if (getattr_resp.attr.objtype != PVFS_TYPE_DIRECTORY) { @@ -490,7 +504,7 @@ int descend(PVFS_fs_id cur_fs, (!token ? PVFS_READDIR_START : token), count, creds, - &readdir_resp); + &readdir_resp, NULL); for (i = 0; i < readdir_resp.pvfs_dirent_outcount; i++) { @@ -507,7 +521,7 @@ int descend(PVFS_fs_id cur_fs, if ((ret = PVFS_sys_getattr(entry_ref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp)) != 0) + &getattr_resp, NULL)) != 0) { printf("Could not get attributes of handle %llu [%d]\n", llu(cur_handle), ret); @@ -549,7 +563,7 @@ int descend(PVFS_fs_id cur_fs, handlelist_remove_handle(cur_handle, server_idx); } - token += readdir_resp.pvfs_dirent_outcount; + token = readdir_resp.token; if (readdir_resp.pvfs_dirent_outcount) { free(readdir_resp.dirent_array); @@ -585,7 +599,7 @@ void verify_datafiles(PVFS_fs_id cur_fs, printf("invalid value of number of datafiles = %d\n", df_count); assert(0); } - ret = PVFS_mgmt_get_dfile_array(mf_ref, creds, df_handles, df_count); + ret = PVFS_mgmt_get_dfile_array(mf_ref, creds, df_handles, df_count, NULL); if (ret != 0) { assert(0); @@ -632,17 +646,19 @@ void analyze_remaining_handles(PVFS_fs_id cur_fs, { PVFS_sysresp_getattr getattr_resp; PVFS_object_ref entry_ref; + char* fmt_string; entry_ref.handle = handle; entry_ref.fs_id = cur_fs; /* only remaining handles are dirdata */ PVFS_sys_getattr(entry_ref, - PVFS_ATTR_SYS_ALL, - creds, &getattr_resp); + PVFS_ATTR_SYS_TYPE, + creds, &getattr_resp, NULL); if (getattr_resp.attr.objtype != PVFS_TYPE_DIRDATA) { flag = 0; - if (dot_fmt) + if (dot_fmt && getattr_resp.attr.objtype != PVFS_TYPE_INTERNAL && + getattr_resp.attr.objtype != PVFS_TYPE_DATAFILE) { printf("\tH%llu [shape=record, color=red, label = \"{(unknown) " "| %llu (%d)}\"];\n", @@ -650,9 +666,18 @@ void analyze_remaining_handles(PVFS_fs_id cur_fs, llu(handle), server_idx); } - else + else if(!dot_fmt) { - printf("\t%s: %llu\n", + if(getattr_resp.attr.objtype == PVFS_TYPE_INTERNAL) + fmt_string = "\t%s: %llu (server internal use)\n"; + else if(getattr_resp.attr.objtype == PVFS_TYPE_DATAFILE) + fmt_string = "\t%s: %llu (datafile, probably preallocated)\n"; + else if(getattr_resp.attr.objtype == PVFS_TYPE_METAFILE) + fmt_string = "\t%s: %llu (metafile, probably preallocated)\n"; + else + fmt_string = "\t%s: %llu (unknown)\n"; + + printf(fmt_string, PVFS_mgmt_map_addr(cur_fs, creds, addr_array[server_idx], diff --git a/src/apps/admin/pvfs2-fsck.c b/src/apps/admin/pvfs2-fsck.c index 16ef48a..a2ccca4 100644 --- a/src/apps/admin/pvfs2-fsck.c +++ b/src/apps/admin/pvfs2-fsck.c @@ -35,11 +35,18 @@ struct options int mnt_point_set; int verbose; int destructive; + int safety_check; + unsigned int safety_count; }; struct options *fsck_opts = NULL; /* lost+found reference */ PVFS_object_ref laf_ref; +unsigned long int global_removals = 0; + +static void handlelist_remove_handle_no_idx(struct handlelist *hl, + PVFS_handle handle); +static void get_user_action_to_continue( void ); int main(int argc, char **argv) { @@ -50,6 +57,7 @@ int main(int argc, char **argv) int server_count; PVFS_BMI_addr_t *addr_array = NULL; struct handlelist *hl_all, *hl_unrefd, *hl_notree; + struct PVFS_mgmt_setparam_value param_value; fsck_opts = parse_args(argc, argv); if (!fsck_opts) @@ -122,15 +130,17 @@ int main(int argc, char **argv) return -1; } + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_ADMIN_MODE; /* put the servers into administrative mode */ ret = PVFS_mgmt_setparam_list(cur_fs, &creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_ADMIN_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL /* detailed errors */); + NULL, /* detailed errors */ + NULL); if (ret != 0) { PVFS_perror("PVFS_mgmt_setparam_list", ret); @@ -177,15 +187,17 @@ int main(int argc, char **argv) handlelist_finalize(&hl_all); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + /* drop out of admin mode now that we've traversed the dir tree */ PVFS_mgmt_setparam_list(cur_fs, &creds, PVFS_SERV_PARAM_MODE, - (uint64_t) PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); in_admin_mode = 0; /* third pass moves salvagable objects into lost+found: @@ -212,15 +224,18 @@ int main(int argc, char **argv) exit_now: if (in_admin_mode) { + + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + /* get us out of admin mode */ PVFS_mgmt_setparam_list(cur_fs, &creds, PVFS_SERV_PARAM_MODE, - (uint64_t) PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); } PVFS_sys_finalize(); @@ -245,20 +260,22 @@ struct handlelist *build_handlelist(PVFS_fs_id cur_fs, PVFS_ds_position *position_array; struct PVFS_mgmt_server_stat *stat_array; struct handlelist *hl; + struct PVFS_mgmt_setparam_value param_value; /* find out how many handles are in use on each */ stat_array = (struct PVFS_mgmt_server_stat *) malloc(server_count * sizeof(struct PVFS_mgmt_server_stat)); if (stat_array == NULL) { + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; PVFS_mgmt_setparam_list(cur_fs, creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return NULL; } @@ -267,18 +284,21 @@ struct handlelist *build_handlelist(PVFS_fs_id cur_fs, stat_array, addr_array, server_count, - NULL /* details */); + NULL /* details */ + , NULL); if (ret != 0) { + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + PVFS_perror("PVFS_mgmt_statfs_list", ret); PVFS_mgmt_setparam_list(cur_fs, creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return NULL; } @@ -353,18 +373,22 @@ struct handlelist *build_handlelist(PVFS_fs_id cur_fs, position_array, addr_array, server_count, - NULL /* details */); + 0, + NULL /* details */, + NULL /* hints */); if (ret < 0) { + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + PVFS_perror("PVFS_mgmt_iterate_handles_list", ret); PVFS_mgmt_setparam_list(cur_fs, creds, PVFS_SERV_PARAM_MODE, - (uint64_t)PVFS_SERVER_NORMAL_MODE, + ¶m_value, addr_array, - NULL, server_count, - NULL); + NULL, NULL); return NULL; } @@ -412,6 +436,77 @@ struct handlelist *build_handlelist(PVFS_fs_id cur_fs, i, total_count_array[i], used_handles); return NULL; } + } + + handlelist_finished_adding_handles(hl); /* sanity check */ + + /* now look for reserved handles */ + for (i=0; i < server_count; i++) + { + hcount_array[i] = HANDLE_BATCH; + position_array[i] = PVFS_ITERATE_START; + } + + more_flag = 1; + while (more_flag) + { + ret = PVFS_mgmt_iterate_handles_list(cur_fs, + creds, + handle_matrix, + hcount_array, + position_array, + addr_array, + server_count, + PVFS_MGMT_RESERVED, + NULL /* details */, + NULL /* hints */); + if (ret < 0) + { + PVFS_perror("PVFS_mgmt_iterate_handles_list", ret); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = PVFS_SERVER_NORMAL_MODE; + PVFS_mgmt_setparam_list(cur_fs, + creds, + PVFS_SERV_PARAM_MODE, + ¶m_value, + addr_array, + server_count, + NULL, + NULL); + return NULL; + } + + for (i=0; i < server_count; i++) + { + /* remove any reserved handles from the handlelist. These will + * not show up in normal objects when we walk the file system + * tree. + */ + for (j=0; j < hcount_array[i]; j++) + { + /* we don't know the server index. Reserved handles can be + * reported by any server; not just the server that actually + * owns that handle. + */ + handlelist_remove_handle_no_idx(hl, + handle_matrix[i][j]); + } + } + + /* find out if any servers have more handles to dump */ + more_flag = 0; + for (i=0; i < server_count; i++) + { + if (position_array[i] != PVFS_ITERATE_END) + { + more_flag = 1; + hcount_array[i] = HANDLE_BATCH; + } + } + } + + for (i = 0; i < server_count; i++) + { free(handle_matrix[i]); } @@ -421,7 +516,6 @@ struct handlelist *build_handlelist(PVFS_fs_id cur_fs, free(total_count_array); free(position_array); - handlelist_finished_adding_handles(hl); /* sanity check */ free(stat_array); stat_array = NULL; @@ -443,7 +537,7 @@ int traverse_directory_tree(PVFS_fs_id cur_fs, "/", creds, &lookup_resp, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); assert(ret == 0); pref = lookup_resp.ref; @@ -451,7 +545,7 @@ int traverse_directory_tree(PVFS_fs_id cur_fs, PVFS_sys_getattr(pref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp); + &getattr_resp, NULL); assert(getattr_resp.attr.objtype == PVFS_TYPE_DIRECTORY); @@ -486,7 +580,7 @@ int match_dirdata(struct handlelist *hl, ret = PVFS_mgmt_get_dirdata_handle(dir_ref, &dirdata_handle, - creds); + creds, NULL); if (ret != 0) { PVFS_perror("match_dirdata", ret); @@ -530,7 +624,7 @@ int descend(PVFS_fs_id cur_fs, (!token ? PVFS_READDIR_START : token), count, creds, - &readdir_resp); + &readdir_resp, NULL); for (i = 0; i < readdir_resp.pvfs_dirent_outcount; i++) { @@ -569,7 +663,7 @@ int descend(PVFS_fs_id cur_fs, ret = PVFS_sys_getattr(entry_ref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp); + &getattr_resp, NULL); if (ret != 0) { ret = remove_directory_entry(dir_ref, entry_ref, @@ -668,7 +762,7 @@ int descend(PVFS_fs_id cur_fs, handlelist_remove_handle(hl, cur_handle, server_idx); } } - token += readdir_resp.pvfs_dirent_outcount; + token = readdir_resp.token; if (readdir_resp.pvfs_dirent_outcount) { free(readdir_resp.dirent_array); @@ -706,7 +800,7 @@ int verify_datafiles(PVFS_fs_id cur_fs, { assert(0); } - ret = PVFS_mgmt_get_dfile_array(mf_ref, creds, df_handles, df_count); + ret = PVFS_mgmt_get_dfile_array(mf_ref, creds, df_handles, df_count, NULL); if (ret != 0) { /* what does this mean? */ @@ -799,7 +893,7 @@ struct handlelist *find_sub_trees(PVFS_fs_id cur_fs, ret = PVFS_sys_getattr(handle_ref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp); + &getattr_resp, NULL); if (ret) { /* remove anything we can't get attributes on */ ret = remove_object(handle_ref, @@ -880,7 +974,7 @@ struct handlelist *fill_lost_and_found(PVFS_fs_id cur_fs, ret = PVFS_sys_getattr(handle_ref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp); + &getattr_resp, NULL); if (ret) { printf("warning: problem calling getattr on %llu; assuming datafile for now.\n", llu(handle)); @@ -991,7 +1085,7 @@ void cull_leftovers(PVFS_fs_id cur_fs, ret = PVFS_sys_getattr(handle_ref, PVFS_ATTR_SYS_ALL_NOHINT, creds, - &getattr_resp); + &getattr_resp, NULL); if (ret) { printf("warning: problem calling getattr on %llu\n", llu(handle)); @@ -1027,7 +1121,7 @@ int create_lost_and_found(PVFS_fs_id cur_fs, "/lost+found", creds, &lookup_resp, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (ret == 0) { laf_ref = lookup_resp.ref; return 0; @@ -1043,7 +1137,7 @@ int create_lost_and_found(PVFS_fs_id cur_fs, "/", creds, &lookup_resp, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); assert(ret == 0); root_ref = lookup_resp.ref; @@ -1056,7 +1150,7 @@ int create_lost_and_found(PVFS_fs_id cur_fs, root_ref, attr, creds, - &mkdir_resp); + &mkdir_resp, NULL); if (ret == 0) { laf_ref = mkdir_resp.ref; } @@ -1085,7 +1179,7 @@ int create_dirent(PVFS_object_ref dir_ref, ret = PVFS_mgmt_create_dirent(dir_ref, name, handle, - creds); + creds, NULL); if (ret != 0) { PVFS_perror("PVFS_mgmt_create_dirent", ret); } @@ -1109,11 +1203,16 @@ int remove_directory_entry(PVFS_object_ref dir_ref, name, llu(entry_ref.handle), llu(dir_ref.handle)); + if( (fsck_opts->safety_check) && + (++global_removals >= fsck_opts->safety_count) ) + { + get_user_action_to_continue( ); + } if (fsck_opts->destructive) { ret = PVFS_mgmt_remove_dirent(dir_ref, name, - creds); + creds, NULL); if (ret != 0) { PVFS_perror("PVFS_mgmt_remove_dirent", ret); } @@ -1136,9 +1235,15 @@ int remove_object(PVFS_object_ref obj_ref, get_type_str(obj_type), llu(obj_ref.handle)); + if( (fsck_opts->safety_check) && + (++global_removals >= fsck_opts->safety_count) ) + { + get_user_action_to_continue( ); + } + if (fsck_opts->destructive) { ret = PVFS_mgmt_remove_object(obj_ref, - creds); + creds, NULL); if (ret != 0) { PVFS_perror("PVFS_mgmt_remove_object", ret); } @@ -1302,6 +1407,50 @@ static int handlelist_find_handle(struct handlelist *hl, return -1; } +/* handlelist_remove_handle_no_idx() + * + * same as handlelist_remove_handle(), but will search for the correct + * server index + */ +/* TODO: we could speed this up by resolving which server the handle + * belongs to using the cached_config api + */ +static void handlelist_remove_handle_no_idx(struct handlelist *hl, + PVFS_handle handle) +{ + unsigned long i; + int server_idx = 0; + int found = 0; + + for(server_idx = 0; server_idxserver_ct; server_idx++) + { + for (i = 0; i < hl->used_array[server_idx]; i++) + { + if (hl->list_array[server_idx][i] == handle) + { + if (i < (hl->used_array[server_idx] - 1)) + { + /* move last entry to this position before decrement */ + hl->list_array[server_idx][i] = + hl->list_array[server_idx][hl->used_array[server_idx]-1]; + + } + hl->used_array[server_idx]--; + found = 1; + break; + } + } + if(found) + { + break; + } + } + + if (!found) { + printf("! problem removing %llu.\n", llu(handle)); + } +} + static void handlelist_remove_handle(struct handlelist *hl, PVFS_handle handle, int server_idx) @@ -1409,7 +1558,7 @@ static struct options *parse_args(int argc, char *argv[]) memset(opts, 0, sizeof(struct options)); /* look at command line arguments */ - while((one_opt = getopt(argc, argv, "apynvVm:")) != EOF){ + while((one_opt = getopt(argc, argv, "apyns:vVm:")) != EOF){ switch(one_opt) { case 'a': @@ -1420,6 +1569,10 @@ static struct options *parse_args(int argc, char *argv[]) case 'n': opts->destructive = 0; break; + case 's': + opts->safety_count = atoi(optarg); + opts->safety_check = 1; + break; case 'V': printf("%s\n", PVFS2_VERSION); exit(0); @@ -1467,12 +1620,14 @@ static struct options *parse_args(int argc, char *argv[]) static void usage(int argc, char** argv) { fprintf(stderr, "\n"); - fprintf(stderr, "Usage : %s [-vV] [-m fs_mount_point]\n", + fprintf(stderr, "Usage : %s [-vV] <-ayp -s N> [-m fs_mount_point]\n", argv[0]); fprintf(stderr, "Display information about contents of file system.\n"); fprintf(stderr, " -V print version and exit\n"); fprintf(stderr, " -v verbose operation\n"); fprintf(stderr, " -n answer \"no\" to all questions\n"); + fprintf(stderr, " -s N safety check, prompt after N removals, " + "use with a, p, or y\n"); fprintf(stderr, " -y answer \"yes\" to all questions\n"); fprintf(stderr, " -p automatically repair with no questions\n"); fprintf(stderr, " -a equivalent to \"-p\"\n"); @@ -1515,6 +1670,52 @@ static char *get_type_str(int type) return ret; } +void get_user_action_to_continue( ) +{ + int count = 0; + int reply_idx = 0; + int read_c = 0; + char reply[3] = { 0 }; + + /* only cause the prompt if destructive actions are being taken */ + if( fsck_opts->destructive ) + { + fprintf(stdout, "%lu objects have been removed, ", global_removals); + do + { + memset(reply, 0, 3); + count = 0, reply_idx = 0; + fprintf(stdout, "continue removal [Y|N]: "); + do + { + read_c = getchar(); + count++; + /* discard anything beyond the two chars we want */ + if( (read_c != EOF) && (reply_idx < 2 ) ) + { + reply[reply_idx++] = (char)read_c; + } + } + while((read_c != EOF) && (read_c != '\n')); + + if( (count == 2) && (strncasecmp("Y\n", reply, 2) == 0) ) + { + fprintf(stdout, "resetting removal count, continuing\n"); + global_removals = 0; + return; + } + else if( (count == 2) && (strncasecmp("N\n", reply, 2) == 0) ) + { + fprintf(stdout, "aborting\n"); + exit(1); + } + } + while( 1 ) ; + } + + return; +} + /* * Local variables: * c-indent-level: 4 @@ -1523,4 +1724,3 @@ static char *get_type_str(int type) * * vim: ts=8 sts=4 sw=4 expandtab */ - diff --git a/src/apps/admin/pvfs2-genconfig b/src/apps/admin/pvfs2-genconfig index b6168e6..7ad2ed0 100755 --- a/src/apps/admin/pvfs2-genconfig +++ b/src/apps/admin/pvfs2-genconfig @@ -18,6 +18,7 @@ my $opt_protocol = ''; my $opt_port = ''; my $opt_board = ''; my $opt_tcpport = ''; +my $opt_tcpbindspecific = '0'; my $opt_gmport = ''; my $opt_mxboard = ''; my $opt_mxendpoint = ''; @@ -31,6 +32,7 @@ my $opt_trovesync = '1'; my $opt_trovemethod = ''; my $opt_quiet = ''; my $opt_logging = ''; +my $opt_tracing = ''; my $opt_logstamp = ''; my $opt_server_job_timeout = ''; my $opt_client_job_timeout = ''; @@ -38,11 +40,10 @@ my $opt_first_handle = ''; my $opt_last_handle = ''; my $opt_root_handle = ''; my $opt_fsid = ''; +my $opt_fsname = ''; my $opt_default_num_dfiles = ''; my $opt_default_flow_buffer_size = ''; my $opt_default_flow_buffer_count = ''; -my $opt_osdtype = ''; -my $opt_osddirtype = ''; my $opt_security = '0'; my $opt_trusted_port = ''; @@ -55,6 +56,7 @@ my $opt_metaspec = undef; my %all_endpoints = (); my $default_storage = undef; +my $default_meta_storage = undef; my $default_logfile = undef; my $bmi_module = undef; @@ -147,16 +149,24 @@ sub parse_hostlist # hostb # hostc{1,2,3} # - @components = $inputline =~ /(?:,?[ ]*([^{,]+(?:{[^}]+})?))/g; - foreach my $comp (@components) + @components = $inputline =~ /(?:,?[ ]*([^{,]+(?:{[^}]+})?[^,]*))/g; + foreach my $comp_ws (@components) { + my $comp; + + # Trim leading and trailing whitespace + $comp = $comp_ws; + $comp =~ s/^\s+//; + $comp =~ s/\s+$//; + # if we've got a component that has {..}, then expand. # match the prefix (hostname) and curly brackets - if($comp =~ /([^{]+){([^}]+)}/) + if($comp =~ /([^{]+){([^}]+)}(.*)$/) { my $prefix = $1; my $ranges = $2; - + my $suffix = $3; + # split the ranges string on the commas foreach my $r (split(/,/, $ranges)) { @@ -170,7 +180,7 @@ sub parse_hostlist my ($s, $f) = $r =~ /([0-9]+)-([0-9]+)/; for(my $i = $s; $i <= $f; ++$i) { - push @hosts, "$prefix$i"; + push @hosts, "$prefix$i$suffix"; } } } @@ -185,11 +195,12 @@ sub parse_hostlist sub emit_defaults { my ($target, $num_unexp, $bmi_module, $logfile, - $logging, $logstamp, $server_job_timeout, $client_job_timeout) = @_; + $logging, $tracing, $logstamp, $server_job_timeout, $client_job_timeout) = @_; print $target "\n"; print $target "\tUnexpectedRequests $num_unexp\n"; print $target "\tEventLogging $logging\n"; + print $target "\tEnableTracing $tracing\n"; print $target "\tLogStamp $logstamp\n"; print $target "\tBMIModules $bmi_module\n"; print $target "\tFlowModules flowproto_multiqueue\n"; @@ -200,18 +211,17 @@ sub emit_defaults print $target "\tClientJobFlowTimeoutSecs $client_job_timeout\n"; print $target "\tClientRetryLimit 5\n"; print $target "\tClientRetryDelayMilliSecs 2000\n"; - if ($opt_osdtype ne "") - { - print $target "\tOSDType $opt_osdtype\n"; - } - if ($opt_osddirtype ne "") + print $target "\tPrecreateBatchSize 0,32,512,32,32,32,0\n"; + print $target "\tPrecreateLowThreshold 0,16,256,16,16,16,0\n"; + + if(defined($default_storage)) { - print $target "\tOSDDirType $opt_osddirtype\n"; + print $target "\n\tDataStorageSpace " . $default_storage . "\n"; } - if(defined($default_storage)) + if(defined($default_meta_storage)) { - print $target "\n\tStorageSpace " . $default_storage . "\n"; + print $target "\tMetadataStorageSpace " . $default_meta_storage . "\n\n"; } if(defined($default_logfile)) @@ -219,6 +229,11 @@ sub emit_defaults print $target "\tLogFile " . $default_logfile . "\n"; } + if($opt_tcpbindspecific) + { + print $target "\tTCPBindSpecific yes\n"; + } + print $target "\n"; } @@ -239,14 +254,8 @@ sub emit_aliases print $target "\n\n"; foreach my $alias (sort keys %all_endpoints) { - my $endpoint = $all_endpoints{$alias}; - print $target "\tAlias $alias "; - if ($opt_osdtype ne "none" && $opt_osdtype ne "") { - print $target "osd://", $endpoint->{HOSTNAME}; - } else { - print $target get_bmi_endpoint($alias); - } - print $target "\n"; + print $target "\tAlias $alias " . + get_bmi_endpoint($alias) . "\n"; } print $target "\n"; } @@ -258,7 +267,7 @@ sub emit_filesystem $default_num_dfiles, $default_flow_buffer_size, $default_flow_buffer_count) = @_; # divide handle range space equally among servers ((2^63)-1 for now) - my($total_num_handles_available, $start, $end, $i, $step, $num_ranges); + my($total_num_handles_available, $start, $end, $i, $step, $num_ranges, $stuffing); $num_ranges = $count; $total_num_handles_available = $last_handle->copy(); $total_num_handles_available->bsub($first_handle); @@ -278,6 +287,23 @@ sub emit_filesystem print $target "\tDefaultNumDFiles $default_num_dfiles\n"; } + # Rules for default stuffing setting: only enable it if every I/O server + # is also a metadata server, otherwise we would tend to unbalance by + # always stuffing on the subset that does metadata. User can override + # if desired. + $stuffing = "yes"; + foreach my $alias (keys %all_endpoints) + { + if($all_endpoints{$alias}->{TYPE} & $IO_ENDPOINT) + { + if(!($all_endpoints{$alias}->{TYPE} & $META_ENDPOINT)) + { + $stuffing = "no"; + } + } + } + print $target "\tFileStuffing $stuffing\n"; + print $target "\t\n"; $start = $end = $first_handle->copy(); $start->bdec(); @@ -325,6 +351,10 @@ sub emit_filesystem { print $target "\t\tTroveMethod $opt_trovemethod\n"; } + else + { + print $target "\t\tTroveMethod alt-aio\n"; + } print $target "\t\n"; @@ -355,7 +385,8 @@ sub emit_serveropts my $endpoint = $all_endpoints{$alias}; print $target "\n\n"; print $target "\tServer $alias\n"; - print $target "\tStorageSpace $endpoint->{STORAGE}\n"; + print $target "\tDataStorageSpace $endpoint->{STORAGE}\n"; + print $target "\tMetadataStorageSpace $endpoint->{METASTORAGE}\n"; print $target "\tLogFile $endpoint->{LOGFILE}\n"; print $target "\n"; } @@ -364,9 +395,10 @@ sub emit_serveropts sub emit_server_conf { - my($target, $node, $storage, $logfile) = @_; + my($target, $node, $storage, $metastorage, $logfile) = @_; - print $target "StorageSpace $storage\n"; + print $target "DataStorageSpace $storage\n"; + print $target "MetadataStorageSpace $metastorage\n"; print $target "HostID \"" . get_bmi_endpoint($node) . "\"\n"; print $target "LogFile $logfile\n"; } @@ -504,6 +536,7 @@ Usage: pvfs2-genconfig [OPTIONS] to run pvfs2-genconfig in interactive or non-interactive mode: --tcpport TCP port to use (default: 3334) + --tcpbindspecific Bind TCP only to specific interfaces --gmport GM port to use (default: 6) --mxboard MX board to use (default is 0) --mxendpoint MX endpoint to use (default is 3) @@ -511,6 +544,7 @@ Usage: pvfs2-genconfig [OPTIONS] --portal Portals index for listening server (default is 5) --logging debugging mask for log messages + --tracing Enable event tracing in the server --logstamp timestamp type for log messages ('none','usec', or 'datetime' are valid) --storage path to pvfs storage directory. @@ -523,6 +557,7 @@ Usage: pvfs2-genconfig [OPTIONS] --last-handle last handle value to reserve --root-handle handle value to reserve for root object --fsid fs identifier value + --fsname fs name --default-num-dfiles number of datafiles to use per file (defaults to number of I/O servers) --flow-buffer-size set flowbuffersize in bytes @@ -657,6 +692,17 @@ sub get_logging return $logging; } +sub get_tracing +{ + my $tracing; + if ($opt_tracing) { + $tracing = "yes"; + } else { + $tracing = "no"; + } + return $tracing; +} + sub get_logstamp { my $logstamp; @@ -691,6 +737,17 @@ sub get_fsid return $fsid; } +sub get_fsname +{ + my $fsname; + if ($opt_fsname) { + $fsname = $opt_fsname; + } else { + $fsname = "pvfs2-fs"; + } + return $fsname; +} + sub get_last_handle { my $last_handle; @@ -792,7 +849,17 @@ sub get_storage return $storage; } - +sub get_meta_storage +{ + my $metastorage = "/pvfs2-storage-space"; + if ($opt_storage) { + $metastorage = $opt_storage; + } elsif (!$opt_quiet) { + print $OUT "Choose a directory for each server to store metadata in.\n"; + $metastorage = prompt_word("Enter directory name: [Default is /pvfs2-storage-space]: ","/pvfs2-storage-space"); + } + return $metastorage; +} # get host port sub tcp_get_port @@ -898,6 +965,7 @@ sub get_ionames { my $portmap = shift; my $storage = shift; + my $metastorage = shift; my $logfile = shift; my $ioline = ''; if ($opt_ioservers) { @@ -927,7 +995,8 @@ sub get_ionames TYPE => $IO_ENDPOINT, HOSTNAME => $io_host, PORTMAP => $portmap, - STORAGE => $storage, + STORAGE => $storage, + METASTORAGE => $metastorage, LOGFILE => $logfile}; } } @@ -937,19 +1006,39 @@ sub get_metanames { my $portmap = shift; my $storage = shift; + my $metastorage = shift; my $logfile = shift; my $metaline = ''; + my @meta_hosts; if ($opt_metaservers) { $metaline = $opt_metaservers; - } else { - print $OUT "Now list the hostnames of the machines that will act as " . - "Metadata\nservers. This list may or may not overlap " . - "with the I/O server list.\n"; + @meta_hosts = parse_hostlist($metaline); + } + else + { + print $OUT "Use same servers for metadata? (recommended)\n"; $metaline = prompt_word( - "Enter hostnames [Default is localhost]: ", - "localhost"); + "Enter yes or no [Default is yes]: ", + "yes"); + if($metaline =~ /^yes$/i) + { + foreach my $alias (keys %all_endpoints) + { + $all_endpoints{$alias}->{TYPE} |= $META_ENDPOINT; + } + } + else + { + print $OUT "Now list the hostnames of the machines that will act as " . + "Metadata\nservers. This list may or may not overlap " . + "with the I/O server list.\n"; + $metaline = prompt_word( + "Enter hostnames [Default is localhost]: ", + "localhost"); + @meta_hosts = parse_hostlist($metaline); + } } - my @meta_hosts = parse_hostlist($metaline); + foreach my $meta_host (@meta_hosts) { if(exists $all_endpoints{$meta_host}) @@ -963,7 +1052,8 @@ sub get_metanames TYPE => $META_ENDPOINT, HOSTNAME => $meta_host, PORTMAP => $portmap, - STORAGE => $storage, + STORAGE => $storage, + METASTORAGE => $metastorage, LOGFILE => $logfile}; } } @@ -992,6 +1082,7 @@ sub get_specs foreach my $ep (@endpoints) { my $stor = undef; + my $mstor = undef; my $logf = undef; my $proto = undef; my $hostname = undef; @@ -1002,12 +1093,14 @@ sub get_specs # the string must have multiple protocols specified for the same # endpoint. We want to match on [...]:storage:logfile # and place the stuff between the [] in $1, and optionally - # place the matched storage path and logfile in $2 and $3 + # place the matched storage path and meta path in $2 and $3, + # logfile is in $4 # $ep =~ /\[([^\]]+)\](?::([^:]+))?(?::([^:]+))?/; $stor = $2; - $logf = $3; + $mstor = $3; + $logf = $4; if(!defined($1)) { @@ -1058,7 +1151,8 @@ sub get_specs TYPE => $type, HOSTNAME => $hostname, PORTMAP => \%port, - STORAGE => $stor, + STORAGE => $stor, + METASTORAGE => $mstor, LOGFILE => $logf}; } else @@ -1105,12 +1199,14 @@ sub get_specs my $branges = $3; my $pranges = $4; $stor = $5; - $logf = $6; + $mstor = $6; + $logf = $7; if($proto !~ /mx/) { $logf = $stor; - $stor = $pranges; + $stor = $mstor; + $mstor = $pranges; $pranges = $branges; $branges = undef; } @@ -1152,6 +1248,7 @@ sub get_specs HOSTNAME => $hostname, PORTMAP => $portmap, STORAGE => $stor, + METASTORAGE => $mstor, LOGFILE => $logf}; } } @@ -1173,6 +1270,7 @@ sub get_specs HOSTNAME => $hostname, PORTMAP => $portmap, STORAGE => $stor, + METASTORAGE => $mstor, LOGFILE => $logf}; } } @@ -1211,6 +1309,7 @@ sub get_specs HOSTNAME => $hostname, PORTMAP => $portmap, STORAGE => $stor, + METASTORAGE => $mstor, LOGFILE => $logf}; } } @@ -1308,6 +1407,7 @@ my $show_specusage = ''; $opt_quiet = 0; GetOptions('protocol=s' => \$opt_protocol, 'tcpport=i' => \$opt_tcpport, + 'tcpbindspecific' => \$opt_tcpbindspecific, 'gmport=i' => \$opt_gmport, 'mxboard=i' => \$opt_mxboard, 'mxendpoint=i' => \$opt_mxendpoint, @@ -1317,6 +1417,7 @@ GetOptions('protocol=s' => \$opt_protocol, 'metaservers=s' => \$opt_metaservers, 'logfile=s' => \$opt_logfile, 'logging=s' => \$opt_logging, + 'tracing' => \$opt_tracing, 'logstamp=s' => \$opt_logstamp, 'first-handle=i' => \$opt_first_handle, 'last-handle=i' => \$opt_last_handle, @@ -1325,6 +1426,7 @@ GetOptions('protocol=s' => \$opt_protocol, 'flow-buffer-count=i'=> \$opt_default_flow_buffer_count, 'root-handle=i' => \$opt_root_handle, 'fsid=i' => \$opt_fsid, + 'fsname=s' => \$opt_fsname, 'trusted=i' => \$opt_security, 'server-job-timeout=i' => \$opt_server_job_timeout, 'client-job-timeout=i' => \$opt_client_job_timeout, @@ -1337,21 +1439,19 @@ GetOptions('protocol=s' => \$opt_protocol, 'metaspec=s' => \$opt_metaspec, 'spec-usage!' => \$show_specusage, 'genkey!' => \$opt_gen_key, - 'osdtype=s' => \$opt_osdtype, - 'osddirtype=s' => \$opt_osddirtype, '-' => \$using_stdout) or die "Could not parse arguments. See -h for help.\n"; if($opt_quiet) { - # quiet requires a protocol - die "Invalid arguments. See -h for help\n" if(!$opt_protocol); - - if(!($opt_iospec || $opt_metaspec)) + if( + !($opt_protocol && $opt_ioservers && $opt_metaservers) + && + !($opt_iospec && $opt_metaspec) + ) { - # quiet requires io servers and meta servers to be specified - die "Invalid arguments. See -h for help\n" if(!$opt_ioservers); - die "Invalid arguments. See -h for help\n" if(!$opt_metaservers); + # quiet requires full specification of server addresses somehow + die "Invalid arguments for --quiet usage. See -h for help\n"; } } else @@ -1426,10 +1526,11 @@ else $bmi_module = join(',', map("bmi_" . $_, keys (%{$portmap}))); $default_storage = get_storage(); + $default_meta_storage = get_meta_storage(); $default_logfile = get_logfile(); get_ionames($portmap, $default_storage, $default_logfile); - get_metanames($portmap, $default_storage, $default_logfile); + get_metanames($portmap, $default_meta_storage, $default_logfile); } # find out if any of the storage or logfile entries in the endpoints @@ -1445,6 +1546,16 @@ if(needs_default_value(STORAGE)) set_default_value(STORAGE, $default_storage); } +if(needs_default_value(METASTORAGE)) +{ + if(!defined($default_meta_storage)) + { + $default_meta_storage = get_meta_storage(); + } + set_default_value(METASTORAGE, $default_meta_storage); + set_default_value(METASTORAGE, $default_meta_storage); +} + if(needs_default_value(LOGFILE)) { if(!defined($default_logfile)) @@ -1503,6 +1614,7 @@ if (!$opt_quiet) { my $logging = get_logging(); +my $tracing = get_tracing(); my $logstamp = get_logstamp(); my $first_handle = get_first_handle(); my $last_handle = get_last_handle(); @@ -1511,6 +1623,7 @@ my $default_flow_buffer_size = get_default_flow_buffer_size(); my $default_flow_buffer_count = get_default_flow_buffer_count(); my $root_handle = get_root_handle(); my $fsid = get_fsid(); +my $fsname = get_fsname(); my $server_job_timeout = get_server_job_timeout(); my $client_job_timeout = get_client_job_timeout(); @@ -1526,7 +1639,7 @@ if (!$opt_quiet) { } emit_defaults($output_target, $num_unexp_reqs, - $bmi_module, $default_logfile, $logging, + $bmi_module, $default_logfile, $logging, $tracing, $logstamp, $server_job_timeout, $client_job_timeout); if ($opt_security == 1) { @@ -1534,7 +1647,7 @@ if ($opt_security == 1) $opt_trusted_network, $opt_trusted_netmask); } emit_aliases($output_target); -emit_filesystem($output_target, "pvfs2-fs", $fsid, $root_handle, +emit_filesystem($output_target, $fsname , $fsid, $root_handle, $last_handle, $first_handle, $meta_count + $io_count, $default_num_dfiles, $default_flow_buffer_size, $default_flow_buffer_count); if ($opt_metaspec) { diff --git a/src/apps/admin/pvfs2-getmattr b/src/apps/admin/pvfs2-getmattr new file mode 100755 index 0000000..dcc399c --- /dev/null +++ b/src/apps/admin/pvfs2-getmattr @@ -0,0 +1,105 @@ +#!/bin/csh -f +unalias * + +#set echo +#set verbose + +#echo $0 + +#if no options entered, then display usage information +if ( $#argv == 0 ) then + goto usage +endif + +#get the path associated with this command +set mybin = $0:h + +#make sure that pvfs-xattr exists +if ( ! -e $mybin/pvfs2-xattr ) then + echo + echo "$mybin/pvfs2-xattr command not found." +endif + +#initialize parms +set init = "init" +set myFile = $init +set myCopiesParm = 0 +set myModeParm = 0 +set myFileParm = 0 + +#parse the command line.... +set index = 1 +set i +while ( $index <= $#argv ) + + switch({$argv[$index]}) + case {-h} : + goto usage + breaksw + case {-c} : + set myCopiesParm = 1 + @ index++ + breaksw + case {-m} : + set myModeParm = 1 + @ index++ + breaksw + case {-f} : + if ( $index == $#argv ) then + echo + echo "Missing file parameter. Recheck usage." + goto usage + else + @ i = $index + 1 + set myFileParm = 1 + set myFile = $argv[$i] + @ index += 2 + endif + breaksw + default : + echo + echo "Missing or invalid parameters. Recheck usage." + goto usage + breaksw + endsw +end #while + + +#NOTE: When PVFS is NOT in kernel mode, we can't easily check for file +# existence. So, we just check to see that SOMETHING was entered. +# pvfs-xattr will validate the filename. +#Did the user enter a file name? +if ( $myFile == $init ) then + echo + echo "File name is required. Recheck usage." + goto usage +endif + +#issue commands +if ( $myCopiesParm && $myModeParm ) then + {$mybin}/pvfs2-xattr -k user.pvfs2.mirror.mode -t {$myFile} + {$mybin}/pvfs2-xattr -k user.pvfs2.mirror.copies -t {$myFile} +else if ( $myCopiesParm ) then + {$mybin}/pvfs2-xattr -k user.pvfs2.mirror.copies -t {$myFile} +else if ( $myModeParm ) then + {$mybin}/pvfs2-xattr -k user.pvfs2.mirror.mode -t {$myFile} +else + {$mybin}/pvfs2-xattr -k user.pvfs2.mirror.mode -t {$myFile} + {$mybin}/pvfs2-xattr -k user.pvfs2.mirror.copies -t {$myFile} +endif + +#leave script +exit + +#display help and exit script +usage: + echo + echo "pvfs2-getmattr [-c] [-m] [-h] -f file" + echo " -c : Retrieve the number of mirror copies" + echo " -m : Retrieve the mirroring mode" + echo " -h : Display this message" + echo + echo "Retrieve copies and mode when none specified. File " \ + "is required." +exit +######## end of script file ########## diff --git a/src/apps/admin/pvfs2-ln.c b/src/apps/admin/pvfs2-ln.c index 7a1ef77..575619d 100644 --- a/src/apps/admin/pvfs2-ln.c +++ b/src/apps/admin/pvfs2-ln.c @@ -187,7 +187,7 @@ static int make_link(PVFS_credentials * pCredentials, (char *) pszLinkTarget, attr, pCredentials, - &resp_sym); + &resp_sym, NULL); if (ret < 0) { diff --git a/src/apps/admin/pvfs2-ls.c b/src/apps/admin/pvfs2-ls.c index 33b8974..75abcc3 100644 --- a/src/apps/admin/pvfs2-ls.c +++ b/src/apps/admin/pvfs2-ls.c @@ -10,12 +10,18 @@ #include #include #include +#ifndef WIN32 #include +#endif #include +#ifndef WIN32 #include #include +#endif #include +#ifndef WIN32 #include +#endif #include "pvfs2.h" #include "str-utils.h" @@ -25,11 +31,37 @@ #define PVFS2_VERSION "Unknown" #endif +#ifdef WIN32 +#define snprintf _snprintf +#endif /* TODO: this can be larger after system interface readdir logic * is in place to break up large readdirs into multiple operations */ -#define MAX_NUM_DIRENTS 32 +/* MAX_NUM_DIRENTS cannot be any larger than PVFS_REQ_LIMIT_LISTATTR */ +#define MAX_NUM_DIRENTS 60 + +/* + Define the maximum length of a single line of output. This is about the + size of 256 maximum path segments, a file name, and attributes. + */ +#define ENTRY_MAX 66560 + +/* + arbitrarily restrict the number of paths + that this ls version can take as arguments +*/ +#define MAX_NUM_PATHS 8 + +/* + Max length of the fully formatted date/time fields +*/ +#define MAX_TIME_LENGTH 128 + +/* + Length of the formatted date/time for --all-times option +*/ +#define ALL_TIMES_LENGTH 25 /* optional parameters, filled in by parse_args() */ struct options @@ -37,6 +69,7 @@ struct options int list_human_readable; int list_long; int list_verbose; + int list_recursive; int list_numeric_uid_gid; int list_directory; int list_no_group; @@ -44,32 +77,48 @@ struct options int list_all; int list_no_owner; int list_inode; + int list_all_times; int list_use_si_units; - char **start; + char *start[MAX_NUM_PATHS]; int num_starts; }; +struct subdir_list +{ + char *path; + struct subdir_list *next; +}; +typedef struct subdir_list subdir; + +static char *process_name = NULL; +static int do_timing = 0; + static struct options* parse_args(int argc, char* argv[]); static void usage(int argc, char** argv); -static int do_timing = 0; static void print_entry( char *entry_name, PVFS_handle handle, PVFS_fs_id fs_id, - struct options *opts); + PVFS_sys_attr *attr, + int attr_error, + struct options *opts, + char* entry_buffer); static int do_list( + char *full_path, char *start, int fs_id, - struct options *opts); + struct options *opts, + char *entry_buffer); static void print_entry_attr( PVFS_handle handle, char *entry_name, PVFS_sys_attr *attr, - struct options *opts); + struct options *opts, + char *entry_buffer); #define print_dot_and_dot_dot_info_if_required(refn) \ do { \ @@ -87,9 +136,11 @@ do { \ } \ else if (opts->list_long) { \ print_entry(".", refn.handle, \ - refn.fs_id, opts); \ + refn.fs_id, NULL, 0, opts, \ + entry_buffer); \ print_entry(".. (faked)", refn.handle, \ - refn.fs_id, opts); \ + refn.fs_id, NULL, 0, opts, \ + entry_buffer); \ } \ else { \ printf(".\n"); \ @@ -169,34 +220,86 @@ void print_entry_attr( PVFS_handle handle, char *entry_name, PVFS_sys_attr *attr, - struct options *opts) + struct options *opts, + char *entry_buffer) { - char buf[128] = {0}, *formatted_size = NULL; - char *formatted_owner = NULL, *formatted_group = NULL; + char *formatted_size = NULL; + char *formatted_owner = NULL, *formatted_group = NULL, *formatted_time = NULL; +#ifdef WIN32 + +#else struct group *grp = NULL; struct passwd *pwd = NULL; +#endif char *empty_str = ""; char *owner = empty_str, *group = empty_str; char *inode = empty_str; - time_t mtime = (time_t)attr->mtime; - struct tm *time = localtime(&mtime); + time_t mtime, atime, ctime; + struct tm *time; PVFS_size size = 0; - char scratch_owner[16] = {0}, scratch_group[16] = {0}; - char scratch_size[16] = {0}, scratch_inode[16] = {0}; + char scratch_owner[16] = {0}, scratch_group[16] = {0}, scratch_time[MAX_TIME_LENGTH] = {0}, scratch_big_time[MAX_TIME_LENGTH] = {0}; + char scratch_size[16] = {0}, scratch_inode[21] = {0}; char f_type = '-'; char group_x_char = '-'; + int num_bytes = 0; if (!opts->list_all && (entry_name[0] == '.')) { return; } + if (attr == NULL) + { + return; + } + + mtime = (time_t)attr->mtime; + time = localtime(&mtime); + if(opts->list_all_times) + { + atime = (time_t)attr->atime; + ctime = (time_t)attr->ctime; + +#ifdef WIN32 + num_bytes = strftime( scratch_time,ALL_TIMES_LENGTH+1,"%Y-%m-%d %H:%M:%S %z",time ); +#else + num_bytes = strftime( scratch_time,ALL_TIMES_LENGTH+1,"%F %H:%M:%S %z",time ); +#endif + strncpy(scratch_big_time,scratch_time,num_bytes); + + time = localtime(&atime); +#ifdef WIN32 + num_bytes = strftime( scratch_time,ALL_TIMES_LENGTH+3," %Y-%m-%d %H:%M:%S %z",time ); +#else + num_bytes = strftime( scratch_time,ALL_TIMES_LENGTH+3," %F %H:%M:%S %z",time ); +#endif + strncat(scratch_big_time,scratch_time,num_bytes); + + time = localtime(&ctime); +#ifdef WIN32 + num_bytes = strftime( scratch_time,ALL_TIMES_LENGTH+3," %Y-%m-%d %H:%M:%S %z",time ); +#else + num_bytes = strftime( scratch_time,ALL_TIMES_LENGTH+3," %F %H:%M:%S %z",time ); +#endif + strncat(scratch_big_time,scratch_time,num_bytes); + + format_size_string(scratch_big_time,strlen(scratch_big_time),&formatted_time,0,1); + } + else + { +#ifdef WIN32 + strftime( scratch_time,17,"%Y-%m-%d %H:%M",time ); +#else + strftime( scratch_time,17,"%F %H:%M",time ); +#endif + format_size_string(scratch_time,16,&formatted_time,0,1); + } snprintf(scratch_owner,16,"%d",(int)attr->owner); snprintf(scratch_group,16,"%d",(int)attr->group); if (opts->list_inode) { - snprintf(scratch_inode,16,"%llu ",llu(handle)); + snprintf(scratch_inode,21,"%llu ",llu(handle)); inode = scratch_inode; } @@ -239,14 +342,23 @@ void print_entry_attr( { if (!opts->list_no_owner) { +#ifdef WIN32 + owner = scratch_owner; +#else pwd = getpwuid((uid_t)attr->owner); owner = (pwd ? pwd->pw_name : scratch_owner); +#endif } if (!opts->list_no_group) { +#ifdef WIN32 + group = scratch_group; +#else grp = getgrgid((gid_t)attr->group); group = (grp ? grp->gr_name : scratch_group); +#endif + } } @@ -276,8 +388,8 @@ void print_entry_attr( group_x_char = ((attr->perms & PVFS_G_EXECUTE) ? 'x' : '-'); } - snprintf(buf,128,"%s%c%c%c%c%c%c%c%c%c%c 1 %s %s %s " - "%.4d-%.2d-%.2d %.2d:%.2d %s", + snprintf(entry_buffer,ENTRY_MAX,"%s %c%c%c%c%c%c%c%c%c%c 1 %s %s %s " + "%s %s", inode, f_type, ((attr->perms & PVFS_U_READ) ? 'r' : '-'), @@ -292,11 +404,7 @@ void print_entry_attr( formatted_owner, formatted_group, formatted_size, - (time->tm_year + 1900), - (time->tm_mon + 1), - time->tm_mday, - (time->tm_hour), - (time->tm_min), + formatted_time, entry_name); if (formatted_size) @@ -311,6 +419,10 @@ void print_entry_attr( { free(formatted_group); } + if (formatted_time) + { + free(formatted_time); + } if (attr->objtype == PVFS_TYPE_SYMLINK) { @@ -318,17 +430,16 @@ void print_entry_attr( if (opts->list_long) { - printf("%s -> %s\n", buf, attr->link_target); + printf("%s -> %s\n", entry_buffer, attr->link_target); } else { - printf("%s\n",buf); + printf("%s\n",entry_buffer); } - free(attr->link_target); } else { - printf("%s\n",buf); + printf("%s\n",entry_buffer); } } @@ -336,7 +447,10 @@ void print_entry( char *entry_name, PVFS_handle handle, PVFS_fs_id fs_id, - struct options *opts) + PVFS_sys_attr *attr, + int attr_error, + struct options *opts, + char *entry_buffer) { int ret = -1; PVFS_object_ref ref; @@ -356,22 +470,35 @@ void print_entry( return; } - ref.handle = handle; - ref.fs_id = fs_id; - - memset(&getattr_response,0, sizeof(PVFS_sysresp_getattr)); - PVFS_util_gen_credentials(&credentials); - - ret = PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL_NOHINT, - &credentials, &getattr_response); - if (ret) + if (attr_error == 0) { - fprintf(stderr,"Failed to get attributes on handle %llu,%d\n", - llu(handle),fs_id); - PVFS_perror("Getattr failure", ret); - return; + if(!attr) + { + /* missing attributes (possibly for . or .. entries); get them + * the old fashioned way + */ + ref.handle = handle; + ref.fs_id = fs_id; + + memset(&getattr_response,0, sizeof(PVFS_sysresp_getattr)); + PVFS_util_gen_credentials(&credentials); + + ret = PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL_NOHINT, + &credentials, &getattr_response, NULL); + if (ret) + { + fprintf(stderr,"Failed to get attributes on handle %llu,%d\n", + llu(handle),fs_id); + PVFS_perror("Getattr failure", ret); + return; + } + print_entry_attr(handle, entry_name, &getattr_response.attr, opts, entry_buffer); + } + else + { + print_entry_attr(handle, entry_name, attr, opts, entry_buffer); + } } - print_entry_attr(handle, entry_name, &getattr_response.attr, opts); } static double Wtime(void) @@ -382,9 +509,11 @@ static double Wtime(void) } int do_list( + char *full_path, char *start, int fs_id, - struct options *opts) + struct options *opts, + char *entry_buffer) { int i = 0, printed_dot_info = 0; int ret = -1; @@ -392,21 +521,27 @@ int do_list( char *name = NULL, *cur_file = NULL; PVFS_handle cur_handle; PVFS_sysresp_lookup lk_response; - PVFS_sysresp_readdir rd_response; + PVFS_sysresp_readdirplus rdplus_response; PVFS_sysresp_getattr getattr_response; PVFS_credentials credentials; PVFS_object_ref ref; PVFS_ds_position token; uint64_t dir_version = 0; double begin = 0., end; + subdir *current, *head = NULL, *tail = NULL; name = start; memset(&lk_response,0,sizeof(PVFS_sysresp_lookup)); PVFS_util_gen_credentials(&credentials); + if (opts->list_recursive || opts->num_starts > 1) + { + printf("%s%s:\n",full_path,start); + } + ret = PVFS_sys_lookup(fs_id, name, &credentials, - &lk_response, PVFS2_LOOKUP_LINK_NO_FOLLOW); + &lk_response, PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if(ret < 0) { PVFS_perror("PVFS_sys_lookup", ret); @@ -418,9 +553,8 @@ int do_list( pvfs_dirent_incount = MAX_NUM_DIRENTS; memset(&getattr_response,0,sizeof(PVFS_sysresp_getattr)); - ret = PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL_NOHINT, - &credentials, &getattr_response); - if(ret == 0) + if (PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL, + &credentials, &getattr_response, NULL) == 0) { if ((getattr_response.attr.objtype == PVFS_TYPE_METAFILE) || (getattr_response.attr.objtype == PVFS_TYPE_SYMLINK) || @@ -438,7 +572,7 @@ int do_list( if (getattr_response.attr.objtype == PVFS_TYPE_DIRECTORY) { if (PVFS_sys_getparent(ref.fs_id, name, &credentials, - &getparent_resp) == 0) + &getparent_resp, NULL) == 0) { print_dot_and_dot_dot_info_if_required( getparent_resp.parent_ref); @@ -448,30 +582,32 @@ int do_list( if (opts->list_long) { print_entry_attr(ref.handle, segment, - &getattr_response.attr, opts); + &getattr_response.attr, opts, entry_buffer); } else { - print_entry(segment, ref.handle, ref.fs_id, opts); + print_entry(segment, ref.handle, ref.fs_id, + NULL, + 0, + opts, entry_buffer); } return 0; } } - else - { - PVFS_perror("PVFS_sys_getattr", ret); - return -1; - } if (do_timing) begin = Wtime(); token = 0; do { - memset(&rd_response, 0, sizeof(PVFS_sysresp_readdir)); - ret = PVFS_sys_readdir( + memset(&rdplus_response, 0, sizeof(PVFS_sysresp_readdirplus)); + ret = PVFS_sys_readdirplus( ref, (!token ? PVFS_READDIR_START : token), - pvfs_dirent_incount, &credentials, &rd_response); + pvfs_dirent_incount, &credentials, + (opts->list_long) ? + PVFS_ATTR_SYS_ALL : PVFS_ATTR_SYS_ALL_NOSIZE, + &rdplus_response, + NULL); if(ret < 0) { PVFS_perror("PVFS_sys_readdir", ret); @@ -480,15 +616,15 @@ int do_list( if (dir_version == 0) { - dir_version = rd_response.directory_version; + dir_version = rdplus_response.directory_version; } else if (opts->list_verbose) { - if (dir_version != rd_response.directory_version) + if (dir_version != rdplus_response.directory_version) { fprintf(stderr, "*** directory changed! listing may " "not be correct\n"); - dir_version = rd_response.directory_version; + dir_version = rdplus_response.directory_version; } } @@ -502,32 +638,103 @@ int do_list( printed_dot_info = 1; } - for(i = 0; i < rd_response.pvfs_dirent_outcount; i++) + for(i = 0; i < rdplus_response.pvfs_dirent_outcount; i++) { - cur_file = rd_response.dirent_array[i].d_name; - cur_handle = rd_response.dirent_array[i].handle; + PVFS_sys_attr *attr; + + cur_file = rdplus_response.dirent_array[i].d_name; + cur_handle = rdplus_response.dirent_array[i].handle; + + print_entry(cur_file, cur_handle, fs_id, + &rdplus_response.attr_array[i], + rdplus_response.stat_err_array[i], + opts, entry_buffer); + + attr = &rdplus_response.attr_array[i]; + if(attr->objtype == PVFS_TYPE_DIRECTORY && opts->list_recursive) + { + int path_len = strlen(start) + strlen(cur_file) + 1; + current = (subdir *) malloc(sizeof(subdir)); + + /* Prevent duplicate slashes in path */ + if(start[strlen(start)-1] == '/') + { + current->path = (char *) malloc(path_len); + snprintf(current->path,path_len,"%s%s",start,cur_file); + } + else + { + current->path = (char *) malloc(path_len + 1); + snprintf(current->path,path_len+1,"%s/%s",start,cur_file); + } - print_entry(cur_file, cur_handle, fs_id, opts); + /* Update linked list of subdirectories to recurse */ + current->next = NULL; + if(!head) + { + head = current; + tail = current; + } + else + { + tail->next = current; + tail = current; + } + } } - token += rd_response.pvfs_dirent_outcount; + token = rdplus_response.token; - if (rd_response.pvfs_dirent_outcount) + if (rdplus_response.pvfs_dirent_outcount) { - free(rd_response.dirent_array); - rd_response.dirent_array = NULL; + free(rdplus_response.dirent_array); + rdplus_response.dirent_array = NULL; + free(rdplus_response.stat_err_array); + rdplus_response.stat_err_array = NULL; + for (i = 0; i < rdplus_response.pvfs_dirent_outcount; i++) { + if (rdplus_response.attr_array) + { + PVFS_util_release_sys_attr(&rdplus_response.attr_array[i]); + } + } + free(rdplus_response.attr_array); + rdplus_response.attr_array = NULL; } - } while(rd_response.pvfs_dirent_outcount == pvfs_dirent_incount); + } while(rdplus_response.pvfs_dirent_outcount == pvfs_dirent_incount); if (do_timing) { end = Wtime(); - printf("PVFS_sys_readdir+sys_getattr took %g msecs\n", + printf("PVFS_sys_readdirplus took %g msecs\n", (end - begin)); } - if (rd_response.pvfs_dirent_outcount) + if (rdplus_response.pvfs_dirent_outcount) { - free(rd_response.dirent_array); - rd_response.dirent_array = NULL; + free(rdplus_response.dirent_array); + rdplus_response.dirent_array = NULL; + free(rdplus_response.stat_err_array); + rdplus_response.stat_err_array = NULL; + for (i = 0; i < rdplus_response.pvfs_dirent_outcount; i++) { + if (rdplus_response.attr_array) + { + PVFS_util_release_sys_attr(&rdplus_response.attr_array[i]); + } + } + free(rdplus_response.attr_array); + rdplus_response.attr_array = NULL; + } + + if (opts->list_recursive) + { + current = head; + while(current) + { + printf("\n"); + do_list(full_path,current->path,fs_id,opts,entry_buffer); + current = current->next; + free(head->path); + free(head); + head = current; + } } return 0; } @@ -538,17 +745,151 @@ int do_list( * * returns pointer to options structure on success, NULL on failure */ +#ifdef WIN32 static struct options* parse_args(int argc, char* argv[]) { int i = 0, ret = 0, option_index = 0; const char *cur_option = NULL; struct options* tmp_opts = NULL; + + static char str_opts[14][16] = { + "help", + "human-readable", + "si", + "version", + "recursive", + "verbose", + "numeric-uid-gid", + "directory", + "no-group", + "almost-all", + "all", + "inode", + "size", + "all-times" + }; + + tmp_opts = (struct options*)malloc(sizeof(struct options)); + if (!tmp_opts) + { + return(NULL); + } + memset(tmp_opts, 0, sizeof(struct options)); + /* RVndGoAaiglt */ + option_index = 1; + while ((option_index < argc) && (argv[option_index][0] == '-')) + { + cur_option = argv[option_index]; + + if ((strcmp(cur_option, "-?") == 0) || + (strcmp(cur_option, "--help") == 0)) + { + usage(argc, argv); + exit(0); + } + else if ((strcmp(cur_option, "--human-readable") == 0) || + (strcmp(cur_option, "-h") == 0)) + { + tmp_opts->list_human_readable = 1; + } + else if (strcmp(cur_option, "--si") == 0) + { + tmp_opts->list_use_si_units = 1; + } + else if (strcmp(cur_option, "--version") == 0) + { + printf("%s\n", PVFS2_VERSION); + exit(0); + } + else if ((strcmp(cur_option, "--recursive") == 0) || + (strcmp(cur_option, "-R") == 0)) + { + tmp_opts->list_recursive = 1; + } + else if ((strcmp(cur_option, "--verbose") == 0) || + (strcmp(cur_option, "-V") == 0)) + { + tmp_opts->list_verbose = 1; + } + else if ((strcmp(cur_option, "--numeric-uid-gid") == 0) || + (strcmp(cur_option, "-n") == 0)) + { + tmp_opts->list_long = 1; + tmp_opts->list_numeric_uid_gid = 1; + } + else if ((strcmp(cur_option, "--directory") == 0) || + (strcmp(cur_option, "-d") == 0)) + { + tmp_opts->list_directory = 1; + } + else if ((strcmp(cur_option, "--no-group") == 0) || + (strcmp(cur_option, "-G") == 0)) + { + tmp_opts->list_long = 1; + tmp_opts->list_no_group = 1; + } + else if ((strcmp(cur_option, "--almost-all") == 0) || + (strcmp(cur_option, "-A") == 0)) + { + tmp_opts->list_almost_all = 1; + } + else if ((strcmp(cur_option, "--all") == 0) || + (strcmp(cur_option, "-a") == 0)) + { + tmp_opts->list_all = 1; + } + else if ((strcmp(cur_option, "--inode") == 0) || + (strcmp(cur_option, "-i") == 0)) + { + tmp_opts->list_inode = 1; + } + else if ((strcmp(cur_option, "--all-times") == 0)) + { + tmp_opts->list_all_times = 1; + } + else if (strcmp(cur_option, "-l") == 0) + { + tmp_opts->list_long = 1; + } + else + { + usage(argc, argv); + exit(EXIT_FAILURE); + } + + option_index++; + } + + for(i = option_index; i < argc; i++) + { + if (tmp_opts->num_starts < MAX_NUM_PATHS) + { + tmp_opts->start[i-option_index] = argv[i]; + tmp_opts->num_starts++; + } + else + { + fprintf(stderr,"Ignoring path %s\n",argv[i]); + } + } + + return tmp_opts; + +} +#else +static struct options* parse_args(int argc, char* argv[]) +{ + int i = 0, ret = 0, option_index = 0; + const char *cur_option = NULL; + struct options* tmp_opts = NULL; + static struct option long_opts[] = { {"help",0,0,0}, {"human-readable",0,0,0}, {"si",0,0,0}, {"version",0,0,0}, + {"recursive",0,0,0}, {"verbose",0,0,0}, {"numeric-uid-gid",0,0,0}, {"directory",0,0,0}, @@ -557,6 +898,7 @@ static struct options* parse_args(int argc, char* argv[]) {"all",0,0,0}, {"inode",0,0,0}, {"size",0,0,0}, + {"all-times",0,0,0}, {0,0,0,0} }; @@ -567,7 +909,7 @@ static struct options* parse_args(int argc, char* argv[]) } memset(tmp_opts, 0, sizeof(struct options)); - while((ret = getopt_long(argc, argv, "hVndGoAaiglt", + while((ret = getopt_long(argc, argv, "hRVndGoAaiglt", long_opts, &option_index)) != -1) { switch(ret) @@ -594,6 +936,10 @@ static struct options* parse_args(int argc, char* argv[]) printf("%s\n", PVFS2_VERSION); exit(0); } + else if (strcmp("recursive", cur_option) == 0) + { + goto list_recursive; + } else if (strcmp("verbose", cur_option) == 0) { goto list_verbose; @@ -622,6 +968,10 @@ static struct options* parse_args(int argc, char* argv[]) { goto list_inode; } + else if (strcmp("all-times", cur_option) == 0) + { + goto list_all_times; + } else { usage(argc, argv); @@ -632,13 +982,14 @@ static struct options* parse_args(int argc, char* argv[]) list_human_readable: tmp_opts->list_human_readable = 1; break; + case 'R': + list_recursive: + tmp_opts->list_recursive = 1; + break; case 'V': list_verbose: tmp_opts->list_verbose = 1; break; - case 't': - do_timing = 1; - break; case 'l': tmp_opts->list_long = 1; break; @@ -672,24 +1023,33 @@ static struct options* parse_args(int argc, char* argv[]) list_inode: tmp_opts->list_inode = 1; break; + list_all_times: + tmp_opts->list_all_times = 1; + break; + case 't': + do_timing = 1; + break; case '?': usage(argc, argv); exit(EXIT_FAILURE); } } - tmp_opts->start = (char **) calloc(1, (argc-optind+1) * sizeof(char *)); - if (tmp_opts->start == NULL) { - exit(EXIT_FAILURE); - } for(i = optind; i < argc; i++) { - tmp_opts->start[i-optind] = argv[i]; - tmp_opts->num_starts++; + if (tmp_opts->num_starts < MAX_NUM_PATHS) + { + tmp_opts->start[i-optind] = argv[i]; + tmp_opts->num_starts++; + } + else + { + fprintf(stderr,"Ignoring path %s\n",argv[i]); + } } - assert(tmp_opts->num_starts < (argc - optind + 1)); return tmp_opts; } +#endif static void usage(int argc, char** argv) { @@ -716,10 +1076,14 @@ static void usage(int argc, char** argv) "format\n"); fprintf(stderr," -n, --numeric-uid-gid like -l, but list " "numeric UIDs and GIDs\n"); + fprintf(stderr," --all-times display atime, mtime," + " and ctime information\n"); fprintf(stderr," -o like -l, but do not " "list group information\n"); fprintf(stderr," --help display this help " "and exit\n"); + fprintf(stderr," -R, --recursive list subdirectories " + "recursively\n"); fprintf(stderr," -V, --verbose reports if the dir is " "changing during listing\n"); fprintf(stderr," --version output version " @@ -730,13 +1094,15 @@ static void usage(int argc, char** argv) int main(int argc, char **argv) { int ret = -1, i = 0; - char **pvfs_path; - PVFS_fs_id *fs_id_array = NULL; + char pvfs_path[MAX_NUM_PATHS][PVFS_NAME_MAX]; + PVFS_fs_id fs_id_array[MAX_NUM_PATHS] = {0}; const PVFS_util_tab* tab; struct options* user_opts = NULL; char current_dir[PVFS_NAME_MAX] = {0}; int found_one = 0; + char *entry_buffer = malloc(ENTRY_MAX); + process_name = argv[0]; user_opts = parse_args(argc, argv); if (!user_opts) { @@ -753,7 +1119,12 @@ int main(int argc, char **argv) return(-1); } - ret = PVFS_sys_initialize(GOSSIP_NO_DEBUG); + for(i = 0; i < MAX_NUM_PATHS; i++) + { + memset(pvfs_path[i],0,PVFS_NAME_MAX); + } + + ret = PVFS_sys_initialize(GOSSIP_NO_DEBUG); if (ret < 0) { PVFS_perror("PVFS_sys_initialize", ret); @@ -786,28 +1157,6 @@ int main(int argc, char **argv) user_opts->num_starts = 1; } - pvfs_path = (char **) calloc(1, user_opts->num_starts * sizeof(char *)); - if (!pvfs_path) - { - fprintf(stderr, "Could not alloc memory\n"); - return -1; - } - for(i = 0; i < user_opts->num_starts; i++) - { - pvfs_path[i] = (char *) calloc(1, PVFS_NAME_MAX); - if (pvfs_path[i] == NULL) - { - fprintf(stderr, "Could not alloc memory\n"); - return -1; - } - } - fs_id_array = (PVFS_fs_id *) calloc(1, user_opts->num_starts * sizeof(*fs_id_array)); - if (fs_id_array == NULL) - { - fprintf(stderr, "Could not alloc memory\n"); - return -1; - } - for(i = 0; i < user_opts->num_starts; i++) { ret = PVFS_util_resolve(user_opts->start[i], @@ -827,29 +1176,46 @@ int main(int argc, char **argv) for(i = 0; i < user_opts->num_starts; i++) { - if (user_opts->num_starts > 1) + char *substr = strstr(user_opts->start[i],pvfs_path[i]); + char *index = user_opts->start[i]; + char *search = substr; + int j = 0; + + /* Keep the mount path info to mimic /bin/ls output */ + if( strncmp(pvfs_path[i],"/",strlen(pvfs_path[i])) ) + { + /* Get last matching substring */ + while (search) + { + substr = search; + search = strstr(++search,pvfs_path[i]); + } + } + else /* Root directory case has nothing to match */ + { + substr = &user_opts->start[i][strlen(user_opts->start[i])]; + } + + + while ((index != substr) && (substr != NULL)) { - printf("%s:\n", pvfs_path[i]); + index++; + j++; } - do_list(pvfs_path[i], fs_id_array[i], user_opts); + user_opts->start[i][++j] = '\0'; + + do_list(user_opts->start[i], pvfs_path[i], fs_id_array[i], user_opts, entry_buffer); if (user_opts->num_starts > 1) { printf("\n"); } } - for (i = 0; i < user_opts->num_starts; i++) - { - free(pvfs_path[i]); - } - free(user_opts->start); - free(pvfs_path); - free(fs_id_array); PVFS_sys_finalize(); - if (user_opts) - free(user_opts); + free(user_opts); + free(entry_buffer); return(ret); } diff --git a/src/apps/admin/pvfs2-migrate-collection.c b/src/apps/admin/pvfs2-migrate-collection.c index aef24ce..83a79ee 100644 --- a/src/apps/admin/pvfs2-migrate-collection.c +++ b/src/apps/admin/pvfs2-migrate-collection.c @@ -34,6 +34,7 @@ #include "mkspace.h" #include "pint-distribution.h" #include "pint-dist-utils.h" +#include "pint-util.h" #ifndef PVFS2_VERSION #define PVFS2_VERSION "Unknown" @@ -45,6 +46,8 @@ typedef struct int fs_set; int all_set; int cleanup_set; + char alias[100]; + int alias_set; char fs_conf[PATH_MAX]; } options_t; @@ -59,9 +62,9 @@ int verbose = 0; static void print_help(char *progname); static int parse_args(int argc, char **argv, options_t *opts); static int src_get_version( - char* storage_space, TROVE_coll_id coll_id, char* coll_name, + char* meta_storage_space, TROVE_coll_id coll_id, char* coll_name, char* ver_string, int ver_string_max); -static int remove_collection_entry(char* storage_space, char* collname); +static int remove_collection_entry(char* meta_storage_space, char* collname); int migrate_collection(void * config, void * sconfig); void fs_config_dummy_free(void *); @@ -69,10 +72,10 @@ int recursive_rmdir(char* dir); /* functions specific to reading 0.0.1 collections */ static int src_get_version_0_0_1( - char* storage_space, TROVE_coll_id coll_id, + char* meta_storage_space, TROVE_coll_id coll_id, char* ver_string, int ver_string_max); static int translate_0_0_1( - char* storage_space, char* old_coll_path, + char* data_storage_space, char* meta_storage_space, char* old_coll_path, char* coll_name, TROVE_coll_id coll_id); static int translate_coll_eattr_0_0_1( char* old_coll_path, TROVE_coll_id coll_id, char* coll_name, @@ -84,7 +87,7 @@ static int translate_keyvals_0_0_1( char* old_coll_path, TROVE_coll_id coll_id, char* coll_name, TROVE_context_id trove_context); static int translate_bstreams_0_0_1( - char* storage_space, char* old_coll_path, + char* data_storage_space, char* old_coll_path, TROVE_coll_id coll_id, char* coll_name, TROVE_context_id trove_context); static int translate_keyval_db_0_0_1( @@ -139,6 +142,7 @@ int main(int argc, char **argv) /* all parameters read in from fs.conf */ struct server_configuration_s server_config; PINT_llist_p fs_configs; + char *server_alias; /* make sure that the buffers we intend to use for reading keys and * values is at least large enough to hold the maximum size of xattr keys @@ -160,13 +164,31 @@ int main(int argc, char **argv) return -1; } - ret = PINT_parse_config(&server_config, opts.fs_conf, NULL); + if(opts.alias_set) + { + server_alias = opts.alias; + } + else + { + server_alias = PINT_util_guess_alias(); + } + + ret = PINT_parse_config(&server_config, opts.fs_conf, server_alias, 1); if(ret < 0) { gossip_err("Error: Please check your config files.\n"); + if(!opts.alias_set) + { + free(server_alias); + } return -1; } + if(!opts.alias_set) + { + free(server_alias); + } + if(opts.all_set) { /* get all the collection ids from the fs config */ @@ -210,10 +232,15 @@ int migrate_collection(void * config, void * sconfig) struct server_configuration_s * server_config = (struct server_configuration_s *) sconfig; + if(server_config->meta_path == NULL) + { + server_config->meta_path = server_config->data_path; + } + memset(version, 0, 256); /* find version of source storage space */ ret = src_get_version( - server_config->storage_path, + server_config->meta_path, fs_config->coll_id, fs_config->file_system_name, version, 254); @@ -230,7 +257,7 @@ int migrate_collection(void * config, void * sconfig) if(strncmp(version, "0.0.1", 5) == 0) { sprintf(old_coll_path, "%s/%08x-old-%s", - server_config->storage_path, + server_config->meta_path, fs_config->coll_id, version); ret = access(old_coll_path, F_OK); @@ -269,7 +296,9 @@ int migrate_collection(void * config, void * sconfig) } ret = translate_0_0_1( - server_config->storage_path, old_coll_path, + server_config->data_path, + server_config->meta_path, + old_coll_path, fs_config->file_system_name, fs_config->coll_id); if(ret < 0) @@ -304,7 +333,8 @@ int migrate_collection(void * config, void * sconfig) * of creating it, but we don't know what the version * is anymore */ - DIR * storage_dir; + DIR * data_storage_dir; + DIR * meta_storage_dir; struct dirent * next_dirent; char collname[PATH_MAX]; int collname_length; @@ -312,15 +342,15 @@ int migrate_collection(void * config, void * sconfig) collname_length = sprintf(collname, "%08x-old", fs_config->coll_id); - storage_dir = opendir(server_config->storage_path); - if(!storage_dir) + data_storage_dir = opendir(server_config->data_path); + if(!data_storage_dir) { - fprintf(stderr, "Error: failed to open directory: %s\n", - server_config->storage_path); + fprintf(stderr, "Error: failed to open data directory: %s\n", + server_config->data_path); return -1; } - while((next_dirent = readdir(storage_dir)) != NULL) + while((next_dirent = readdir(data_storage_dir)) != NULL) { int d_namelen = strlen(next_dirent->d_name); if(collname_length < d_namelen && @@ -329,7 +359,7 @@ int migrate_collection(void * config, void * sconfig) char old_coll_path[PATH_MAX]; sprintf(old_coll_path, "%s/%s", - server_config->storage_path, next_dirent->d_name); + server_config->data_path, next_dirent->d_name); /* found an old version, delete it */ if(verbose) @@ -342,13 +372,55 @@ int migrate_collection(void * config, void * sconfig) stderr, "Error: failed to remove old collection at: %s\n", old_coll_path); - closedir(storage_dir); + closedir(data_storage_dir); return -1; } removed_olddirs = 1; } } + /* if the meta and data paths are the same, don't try to remove twice */ + if (strcmp(server_config->data_path, server_config->meta_path)) + { + meta_storage_dir = opendir(server_config->meta_path); + if(!meta_storage_dir) + { + fprintf(stderr, "Error: failed to open meta directory: %s\n", + server_config->meta_path); + return -1; + } + + while((next_dirent = readdir(meta_storage_dir)) != NULL) + { + int d_namelen = strlen(next_dirent->d_name); + if(collname_length < d_namelen && + strncmp(next_dirent->d_name, collname, collname_length) == 0) + { + char old_coll_path[PATH_MAX]; + + sprintf(old_coll_path, "%s/%s", + server_config->meta_path, next_dirent->d_name); + + /* found an old version, delete it */ + if(verbose) + printf("VERBOSE Removing old collection at: %s\n", + old_coll_path); + ret = recursive_rmdir(old_coll_path); + if(ret < 0) + { + fprintf( + stderr, + "Error: failed to remove old collection at: %s\n", + old_coll_path); + closedir(meta_storage_dir); + return -1; + } + removed_olddirs = 1; + } + } + closedir(meta_storage_dir); + } + if(removed_olddirs == 0) { printf("\nWARNING: No old collections with name \"%s\" " @@ -356,7 +428,7 @@ int migrate_collection(void * config, void * sconfig) fs_config->file_system_name); } - closedir(storage_dir); + closedir(data_storage_dir); } else { @@ -389,6 +461,7 @@ static int parse_args( {"version",0,0,0}, {"fs",1,0,0}, {"all",0,0,0}, + {"alias",1,0,0}, {"cleanup",0,0,0}, {0,0,0,0} }; @@ -421,7 +494,12 @@ static int parse_args( opts->all_set = 1; break; - case 5: /* cleanup */ + case 5: /* alias */ + strncpy(opts->alias, optarg, 99); + opts->alias_set = 1; + break; + + case 6: /* cleanup */ opts->cleanup_set = 1; break; default: @@ -476,6 +554,10 @@ static void print_help( fprintf(stderr,"--------------\n"); fprintf(stderr," --cleanup " "remove the old collection\n"); + fprintf(stderr, + " --alias Specify the alias for this server.\n" + " The migration tool tries to guess the\n" + " alias based on the hostname if none is specified.\n"); fprintf(stderr," --verbose " "print verbose messages during execution\n"); fprintf(stderr," --help " @@ -493,7 +575,7 @@ static void print_help( * \return 0 on succes, -1 on failure */ static int src_get_version( - char* storage_space, /**< path to storage space */ + char* meta_storage_space, /**< path to storage space */ TROVE_coll_id coll_id, /**< collection id */ char* coll_name, /**< collection name */ char* ver_string, /**< version in string format */ @@ -503,14 +585,14 @@ static int src_get_version( ret = src_get_version_0_0_1( - storage_space, coll_id, ver_string, ver_string_max); + meta_storage_space, coll_id, ver_string, ver_string_max); if(ret != 0) { fprintf(stderr, "Error: all known collection version checks " "failed for \ncollection %s (%08x) in storage space %s\n", - coll_name, coll_id, storage_space); + coll_name, coll_id, meta_storage_space); } return(ret); @@ -522,7 +604,7 @@ static int src_get_version( * \return 0 on succes, -1 on failure */ static int src_get_version_0_0_1( - char* storage_space, /**< path to storage space */ + char* meta_storage_space, /**< path to storage space */ TROVE_coll_id coll_id, /**< collection id */ char* ver_string, /**< version in string format */ int ver_string_max) /**< maximum size of version string */ @@ -533,7 +615,7 @@ static int src_get_version_0_0_1( DBT key, data; sprintf(coll_db, "%s/%08x/collection_attributes.db", - storage_space, coll_id); + meta_storage_space, coll_id); /* try to find a collections db */ ret = access(coll_db, F_OK); @@ -596,7 +678,8 @@ static int src_get_version_0_0_1( * \return 0 on succes, -1 on failure */ static int translate_0_0_1( - char* storage_space, /**< path to storage space */ + char* data_storage_space, /**< path to data storage space */ + char* meta_storage_space, /**< path to metadata storage space */ char* old_coll_path, /**< path to old collection */ char* coll_name, /**< collection name */ TROVE_coll_id coll_id) /**< collection id in string format */ @@ -611,7 +694,7 @@ static int translate_0_0_1( char current_path[PATH_MAX]; /* rename old collection */ - snprintf(current_path, PATH_MAX, "%s/%08x", storage_space, coll_id); + snprintf(current_path, PATH_MAX, "%s/%08x", meta_storage_space, coll_id); if(access(current_path, F_OK) != 0) { @@ -630,7 +713,7 @@ static int translate_0_0_1( return(-1); } - ret = remove_collection_entry(storage_space, coll_name); + ret = remove_collection_entry(meta_storage_space, coll_name); if(ret < 0) { fprintf(stderr, "Error: failed to remove collection entry: %s\n", @@ -645,7 +728,8 @@ static int translate_0_0_1( if(verbose) printf("VERBOSE Creating temporary collection to migrate to.\n"); ret = pvfs2_mkspace( - storage_space, + data_storage_space, + meta_storage_space, coll_name, coll_id, TROVE_HANDLE_NULL, @@ -666,18 +750,18 @@ static int translate_0_0_1( { PVFS_perror("PINT_dist_initialize", ret); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return(-1); } /* initialize trove and lookup collection */ ret = trove_initialize( - TROVE_METHOD_DBPF, NULL, storage_space, 0); + TROVE_METHOD_DBPF, NULL, data_storage_space, meta_storage_space,0); if (ret < 0) { PVFS_perror("trove_initialize", ret); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return(-1); } ret = trove_collection_lookup( @@ -686,7 +770,7 @@ static int translate_0_0_1( { fprintf(stderr, "Error: failed to lookup new collection.\n"); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return -1; } @@ -704,7 +788,7 @@ static int translate_0_0_1( { fprintf(stderr, "Error: failed to migrate collection extended attributes.\n"); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return(-1); } @@ -715,7 +799,7 @@ static int translate_0_0_1( { fprintf(stderr, "Error: failed to migrate dspace attributes.\n"); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return(-1); } @@ -726,7 +810,7 @@ static int translate_0_0_1( { fprintf(stderr, "Error: failed to migrate keyvals.\n"); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return(-1); } @@ -737,12 +821,12 @@ static int translate_0_0_1( /* convert bstreams */ ret = translate_bstreams_0_0_1( - storage_space, old_coll_path, coll_id, coll_name, trove_context); + data_storage_space, old_coll_path, coll_id, coll_name, trove_context); if(ret < 0) { fprintf(stderr, "Error: failed to migrate bstreams.\n"); if(verbose) printf("VERBOSE Destroying temporary collection.\n"); - pvfs2_rmspace(storage_space, coll_name, coll_id, 1, 0); + pvfs2_rmspace(data_storage_space, meta_storage_space, coll_name, coll_id, 1, 0); return(-1); } @@ -761,7 +845,7 @@ static int translate_0_0_1( return(0); } -static int remove_collection_entry(char* storage_space, char* collname) +static int remove_collection_entry(char* meta_storage_space, char* collname) { char collections_db[PATH_MAX]; DB * dbp; @@ -769,7 +853,7 @@ static int remove_collection_entry(char* storage_space, char* collname) int ret = 0; TROVE_coll_id coll_id; - sprintf(collections_db, "%s/collections.db", storage_space); + sprintf(collections_db, "%s/collections.db", meta_storage_space); ret = access(collections_db, F_OK); if(ret == -1 && errno == ENOENT) @@ -1092,7 +1176,7 @@ static int translate_dspace_attr_0_0_1( coll_id, &extent_array, &new_handle, tmp_attr->type, NULL, (TROVE_SYNC | TROVE_FORCE_REQUESTED_HANDLE), - NULL, trove_context, &op_id); + NULL, trove_context, &op_id, NULL); while (ret == 0) { @@ -1125,8 +1209,8 @@ static int translate_dspace_attr_0_0_1( new_attr.ctime = tmp_attr->ctime; new_attr.mtime = tmp_attr->mtime; new_attr.atime = tmp_attr->atime; - new_attr.dfile_count = tmp_attr->dfile_count; - new_attr.dist_size = tmp_attr->dist_size; + new_attr.u.metafile.dfile_count = tmp_attr->dfile_count; + new_attr.u.metafile.dist_size = tmp_attr->dist_size; /* write the attributes into the new collection */ state = 0; @@ -1136,7 +1220,7 @@ static int translate_dspace_attr_0_0_1( TROVE_SYNC, NULL, trove_context, - &op_id); + &op_id, NULL); while (ret == 0) { ret = trove_dspace_test( @@ -1420,7 +1504,7 @@ static int translate_keyval_db_0_0_1( state = 0; ret = trove_keyval_write( coll_id, handle, &t_key, &t_val, trove_flags, 0, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -1460,7 +1544,7 @@ static int translate_keyval_db_0_0_1( * \return 0 on succes, -1 on failure */ static int translate_bstreams_0_0_1( - char* storage_space, /**< path to trove storage space */ + char* data_storage_space, /**< path to trove storage space */ char* old_coll_path, /**< path to old collection */ TROVE_coll_id coll_id, /**< collection id */ char* new_name, /**< name of collection */ @@ -1497,7 +1581,7 @@ static int translate_bstreams_0_0_1( snprintf(bstream_file, PATH_MAX, "%s/bstreams/%.8d/%s", old_coll_path, i, tmp_ent->d_name); snprintf(new_bstream_file, PATH_MAX, "%s/%08x/bstreams/%.8d/%s", - storage_space, coll_id, i, tmp_ent->d_name); + data_storage_space, coll_id, i, tmp_ent->d_name); /* hard link to new location */ ret = link(bstream_file, new_bstream_file); if(ret != 0) diff --git a/src/apps/admin/pvfs2-mkdir.c b/src/apps/admin/pvfs2-mkdir.c index fa54dd1..6c22b47 100644 --- a/src/apps/admin/pvfs2-mkdir.c +++ b/src/apps/admin/pvfs2-mkdir.c @@ -237,7 +237,7 @@ static int make_directory(PVFS_credentials * credentials, parentdir_ptr, credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); if( ret < 0 && !make_parent_dirs) @@ -275,7 +275,7 @@ static int make_directory(PVFS_credentials * credentials, parentdir_ptr, credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); if(ret < 0) { @@ -314,7 +314,7 @@ static int make_directory(PVFS_credentials * credentials, parent_ref, attr, credentials, - &resp_mkdir); + &resp_mkdir, NULL); if (ret < 0) { diff --git a/src/apps/admin/pvfs2-mkspace.c b/src/apps/admin/pvfs2-mkspace.c index bda1488..9a71898 100644 --- a/src/apps/admin/pvfs2-mkspace.c +++ b/src/apps/admin/pvfs2-mkspace.c @@ -31,7 +31,8 @@ typedef struct char meta_ranges[PATH_MAX]; char data_ranges[PATH_MAX]; char collection[PATH_MAX]; - char storage_space[PATH_MAX]; + char data_space[PATH_MAX]; + char meta_space[PATH_MAX]; } options_t; static int default_verbose = 0; @@ -53,7 +54,8 @@ static int parse_args(int argc, char **argv, options_t *opts) {"version",0,0,0}, {"verbose",0,0,0}, {"defaults",0,0,0}, - {"storage-space",1,0,0}, + {"data-space",1,0,0}, + {"meta-space",1,0,0}, {"coll-id",1,0,0}, {"coll-name",1,0,0}, {"root-handle",1,0,0}, @@ -70,7 +72,7 @@ static int parse_args(int argc, char **argv, options_t *opts) exit(1); } - while ((ret = getopt_long(argc, argv, "s:c:i:r:vVhadDM:N:", + while ((ret = getopt_long(argc, argv, "c:i:r:vVhadDM:N:", long_opts, &option_index)) != -1) { switch (ret) @@ -90,10 +92,16 @@ static int parse_args(int argc, char **argv, options_t *opts) { goto do_verbose; } - else if (strcmp("storage-space", cur_option) == 0) + else if (strcmp("data-space", cur_option) == 0) { - goto do_storage_space; + strncpy(opts->data_space, optarg, PATH_MAX); + break; } + else if (strcmp("meta-space", cur_option) == 0) + { + strncpy(opts->meta_space, optarg, PATH_MAX); + break; + } else if (strcmp("coll-id", cur_option) == 0) { goto do_collection_id; @@ -173,10 +181,6 @@ static int parse_args(int argc, char **argv, options_t *opts) do_data_handle_range: strncpy(opts->data_ranges, optarg, PATH_MAX); break; - case 's': - do_storage_space: - strncpy(opts->storage_space, optarg, PATH_MAX); - break; case 'v': do_verbose: opts->verbose = PVFS2_MKSPACE_STDERR_VERBOSE; @@ -204,28 +208,31 @@ static void print_options(options_t *opts) { if (opts) { - printf("\tuse all defaults : %s\n", + printf("\t use all defaults : %s\n", (opts->use_defaults ? "yes" : "no")); - printf("\tdelete storage : %s\n", + printf("\t delete storage : %s\n", (opts->delete_storage ? "yes" : "no")); - printf("\tverbose : %s\n", + printf("\t verbose : %s\n", (opts->verbose ? "ON" : "OFF")); - printf("\troot handle : %llu\n", llu(opts->root_handle)); - printf("\tcollection-only mode: %s\n", + printf("\t root handle : %llu\n", llu(opts->root_handle)); + printf("\t collection-only mode: %s\n", (opts->collection_only ? "ON" : "OFF")); - printf("\tcollection id : %d\n", opts->coll_id); - printf("\tcollection name : %s\n", + printf("\t collection id : %d\n", opts->coll_id); + printf("\t collection name : %s\n", (strlen(opts->collection) ? opts->collection : "None specified")); - printf("\tmeta handle ranges : %s\n", + printf("\t meta handle ranges : %s\n", (strlen(opts->meta_ranges) ? opts->meta_ranges : "None specified")); - printf("\tdata handle ranges : %s\n", + printf("\t data handle ranges : %s\n", (strlen(opts->data_ranges) ? opts->data_ranges : "None specified")); - printf("\tstorage space : %s\n", - (strlen(opts->storage_space) ? - opts->storage_space : "None specified")); + printf("\t data storage space : %s\n", + (strlen(opts->data_space) ? + opts->data_space : "None specified")); + printf("\tmetadata storage space : %s\n", + (strlen(opts->meta_space) ? + opts->meta_space : "None specified")); } } @@ -258,8 +265,10 @@ static void print_help(char *progname, options_t *opts) fprintf(stderr," -N, --data-handle-range=RANGE " "create collection with the specified\n " " data handle range\n"); - fprintf(stderr," -s, --storage-space=PATH " - "create storage space at this location\n"); + fprintf(stderr," --data-space=PATH " + "create data storage space at this location\n"); + fprintf(stderr," --meta-space=PATH " + "create metadata storage space at this location\n"); fprintf(stderr," -v, --verbose " "operate in verbose mode\n"); fprintf(stderr," -V, --version " @@ -296,9 +305,15 @@ int main(int argc, char **argv) print_options(&opts); - if (strlen(opts.storage_space) == 0) + if (strlen(opts.data_space) == 0) + { + fprintf(stderr, "Error: You MUST specify a data storage space\n"); + return -1; + } + + if (strlen(opts.meta_space) == 0) { - fprintf(stderr, "Error: You MUST specify a storage space\n"); + fprintf(stderr, "Error: You MUST specify a metadata storage space\n"); return -1; } @@ -318,16 +333,18 @@ int main(int argc, char **argv) if (opts.delete_storage) { - ret = pvfs2_rmspace(opts.storage_space, opts.collection, - opts.coll_id, opts.collection_only, - opts.verbose); + ret = pvfs2_rmspace(opts.data_space, opts.meta_space, + opts.collection, opts.coll_id, + opts.collection_only, opts.verbose); } else { - ret = pvfs2_mkspace(opts.storage_space, opts.collection, - opts.coll_id, opts.root_handle, - opts.meta_ranges, opts.data_ranges, - opts.collection_only, opts.verbose); + printf("opts.collection_only(%d).\n",opts.collection_only); + ret = pvfs2_mkspace(opts.data_space, opts.meta_space, + opts.collection, opts.coll_id, + opts.root_handle, opts.meta_ranges, + opts.data_ranges, opts.collection_only, + opts.verbose); } return ret; } diff --git a/src/apps/admin/pvfs2-perf-mon-example.c b/src/apps/admin/pvfs2-perf-mon-example.c index d17c033..116fd40 100644 --- a/src/apps/admin/pvfs2-perf-mon-example.c +++ b/src/apps/admin/pvfs2-perf-mon-example.c @@ -27,6 +27,19 @@ #define PVFS2_VERSION "Unknown" #endif +#define MAX_KEY_CNT 4; +/* macros for accessing data returned from server */ +#define VALID_FLAG(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + key_cnt] != 0.0) +#define ID(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + key_cnt]) +#define START_TIME(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + key_cnt]) +#define READ(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + 0]) +#define WRITE(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + 1]) +#define METADATA_READ(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + 2]) +#define METADATA_WRITE(s,h) (perf_matrix[(s)][((h) * (key_cnt + 2)) + 3]) + +int key_cnt; /* holds the Number of keys */ + + struct options { char* mnt_point; @@ -45,7 +58,7 @@ int main(int argc, char **argv) int i,j; PVFS_credentials creds; int io_server_count; - struct PVFS_mgmt_perf_stat** perf_matrix; + int64_t** perf_matrix; uint64_t* end_time_ms_array; uint32_t* next_id_array; PVFS_BMI_addr_t *addr_array; @@ -90,8 +103,7 @@ int main(int argc, char **argv) } /* allocate a 2 dimensional array for statistics */ - perf_matrix = (struct PVFS_mgmt_perf_stat**)malloc( - io_server_count*sizeof(struct PVFS_mgmt_perf_stat*)); + perf_matrix = (int64_t **)malloc(io_server_count*sizeof(int64_t *)); if(!perf_matrix) { perror("malloc"); @@ -99,8 +111,7 @@ int main(int argc, char **argv) } for(i=0; idirent_name, PATH_MAX, optarg); + snprintf(tmp_opts->dirent_name, PATH_MAX, "%s", optarg); break; case 'f': do_fsid: @@ -216,7 +216,7 @@ int main(int argc, char **argv) fprintf(stderr,"Attempting to remove object %llu,%d\n", llu(ref.handle), ref.fs_id); - ret = PVFS_mgmt_remove_object(ref, &credentials); + ret = PVFS_mgmt_remove_object(ref, &credentials, NULL); if (ret) { PVFS_perror("PVFS_mgmt_remove_object", ret); @@ -228,7 +228,7 @@ int main(int argc, char **argv) "\n", user_opts->dirent_name, llu(ref.handle), ref.fs_id); ret = PVFS_mgmt_remove_dirent( - ref, user_opts->dirent_name, &credentials); + ref, user_opts->dirent_name, &credentials, NULL); if (ret) { PVFS_perror("PVFS_mgmt_remove_dirent", ret); diff --git a/src/apps/admin/pvfs2-rm.c b/src/apps/admin/pvfs2-rm.c index 19802aa..987bca5 100644 --- a/src/apps/admin/pvfs2-rm.c +++ b/src/apps/admin/pvfs2-rm.c @@ -100,7 +100,7 @@ int main(int argc, char **argv) memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); rc = PVFS_sys_lookup(cur_fs, pvfs_path, &credentials, - &resp_lookup, PVFS2_LOOKUP_LINK_NO_FOLLOW); + &resp_lookup, PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (rc) { PVFS_perror("PVFS_sys_lookup", rc); @@ -110,7 +110,7 @@ int main(int argc, char **argv) memset(&resp_getattr, 0, sizeof(PVFS_sysresp_getattr)); rc = PVFS_sys_getattr(resp_lookup.ref, PVFS_ATTR_SYS_TYPE, - &credentials, &resp_getattr); + &credentials, &resp_getattr, NULL); if (rc) { PVFS_perror("PVFS_sys_getattr", rc); @@ -146,7 +146,7 @@ int main(int argc, char **argv) memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); rc = PVFS_sys_lookup(cur_fs, directory, &credentials, - &resp_lookup, PVFS2_LOOKUP_LINK_NO_FOLLOW); + &resp_lookup, PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (rc) { PVFS_perror("PVFS_sys_lookup", rc); @@ -155,7 +155,7 @@ int main(int argc, char **argv) } parent_ref = resp_lookup.ref; - rc = PVFS_sys_remove(filename, parent_ref, &credentials); + rc = PVFS_sys_remove(filename, parent_ref, &credentials, NULL); if (rc) { fprintf(stderr, "Error: An error occurred while " diff --git a/src/apps/admin/pvfs2-set-debugmask.c b/src/apps/admin/pvfs2-set-debugmask.c index a6df256..4f62d2e 100644 --- a/src/apps/admin/pvfs2-set-debugmask.c +++ b/src/apps/admin/pvfs2-set-debugmask.c @@ -42,6 +42,7 @@ int main(int argc, char **argv) struct options *user_opts = NULL; char pvfs_path[PVFS_NAME_MAX] = {0}; PVFS_credentials creds; + struct PVFS_mgmt_setparam_value param_value; /* look at command line arguments */ user_opts = parse_args(argc, argv); @@ -76,18 +77,22 @@ int main(int argc, char **argv) printf("Setting debugmask on server %s\n", user_opts->single_server); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = (uint64_t)user_opts->debug_mask; ret = PVFS_mgmt_setparam_single( cur_fs, &creds, PVFS_SERV_PARAM_GOSSIP_MASK, - user_opts->debug_mask, user_opts->single_server, + ¶m_value, user_opts->single_server, NULL, NULL); } else { printf("Setting debugmask on all servers\n"); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = user_opts->debug_mask; ret = PVFS_mgmt_setparam_all( cur_fs, &creds, PVFS_SERV_PARAM_GOSSIP_MASK, - user_opts->debug_mask, NULL, NULL); + ¶m_value, NULL, NULL); } if (ret) diff --git a/src/apps/admin/pvfs2-set-eventmask.c b/src/apps/admin/pvfs2-set-eventmask.c index 69c0b97..8327676 100644 --- a/src/apps/admin/pvfs2-set-eventmask.c +++ b/src/apps/admin/pvfs2-set-eventmask.c @@ -27,10 +27,8 @@ struct options { char* mnt_point; int mnt_point_set; - int api_mask; - int api_mask_set; - int op_mask; - int op_mask_set; + char *event_string; + int events_set; }; static struct options* parse_args(int argc, char* argv[]); @@ -43,8 +41,8 @@ int main(int argc, char **argv) struct options* user_opts = NULL; char pvfs_path[PVFS_NAME_MAX] = {0}; PVFS_credentials creds; + struct PVFS_mgmt_setparam_value param_value; - /* look at command line arguments */ user_opts = parse_args(argc, argv); if(!user_opts) { @@ -73,35 +71,24 @@ int main(int argc, char **argv) PVFS_util_gen_credentials(&creds); - if(!user_opts->op_mask || !user_opts->api_mask) + param_value.type = PVFS_MGMT_PARAM_TYPE_STRING; + if(!user_opts->event_string) { - /* turn off event logging */ - ret = PVFS_mgmt_setparam_all(cur_fs, &creds, - PVFS_SERV_PARAM_EVENT_ON, 0, NULL, NULL); + param_value.u.string_value = "none"; } else { - /* set mask */ - ret = PVFS_mgmt_setparam_all(cur_fs, &creds, - PVFS_SERV_PARAM_EVENT_MASKS, - (int64_t)(((int64_t)user_opts->op_mask << 32) - + user_opts->api_mask), - NULL, NULL); - if(ret < 0) - { - PVFS_perror("PVFS_mgmt_setparam_all", ret); - return(-1); - } - - /* turn on event logging */ - ret = PVFS_mgmt_setparam_all(cur_fs, &creds, - PVFS_SERV_PARAM_EVENT_ON, 1, NULL, NULL); + param_value.u.string_value = user_opts->event_string; } + ret = PVFS_mgmt_setparam_all( + cur_fs, &creds, + PVFS_SERV_PARAM_EVENT_ENABLE, + ¶m_value, NULL, NULL); if(ret < 0) { - PVFS_perror("PVFS_mgmt_setparam_all", ret); - return(-1); + PVFS_perror("PVFS_mgmt_setparam_all", ret); + return(-1); } PVFS_sys_finalize(); @@ -118,7 +105,7 @@ int main(int argc, char **argv) */ static struct options* parse_args(int argc, char* argv[]) { - char flags[] = "vm:a:o:"; + char flags[] = "vm:e:"; int one_opt = 0; int len = 0; @@ -160,23 +147,14 @@ static struct options* parse_args(int argc, char* argv[]) strcat(tmp_opts->mnt_point, "/"); tmp_opts->mnt_point_set = 1; break; - case('a'): - sscanf(optarg, "%x", &tmp_opts->api_mask); - if(ret < 1){ - if(tmp_opts->mnt_point) free(tmp_opts->mnt_point); - free(tmp_opts); - return(NULL); - } - tmp_opts->api_mask_set = 1; - break; - case('o'): - sscanf(optarg, "%x", &tmp_opts->op_mask); + case('e'): + tmp_opts->event_string = strdup(optarg); if(ret < 1){ if(tmp_opts->mnt_point) free(tmp_opts->mnt_point); free(tmp_opts); return(NULL); } - tmp_opts->op_mask_set = 1; + tmp_opts->events_set = 1; break; case('?'): usage(argc, argv); @@ -184,8 +162,7 @@ static struct options* parse_args(int argc, char* argv[]) } } - if(!tmp_opts->mnt_point_set || !tmp_opts->api_mask_set || - !tmp_opts->op_mask_set) + if(!tmp_opts->mnt_point_set || !tmp_opts->events_set) { if(tmp_opts->mnt_point) free(tmp_opts->mnt_point); free(tmp_opts); @@ -200,8 +177,8 @@ static void usage(int argc, char** argv) { fprintf(stderr, "\n"); fprintf(stderr, "Usage : %s [-m fs_mount_point] " - "[-a hex_api_mask] [-o hex_operation_mask]\n", argv[0]); - fprintf(stderr, "Example: %s -m /mnt/pvfs2 -a 0xFFFF -o 0xFFFF\n", + "[-e events]\n", argv[0]); + fprintf(stderr, "Example: %s -m /mnt/pvfs2 -e bmi-send,dbpf-write\n", argv[0]); return; } diff --git a/src/apps/admin/pvfs2-set-mode.c b/src/apps/admin/pvfs2-set-mode.c index d855c10..02d7800 100644 --- a/src/apps/admin/pvfs2-set-mode.c +++ b/src/apps/admin/pvfs2-set-mode.c @@ -41,6 +41,7 @@ int main(int argc, char **argv) struct options* user_opts = NULL; char pvfs_path[PVFS_NAME_MAX] = {0}; PVFS_credentials creds; + struct PVFS_mgmt_setparam_value param_value; /* look at command line arguments */ user_opts = parse_args(argc, argv); @@ -70,10 +71,13 @@ int main(int argc, char **argv) PVFS_util_gen_credentials(&creds); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = user_opts->mode; + ret = PVFS_mgmt_setparam_all(cur_fs, &creds, PVFS_SERV_PARAM_MODE, - user_opts->mode, + ¶m_value, NULL, NULL /* detailed errors */); diff --git a/src/apps/admin/pvfs2-set-sync.c b/src/apps/admin/pvfs2-set-sync.c index f7d97d0..daeb71f 100644 --- a/src/apps/admin/pvfs2-set-sync.c +++ b/src/apps/admin/pvfs2-set-sync.c @@ -46,6 +46,7 @@ int main(int argc, char **argv) struct options* user_opts = NULL; char pvfs_path[PVFS_NAME_MAX] = {0}; PVFS_credentials creds; + struct PVFS_mgmt_setparam_value param_value; /* look at command line arguments */ user_opts = parse_args(argc, argv); @@ -75,10 +76,12 @@ int main(int argc, char **argv) PVFS_util_gen_credentials(&creds); + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = user_opts->meta_sync; ret = PVFS_mgmt_setparam_all(cur_fs, &creds, PVFS_SERV_PARAM_SYNC_META, - user_opts->meta_sync, + ¶m_value, NULL, NULL /* detailed errors */); if(ret < 0) @@ -87,10 +90,13 @@ int main(int argc, char **argv) return(-1); } + param_value.type = PVFS_MGMT_PARAM_TYPE_UINT64; + param_value.u.value = user_opts->data_sync; + ret = PVFS_mgmt_setparam_all(cur_fs, &creds, PVFS_SERV_PARAM_SYNC_DATA, - user_opts->data_sync, + ¶m_value, NULL, NULL /* detailed errors */); if(ret < 0) diff --git a/src/apps/admin/pvfs2-setmattr b/src/apps/admin/pvfs2-setmattr new file mode 100755 index 0000000..857d2c9 --- /dev/null +++ b/src/apps/admin/pvfs2-setmattr @@ -0,0 +1,116 @@ +#!/bin/csh -f +unalias * + +#set echo +#set verbose + +#echo $0 + +#if no options entered, then display usage information +if ( $#argv == 0 ) then + goto usage +endif + +#get the path associated with this command +set mybin = $0:h + +#make sure that pvfs-xattr exists +if ( ! -e $mybin/pvfs2-xattr ) then + echo "$mybin/pvfs2-xattr command not found." + goto usage +endif + +#initialize parms +set init = "init" +set myFile = $init +set myCopies = $init +set myMode = $init +set myCopiesParm = 0 +set myModeParm = 0 +set myFileParm = 0 + +#parse the command line.... +set index = 1 +while ( $index <= $#argv ) + if ( {$argv[$index]} == {-h} ) then + goto usage + endif + + if ( $index == $#argv ) then + echo + echo "Missing parameters. Recheck usage." + goto usage + endif + + @ i = $index + 1 + + if ( {$argv[$index]} == {-c} ) then + set myCopies = $argv[$i] + set myCopiesParm = 1 + else if ( {$argv[$index]} == {-m} ) then + set myMode = $argv[$i] + set myModeParm = 1 + else if ( {$argv[$index]} == {-f} ) then + set myFile = $argv[$i] + set myFileParm = 1 + else + echo + echo "Missing or invalid parameters. Recheck usage." + goto usage + endif + + @ index += 2 +end #while + +#NOTE: When PVFS is NOT in kernel mode, we can't easily check for file +# existence. So, we just check to see that SOMETHING was entered. +# pvfs-xattr will validate the filename. +#Did the user enter a file name? +if ( $myFile == $init ) then + echo + echo "Filename is required. Recheck usage." + goto usage +endif + +#Did the user enter a numeric copy value? +if ( $myCopiesParm ) then + echo $myCopies | grep -E "[^0-9]" - + if ( ! $status ) then + echo + echo "Invalid copies value entered : $myCopies. Check usage." + goto usage + endif +endif + +#Did the user enter a valid mode? +if ( $myModeParm ) then + if ( ! ($myMode == 100 || $myMode == 200) ) then + echo + echo "Invalid mirror mode entered : $myMode. Check usage." + goto usage + endif +endif + + +#issue commands +if ( $myCopiesParm ) then + {$mybin}/pvfs2-xattr -s -k user.pvfs2.mirror.copies -v {$myCopies} {$myFile} +endif +if ( $myModeParm ) then + {$mybin}/pvfs2-xattr -s -k user.pvfs2.mirror.mode -v {$myMode} {$myFile} +endif + +#leave script +exit + +#display help and exit script +usage: + echo + echo "pvfs2-setmattr {-c copies} {-m mode} {-h} -f file" + echo " copies : positive numeric value" + echo " mode : 100 => No Mirroring" + echo " 200 => Create Mirror when IMMUTABLE is set" + echo " -h : Display this message" + echo " file : file to mirror (may include path)" +exit +######## end of script file ########## diff --git a/src/apps/admin/pvfs2-showcoll.c b/src/apps/admin/pvfs2-showcoll.c index 7b7be6a..9890741 100644 --- a/src/apps/admin/pvfs2-showcoll.c +++ b/src/apps/admin/pvfs2-showcoll.c @@ -20,10 +20,9 @@ #include "pvfs2-attr.h" #include "pvfs2-internal.h" -/* declare the strnlen prototype */ -size_t strnlen(const char *s, size_t limit); -static char storage_space[PATH_MAX] = "/tmp/pvfs2-test-space"; +static char data_path[PATH_MAX] = "/tmp/pvfs2-test-space"; +static char meta_path[PATH_MAX] = "/tmp/pvfs2-test-space"; static char collection[PATH_MAX]; static int verbose = 0, got_collection = 0, print_keyvals = 0, got_dspace_handle = 0; TROVE_handle dspace_handle; @@ -71,9 +70,10 @@ int main(int argc, char **argv) /* initialize trove, verifying storage space exists */ ret = trove_initialize( - TROVE_METHOD_DBPF, NULL, storage_space, 0); + TROVE_METHOD_DBPF, NULL, data_path, meta_path, 0); if (ret < 0) { + printf("Error from trove_initialize is %d.\n",ret); fprintf(stderr, "%s: error: trove initialize failed; aborting!\n", argv[0]); @@ -81,9 +81,9 @@ int main(int argc, char **argv) } if (verbose) fprintf(stderr, - "%s: info: initialized with storage space '%s'.\n", + "%s: info: initialized with storage spaces '%s' and '%s'.\n", argv[0], - storage_space); + data_path, meta_path); /* if no collection was specified, simply print out the collections and exit */ if (!got_collection) { @@ -164,17 +164,19 @@ int main(int argc, char **argv) /* print basic stats on collection */ if (no_root_handle) { fprintf(stdout, - "Storage space %s, collection %s (coll_id = %d, " + "Storage space %s and %s, collection %s (coll_id = %d, " "*** no root_handle found ***):\n", - storage_space, + data_path, + meta_path, collection, coll_id); } else { fprintf(stdout, - "Storage space %s, collection %s (coll_id = %d, " + "Storage space %s and %s, collection %s (coll_id = %d, " "root_handle = 0x%08llx):\n", - storage_space, + data_path, + meta_path, collection, coll_id, llu(root_handle)); @@ -200,11 +202,14 @@ static int parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "s:c:d:kvh")) != EOF) { + while ((c = getopt(argc, argv, "s:m:c:d:kvh")) != EOF) { switch (c) { case 's': - strncpy(storage_space, optarg, PATH_MAX); + strncpy(data_path, optarg, PATH_MAX); break; + case 'm': + strncpy(meta_path, optarg, PATH_MAX); + break; case 'c': /* collection */ got_collection = 1; strncpy(collection, optarg, PATH_MAX); @@ -216,6 +221,7 @@ static int parse_args(int argc, char **argv) /* TODO: USE BIGGER VALUE */ got_dspace_handle = 1; dspace_handle = strtol(optarg, NULL, 16); + break; case 'v': verbose = 1; break; @@ -224,7 +230,7 @@ static int parse_args(int argc, char **argv) fprintf(stderr, "%s: error: unrecognized option '%c'.\n", argv[0], c); case 'h': fprintf(stderr, - "usage: pvfs2-showcoll [-s storage_space] [-c collection_name] [-d dspace_handle] [-v] [-k] [-h]\n"); + "usage: pvfs2-showcoll [-s data_storage_space] [-m metadata storage space] [-c collection_name] [-d dspace_handle] [-v] [-k] [-h]\n"); fprintf(stderr, "\tdefault storage space is '/tmp/pvfs2-test-space'.\n"); fprintf(stderr, "\t'-v' turns on verbose output.\n"); fprintf(stderr, "\t'-k' prints data in keyval spaces.\n"); @@ -295,7 +301,7 @@ static int print_dspace(TROVE_coll_id coll_id, 0 /* flags */, NULL /* user ptr */, trove_context, - &op_id); + &op_id, NULL); while (ret == 0) { ret = trove_dspace_test( coll_id, op_id, trove_context, &opcount, NULL, NULL, &state, @@ -304,10 +310,10 @@ static int print_dspace(TROVE_coll_id coll_id, if (ret != 1) return -1; fprintf(stdout, - "\t0x%08llx (dspace_getattr output: type = %s, b_size = %lld)\n", - llu(handle), + "\t0x%08llx/%llu (dspace_getattr output: type = %s, b_size = %lld)\n", + llu(handle),llu(handle), type_to_string(ds_attr.type), - lld(ds_attr.b_size)); + (ds_attr.type == PVFS_TYPE_DATAFILE) ? lld(ds_attr.u.datafile.b_size) : 0); if (print_keyvals) { ret = print_dspace_keyvals(coll_id, handle, @@ -325,6 +331,7 @@ static char *type_to_string(TROVE_ds_type type) static char sl[] = "symlink"; static char di[] = "directory"; static char dd[] = "dirdata"; + static char in[] = "internal"; static char un[] = "unknown"; switch (type) { @@ -338,6 +345,8 @@ static char *type_to_string(TROVE_ds_type type) return sl; case PVFS_TYPE_DIRECTORY: return di; + case PVFS_TYPE_INTERNAL: + return in; default: return un; } @@ -354,16 +363,37 @@ static int print_dspace_keyvals(TROVE_coll_id coll_id, TROVE_op_id op_id; TROVE_ds_state state; - key.buffer = malloc(65536); - key.buffer_sz = 65536; + key.buffer = malloc(256); + key.buffer_sz = 256; + key.read_sz = 0; val.buffer = malloc(65536); val.buffer_sz = 65536; + val.read_sz = 0; + + if (key.buffer) + memset(key.buffer,0,256); + if (val.buffer) + memset(val.buffer,0,65536); + + if ( !(key.buffer && val.buffer) ) + { + if (key.buffer) + free(key.buffer); + if (val.buffer) + free(val.buffer); + printf("%s: Unable to allocate memory.\n",__func__); + return -1; + } + pos = TROVE_ITERATE_START; count = 1; while (count > 0) { int opcount; + printf("%s:calling trove_keyval_iterate for %llu.\n" + ,__func__ + ,llu(handle)); ret = trove_keyval_iterate(coll_id, handle, &pos, @@ -374,14 +404,24 @@ static int print_dspace_keyvals(TROVE_coll_id coll_id, NULL /* vtag */, NULL /* user ptr */, trove_context, - &op_id); + &op_id, NULL); while (ret == 0) ret = trove_dspace_test( coll_id, op_id, trove_context, &opcount, NULL, NULL, &state, TROVE_DEFAULT_TEST_TIMEOUT); if (ret != 1) return -1; + printf("%s: count=%d\n",__func__,count); + if (count > 0) print_keyval_pair(&key, &val, type, 65536); + + /* re-initialize key val */ + memset(key.buffer,0,256); + memset(val.buffer,0,65536); + key.buffer_sz = 256; + val.buffer_sz = 65536; + key.read_sz = 0; + val.read_sz = 0; } free(key.buffer); @@ -405,7 +445,7 @@ static void print_datafile_handles(PVFS_handle *h_p, { int i; - for (i = 0; i < count && i < 10; i++) fprintf(stdout, "0x%08llx ", llu(h_p[i])); + for (i = 0; i < count && i < 10; i++) fprintf(stdout, "\n\t\t\t\t0x%08llx(%llu)", llu(h_p[i]), llu(h_p[i])); if (i == 10) fprintf(stdout, "...\n"); else fprintf(stdout, "\n"); @@ -418,8 +458,18 @@ static int print_keyval_pair(TROVE_keyval_s *key_p, { int key_printable = 0, val_printable = 0; - if (isprint(((char *)key_p->buffer)[0]) && (strnlen(key_p->buffer, sz) < 64)) key_printable = 1; - if (isprint(((char *)val_p->buffer)[0]) && (strnlen(val_p->buffer, sz) < 64)) val_printable = 1; + if (isprint(((char *)key_p->buffer)[0])) key_printable = 1; + if (isprint(((char *)val_p->buffer)[0])) val_printable = 1; + + if (key_printable && key_p->buffer_sz >= 64) + { + memset(&((char *)key_p->buffer)[64],0,1); + } + + if (val_printable && val_p->buffer_sz >= 64) + { + memset(&((char *)key_p->buffer)[64],0,1); + } if (!strncmp(key_p->buffer, "metadata", 9) && val_p->read_sz == sizeof(struct PVFS_object_attr)) { fprintf(stdout, @@ -435,18 +485,19 @@ static int print_keyval_pair(TROVE_keyval_s *key_p, "\t\t'%s' (%d): '%s' (%d) as handles = ", (char *) key_p->buffer, key_p->read_sz, - (char *) val_p->buffer, + val_printable ? (char *) val_p->buffer : "", val_p->read_sz); print_datafile_handles((PVFS_handle *) val_p->buffer, val_p->read_sz / sizeof(PVFS_handle)); } else if (type == PVFS_TYPE_DIRECTORY && !strncmp(key_p->buffer, "de", 3)) { fprintf(stdout, - "\t\t'%s' (%d): '%s' (%d) as a handle = 0x%08llx\n", + "\t\t'%s' (%d): '%s' (%d) as a handle = 0x%08llx(%llu)\n", (char *) key_p->buffer, key_p->read_sz, - (char *) val_p->buffer, + val_printable ? (char *) val_p->buffer : "", val_p->read_sz, - llu(*(TROVE_handle *) val_p->buffer)); + llu(*(TROVE_handle *) val_p->buffer), + llu(*(TROVE_handle *) val_p->buffer)); } else if (type == PVFS_TYPE_DIRDATA && val_p->read_sz == 8) { fprintf(stdout, @@ -457,6 +508,42 @@ static int print_keyval_pair(TROVE_keyval_s *key_p, val_p->read_sz, llu(*(TROVE_handle *) val_p->buffer)); } + else if (key_printable && !strncmp((char *)key_p->buffer,"user.pvfs2.meta_hint",20)) + { + fprintf(stdout, + "\t\t'%s' (%d): 0x%08llX (%d)\n" + ,(char *)key_p->buffer + ,(int)strlen((char*)key_p->buffer) + ,*(unsigned long long *)val_p->buffer + ,(int)sizeof(unsigned long)); + } + else if (key_printable && !strncmp((char *)key_p->buffer,"user.pvfs2.mirror.mode",22)) + { + fprintf(stdout, + "\t\t'%s' (%d): %d (%d)\n" + ,(char *)key_p->buffer + ,(int)strlen((char*)key_p->buffer) + ,*(unsigned int *)val_p->buffer + ,(int)sizeof(unsigned int)); + } + else if (key_printable && !strncmp((char *)key_p->buffer,"user.pvfs2.mirror.copies",24)) + { + fprintf(stdout, + "\t\t'%s' (%d): %d (%d)\n" + ,(char *)key_p->buffer + ,(int)strlen((char*)key_p->buffer) + ,*(unsigned int *)val_p->buffer + ,(int)sizeof(unsigned int)); + } + else if (key_printable && !strncmp((char *)key_p->buffer,"user.pvfs2.mirror.handles",25)) + { + fprintf(stdout, + "\t\t'%s' (%d): '' (%d) as handles:" + ,(char *)key_p->buffer + ,(int)strlen((char*)key_p->buffer) + ,(int)val_p->read_sz); + print_datafile_handles((PVFS_handle *) val_p->buffer, val_p->read_sz / sizeof(PVFS_handle)); + } else if (key_printable && val_printable) { fprintf(stdout, "\t\t'%s' (%d): '%s' (%d)\n", @@ -492,13 +579,16 @@ static int print_collections(void) coll_name = malloc(PATH_MAX); if (coll_name == NULL) return -1; + memset(coll_name,0,PATH_MAX); name.buffer = coll_name; name.buffer_sz = PATH_MAX; + name.read_sz = 0; count = 1; pos = TROVE_ITERATE_START; - fprintf(stdout, "Storage space %s collections:\n", storage_space); + fprintf(stdout, "Storage space %s and %s collections:\n", + data_path, meta_path); while (count > 0) { ret = trove_collection_iterate(TROVE_METHOD_DBPF, @@ -519,6 +609,11 @@ static int print_collections(void) "\t%s (coll_id = %d)\n", coll_name, coll_id); + memset(coll_name,0,PATH_MAX); + memset(&name,0,sizeof(name)); + name.buffer = coll_name; + name.buffer_sz = PATH_MAX; + name.read_sz = 0; } fprintf(stdout, "\n"); diff --git a/src/apps/admin/pvfs2-stat.c b/src/apps/admin/pvfs2-stat.c index ecad4a9..b4f6a9d 100644 --- a/src/apps/admin/pvfs2-stat.c +++ b/src/apps/admin/pvfs2-stat.c @@ -54,6 +54,7 @@ void print_stats(const PVFS_object_ref * ref, int main(int argc, char **argv) { int ret = -1, + ret_agg = 0, i = 0; char ** ppszPvfsPath = NULL; PVFS_fs_id * pfs_id = NULL; @@ -145,6 +146,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error stating [%s]\n", user_opts.pszFiles[i]); } + ret_agg |= ret; } PVFS_sys_finalize(); @@ -173,7 +175,7 @@ int main(int argc, char **argv) free(pfs_id); } - return(0); + return(ret_agg); } static int do_stat(const char * pszFile, @@ -200,7 +202,7 @@ static int do_stat(const char * pszFile, (char *) pszRelativeFile, (PVFS_credentials *) credentials, &lk_response, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); } else { @@ -208,7 +210,7 @@ static int do_stat(const char * pszFile, (char *) pszRelativeFile, (PVFS_credentials *) credentials, &lk_response, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); } if(ret < 0) @@ -227,7 +229,7 @@ static int do_stat(const char * pszFile, ret = PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL_NOHINT, (PVFS_credentials *) credentials, - &getattr_response); + &getattr_response, NULL); if(ret < 0) { @@ -472,6 +474,13 @@ void print_stats(const PVFS_object_ref * ref, { fprintf(stdout, " datafiles : %d\n", attr->dfile_count); } + + if( (attr->mask & PVFS_ATTR_SYS_BLKSIZE) && + (attr->objtype == PVFS_TYPE_METAFILE)) + { + fprintf(stdout, " blksize : %lld\n", lld(attr->blksize)); + } + /* dirent_count is only valid on directories */ if( (attr->mask & PVFS_ATTR_SYS_DIRENT_COUNT) && (attr->objtype == PVFS_TYPE_DIRECTORY)) diff --git a/src/apps/admin/pvfs2-statfs.c b/src/apps/admin/pvfs2-statfs.c index 1c75db1..2d9879c 100644 --- a/src/apps/admin/pvfs2-statfs.c +++ b/src/apps/admin/pvfs2-statfs.c @@ -84,7 +84,7 @@ int main(int argc, char **argv) PVFS_util_gen_credentials(&creds); /* gather normal statfs statistics from system interface */ - ret = PVFS_sys_statfs(cur_fs, &creds, &resp_statfs); + ret = PVFS_sys_statfs(cur_fs, &creds, &resp_statfs, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_statfs", ret); @@ -146,7 +146,7 @@ int main(int argc, char **argv) outcount = resp_statfs.server_count; ret = PVFS_mgmt_statfs_all(cur_fs, &creds, stat_array, - &outcount, NULL); + &outcount, NULL, NULL); for(j = 0; j<2; j++) { diff --git a/src/apps/admin/pvfs2-touch.c b/src/apps/admin/pvfs2-touch.c index acff138..6b11b77 100644 --- a/src/apps/admin/pvfs2-touch.c +++ b/src/apps/admin/pvfs2-touch.c @@ -15,9 +15,11 @@ #include #include #include +#include #include "pvfs2.h" #include "str-utils.h" +#include "bmi.h" #ifndef PVFS2_VERSION #define PVFS2_VERSION "Unknown" @@ -26,6 +28,8 @@ /* optional parameters, filled in by parse_args() */ struct options { + int random; + char* server_list; uint32_t num_files; char **filenames; }; @@ -37,6 +41,13 @@ int main(int argc, char **argv) { int ret = -1, i = 0; struct options *user_opts = NULL; + char* tmp_server; + int tmp_server_index; + PVFS_sys_layout layout; + + layout.algorithm = PVFS_SYS_LAYOUT_ROUND_ROBIN; + layout.server_list.count = 0; + layout.server_list.servers = NULL; /* look at command line arguments */ user_opts = parse_args(argc, argv); @@ -63,6 +74,14 @@ int main(int argc, char **argv) char directory[PVFS_NAME_MAX]; char filename[PVFS_SEGMENT_MAX]; + layout.algorithm = PVFS_SYS_LAYOUT_ROUND_ROBIN; + layout.server_list.count = 0; + if(layout.server_list.servers) + { + free(layout.server_list.servers); + } + layout.server_list.servers = NULL; + char pvfs_path[PVFS_NAME_MAX] = {0}; PVFS_fs_id cur_fs; PVFS_sysresp_lookup resp_lookup; @@ -97,7 +116,7 @@ int main(int argc, char **argv) memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); rc = PVFS_sys_lookup(cur_fs, pvfs_path, &credentials, - &resp_lookup, PVFS2_LOOKUP_LINK_NO_FOLLOW); + &resp_lookup, PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (rc) { PVFS_perror("PVFS_sys_lookup", rc); @@ -109,7 +128,7 @@ int main(int argc, char **argv) memset(&attr, 0, sizeof(PVFS_sys_attr)); attr.owner = credentials.uid; attr.group = credentials.gid; - attr.perms = 0; + attr.perms = 0777; attr.atime = time(NULL); attr.mtime = attr.atime; attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; @@ -117,13 +136,69 @@ int main(int argc, char **argv) parent_ref = resp_lookup.ref; + if(user_opts->random) + { + layout.algorithm = PVFS_SYS_LAYOUT_RANDOM; + } + else if(user_opts->server_list) + { + layout.algorithm = PVFS_SYS_LAYOUT_LIST; + layout.server_list.count = 1; + tmp_server = user_opts->server_list; + + /* iterate once to count servers */ + while((tmp_server = index(tmp_server, ','))) + { + layout.server_list.count++; + tmp_server++; + } + + layout.server_list.servers = + malloc(layout.server_list.count*sizeof(PVFS_BMI_addr_t)); + if(!(layout.server_list.servers)) + { + perror("malloc"); + ret = -1; + break; + } + + /* split servers out and resolve each addr */ + tmp_server_index = 0; + for(tmp_server = strtok(user_opts->server_list, ","); + tmp_server != NULL; + tmp_server = strtok(NULL, ",")) + { + assert(tmp_server_index < layout.server_list.count); + + /* TODO: is there a way to do this without internal BMI + * functions? + */ + rc = BMI_addr_lookup( + &layout.server_list.servers[tmp_server_index], + tmp_server); + if(rc < 0) + { + PVFS_perror("BMI_addr_lookup", rc); + break; + } + tmp_server_index++; + } + if(tmp_server_index != layout.server_list.count) + { + fprintf(stderr, "Error: unable to resolve server list.\n"); + ret = -1; + break; + } + } + rc = PVFS_sys_create(filename, parent_ref, attr, &credentials, NULL, - NULL, - &resp_create); + &resp_create, + &layout, + NULL); if (rc) { fprintf(stderr, "Error: An error occurred while creating %s\n", @@ -135,6 +210,12 @@ int main(int argc, char **argv) } PVFS_sys_finalize(); + + if(user_opts->server_list) + { + free(layout.server_list.servers); + free(user_opts->server_list); + } free(user_opts); return ret; @@ -149,7 +230,7 @@ int main(int argc, char **argv) static struct options* parse_args(int argc, char **argv) { int one_opt = 0; - char flags[] = "?"; + char flags[] = "l:r?"; struct options *tmp_opts = NULL; tmp_opts = (struct options *)malloc(sizeof(struct options)); @@ -160,6 +241,8 @@ static struct options* parse_args(int argc, char **argv) memset(tmp_opts, 0, sizeof(struct options)); tmp_opts->filenames = 0; + tmp_opts->server_list = NULL; + tmp_opts->random = 0; while((one_opt = getopt(argc, argv, flags)) != EOF) { @@ -168,9 +251,26 @@ static struct options* parse_args(int argc, char **argv) case('?'): usage(argc, argv); exit(EXIT_FAILURE); + case('l'): + tmp_opts->server_list = strdup(optarg); + if(!tmp_opts->server_list) + { + perror("strdup"); + exit(EXIT_FAILURE); + } + break; + case('r'): + tmp_opts->random = 1; + break; } } + if(tmp_opts->random && tmp_opts->server_list) + { + fprintf(stderr, "Error: only one of -r or -l may be specified.\n"); + exit(EXIT_FAILURE); + } + if (optind < argc) { int i = 0; @@ -194,7 +294,10 @@ static struct options* parse_args(int argc, char **argv) static void usage(int argc, char **argv) { - fprintf(stderr, "Usage: %s [-rf] pvfs2_filename[s]\n", argv[0]); + fprintf(stderr, "Usage: %s pvfs2_filename[s]\n", argv[0]); + fprintf(stderr, " optional arguments:\n"); + fprintf(stderr, " -l use list layout (requires comma separated list of servers)\n"); + fprintf(stderr, " -r use random layout\n"); } /* diff --git a/src/apps/admin/pvfs2-validate.c b/src/apps/admin/pvfs2-validate.c index ab42d8f..575b766 100755 --- a/src/apps/admin/pvfs2-validate.c +++ b/src/apps/admin/pvfs2-validate.c @@ -141,7 +141,7 @@ int main(int argc, char **argv) cur_fs, pvfs_path, &creds, &lookup_resp, - PVFS2_LOOKUP_LINK_NO_FOLLOW); + PVFS2_LOOKUP_LINK_NO_FOLLOW, NULL); if (ret != 0) { diff --git a/src/apps/admin/pvfs2-viewdist.c b/src/apps/admin/pvfs2-viewdist.c index 31fe7f3..1299f50 100644 --- a/src/apps/admin/pvfs2-viewdist.c +++ b/src/apps/admin/pvfs2-viewdist.c @@ -91,7 +91,7 @@ static int generic_dist(file_object *obj, PVFS_credentials *creds, { char *buffer = (char *) malloc(4096); int ret; - + if (obj->fs_type == UNIX_FILE) { #ifndef HAVE_FGETXATTR_EXTRA_ARGS @@ -113,8 +113,8 @@ static int generic_dist(file_object *obj, PVFS_credentials *creds, key.buffer_sz = strlen(DIST_KEY) + 1; val.buffer = buffer; val.buffer_sz = 4096; - if ((ret = PVFS_sys_geteattr(obj->u.pvfs2.ref, - creds, &key, &val)) < 0) + if ((ret = PVFS_sys_geteattr(obj->u.pvfs2.ref, + creds, &key, &val, NULL)) < 0) { PVFS_perror("PVFS_sys_geteattr", ret); return -1; @@ -137,7 +137,7 @@ static int generic_server_location(file_object *obj, PVFS_credentials *creds, char *buffer = (char *) malloc(4096); int ret, num_dfiles, count; PVFS_fs_id fsid; - + if (obj->fs_type == UNIX_FILE) { #ifndef HAVE_FGETXATTR_EXTRA_ARGS @@ -159,8 +159,8 @@ static int generic_server_location(file_object *obj, PVFS_credentials *creds, key.buffer_sz = strlen(DFILE_KEY) + 1; val.buffer = buffer; val.buffer_sz = 4096; - if ((ret = PVFS_sys_geteattr(obj->u.pvfs2.ref, - creds, &key, &val)) < 0) + if ((ret = PVFS_sys_geteattr(obj->u.pvfs2.ref, + creds, &key, &val, NULL)) < 0) { PVFS_perror("PVFS_sys_geteattr", ret); return -1; @@ -169,7 +169,7 @@ static int generic_server_location(file_object *obj, PVFS_credentials *creds, fsid = obj->u.pvfs2.fs_id; } /* - * At this point, we know all the dfile handles + * At this point, we know all the dfile handles */ num_dfiles = (ret / sizeof(PVFS_handle)); count = num_dfiles < *nservers ? num_dfiles : *nservers; @@ -212,6 +212,7 @@ int main(int argc, char ** argv) PVFS_credentials credentials; char *servers[256]; PVFS_handle handles[256]; + char metadataserver[256]; int i, nservers = 256; memset(&dist, 0, sizeof(dist)); @@ -258,10 +259,21 @@ int main(int argc, char ** argv) printf("dist_name = %s\n", dist->dist_name); printf("dist_params:\n%s\n", dist->methods->params_string(dist->params)); PINT_dist_free(dist); + + + ret = PINT_cached_config_get_server_name(metadataserver, 256, + src.u.pvfs2.ref.handle, src.u.pvfs2.ref.fs_id); + if( ret != 0) + { + fprintf(stderr, "Error, could not get metadataserver name\n"); + return (-1); + } + printf("Metadataserver: %s\n", metadataserver); + printf("Number of datafiles/servers = %d\n", nservers); for (i = 0; i < nservers; i++) { - printf("Server %d - %s, handle: %llu (%08llx.bstream)\n", i, servers[i], + printf("Datafile %d - %s, handle: %llu (%08llx.bstream)\n", i, servers[i], llu(handles[i]), llu(handles[i])); free(servers[i]); } @@ -380,18 +392,18 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials) perror("fstatfs:"); fprintf(stderr, "could not fstatfs %s\n", obj->u.ufs.path); } - memcpy(&obj->u.ufs.fs_id, &PINT_statfs_fsid(&statfsbuf), + memcpy(&obj->u.ufs.fs_id, &PINT_statfs_fsid(&statfsbuf), sizeof(PINT_statfs_fsid(&statfsbuf))); return 0; } else { memset(&resp_lookup, 0, sizeof(PVFS_sysresp_lookup)); - ret = PVFS_sys_lookup(obj->u.pvfs2.fs_id, + ret = PVFS_sys_lookup(obj->u.pvfs2.fs_id, (char *) obj->u.pvfs2.pvfs2_path, - credentials, + credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_lookup", ret); @@ -402,7 +414,7 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials) memset(&resp_getattr, 0, sizeof(PVFS_sysresp_getattr)); ret = PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL_NOHINT, - credentials, &resp_getattr); + credentials, &resp_getattr, NULL); if (ret) { fprintf(stderr, "Failed to do pvfs2 getattr on %s\n", diff --git a/src/apps/admin/pvfs2-xattr.c b/src/apps/admin/pvfs2-xattr.c index 97c355e..b12f522 100644 --- a/src/apps/admin/pvfs2-xattr.c +++ b/src/apps/admin/pvfs2-xattr.c @@ -2,6 +2,9 @@ * (C) 2004 Clemson University and The University of Chicago * * See COPYING in top-level directory. + * + * 03/19/07 - Added set and get for user.pvfs2.mirror.mode and ..mirror.copies. + * Added get for user.pvfs2.mirror.handles and ..mirror.status */ #include @@ -25,6 +28,8 @@ #include "xattr-utils.h" +#include "pvfs2-mirror.h" + #define VALBUFSZ 1024 /* extended attribute name spaces supported in PVFS2 */ @@ -40,10 +45,10 @@ const char *PINT_eattr_namespaces[] = /* optional parameters, filled in by parse_args() */ struct options { - PVFS_ds_keyval key; - PVFS_ds_keyval val; + PVFS_ds_keyval *key; + PVFS_ds_keyval *val; char* srcfile; - int get, text; + int get, text, key_count; }; enum object_type { @@ -77,20 +82,30 @@ typedef struct file_object_s { static struct options* parse_args(int argc, char* argv[]); static int generic_open(file_object *obj, PVFS_credentials *credentials); -static int pvfs2_eattr(int get, file_object *, PVFS_ds_keyval *key_p, - PVFS_ds_keyval *val_p, PVFS_credentials *creds); + +static int pvfs2_eattr(int get + ,file_object *obj + ,PVFS_ds_keyval *key_p + ,PVFS_ds_keyval *val_p + ,PVFS_credentials *creds + ,int key_count); + static void usage(int argc, char** argv); static int resolve_filename(file_object *obj, char *filename); static int modify_val(PVFS_ds_keyval *key_p, PVFS_ds_keyval *val_p); static int permit_set(PVFS_ds_keyval *key_p); static int eattr_is_prefixed(char* key_name); +PVFS_metafile_hint current_meta_hint={0}; + int main(int argc, char **argv) { int ret = 0; struct options* user_opts = NULL; file_object src; PVFS_credentials credentials; + int i; + PVFS_ds_keyval tmp_val={0}; memset(&src, 0, sizeof(src)); /* look at command line arguments */ @@ -117,49 +132,149 @@ int main(int argc, char **argv) fprintf(stderr, "Could not open %s\n", user_opts->srcfile); return -1; } - if (!eattr_is_prefixed(user_opts->key.buffer)) + + if (!eattr_is_prefixed(user_opts->key[0].buffer)) { - fprintf(stderr, "extended attribute key is not prefixed %s\n", (char *) user_opts->key.buffer); + fprintf(stderr, "extended attribute key is not prefixed %s\n" + , (char *) user_opts->key[0].buffer); return -1; } if (!user_opts->get) { - if (!permit_set(&user_opts->key)) + if (!permit_set(&user_opts->key[0])) { - fprintf(stderr, "Not permitted to set key %s\n", (char *) user_opts->key.buffer); + fprintf(stderr, "Not permitted to set key %s\n" + , (char *) user_opts->key[0].buffer); return -1; } - if (modify_val(&user_opts->key, &user_opts->val) < 0) + if (strncmp(user_opts->key[0].buffer + ,"user.pvfs2.meta_hint" + ,user_opts->key[0].buffer_sz) == 0) + { + tmp_val.buffer=¤t_meta_hint.flags; + tmp_val.buffer_sz=sizeof(current_meta_hint.flags); + /*retrieve the current value of meta_hint*/ + ret=pvfs2_eattr(1 /*get*/ + ,&src + ,user_opts->key + ,&tmp_val + ,&credentials + ,1 /*keycount*/); + if (ret != 0) + { + printf("%s does not currently have a meta_hint value (0X%08X).\n" + ,user_opts->srcfile + ,(unsigned int)current_meta_hint.flags + ); + }else{ + printf("%s has a meta_hint value of (0X%08X).\n" + ,user_opts->srcfile + ,(unsigned int)current_meta_hint.flags + ); + } + } + if (modify_val(&user_opts->key[0], &user_opts->val[0]) < 0) { - fprintf(stderr, "Invalid value for user-settable hint %s, %s\n", (char *) user_opts->key.buffer, (char *) user_opts->val.buffer); + fprintf(stderr, "Invalid value for user-settable attribute %s\n" + , (char *) user_opts->key[0].buffer); return -1; } } - ret = pvfs2_eattr(user_opts->get, &src, &user_opts->key, &user_opts->val, &credentials); - if (ret != 0) + ret = pvfs2_eattr(user_opts->get + ,&src + ,user_opts->key + ,user_opts->val + ,&credentials + ,user_opts->key_count); + if ( (ret != 0) && (ret == -PVFS_ENOENT) ) { + printf("PVFS_sys_geteattr: no hints defined\n"); + return ret; + } + else if (ret != 0) + { + PVFS_perror("PVFS_sys_geteattr",ret); return ret; } if (user_opts->get && user_opts->text) { - if (strncmp(user_opts->key.buffer, "user.pvfs2.meta_hint", SPECIAL_METAFILE_HINT_KEYLEN) == 0) { - PVFS_metafile_hint *hint = (PVFS_metafile_hint *) user_opts->val.buffer; - printf("Metafile hints: "); + if (strncmp(user_opts->key[0].buffer + ,"user.pvfs2.meta_hint" + ,user_opts->key[0].buffer_sz) == 0) { + PVFS_metafile_hint *hint = + (PVFS_metafile_hint *) user_opts->val[0].buffer; + printf("Metafile Hints (0X%08X)",(unsigned int)hint->flags); if (hint->flags & PVFS_IMMUTABLE_FL) { - printf("immutable file "); + printf(" :immutable file "); } if (hint->flags & PVFS_APPEND_FL) { - printf("Append-only file "); + printf(" :Append-only file "); } if (hint->flags & PVFS_NOATIME_FL) { - printf("Atime updates disabled."); + printf(" :Atime updates disabled"); + } + if (hint->flags & PVFS_MIRROR_FL) { + printf(" :Mirroring is enabled"); } printf("\n"); + } else if ( strncmp(user_opts->key[0].buffer + ,"user.pvfs2.mirror.handles" + ,user_opts->key[0].buffer_sz) == 0) + { + PVFS_handle *myHandles = (PVFS_handle *)user_opts->val[0].buffer; + int copies = *(int *)user_opts->val[1].buffer; + int dfile_count = src.u.pvfs2.attr.dfile_count; + for (i=0; i<(copies * dfile_count); i++) + { + printf("Handle(%d):%llu\n",i,llu(myHandles[i])); + } + } else if ( strncmp(user_opts->key[0].buffer + ,"user.pvfs2.mirror.copies" + ,user_opts->key[0].buffer_sz) == 0) + { + int *myCopies = (int *)user_opts->val[0].buffer; + printf("Number of Mirrored Copies : %d\n",*myCopies); + } else if ( strncmp(user_opts->key[0].buffer + ,"user.pvfs2.mirror.status" + ,user_opts->key[0].buffer_sz) == 0) + { + int copies = *(int *)user_opts->val[1].buffer; + int dfile_count = src.u.pvfs2.attr.dfile_count; + PVFS_handle *status = (PVFS_handle *)user_opts->val[0].buffer; + for (i=0; i<(dfile_count * copies); i++) + printf("src handle(%d) : status(%s) : value(%llu)\n" + ,i + ,status[i]==0?"usable":"UNusable" + ,llu(status[i])); + } else if ( strncmp(user_opts->key[0].buffer + ,"user.pvfs2.mirror.mode" + ,user_opts->key[0].buffer_sz) == 0) + { + printf("Mirroring Mode : "); + switch(*(MIRROR_MODE *)user_opts->val[0].buffer) + { + case NO_MIRRORING : + { + printf("Turned OFF\n"); + break; + } + case MIRROR_ON_IMMUTABLE : + { + printf("Create Mirror when IMMUTABLE is set\n"); + break; + } + default: + { + printf("Unknown mode(%d)\n" + ,*(int *)user_opts->val[0].buffer); + break; + } + }/*end switch*/ } else { - printf("key:%s Value:\n%s\n", - (char *)user_opts->key.buffer, - (char *)user_opts->val.buffer); + printf("key : \"%s\" \tValue : \"%s\"\n", + (char *)user_opts->key[0].buffer, + (char *)user_opts->val[0].buffer); } } PVFS_sys_finalize(); @@ -168,27 +283,86 @@ int main(int argc, char **argv) static int modify_val(PVFS_ds_keyval *key_p, PVFS_ds_keyval *val_p) { - if (strncmp(key_p->buffer, "user.pvfs2.meta_hint", SPECIAL_METAFILE_HINT_KEYLEN) == 0) + /*We don't want these settings to interfere with the mirroring flag. It is + *turned on and off with the pvfs2-setmattr and setmattr commands. + */ + if (strncmp(key_p->buffer,"user.pvfs2.meta_hint" + ,key_p->buffer_sz) == 0) { - PVFS_metafile_hint hint; - memset(&hint, 0, sizeof(hint)); - if (strncmp(val_p->buffer, "+immutable", 10) == 0) - hint.flags |= PVFS_IMMUTABLE_FL; - else if (strncmp(val_p->buffer, "-immutable", 10) == 0) - hint.flags &= ~PVFS_IMMUTABLE_FL; - else if (strncmp(val_p->buffer, "+append", 7) == 0) - hint.flags |= PVFS_APPEND_FL; - else if (strncmp(val_p->buffer, "-append", 7) == 0) - hint.flags &= ~PVFS_APPEND_FL; - else if (strncmp(val_p->buffer, "+noatime", 8) == 0) - hint.flags |= PVFS_NOATIME_FL; - else if (strncmp(val_p->buffer, "-noatime", 8) == 0) - hint.flags &= ~PVFS_NOATIME_FL; - else + if (strncmp(val_p->buffer, "+immutable", val_p->buffer_sz) == 0) + { + current_meta_hint.flags |= PVFS_IMMUTABLE_FL; + printf("Adding immutable to meta_hint...(0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer, "-immutable", val_p->buffer_sz) == 0) + { + current_meta_hint.flags &= ~PVFS_IMMUTABLE_FL; + printf("Removing immutable from meta_hint...(0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer,"=immutable", val_p->buffer_sz) == 0) + { + current_meta_hint.flags = + (current_meta_hint.flags & ~ALL_FS_META_HINT_FLAGS) | PVFS_IMMUTABLE_FL; + printf("Setting meta_hint to immutable only (0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer, "+append", val_p->buffer_sz) == 0) + { + current_meta_hint.flags |= PVFS_APPEND_FL; + printf("Adding append to meta_hint...(0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer, "-append", val_p->buffer_sz) == 0) + { + current_meta_hint.flags &= ~PVFS_APPEND_FL; + printf("Removing append from meta_hint...(0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer,"=append", val_p->buffer_sz) == 0) + { + current_meta_hint.flags = + (current_meta_hint.flags & ~ALL_FS_META_HINT_FLAGS) | PVFS_APPEND_FL; + printf("Setting meta_hint to append only (0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer, "+noatime", val_p->buffer_sz) == 0) + { + current_meta_hint.flags |= PVFS_NOATIME_FL; + printf("Adding noatime to meta_hint...(0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer, "-noatime", val_p->buffer_sz) == 0) + { + current_meta_hint.flags &= ~PVFS_NOATIME_FL; + printf("Removing atime from meta_hint...(0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else if (strncmp(val_p->buffer,"=noatime", val_p->buffer_sz) == 0) + { + current_meta_hint.flags = + (current_meta_hint.flags & ~ALL_FS_META_HINT_FLAGS) | PVFS_NOATIME_FL; + printf("Setting meta_hint to noatime only (0X%08X)\n" + ,(unsigned int)current_meta_hint.flags); + } + else + { return -1; - memcpy(val_p->buffer, &hint, sizeof(hint)); - val_p->buffer_sz = sizeof(hint); + } + memcpy(val_p->buffer, ¤t_meta_hint.flags, sizeof(current_meta_hint.flags)); + val_p->buffer_sz = sizeof(current_meta_hint.flags); + } else if (strncmp(key_p->buffer,"user.pvfs2.mirror.mode" + ,key_p->buffer_sz) == 0) + { + printf("Setting mirror mode to %d\n",*(int *)val_p->buffer); + } else if (strncmp(key_p->buffer,"user.pvfs2.mirror.copies" + ,key_p->buffer_sz) == 0) + { + printf("Setting number of mirrored copies to %d\n" + ,*(int *)val_p->buffer); } + return 0; } @@ -207,8 +381,12 @@ static int permit_set(PVFS_ds_keyval *key_p) * * returns zero on success and negative one on failure */ -static int pvfs2_eattr(int get, file_object *obj, PVFS_ds_keyval *key_p, - PVFS_ds_keyval *val_p, PVFS_credentials *creds) +static int pvfs2_eattr(int get + ,file_object *obj + ,PVFS_ds_keyval *key_p + ,PVFS_ds_keyval *val_p + ,PVFS_credentials *creds + ,int key_count) { int ret = -1; @@ -217,9 +395,17 @@ static int pvfs2_eattr(int get, file_object *obj, PVFS_ds_keyval *key_p, if (get == 1) { #ifndef HAVE_FGETXATTR_EXTRA_ARGS - if ((ret = fgetxattr(obj->u.ufs.fd, key_p->buffer, val_p->buffer, val_p->buffer_sz)) < 0) + if ((ret = fgetxattr(obj->u.ufs.fd + ,key_p->buffer + ,val_p->buffer + ,val_p->buffer_sz)) < 0) #else - if ((ret = fgetxattr(obj->u.ufs.fd, key_p->buffer, val_p->buffer, val_p->buffer_sz, 0, 0)) < 0) + if ((ret = fgetxattr(obj->u.ufs.fd + ,key_p->buffer + ,val_p->buffer + ,val_p->buffer_sz + ,0 + ,0)) < 0) #endif { perror("fgetxattr:"); @@ -246,19 +432,38 @@ static int pvfs2_eattr(int get, file_object *obj, PVFS_ds_keyval *key_p, } else { - if (get == 1) + if (get == 1 && key_count == 1) { - ret = PVFS_sys_geteattr(obj->u.pvfs2.ref, creds, key_p, val_p); - } - else { - ret = PVFS_sys_seteattr(obj->u.pvfs2.ref, creds, key_p, val_p, 0); - } - - if (ret < 0) + ret = PVFS_sys_geteattr(obj->u.pvfs2.ref, creds, key_p, val_p, NULL); + } else if (get == 1 && key_count == 2) { - PVFS_perror("PVFS_sys_geteattr", ret); - return -1; + PVFS_sysresp_geteattr *resp = malloc(sizeof(*resp)); + if (!resp) + { + fprintf(stderr,"Unable to allocate resp structure.\n"); + exit(EXIT_FAILURE); + } + memset(resp,0,sizeof(*resp)); + resp->val_array = val_p; + resp->err_array = malloc(2 * sizeof(PVFS_error)); + if (!resp->err_array) + { + fprintf(stderr,"Unable to allocate err_array.\n"); + exit(EXIT_FAILURE); + } + memset(resp->err_array,0,sizeof(2 * sizeof(PVFS_error))); + + ret = PVFS_sys_geteattr_list(obj->u.pvfs2.ref + ,creds + ,key_count + ,key_p + ,resp + ,NULL ); + } else { + ret = PVFS_sys_seteattr(obj->u.pvfs2.ref, creds, key_p, val_p, 0, NULL); } + + return ret; } return 0; } @@ -284,14 +489,33 @@ static struct options* parse_args(int argc, char* argv[]) } memset(tmp_opts, 0, sizeof(struct options)); + /*create one key structure*/ + tmp_opts->key = malloc(sizeof(PVFS_ds_keyval)); + if (!tmp_opts->key) + { + fprintf(stderr,"Unable to allocate tmp_opts->key.\n"); + exit(EXIT_FAILURE); + } + memset(tmp_opts->key,0,sizeof(PVFS_ds_keyval)); + + /*create one val structure*/ + tmp_opts->val = malloc(sizeof(PVFS_ds_keyval)); + if (!tmp_opts->val) + { + fprintf(stderr,"Unable to allocate tmp_opts->val.\n"); + exit(EXIT_FAILURE); + } + memset(tmp_opts->val,0,sizeof(PVFS_ds_keyval)); + + /*set default key_count*/ + tmp_opts->key_count = 1; + /* fill in defaults */ - memset(&tmp_opts->key, 0, sizeof(PVFS_ds_keyval)); - memset(&tmp_opts->val, 0, sizeof(PVFS_ds_keyval)); tmp_opts->srcfile = strdup(argv[argc-1]); tmp_opts->get = 1; /* look at command line arguments */ - while((one_opt = getopt(argc, argv, flags)) != EOF) + while((one_opt = getopt(argc, argv, flags)) != -1) { switch(one_opt){ case 't': @@ -301,38 +525,130 @@ static struct options* parse_args(int argc, char* argv[]) tmp_opts->get = 0; break; case 'k': - tmp_opts->key.buffer = strdup(optarg); - tmp_opts->key.buffer_sz = strlen(tmp_opts->key.buffer) + 1; + tmp_opts->key[0].buffer = strdup(optarg); + tmp_opts->key[0].buffer_sz = strlen(tmp_opts->key[0].buffer) + 1; break; case 'v': - tmp_opts->val.buffer = strdup(optarg); - tmp_opts->val.buffer_sz = strlen(tmp_opts->val.buffer) + 1; - break; + if (strncmp(tmp_opts->key[0].buffer + ,"user.pvfs2.mirror.mode" + ,tmp_opts->key[0].buffer_sz) == 0 || + strncmp(tmp_opts->key[0].buffer + ,"user.pvfs2.mirror.copies" + ,tmp_opts->key[0].buffer_sz) == 0) + { /*convert string argument into numeric argument*/ + tmp_opts->val[0].buffer = malloc(sizeof(int)); + if (!tmp_opts->val[0].buffer) + { + printf("Unable to allocate memory for key value.\n"); + exit(EXIT_FAILURE); + } + memset(tmp_opts->val[0].buffer,0,sizeof(int)); + *(int *)tmp_opts->val[0].buffer = atoi(optarg); + tmp_opts->val[0].buffer_sz = sizeof(int); + break; + } else { + tmp_opts->val[0].buffer = strdup(optarg); + tmp_opts->val[0].buffer_sz = strlen(tmp_opts->val[0].buffer); + break; + } case('?'): printf("?\n"); usage(argc, argv); exit(EXIT_FAILURE); } } + + /*ensure that the given mode is supported by PVFS*/ + if (!tmp_opts->get && + strncmp(tmp_opts->key[0].buffer + ,"user.pvfs2.mirror.mode" + ,tmp_opts->key[0].buffer_sz) == 0) + { + if (tmp_opts->val[0].buffer && + (*(int *)tmp_opts->val[0].buffer < BEGIN_MIRROR_MODE || + *(int *)tmp_opts->val[0].buffer > END_MIRROR_MODE) ) + { + fprintf(stderr,"Invalid Mirror Mode ==> %d\n" + "\tValid Modes\n" + "\t1. %d == No Mirroring\n" + "\t2. %d == Mirroring on Immutable\n" + ,*(int *)tmp_opts->val[0].buffer + ,NO_MIRRORING,MIRROR_ON_IMMUTABLE); + + exit(EXIT_FAILURE); + } + } + if (tmp_opts->get == 1) { - tmp_opts->val.buffer = calloc(1, VALBUFSZ); - tmp_opts->val.buffer_sz = VALBUFSZ; - if (tmp_opts->val.buffer == NULL) + /*if user wants mirror.handles or mirror.status, then we must also */ + /*retrieve the number of copies, so we know how to display the */ + /*information properly. */ + if (strncmp(tmp_opts->key[0].buffer + ,"user.pvfs2.mirror.handles" + ,tmp_opts->key[0].buffer_sz) == 0 || + strncmp(tmp_opts->key[0].buffer + ,"user.pvfs2.mirror.status" + ,tmp_opts->key[0].buffer_sz) == 0 ) { - fprintf(stderr, "Could not allocate val\n"); - exit(EXIT_FAILURE); + tmp_opts->key_count = 2; + PVFS_ds_keyval *myKeys = malloc(tmp_opts->key_count * + sizeof(PVFS_ds_keyval)); + if (!myKeys) + { + fprintf(stderr,"Unable to allocate myKeys.\n"); + exit(EXIT_FAILURE); + } + memset(myKeys,0,tmp_opts->key_count*sizeof(PVFS_ds_keyval)); + myKeys[0] = *tmp_opts->key; + myKeys[1].buffer = strdup("user.pvfs2.mirror.copies"); + myKeys[1].buffer_sz = sizeof("user.pvfs2.mirror.copies"); + free(tmp_opts->key); + tmp_opts->key = myKeys; + }/*end if handles or status*/ + + + + tmp_opts->val[0].buffer = calloc(1, VALBUFSZ); + if (!tmp_opts->val[0].buffer) + { + fprintf(stderr,"Unable to allocate tmp_opts->val[0].buffer.\n"); + exit(EXIT_FAILURE); } - } - else { - if (tmp_opts->val.buffer == NULL) + tmp_opts->val[0].buffer_sz = VALBUFSZ; + + if (tmp_opts->key_count == 2) + { + PVFS_ds_keyval *myVals = malloc(tmp_opts->key_count * + sizeof(PVFS_ds_keyval)); + if (!myVals) + { + fprintf(stderr,"Unable to allocate myVals.\n"); + exit(EXIT_FAILURE); + } + memset(myVals,0,tmp_opts->key_count*sizeof(PVFS_ds_keyval)); + myVals[0] = *tmp_opts->val; + free(tmp_opts->val); + + myVals[1].buffer = malloc(sizeof(int)); + if (!myVals[1].buffer) + { + fprintf(stderr,"Unable to allocate myVals[1].buffer.\n"); + exit(EXIT_FAILURE); + } + myVals[1].buffer_sz = sizeof(int); + tmp_opts->val = myVals; + }/*end if*/ + } else { + if (tmp_opts->val[0].buffer == NULL) { - fprintf(stderr, "Please specify value if setting extended attributes\n"); + fprintf(stderr, "Please specify value if setting extended " + "attributes\n"); usage(argc, argv); exit(EXIT_FAILURE); } } - if (tmp_opts->key.buffer == NULL) + if (tmp_opts->key[0].buffer == NULL) { fprintf(stderr, "Please specify key if getting extended attributes\n"); usage(argc, argv); @@ -344,7 +660,8 @@ static struct options* parse_args(int argc, char* argv[]) static void usage(int argc, char** argv) { - fprintf(stderr,"Usage: %s -s {set xattrs} -k -v -t {print attributes} filename\n",argv[0]); + fprintf(stderr,"Usage: %s -s {set xattrs} -k -v " + "-t {print attributes} filename\n",argv[0]); return; } @@ -415,7 +732,7 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials) (char *) obj->u.pvfs2.pvfs2_path, credentials, &resp_lookup, - PVFS2_LOOKUP_LINK_FOLLOW); + PVFS2_LOOKUP_LINK_FOLLOW, NULL); if (ret < 0) { PVFS_perror("PVFS_sys_lookup", ret); @@ -426,7 +743,7 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials) memset(&resp_getattr, 0, sizeof(PVFS_sysresp_getattr)); ret = PVFS_sys_getattr(ref, PVFS_ATTR_SYS_ALL_NOHINT, - credentials, &resp_getattr); + credentials, &resp_getattr, NULL); if (ret) { fprintf(stderr, "Failed to do pvfs2 getattr on %s\n", @@ -443,7 +760,11 @@ static int generic_open(file_object *obj, PVFS_credentials *credentials) obj->u.pvfs2.perms = resp_getattr.attr.perms; memcpy(&obj->u.pvfs2.attr, &resp_getattr.attr, sizeof(PVFS_sys_attr)); - obj->u.pvfs2.attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; + /* we should not modify the returned mask, so we know which data fields + * in the attribute structure are valid. I don't see any reason why + * it is being reset here. + */ + //obj->u.pvfs2.attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; obj->u.pvfs2.ref = ref; } return 0; diff --git a/src/apps/devel/module.mk.in b/src/apps/devel/module.mk.in new file mode 100644 index 0000000..fcbe92e --- /dev/null +++ b/src/apps/devel/module.mk.in @@ -0,0 +1,4 @@ +DIR := src/apps/devel + +DEVELSRC := \ + $(DIR)/pvfs2-db-display.c diff --git a/src/apps/devel/pvfs2-db-display.c b/src/apps/devel/pvfs2-db-display.c new file mode 100644 index 0000000..3ea111c --- /dev/null +++ b/src/apps/devel/pvfs2-db-display.c @@ -0,0 +1,540 @@ +/* + * (C) 2010 Clemson University + * + * See COPYING in top-level directory. + */ + +/** \file + * Utility for displaying PVFS Berkeley DBs in textual form for debugging + * */ + + +#include +#include +#include +#include +#include + +#include "pvfs2-types.h" +#include "trove-types.h" +#include "pvfs2-storage.h" +#include "pvfs2-internal.h" + +#define COLLECTION_FILE "collections.db" +#define STORAGE_FILE "storage_attributes.db" +#define DATASPACE_FILE "dataspace_attributes.db" +#define KEYVAL_FILE "keyval.db" +#define COLLECTION_ATTR_FILE "collection_attributes.db" + +/* from src/io/trove/trove-dbpf/dbpf-keyval.c and include/pvfs2-types.h */ +#define DBPF_MAX_KEY_LENGTH 256 + +/* from src/io/trove/trove-dbpf/dbpf-keyval.c */ +struct dbpf_keyval_db_entry +{ + TROVE_handle handle; + char key[DBPF_MAX_KEY_LENGTH]; +}; + +typedef struct +{ + char dbpath[PATH_MAX]; + char hexdir[PATH_MAX]; + int verbose; +} options_t; + +/* globals */ +static options_t opts; +int hex = 0; + +int open_db( DB **db_p, char *path, int type, int flags); +void close_db( DB *db_p ); +void iterate_database(DB *db_p, void (*print)(DBT key, DBT val) ); +void print_collection( DBT key, DBT val ); +void print_storage( DBT key, DBT val ); +void print_dspace( DBT key, DBT val ); +void print_keyval( DBT key, DBT val ); +void print_collection_attr( DBT key, DBT val ); +void print_help(char *progname); +void print_ds_type( PVFS_ds_type type ); +int process_args(int argc, char ** argv); + +int main( int argc, char **argv ) +{ + DB *db_p = NULL; + DB_ENV *dbe_p = NULL; + char *path = NULL; + u_int32_t db_flags = DB_RDONLY|DB_THREAD, + env_flags = DB_CREATE | DB_INIT_MPOOL, + type = DB_UNKNOWN; + int ret, path_len; + + if( (ret = process_args( argc, argv)) != 0 ) + { + return ret; + } + + /* allocate space for the longest path */ + path_len = strlen(opts.dbpath) + strlen(opts.hexdir) + + strlen(COLLECTION_ATTR_FILE) + 3; /* padding for / and \0 */ + path = calloc( path_len, sizeof(char) ); + if( path == NULL ) + { + printf("Error allocating path, exiting\n"); + return ENOMEM; + } + + ret = db_env_create(&dbe_p, 0); + if (ret != 0) + { + printf("Error creating env handle: %s\n", db_strerror(ret)); + return -1; + } + + /* Open the environment. */ + ret = dbe_p->open(dbe_p, + opts.dbpath, + env_flags, + 0); + if (ret != 0) + { + printf("Environment open failed: %s", db_strerror(ret)); + return -1; + } + + /* open and print each database */ + + /* collection database */ + sprintf(path, "%s/%s", opts.dbpath, COLLECTION_FILE ); + ret = open_db( &db_p, path, type, db_flags); + if (ret == 0) + { + printf("Collection Database\n"); + iterate_database(db_p, &print_collection ); + close_db(db_p); + } + + /* storage database */ + memset(path, path_len, sizeof(char)); + sprintf(path, "%s/%s", opts.dbpath, STORAGE_FILE ); + ret = open_db( &db_p, path, type, db_flags); + if (ret == 0) + { + printf("Storage Database\n"); + iterate_database(db_p, &print_storage ); + close_db(db_p); + } + + /* dspace database */ + memset(path, path_len, sizeof(char)); + sprintf(path, "%s/%s/%s", opts.dbpath, opts.hexdir, DATASPACE_FILE ); + ret = open_db( &db_p, path, type, db_flags); + if (ret == 0) + { + printf("Dataspace Database\n"); + iterate_database(db_p, &print_dspace ); + close_db(db_p); + } + + /* keyval database */ + memset(path, path_len, sizeof(char)); + sprintf(path, "%s/%s/%s", opts.dbpath, opts.hexdir, KEYVAL_FILE ); + ret = open_db( &db_p, path, type, db_flags); + if (ret == 0) + { + printf("Keyval Database\n"); + iterate_database(db_p, &print_keyval ); + close_db(db_p); + } + + /* collection attribute database */ + memset(path, path_len, sizeof(char)); + sprintf(path, "%s/%s/%s", opts.dbpath, opts.hexdir, COLLECTION_ATTR_FILE ); + ret = open_db( &db_p, path, type, db_flags); + if (ret == 0) + { + printf("Collection Attributes Database\n"); + iterate_database(db_p, &print_collection_attr ); + close_db(db_p); + } + + dbe_p->close(dbe_p, 0); + + free(path); + return 0; +} + +int open_db( DB **db_p, char *path, int type, int flags) +{ + int ret = 0; + + ret = db_create(db_p, NULL, 0); + if (ret != 0) + { + close_db( *db_p ); + printf("Couldn't create db_p for %s: %s\n", path, db_strerror(ret)); + return ret; + } + + ret = (*db_p)->open(*db_p, NULL, path, NULL, type, flags, 0 ); + if (ret != 0) + { + close_db( *db_p ); + printf("Couldn't open %s: %s\n", path, db_strerror(ret)); + return ret; + } + return ret; +} + +void close_db( DB *db_p ) +{ + int ret = 0; + if( db_p ) + { + ret = db_p->close(db_p, 0); + } + + if (ret != 0) + { + printf("Couldn't close db_p: %s\n", db_strerror(ret)); + } + return; +} + +void iterate_database(DB *db_p, void (*print)(DBT key, DBT val) ) +{ + int ret = 0; + DBC *dbc_p = NULL; + DBT key, val; + + ret = db_p->cursor(db_p, NULL, &dbc_p, 0); + if( ret != 0 ) + { + printf("Unable to open cursor to print db: %s\n", + db_strerror(ret)); + return; + } + + memset(&key, 0, sizeof(key)); + memset(&val, 0, sizeof(val)); + + printf("-------- Start database --------\n"); + while ((ret = dbc_p->c_get(dbc_p, &key, &val, DB_NEXT)) == 0) + { + print( key, val ); + memset(key.data, 0, key.size); + memset(val.data, 0, val.size); + } + + if( ret != DB_NOTFOUND ) + { + printf("**** an error occurred (%s) ****\n", db_strerror(ret)); + } + printf("-------- End database --------\n\n"); + + dbc_p->c_close( dbc_p ); + return; +} + +void print_collection( DBT key, DBT val ) +{ + char *k; + int32_t v; + k = key.data; + v = *(int32_t *)val.data; + if (hex) + printf("(%s)(%d) -> (%x)(%d)\n", k, key.size, v, val.size); + else + printf("(%s)(%d) -> (%d)(%d)\n", k, key.size, v, val.size); + return; +} + +void print_storage( DBT key, DBT val ) +{ + char *k; + int32_t v; + k = key.data; + v = *(int32_t *)val.data; + if (hex) + printf("(%s)(%d) -> (%x)(%d)\n", k, key.size, v, val.size); + else + printf("(%s)(%d) -> (%d)(%d)\n", k, key.size, v, val.size); + return; +} + +void print_dspace( DBT key, DBT val ) +{ + uint64_t k; + struct PVFS_ds_attributes_s *v; + + k = *(uint64_t *)key.data; + v = val.data; + + if (hex) + printf("(%llx)(%d) -> ", llu(k), key.size); + else + printf("(%llu)(%d) -> ", llu(k), key.size); + + print_ds_type( v->type ); + + if (hex) { + printf("(fsid: %d)(handle: %llx)(uid: %u)(gid: %u)" + "(perm: %u)(ctime: %llu)(mtime: %llu)(atime: %llu)(%d)\n", + v->fs_id, llu(v->handle), v->uid, v->gid, v->mode, + llu(v->ctime), llu(v->mtime), llu(v->atime), val.size); + } + else { + printf("(fsid: %d)(handle: %llu)(uid: %u)(gid: %u)" + "(perm: %u)(ctime: %llu)(mtime: %llu)(atime: %llu)(%d)\n", + v->fs_id, llu(v->handle), v->uid, v->gid, v->mode, + llu(v->ctime), llu(v->mtime), llu(v->atime), val.size); + } + + /* union elements are not printed */ + return; +} + +void print_keyval( DBT key, DBT val ) +{ + struct dbpf_keyval_db_entry *k; + uint64_t vh, kh; + uint32_t vi; + + + k = key.data; + if (hex) + printf("(%llx)", llu(k->handle)); + else + printf("(%llu)", llu(k->handle)); + if( key.size == 8 ) + { + printf("()(%d) -> ", key.size); + } + else if( key.size == 16 ) + { + kh = *(uint64_t *)k->key; + if (hex) + printf("(%llx)(%d) -> ", llu(kh), key.size); + else + printf("(%llu)(%d) -> ", llu(kh), key.size); + } + else + { + printf("(%s)(%d) -> ", k->key, key.size); + } + + if( strncmp(k->key, "dh", 3) == 0 || strncmp(k->key, "de", 3) == 0 ) + { + int s = 0; + while(s < val.size ) + { + vh = *(uint64_t *)(val.data + s); + if (hex) + printf("(%llx)", llu(vh)); + else + printf("(%llu)", llu(vh)); + s += sizeof(TROVE_handle); + } + printf("(%d)\n", val.size); + + } + + else if( strncmp(k->key, "md", 3) == 0 ) + { + /* just print the name of the distribution, the rest is extra. + * the PINT_dist struct is packed/encoded before writing to db. that + * means the first uint32_t bytes are the length of the string, skip + * it. */ + char *dname = val.data + sizeof(uint32_t); + printf("(%s)(%d)\n", dname, val.size ); + } + + else if( strlen(k->key) > 2 && val.size == 8 ) + { + /* should be cases of filename to handle */ + vh = *(uint64_t *)val.data; + if (hex) + printf("(%llx)(%d)\n", llu(vh), val.size ); + else + printf("(%llu)(%d)\n", llu(vh), val.size ); + } + + else if( (key.size == 8 || key.size == 16 ) && val.size == 4 ) + { + vi = *(uint32_t *)val.data; + if (hex) + printf("(%x)(%d)\n", vi, val.size ); + else + printf("(%u)(%d)\n", vi, val.size ); + } +/* + * not implemented + elseif( strncmp(k->key. "st", 3) == 0 ) + { + + } + + elseif( strncmp(k->key. "de", 3) == 0 ) + { + + } + + elseif( strncmp(k->key. "mh", 3) == 0 ) + { + + } + * not implemented +*/ + else + { + /* just print out the size of the data, try not to segfault */ + printf("(%d)\n", val.size); + } + + return; +} + +void print_collection_attr( DBT key, DBT val ) +{ + char *k, *vs; + uint64_t vu; + k = key.data; + printf("(%s)(%d) -> ", k, key.size); + if( val.size == 8 ) + { + vu = *(uint64_t *)val.data; + if (hex) + printf("(%llx)(%d)\n", llu(vu), val.size); + else + printf("(%llu)(%d)\n", llu(vu), val.size); + } + else + { + vs = val.data; + printf("(%s)(%d)\n", vs, val.size); + } + return; +} + +int process_args(int argc, char ** argv) +{ + int ret = 0, option_index = 0; + static struct option long_opts[] = + { + {"help",0,0,0}, + {"verbose",0,0,0}, + {"dbpath",1,0,0}, + {"hexdir",1,0,0}, + {"hexhandles",0,0,0}, + {0,0,0,0} + }; + + memset(&opts, 0, sizeof(options_t)); + + while ((ret = getopt_long(argc, argv, "", long_opts, &option_index)) != -1) + { + switch (option_index) + { + case 0: /* help */ + print_help(argv[0]); + exit(0); + case 1: /* verbose */ + opts.verbose = 1; + break; + + case 2: /*dbpath */ + strncpy(opts.dbpath, optarg, PATH_MAX); + break; + case 3: /* hexdir */ + strncpy(opts.hexdir, optarg, PATH_MAX); + break; + case 4: /* hexhandles */ + hex = 1; + break; + default: + print_help(argv[0]); + return -1; + } + option_index = 0; + } + + if( strncmp( opts.dbpath,"",PATH_MAX ) == 0 ) + { + fprintf(stderr, "\nError: --dbpath option must be given.\n"); + print_help(argv[0]); + return -1; + } + + if( strncmp( opts.hexdir,"",PATH_MAX ) == 0 ) + { + fprintf(stderr, "\nError: --hexdir option must be given.\n"); + print_help(argv[0]); + return -1; + } + + return 0; +} + +void print_help(char *progname) +{ + fprintf(stderr, + "\nThis utility is used to display the contents of a single\n" + "server's Berkeley DB databases in a manner useful for PVFS \n" + "development.\n"); + fprintf(stderr, "\nUsage:\t\t%s --dbpath --hexdir ", + progname); + fprintf(stderr, "\nExample:\t%s --dbpath /tmp/pvfs2-space --hexdir " + "4e3f77a5\n", progname); + fprintf(stderr, "\nOptions:\n" + "\t--verbose\t\tEnable verbose output\n" + "\t--help\t\t\tThis message.\n" + "\t--hexhandles\t\tPrint handles in hex\n" + "\t--dbpath \t\tThe path of the server's " + "StorageSpace. The path\n\t\t\t\tshould contain " + "collections.db and \n\t\t\t\tstorage_attributes.db\n" + "\t--hexdir

\t\tThe directory in dbpath that " + "contains\n\t\t\t\tcollection_attributes.db, " + "dataspace_attrbutes.db\n\t\t\t\tand keyval.db\n\n"); + return; +} + +void print_ds_type( PVFS_ds_type type ) +{ + switch( type ) + { + case PVFS_TYPE_NONE: + printf("(type: none)"); + break; + case PVFS_TYPE_METAFILE: + printf("(type: metafile)"); + break; + case PVFS_TYPE_DATAFILE: + printf("(type: datafile)"); + break; + case PVFS_TYPE_DIRECTORY: + printf("(type: directory)"); + break; + case PVFS_TYPE_SYMLINK: + printf("(type: symlink)"); + break; + case PVFS_TYPE_DIRDATA: + printf("(type: dirdata)"); + break; + case PVFS_TYPE_INTERNAL: + printf("(type: internal)"); + break; + default: + printf("type: unknown"); + break; + } +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/apps/fuse/module.mk.in b/src/apps/fuse/module.mk.in new file mode 100644 index 0000000..29d3157 --- /dev/null +++ b/src/apps/fuse/module.mk.in @@ -0,0 +1,13 @@ +ifdef BUILD_FUSE + +DIR := src/apps/fuse + +FUSESRC += \ + $(DIR)/pvfs2fuse.c + +FUSE := $(DIR)/pvfs2fuse + +MODCFLAGS_$(DIR) := @FUSE_CFLAGS@ +MODLDFLAGS_$(DIR) := @FUSE_LDFLAGS@ + +endif # BUILD_FUSE diff --git a/src/apps/fuse/pvfs2fuse.c b/src/apps/fuse/pvfs2fuse.c new file mode 100644 index 0000000..dcab18c --- /dev/null +++ b/src/apps/fuse/pvfs2fuse.c @@ -0,0 +1,1240 @@ +/* + * PVFS FUSE interface + * + * + * (C) 2001 Clemson University and The University of Chicago + * + * (C) 2007 University of Connecticut. All rights reserved. + * + * Author: John A. Chandy + * Sumit Narayan + * + * $Date: 2010-12-21 15:34:13 $ + * $Revision: 1.3.8.2 $ + * + * Documentation: http://www.engr.uconn.edu/~sun03001/docs/pvfs2fuse-rpt.pdf + */ + +/* char *pvfs2fuse_version = "$Id: pvfs2fuse.c,v 1.3.8.2 2010-12-21 15:34:13 mtmoore Exp $"; */ +char *pvfs2fuse_version = "0.01"; + +#define FUSE_USE_VERSION 27 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pvfs2-compat.h" +#include "pint-dev-shared.h" +#include "pint-util.h" +#include "str-utils.h" + +typedef struct { + PVFS_object_ref ref; + PVFS_credentials creds; +} pvfs_fuse_handle_t; + +struct pvfs2fuse { + char *fs_spec; + char *mntpoint; + PVFS_fs_id fs_id; + struct PVFS_sys_mntent mntent; +}; + +static struct pvfs2fuse pvfs2fuse; + +#if __LP64__ +#define SET_FUSE_HANDLE( fi, pfh ) \ + fi->fh = (uint64_t)pfh +#define GET_FUSE_HANDLE( fi ) \ + (pvfs_fuse_handle_t *)fi->fh +#else +#define SET_FUSE_HANDLE( fi, pfh ) \ + *((pvfs_fuse_handle_t **)(&fi->fh)) = pfh +#define GET_FUSE_HANDLE( fi ) \ + *((pvfs_fuse_handle_t **)(&fi->fh)) +#endif + +#define PVFS_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#define THIS_PVFS_VERSION \ + PVFS_VERSION(PVFS2_VERSION_MAJOR, PVFS2_VERSION_MINOR, PVFS2_VERSION_SUB) + +#if THIS_PVFS_VERSION > PVFS_VERSION(2,6,3) +#define PVFS_ERROR_TO_ERRNO_N(x) (-1)*PVFS_ERROR_TO_ERRNO(x) +#else +#define PVFS_ERROR_TO_ERRNO_N(x) PVFS_ERROR_TO_ERRNO(x) +#endif + +static void pvfs_fuse_gen_credentials( + PVFS_credentials *credentials) +{ + credentials->uid = fuse_get_context()->uid; + credentials->gid = fuse_get_context()->gid; +} + +static int lookup( const char *path, pvfs_fuse_handle_t *pfh, + int32_t follow_link ) +{ + PVFS_sysresp_lookup lk_response; + int ret; + + /* we don't have to do a PVFS_util_resolve + * because FUSE resolves the path for us + */ + + pvfs_fuse_gen_credentials(&pfh->creds); + + memset(&lk_response, 0, sizeof(lk_response)); + ret = PVFS_sys_lookup(pvfs2fuse.fs_id, + (char *)path, + &pfh->creds, + &lk_response, + follow_link); + if ( ret < 0 ) { + return ret; + } + + pfh->ref.handle = lk_response.ref.handle; + pfh->ref.fs_id = pvfs2fuse.fs_id; + + return 0; +} + +static int pvfs_fuse_getattr_pfhp(pvfs_fuse_handle_t *pfhp, struct stat *stbuf) +{ + PVFS_sysresp_getattr getattr_response; + PVFS_sys_attr* attrs; + int ret; + int perm_mode = 0; + + memset(&getattr_response,0, sizeof(PVFS_sysresp_getattr)); + + ret = PVFS_sys_getattr(pfhp->ref, + PVFS_ATTR_SYS_ALL_NOHINT, + (PVFS_credentials *) &pfhp->creds, + &getattr_response); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + memset(stbuf, 0, sizeof(struct stat)); + + /* Code copied from kernel/linux-2.x/pvfs2-utils.c */ + + /* + arbitrarily set the inode block size; FIXME: we need to + resolve the difference between the reported inode blocksize + and the PAGE_CACHE_SIZE, since our block count will always + be wrong. + + For now, we're setting the block count to be the proper + number assuming the block size is 512 bytes, and the size is + rounded up to the nearest 4K. This is apparently required + to get proper size reports from the 'du' shell utility. + + */ + + attrs = &getattr_response.attr; + + if (attrs->objtype == PVFS_TYPE_METAFILE) + { + if (attrs->mask & PVFS_ATTR_SYS_SIZE) + { + size_t inode_size = attrs->size; + size_t rounded_up_size = (inode_size + (4096 - (inode_size % 4096))); + + stbuf->st_size = inode_size; + stbuf->st_blocks = (unsigned long)(rounded_up_size / 512); + } + } + else if ((attrs->objtype == PVFS_TYPE_SYMLINK) && + (attrs->link_target != NULL)) + { + stbuf->st_size = strlen(attrs->link_target); + } + else + { + /* what should this be??? */ + unsigned long PAGE_CACHE_SIZE = 4096; + stbuf->st_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512); + stbuf->st_size = PAGE_CACHE_SIZE; + } + + stbuf->st_uid = attrs->owner; + stbuf->st_gid = attrs->group; + + stbuf->st_atime = (time_t)attrs->atime; + stbuf->st_mtime = (time_t)attrs->mtime; + stbuf->st_ctime = (time_t)attrs->ctime; + + stbuf->st_mode = 0; + if (attrs->perms & PVFS_O_EXECUTE) + perm_mode |= S_IXOTH; + if (attrs->perms & PVFS_O_WRITE) + perm_mode |= S_IWOTH; + if (attrs->perms & PVFS_O_READ) + perm_mode |= S_IROTH; + + if (attrs->perms & PVFS_G_EXECUTE) + perm_mode |= S_IXGRP; + if (attrs->perms & PVFS_G_WRITE) + perm_mode |= S_IWGRP; + if (attrs->perms & PVFS_G_READ) + perm_mode |= S_IRGRP; + + if (attrs->perms & PVFS_U_EXECUTE) + perm_mode |= S_IXUSR; + if (attrs->perms & PVFS_U_WRITE) + perm_mode |= S_IWUSR; + if (attrs->perms & PVFS_U_READ) + perm_mode |= S_IRUSR; + + if (attrs->perms & PVFS_G_SGID) + perm_mode |= S_ISGID; + + /* Should we honor the suid bit of the file? */ + /* FIXME should we check the file system suid flag */ + if ( /* get_suid_flag(inode) == 1 && */ (attrs->perms & PVFS_U_SUID)) + perm_mode |= S_ISUID; + + stbuf->st_mode |= perm_mode; + + /* FIXME special case: mark the root inode as sticky + if (is_root_handle(inode)) + { + inode->i_mode |= S_ISVTX; + } + */ + switch (attrs->objtype) + { + case PVFS_TYPE_METAFILE: + stbuf->st_mode |= S_IFREG; + break; + case PVFS_TYPE_DIRECTORY: + stbuf->st_mode |= S_IFDIR; + /* NOTE: we have no good way to keep nlink consistent for + * directories across clients; keep constant at 1. Why 1? If + * we go with 2, then find(1) gets confused and won't work + * properly withouth the -noleaf option */ + stbuf->st_nlink = 1; + break; + case PVFS_TYPE_SYMLINK: + stbuf->st_mode |= S_IFLNK; + break; + default: + break; + } + + stbuf->st_dev = pfhp->ref.fs_id; + stbuf->st_ino = pfhp->ref.handle; + + stbuf->st_rdev = 0; + stbuf->st_blksize = 4096; + + PVFS_util_release_sys_attr(attrs); + + return 0; +} + +static int pvfs_fuse_getattr(const char *path, struct stat *stbuf) +{ + int ret; + pvfs_fuse_handle_t pfh; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_NO_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + return pvfs_fuse_getattr_pfhp( &pfh, stbuf ); +} + +static int pvfs_fuse_fgetattr(const char *path, struct stat *stbuf, + struct fuse_file_info *fi) +{ + return pvfs_fuse_getattr_pfhp( GET_FUSE_HANDLE( fi ), stbuf ); +} + +static int pvfs_fuse_readlink(const char *path, char *buf, size_t size) +{ + PVFS_sysresp_getattr getattr_response; + int ret; + size_t len; + pvfs_fuse_handle_t pfh; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_NO_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + ret = PVFS_sys_getattr(pfh.ref, + PVFS_ATTR_SYS_ALL_NOHINT, + (PVFS_credentials *) &pfh.creds, + &getattr_response); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + if (getattr_response.attr.objtype != PVFS_TYPE_SYMLINK) + return -EINVAL; + + len = strlen( getattr_response.attr.link_target ); + if ( len < (size-1) ) + size = len; + + bcopy( getattr_response.attr.link_target, buf, size ); + + buf[len] = '\0'; + + return 0; +} + +static int pvfs_fuse_mkdir(const char *path, mode_t mode) +{ + int rc; + int num_segs; + PVFS_sys_attr attr; + char parent[PVFS_NAME_MAX]; + char dirname[PVFS_SEGMENT_MAX]; + pvfs_fuse_handle_t parent_pfh; + + PVFS_sysresp_mkdir resp_mkdir; + + /* Translate path into pvfs2 relative path */ + rc = PINT_get_base_dir((char *)path, parent, PVFS_NAME_MAX); + num_segs = PINT_string_count_segments((char *)path); + rc = PINT_get_path_element((char *)path, num_segs - 1, + dirname, PVFS_SEGMENT_MAX); + + if (rc) + { + return PVFS_ERROR_TO_ERRNO_N( rc ); + } + + lookup( parent, &parent_pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + + /* Set attributes */ + memset(&attr, 0, sizeof(PVFS_sys_attr)); + attr.owner = parent_pfh.creds.uid; + attr.group = parent_pfh.creds.gid; + attr.perms = mode; + attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; + + rc = PVFS_sys_mkdir(dirname, + parent_pfh.ref, + attr, + &parent_pfh.creds, + &resp_mkdir); + if (rc) + { + return PVFS_ERROR_TO_ERRNO_N( rc ); + } + + return 0; +} + +static int pvfs_fuse_remove( const char *path ) +{ + int rc; + int num_segs; + char parent[PVFS_NAME_MAX]; + char filename[PVFS_SEGMENT_MAX]; + pvfs_fuse_handle_t parent_pfh; + + /* Translate path into pvfs2 relative path */ + rc = PINT_get_base_dir((char *)path, parent, PVFS_NAME_MAX); + num_segs = PINT_string_count_segments((char *)path); + rc = PINT_get_path_element((char *)path, num_segs - 1, + filename, PVFS_SEGMENT_MAX); + + if (rc) + { + return PVFS_ERROR_TO_ERRNO_N( rc ); + } + + lookup( parent, &parent_pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + + rc = PVFS_sys_remove(filename, parent_pfh.ref, &parent_pfh.creds); + if (rc) + { + return PVFS_ERROR_TO_ERRNO_N( rc ); + } + + return 0; +} + +static int pvfs_fuse_unlink(const char *path) +{ + return pvfs_fuse_remove(path); +} + +static int pvfs_fuse_rmdir(const char *path) +{ + return pvfs_fuse_remove(path); +} + +static int pvfs_fuse_symlink(const char *from, const char *to) +{ + int ret = 0; + PVFS_sys_attr attr; + PVFS_sysresp_lookup resp_lookup; + PVFS_object_ref parent_ref; + PVFS_sysresp_symlink resp_sym; + PVFS_credentials credentials; + pvfs_fuse_handle_t dir_pfh; + char *tofile, *todir, *cp; + + pvfs_fuse_gen_credentials(&credentials); + + /* Initialize any variables */ + memset(&attr, 0, sizeof(attr)); + memset(&resp_lookup, 0, sizeof(resp_lookup)); + memset(&parent_ref, 0, sizeof(parent_ref)); + memset(&resp_sym, 0, sizeof(resp_sym)); + + /* Set the attributes for the new directory */ + attr.owner = credentials.uid; + attr.group = credentials.gid; + attr.perms = 0777; + attr.mask = (PVFS_ATTR_SYS_ALL_SETABLE); + + todir = strdup( to ); + if ( todir == NULL ) + return -ENOMEM; + + /* find the last / to get the parent directory */ + cp = rindex( todir, '/' ); + if ( cp == NULL ) + { + free( todir ); + return -ENOTDIR; + } + tofile = strdup( cp+1 ); + if ( cp == todir ) + { + /* we're creating a link at the root, so keep the slash */ + *(cp+1) = '\0'; + } + else + { + *cp = '\0'; + } + + ret = lookup( todir, &dir_pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if(ret < 0) + { + free( tofile ); + free( todir ); + PVFS_perror("lookup", ret); + return(-1); + } + + ret = PVFS_sys_symlink(tofile, + dir_pfh.ref, + (char *) from, + attr, + &credentials, + &resp_sym); + + if (ret < 0) + { + PVFS_perror("PVFS_sys_symlink", ret); + return(ret); + } + else + { + ret = 0; + } + + free( tofile ); + free( todir ); + return(ret); +} + +static int pvfs_fuse_rename(const char *from, const char *to) +{ + int rc; + int num_segs; + char fromdir[PVFS_NAME_MAX], todir[PVFS_NAME_MAX]; + char fromname[PVFS_SEGMENT_MAX], toname[PVFS_SEGMENT_MAX]; + pvfs_fuse_handle_t todir_pfh, fromdir_pfh; + + /* Translate path into pvfs2 relative path */ + rc = PINT_get_base_dir((char *)from, fromdir, PVFS_NAME_MAX); + num_segs = PINT_string_count_segments((char *)from); + rc = PINT_get_path_element((char *)from, num_segs - 1, + fromname, PVFS_SEGMENT_MAX); + + if (rc) + return PVFS_ERROR_TO_ERRNO_N( rc ); + + rc = lookup( fromdir, &fromdir_pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if (rc < 0) + return PVFS_ERROR_TO_ERRNO_N( rc ); + + /* Translate path into pvfs2 relative path */ + rc = PINT_get_base_dir((char *)to, todir, PVFS_NAME_MAX); + num_segs = PINT_string_count_segments((char *)to); + rc = PINT_get_path_element((char *)to, num_segs - 1, + toname, PVFS_SEGMENT_MAX); + + if (rc) + return PVFS_ERROR_TO_ERRNO_N( rc ); + + lookup( todir, &todir_pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if (rc < 0) + return PVFS_ERROR_TO_ERRNO_N( rc ); + + rc = PVFS_sys_rename(fromname, + fromdir_pfh.ref, + toname, + todir_pfh.ref, + &todir_pfh.creds); + if (rc) + return PVFS_ERROR_TO_ERRNO_N( rc ); + + return 0; +} + +static int pvfs_fuse_chmod(const char *path, mode_t mode) +{ + int ret; + PVFS_sys_attr new_attr; + + pvfs_fuse_handle_t pfh; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + /* FUSE passes in 5 octets in 'mode'. However, the the first + * octet is not related to permissions, hence checking only + * the lower 4 octets */ + new_attr.perms = mode & 07777; + new_attr.mask = PVFS_ATTR_SYS_PERM; + + ret = PVFS_sys_setattr(pfh.ref,new_attr,&pfh.creds); + if (ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + return 0; +} + +static int pvfs_fuse_chown(const char *path, uid_t uid, gid_t gid) +{ + int ret; + PVFS_sys_attr new_attr; + + pvfs_fuse_handle_t pfh; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + new_attr.owner = uid; + new_attr.group = gid; + new_attr.mask = PVFS_ATTR_SYS_UID | PVFS_ATTR_SYS_GID; + + ret = PVFS_sys_setattr(pfh.ref,new_attr,&pfh.creds); + if (ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + return 0; +} + +static int pvfs_fuse_truncate(const char *path, off_t size) +{ + int ret; + pvfs_fuse_handle_t pfh; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + ret = PVFS_sys_truncate(pfh.ref,size,&pfh.creds); + if (ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + return 0; +} + +static int pvfs_fuse_utime(const char *path, struct utimbuf *timbuf) +{ + int ret; + PVFS_sys_attr new_attr; + + pvfs_fuse_handle_t pfh; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + new_attr.atime = (PVFS_time)timbuf->actime; + new_attr.mtime = (PVFS_time)timbuf->modtime; + new_attr.mask = PVFS_ATTR_SYS_ATIME | PVFS_ATTR_SYS_MTIME; + + ret = PVFS_sys_setattr(pfh.ref,new_attr,&pfh.creds); + if (ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + return 0; +} + +static int pvfs_fuse_open(const char *path, struct fuse_file_info *fi) +{ + pvfs_fuse_handle_t *pfhp; + int ret; + + pfhp = (pvfs_fuse_handle_t *)malloc( sizeof( pvfs_fuse_handle_t ) ); + if (pfhp == NULL) + { + return -ENOMEM; + } + + ret = lookup( path, pfhp, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) { + free( pfhp ); + return PVFS_ERROR_TO_ERRNO_N( ret ); + } + + SET_FUSE_HANDLE( fi, pfhp ); + + return 0; +} + +static int pvfs_fuse_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + PVFS_Request mem_req, file_req; + PVFS_sysresp_io resp_io; + int ret; + pvfs_fuse_handle_t *pfh = GET_FUSE_HANDLE( fi ); + + file_req = PVFS_BYTE; + ret = PVFS_Request_contiguous(size, PVFS_BYTE, &mem_req); + if (ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + ret = PVFS_sys_read(pfh->ref, file_req, offset, buf, + mem_req, &pfh->creds, &resp_io); + if (ret == 0) + { + PVFS_Request_free(&mem_req); + return(resp_io.total_completed); + } + else + return PVFS_ERROR_TO_ERRNO_N( ret ); +} + +static int pvfs_fuse_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + PVFS_Request mem_req, file_req; + PVFS_sysresp_io resp_io; + int ret; + pvfs_fuse_handle_t *pfh = GET_FUSE_HANDLE( fi ); + + file_req = PVFS_BYTE; + ret = PVFS_Request_contiguous(size, PVFS_BYTE, &mem_req); + if (ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + ret = PVFS_sys_write(pfh->ref, file_req, offset, (char*)buf, + mem_req, &pfh->creds, &resp_io); + if (ret == 0) + { + PVFS_Request_free(&mem_req); + return(resp_io.total_completed); + } + else + return PVFS_ERROR_TO_ERRNO_N( ret ); +} + +static int pvfs_fuse_statfs(const char *path, struct statvfs *stbuf) +{ + int ret; + PVFS_credentials creds; + PVFS_sysresp_statfs resp_statfs; + + pvfs_fuse_gen_credentials(&creds); + + /* gather normal statfs statistics from system interface */ + + ret = PVFS_sys_statfs(pvfs2fuse.fs_id, &creds, &resp_statfs); + if (ret < 0) + { + if(ret != ERANGE) + return PVFS_ERROR_TO_ERRNO_N( ret ); + } + + memcpy(&stbuf->f_fsid, &resp_statfs.statfs_buf.fs_id, + sizeof(resp_statfs.statfs_buf.fs_id)); + /* FIXME is this bsize right? */ + + stbuf->f_bsize = PVFS2_BUFMAP_DEFAULT_DESC_SIZE; + stbuf->f_frsize = PVFS2_BUFMAP_DEFAULT_DESC_SIZE; + stbuf->f_namemax = PVFS_NAME_MAX; + + stbuf->f_blocks = resp_statfs.statfs_buf.bytes_total / stbuf->f_bsize; + stbuf->f_bfree = resp_statfs.statfs_buf.bytes_available / stbuf->f_bsize; + stbuf->f_bavail = resp_statfs.statfs_buf.bytes_available / stbuf->f_bsize; + stbuf->f_files = resp_statfs.statfs_buf.handles_total_count; + stbuf->f_ffree = resp_statfs.statfs_buf.handles_available_count; + stbuf->f_favail = resp_statfs.statfs_buf.handles_available_count; + + stbuf->f_flag = 0; + + return 0; +} + +static int pvfs_fuse_release(const char *path, struct fuse_file_info *fi) +{ + pvfs_fuse_handle_t *pfh = GET_FUSE_HANDLE( fi ); + + if ( pfh != NULL ) { + free( pfh ); + SET_FUSE_HANDLE( fi, NULL ); + } + + return 0; +} + +static int pvfs_fuse_fsync(const char *path, int isdatasync, + struct fuse_file_info *fi) +{ + /* Just a stub. This method is optional and can safely be left + unimplemented */ + + (void) path; + (void) isdatasync; + (void) fi; + + return 0; +} + +#define MAX_NUM_DIRENTS 32 + +static int pvfs_fuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + int ret; + PVFS_ds_position token; + pvfs_fuse_handle_t pfh; + int pvfs_dirent_incount; + PVFS_sysresp_readdir rd_response; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + pvfs_dirent_incount = MAX_NUM_DIRENTS; + token = 0; + do + { + char *cur_file = NULL; + int i; + + memset(&rd_response, 0, sizeof(PVFS_sysresp_readdir)); + ret = PVFS_sys_readdir( + pfh.ref, (!token ? PVFS_READDIR_START : token), + pvfs_dirent_incount, &pfh.creds, &rd_response); + if(ret < 0) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + for(i = 0; i < rd_response.pvfs_dirent_outcount; i++) + { + cur_file = rd_response.dirent_array[i].d_name; + + if (filler(buf, cur_file, NULL, 0)) + break; + } + token += rd_response.pvfs_dirent_outcount; + + if (rd_response.pvfs_dirent_outcount) + { + free(rd_response.dirent_array); + rd_response.dirent_array = NULL; + } + + } while(rd_response.pvfs_dirent_outcount == pvfs_dirent_incount); + + return 0; +} + +static int pvfs_fuse_access(const char *path, int mask) +{ + PVFS_sysresp_getattr getattr_response; + PVFS_sys_attr* attrs; + int ret; + pvfs_fuse_handle_t pfh; + int in_group_flag = 0; + + ret = lookup( path, &pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + /* give root permission, no matter what */ + if ( pfh.creds.uid == 0 ) + return 0; + + /* if checking for file existence, return 0 */ + if ( mask == F_OK ) + return 0; + + ret = PVFS_sys_getattr(pfh.ref, + PVFS_ATTR_SYS_ALL_NOHINT, + (PVFS_credentials *) &pfh.creds, + &getattr_response); + if ( ret < 0 ) + return PVFS_ERROR_TO_ERRNO_N( ret ); + + attrs = &getattr_response.attr; + + /* basic code is copied from PINT_check_mode() */ + + /* see if uid matches object owner */ + if ( attrs->owner == pfh.creds.uid ) + { + /* see if object user permissions match access type */ + if( (mask & R_OK) && (attrs->perms & PVFS_U_READ)) + { + return(0); + } + if( (mask & W_OK) && (attrs->perms & PVFS_U_WRITE)) + { + return(0); + } + if( (mask & X_OK) && (attrs->perms & PVFS_U_EXECUTE)) + { + return(0); + } + } + + /* see if other bits allow access */ + if( (mask & R_OK) && (attrs->perms & PVFS_O_READ)) + { + return(0); + } + if( (mask & W_OK) && (attrs->perms & PVFS_O_WRITE)) + { + return(0); + } + if( (mask & X_OK) && (attrs->perms & PVFS_O_EXECUTE)) + { + return(0); + } + + /* see if gid matches object group */ + if(attrs->group == pfh.creds.gid) + { + /* default group match */ + in_group_flag = 1; + } + else + { +#if 0 + /* no default group match, check supplementary groups */ + ret = PINT_check_group(pfh.creds.uid, attrs->group); + if(ret == 0) + { + in_group_flag = 1; + } + else + { + if(ret != -PVFS_ENOENT) + { + /* system error; not just failed match */ + return(ret); + } + } +#endif + } + + if(in_group_flag) + { + /* see if object group permissions match access type */ + if( (mask & R_OK) && (attrs->perms & PVFS_G_READ)) + { + return(0); + } + if( (mask & W_OK) && (attrs->perms & PVFS_G_WRITE)) + { + return(0); + } + if( (mask & X_OK) && (attrs->perms & PVFS_G_EXECUTE)) + { + return(0); + } + } + + /* default case: access denied */ + return -EACCES; +} + +static int pvfs_fuse_create(const char *path, mode_t mode, + struct fuse_file_info *fi) +{ + int rc; + int num_segs; + PVFS_sys_attr attr; + char directory[PVFS_NAME_MAX]; + char filename[PVFS_SEGMENT_MAX]; + pvfs_fuse_handle_t dir_pfh, *pfhp; + + PVFS_sysresp_create resp_create; + + /* Translate path into pvfs2 relative path */ + rc = PINT_get_base_dir((char *)path, directory, PVFS_NAME_MAX); + num_segs = PINT_string_count_segments((char *)path); + rc = PINT_get_path_element((char *)path, num_segs - 1, + filename, PVFS_SEGMENT_MAX); + + if (rc) + { + return PVFS_ERROR_TO_ERRNO_N( rc ); + } + + rc = lookup( directory, &dir_pfh, PVFS2_LOOKUP_LINK_FOLLOW ); + if ( rc < 0 ) + return PVFS_ERROR_TO_ERRNO_N( rc ); + + /* Set attributes */ + memset(&attr, 0, sizeof(PVFS_sys_attr)); + attr.owner = dir_pfh.creds.uid; + attr.group = dir_pfh.creds.gid; + attr.perms = mode; + attr.atime = time(NULL); + attr.mtime = attr.atime; + attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; + attr.dfile_count = 0; + + rc = PVFS_sys_create(filename, + dir_pfh.ref, + attr, + &dir_pfh.creds, + NULL, + &resp_create); + if (rc) + { + /* FIXME + * the PVFS2 server code returns a ENOENT instead of an EACCES + * because it does a ACL lookup for the system.posix_acl_access + * which returns a ENOENT from the TROVE DBPF and that error is + * just passed up in prelude_check_acls (server/prelude.c). I'm + * not sure that's the right thing to do. + */ + if ( rc == -PVFS_ENOENT ) + { + return -EACCES; + } + return PVFS_ERROR_TO_ERRNO_N( rc ); + } + + pfhp = (pvfs_fuse_handle_t *)malloc( sizeof( pvfs_fuse_handle_t ) ); + if (pfhp == NULL) + { + return -ENOMEM; + } + + pfhp->ref = resp_create.ref; + pfhp->creds = dir_pfh.creds; + + SET_FUSE_HANDLE( fi, pfhp ); + + return 0; +} + +static struct fuse_operations pvfs_fuse_oper = { + .getattr = pvfs_fuse_getattr, + .fgetattr = pvfs_fuse_fgetattr, + .readlink = pvfs_fuse_readlink, + .mkdir = pvfs_fuse_mkdir, + .unlink = pvfs_fuse_unlink, + .rmdir = pvfs_fuse_rmdir, + .symlink = pvfs_fuse_symlink, + .rename = pvfs_fuse_rename, + /* .link = pvfs_fuse_link, */ /* hard links not supported on PVFS */ + .chmod = pvfs_fuse_chmod, + .chown = pvfs_fuse_chown, + .truncate = pvfs_fuse_truncate, + .utime = pvfs_fuse_utime, + .open = pvfs_fuse_open, + .read = pvfs_fuse_read, + .write = pvfs_fuse_write, + .statfs = pvfs_fuse_statfs, +/* .flush = pvfs_fuse_flush, */ + .release = pvfs_fuse_release, + .fsync = pvfs_fuse_fsync, + .readdir = pvfs_fuse_readdir, + .access = pvfs_fuse_access, + .create = pvfs_fuse_create, +}; + +enum { + KEY_HELP, + KEY_VERSION, +}; + +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#define PVFS2FUSE_OPT(t, p, v) { t, offsetof(struct pvfs2fuse, p), v } + +static struct fuse_opt pvfs2fuse_opts[] = { + PVFS2FUSE_OPT("fs_spec=%s", fs_spec, 0), + + FUSE_OPT_KEY("-V", KEY_VERSION), + FUSE_OPT_KEY("--version", KEY_VERSION), + FUSE_OPT_KEY("-h", KEY_HELP), + FUSE_OPT_KEY("--help", KEY_HELP), + FUSE_OPT_END +}; + +static void usage(const char *progname) +{ + fprintf(stderr, + "usage: %s mountpoint [options]\n" + "\n" + "general options:\n" + " -o opt,[opt...] mount options\n" + " -h --help print help\n" + " -V --version print version\n" + "\n" + "PVFS2FUSE options:\n" + " -o fs_spec=FS_SPEC PVFS2 fs_spec URI (eg. tcp://localhost:3334/pvfs2-fs)\n" + "\n", progname); +} + +static int pvfs_fuse_main(struct fuse_args *args) +{ +#if FUSE_VERSION >= 26 + return fuse_main(args->argc, args->argv, &pvfs_fuse_oper, NULL); +#else + return fuse_main(args->argc, args->argv, &pvfs_fuse_oper); +#endif +} + +static int pvfs2fuse_opt_proc(void *data, const char *arg, int key, + struct fuse_args *outargs) +{ + (void) data; + + switch (key) { + case FUSE_OPT_KEY_OPT: + return 1; + + case FUSE_OPT_KEY_NONOPT: + if (!pvfs2fuse.mntpoint) { + if(!arg) + { + fprintf(stderr, "PVFS2FUSE requires mountpoint as argument\n"); + abort(); + } + + pvfs2fuse.mntpoint = strdup(arg); + } + return 1; + + case KEY_HELP: + usage(outargs->argv[0]); + /* FIXME don't show the FUSE arguments + fuse_opt_add_arg(outargs, "-ho"); + pvfs_fuse_main(outargs); */ + exit(1); + + case KEY_VERSION: + fprintf(stderr, "PVFS2FUSE version %s (PVFS2 %s) (%s, %s)\n", + pvfs2fuse_version, PVFS2_VERSION, __DATE__, __TIME__); +#if FUSE_VERSION >= 25 + fuse_opt_add_arg(outargs, "--version"); + pvfs_fuse_main(outargs); +#endif + exit(0); + + default: + fprintf(stderr, "internal error\n"); + abort(); + } +} + +int main(int argc, char *argv[]) +{ + int ret; + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + + umask(0); + + if (fuse_opt_parse(&args, &pvfs2fuse, pvfs2fuse_opts, + pvfs2fuse_opt_proc) == -1 ) + exit(1); + + if (pvfs2fuse.fs_spec == NULL) + { + ret = PVFS_util_init_defaults(); + if(ret < 0) + { + PVFS_perror("PVFS_util_init_defaults", ret); + return(-1); + } + + ret = PVFS_util_get_default_fsid(&pvfs2fuse.fs_id); + if( ret < 0 ) + { + PVFS_perror("No default PVFS2 filesystem found", ret); + return(-1); + } + + PVFS_util_get_mntent_copy( pvfs2fuse.fs_id, &pvfs2fuse.mntent ); + /* Set timeouts for PVFS2's name cache and attribute cache */ + PVFS_sys_set_info(PVFS_SYS_ACACHE_TIMEOUT_MSECS, 0); + PVFS_sys_set_info(PVFS_SYS_NCACHE_TIMEOUT_MSECS, 0); + } + else + { + struct PVFS_sys_mntent *me = &pvfs2fuse.mntent; + char *cp; + int cur_server; + + /* the following is copied from PVFS_util_init_defaults() + in fuse/lib/pvfs2-util.c */ + + /* initialize pvfs system interface */ + ret = PVFS_sys_initialize(GOSSIP_NO_DEBUG); + if (ret < 0) + { + return(ret); + } + + /* the following is copied from PVFS_util_parse_pvfstab() + in fuse/lib/pvfs2-util.c */ + memset( me, 0, sizeof(pvfs2fuse.mntent) ); + + /* Enable integrity checks by default */ + me->integrity_check = 1; + /* comma-separated list of ways to contact a config server */ + me->num_pvfs_config_servers = 1; + + for (cp=pvfs2fuse.fs_spec; *cp; cp++) + if (*cp == ',') + ++me->num_pvfs_config_servers; + + /* allocate room for our copies of the strings */ + me->pvfs_config_servers = + malloc(me->num_pvfs_config_servers * + sizeof(*me->pvfs_config_servers)); + if (!me->pvfs_config_servers) + exit(-1); + memset(me->pvfs_config_servers, 0, + me->num_pvfs_config_servers * sizeof(*me->pvfs_config_servers)); + + me->mnt_dir = strdup(pvfs2fuse.mntpoint); + me->mnt_opts = NULL; + + cp = pvfs2fuse.fs_spec; + cur_server = 0; + for (;;) { + char *tok; + int slashcount; + char *slash; + char *last_slash; + + tok = strsep(&cp, ","); + if (!tok) break; + + slash = tok; + slashcount = 0; + while ((slash = index(slash, '/'))) + { + slash++; + slashcount++; + } + if (slashcount != 3) + { + fprintf(stderr,"Error: invalid FS spec: %s\n", + pvfs2fuse.fs_spec); + exit(-1); + } + + /* find a reference point in the string */ + last_slash = rindex(tok, '/'); + *last_slash = '\0'; + + /* config server and fs name are a special case, take one + * string and split it in half on "/" delimiter + */ + me->pvfs_config_servers[cur_server] = strdup(tok); + if (!me->pvfs_config_servers[cur_server]) + exit(-1); + + ++last_slash; + + if (cur_server == 0) { + me->pvfs_fs_name = strdup(last_slash); + if (!me->pvfs_fs_name) + exit(-1); + } else { + if (strcmp(last_slash, me->pvfs_fs_name) != 0) { + fprintf(stderr, + "Error: different fs names in server addresses: %s\n", + pvfs2fuse.fs_spec); + exit(-1); + } + } + ++cur_server; + } + + /* FIXME flowproto should be an option */ + me->flowproto = FLOWPROTO_DEFAULT; + + /* FIXME encoding should be an option */ + me->encoding = PVFS2_ENCODING_DEFAULT; + + /* FIXME default_num_dfiles should be an option */ + + ret = PVFS_sys_fs_add(me); + if( ret < 0 ) + { + PVFS_perror("Could not add mnt entry", ret); + return(-1); + } + pvfs2fuse.fs_id = me->fs_id; + } + + /* FIXME should we allow all the FUSE options? Maybe we should + * pass only some of the FUSE options to fuse_main. For now, + * force the direct_io and allow_other options. Also turn off + * multithreaded operation since it doesnt work with PVFS. + */ + + fuse_opt_insert_arg( &args, 1, "-odirect_io" ); + fuse_opt_insert_arg( &args, 1, "-oattr_timeout=0"); + fuse_opt_insert_arg( &args, 1, "-omax_write=524288"); + if ( getpid() == 0 ) + fuse_opt_insert_arg( &args, 1, "-oallow_other" ); + fuse_opt_insert_arg( &args, 1, "-s" ); + + { + /* set the fsname and volname */ + char name[200]; + char *config = pvfs2fuse.mntent.the_pvfs_config_server; + + if ( !config ) + config = pvfs2fuse.mntent.pvfs_config_servers[0]; + + snprintf( name, 200, "-ofsname=pvfs2fuse#%s", config ); + fuse_opt_insert_arg( &args, 1, name ); +#if (__FreeBSD__ >= 10) + snprintf( name, 200, "-ovolname=%s", pvfs2fuse.mntent.pvfs_fs_name ); + fuse_opt_insert_arg( &args, 1, name ); +#endif + } + +#if (__FreeBSD__ >= 10) + { + /* MacFUSE has a bug where cached attributes + * arent invalidated on direct_io writes + */ + fuse_opt_insert_arg( &args, 1, "-oattr_timeout=0" ); + } +#endif + + return pvfs_fuse_main(&args); +} diff --git a/src/apps/karma/comm.c b/src/apps/karma/comm.c index 13a90fc..5242974 100644 --- a/src/apps/karma/comm.c +++ b/src/apps/karma/comm.c @@ -16,6 +16,7 @@ #include "karma.h" #define GUI_COMM_PERF_HISTORY 5 +#define GUI_COMM_PERF_KEYCOUNT 4 #undef FAKE_STATS #undef FAKE_PERF @@ -33,6 +34,19 @@ static int internal_addr_ct; static int64_t meta_read_prev = 0; static int64_t meta_write_prev = 0; +/* this struct is now only used by karma */ +/* perf numbers are returned as an array of int64_t */ + +struct PVFS_mgmt_perf_stat +{ + int32_t valid_flag; + int64_t start_time_ms; + int64_t read; + int64_t write; + int64_t metadata_read; + int64_t metadata_write; +}; + /* performance data structures */ static struct PVFS_mgmt_perf_stat **internal_perf; static uint32_t *internal_perf_ids; @@ -351,7 +365,7 @@ static int gui_comm_stats_collect( &creds, internal_stats, internal_addrs, - internal_stat_ct, internal_details); + internal_stat_ct, internal_details, NULL); if (ret == 0) return 0; else if (ret == -PVFS_EDETAIL) @@ -396,21 +410,57 @@ static int gui_comm_stats_collect( static int gui_comm_perf_collect( void) { - int ret; + int ret = 0; char err_msg[64]; char msgbuf[64]; + int key_count; + int64_t **perf_data; + int srv; #ifndef FAKE_PERF + key_count = GUI_COMM_PERF_KEYCOUNT; + + perf_data = (int64_t **)malloc(internal_addr_ct * sizeof(int64_t *)); + for (srv = 0; srv < internal_addr_ct; srv++) + perf_data[srv] = (int64_t *)malloc(sizeof(int64_t) * + (GUI_COMM_PERF_KEYCOUNT + 2) * + GUI_COMM_PERF_HISTORY); + ret = PVFS_mgmt_perf_mon_list(cur_fsid, &creds, - internal_perf, + perf_data, internal_end_time_ms, internal_addrs, internal_perf_ids, internal_addr_ct, - GUI_COMM_PERF_HISTORY, internal_details); + &key_count, + GUI_COMM_PERF_HISTORY, internal_details, + NULL); if (ret == 0) - return 0; + { + /* Note: Karma could be rewritten to deal with the */ + /* new format from the perf subsystem, but might */ + /* not be any great need to ... WBL */ + for (srv = 0; srv < internal_addr_ct; srv++) + { + int i; + for (i = 0; i < GUI_COMM_PERF_HISTORY; i++) + { + internal_perf[srv][i].valid_flag = + (perf_data[srv][(i * (key_count + 2)) + key_count] != 0.0); + internal_perf[srv][i].start_time_ms = + perf_data[srv][(i * (key_count + 2)) + key_count]; + internal_perf[srv][i].read = + perf_data[srv][(i * (key_count + 2)) + 0]; + internal_perf[srv][i].write = + perf_data[srv][(i * (key_count + 2)) + 1]; + internal_perf[srv][i].metadata_read = + perf_data[srv][(i * (key_count + 2)) + 2]; + internal_perf[srv][i].metadata_write = + perf_data[srv][(i * (key_count + 2)) + 3]; + } + } + } else if (ret == -PVFS_EDETAIL) { int i; @@ -431,7 +481,7 @@ static int gui_comm_perf_collect( gui_message_new(msgbuf); } - return 0; + ret = 0; } else { @@ -442,11 +492,15 @@ static int gui_comm_perf_collect( err_msg); gui_message_new(msgbuf); - return -1; + ret = -1; } + + for (srv = 0; srv < internal_addr_ct; srv++) + free(perf_data[srv]); + free(perf_data); #endif - return 0; + return ret; } /* gui_comm_traffic_retrieve() diff --git a/src/apps/kernel/linux/module.mk.in b/src/apps/kernel/linux/module.mk.in index 6e69bb8..70ed3a0 100644 --- a/src/apps/kernel/linux/module.mk.in +++ b/src/apps/kernel/linux/module.mk.in @@ -1,5 +1,9 @@ DIR := src/apps/kernel/linux +PVFS2_SEGV_BACKTRACE = @PVFS2_SEGV_BACKTRACE@ + +ifdef BUILD_KERNEL + KERNAPPSRC += \ $(DIR)/pvfs2-client.c @@ -18,7 +22,12 @@ endif MODCFLAGS_$(DIR)/pvfs2-client-core.c = \ -I$(srcdir)/src/kernel/linux-2.6 +ifdef PVFS2_SEGV_BACKTRACE + MODCFLAGS_$(DIR)/pvfs2-client-core.c += -D__PVFS2_SEGV_BACKTRACE__ +endif + # explicitly uses pthreads both threaded and not-threaded versions, even if # threading is turned off in the rest of libpvfs MODLDFLAGS_$(DIR)/pvfs2-client-core.o = -lpthread +endif diff --git a/src/apps/kernel/linux/pvfs2-client-core.c b/src/apps/kernel/linux/pvfs2-client-core.c index ba7476d..2af5cfe 100644 --- a/src/apps/kernel/linux/pvfs2-client-core.c +++ b/src/apps/kernel/linux/pvfs2-client-core.c @@ -14,6 +14,13 @@ #include #include #include +#include +#include + +#ifdef __PVFS2_SEGV_BACKTRACE__ +#include +#include +#endif #include "pvfs2.h" #include "gossip.h" @@ -29,6 +36,7 @@ #include "client-state-machine.h" #include "pint-perf-counter.h" #include "pvfs2-encode-stubs.h" +#include "pint-event.h" #include "src/client/sysint/osd.h" #ifdef USE_MMAP_RA_CACHE @@ -93,12 +101,13 @@ typedef struct char* gossip_mask; int logstamp_type; int logstamp_type_set; - int standalone; + int child; /* kernel module buffer size settings */ unsigned int dev_buffer_count; int dev_buffer_count_set; unsigned int dev_buffer_size; int dev_buffer_size_set; + char *events; } options_t; /* @@ -109,9 +118,13 @@ typedef struct be serviced by our regular handlers. to do both, we use a thread for the blocking ioctl. */ +#define REMOUNT_NOTCOMPLETED 0 +#define REMOUNT_COMPLETED 1 +#define REMOUNT_FAILED 2 static pthread_t remount_thread; static pthread_mutex_t remount_mutex = PTHREAD_MUTEX_INITIALIZER; -static int remount_complete = 0; +static int remount_complete = REMOUNT_NOTCOMPLETED; + /* used for generating unique dynamic mount point names */ static int dynamic_mount_id = 1; @@ -124,7 +137,8 @@ typedef struct job_status_s jstat; struct PINT_dev_unexp_info info; - + PVFS_hint hints; + /* iox requests may post multiple operations at one shot */ int num_ops, num_incomplete_ops; PVFS_sys_op_id op_id; @@ -181,6 +195,7 @@ static options_t s_opts; static job_context_id s_client_dev_context; static int s_client_is_processing = 1; +static int s_client_signal = 0; /* We have 2 sets of description buffers, one used for staging I/O * and one for readdir/readdirplus */ @@ -189,7 +204,9 @@ static struct PVFS_dev_map_desc s_io_desc[NUM_MAP_DESC]; static struct PINT_dev_params s_desc_params[NUM_MAP_DESC]; static struct PINT_perf_counter* acache_pc = NULL; +/* static struct PINT_perf_counter* static_acache_pc = NULL; */ static struct PINT_perf_counter* ncache_pc = NULL; +/* static char hostname[100]; */ /* used only for deleting all allocated vfs_request objects */ vfs_request_t *s_vfs_request_array[MAX_NUM_OPS] = {NULL}; @@ -210,12 +227,15 @@ static int set_acache_parameters(options_t* s_opts); static void set_device_parameters(options_t *s_opts); static void reset_ncache_timeout(void); static int set_ncache_parameters(options_t* s_opts); +static void finalize_perf_items(int n, ... ); +inline static void fill_hints(PVFS_hint *hints, vfs_request_t *req); static PVFS_object_ref perform_lookup_on_create_error( PVFS_object_ref parent, char *entry_name, PVFS_credentials *credentials, - int follow_link); + int follow_link, + PVFS_hint hints); static int write_device_response( void *buffer_list, @@ -257,16 +277,57 @@ do { \ vfs_request->was_handled_inline = 1; \ } while(0) +#ifdef __PVFS2_SEGV_BACKTRACE__ + +#if defined(REG_EIP) +# define REG_INSTRUCTION_POINTER REG_EIP +#elif defined(REG_RIP) +# define REG_INSTRUCTION_POINTER REG_RIP +#else +# error Unknown instruction pointer location for your architecture, configure with --disable-segv-backtrace. +#endif + +static void client_segfault_handler(int signum, siginfo_t *info, void *secret) +{ + void *trace[16]; + char **messages = (char **)NULL; + int i, trace_size = 0; + ucontext_t *uc = (ucontext_t *)secret; + + /* Do something useful with siginfo_t */ + if (signum == SIGSEGV) + { + gossip_err("PVFS2 client: signal %d, faulty address is %p, " + "from %p\n", signum, info->si_addr, + (void*)uc->uc_mcontext.gregs[REG_INSTRUCTION_POINTER]); + } + else + { + gossip_err("PVFS2 client: signal %d\n", signum); + } + + trace_size = backtrace(trace, 16); + /* overwrite sigaction with caller's address */ + trace[1] = (void *) uc->uc_mcontext.gregs[REG_INSTRUCTION_POINTER]; + + messages = backtrace_symbols(trace, trace_size); + /* skip first stack frame (points here) */ + for (i=1; iinfo.tag == tag); @@ -366,12 +425,11 @@ static int is_op_in_progress(vfs_request_t *vfs_request) gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "is_op_in_progress called on " "tag %lld\n", lld(vfs_request->info.tag)); - hash_link = qhash_search( - s_ops_in_progress_table, (void *)(&vfs_request->info.tag)); + hash_link = qhash_search( s_ops_in_progress_table, + (void *)(&vfs_request->info.tag)); if (hash_link) { - tmp_request = qhash_entry( - hash_link, vfs_request_t, hash_link); + tmp_request = qhash_entry( hash_link, vfs_request_t, hash_link); assert(tmp_request); op_found = ((tmp_request->info.tag == vfs_request->info.tag) && @@ -434,12 +492,17 @@ static void *exec_remount(void *ptr) will fill in our dynamic mount information by triggering mount upcalls for each fs mounted by the kernel at this point */ + + /* if PINT_dev_remount fails set remount_complete appropriately */ if (PINT_dev_remount()) { gossip_err("*** Failed to remount filesystems!\n"); + remount_complete = REMOUNT_FAILED; + } + else + { + remount_complete = REMOUNT_COMPLETED; } - - remount_complete = 1; pthread_mutex_unlock(&remount_mutex); return NULL; @@ -471,6 +534,7 @@ static inline void log_operation_timing(vfs_request_t *vfs_request) static PVFS_error post_lookup_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -479,6 +543,8 @@ static PVFS_error post_lookup_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.lookup.parent_refn.fs_id, llu(vfs_request->in_upcall.req.lookup.parent_refn.handle)); + /* get rank from pid */ + fill_hints(&hints, vfs_request); ret = PVFS_isys_ref_lookup( vfs_request->in_upcall.req.lookup.parent_refn.fs_id, vfs_request->in_upcall.req.lookup.d_name, @@ -486,7 +552,8 @@ static PVFS_error post_lookup_request(vfs_request_t *vfs_request) &vfs_request->in_upcall.credentials, &vfs_request->response.lookup, vfs_request->in_upcall.req.lookup.sym_follow, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -502,7 +569,8 @@ static PVFS_error post_lookup_request(vfs_request_t *vfs_request) static PVFS_error post_create_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a create request for %s (fsid %d | parent %llu)\n", @@ -510,13 +578,15 @@ static PVFS_error post_create_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.create.parent_refn.fs_id, llu(vfs_request->in_upcall.req.create.parent_refn.handle)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_create( vfs_request->in_upcall.req.create.d_name, vfs_request->in_upcall.req.create.parent_refn, vfs_request->in_upcall.req.create.attributes, &vfs_request->in_upcall.credentials, NULL, NULL, &vfs_request->response.create, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -528,6 +598,7 @@ static PVFS_error post_create_request(vfs_request_t *vfs_request) static PVFS_error post_symlink_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -537,6 +608,7 @@ static PVFS_error post_symlink_request(vfs_request_t *vfs_request) llu(vfs_request->in_upcall.req.sym.parent_refn.handle), vfs_request->in_upcall.req.sym.target); + fill_hints(&hints, vfs_request); ret = PVFS_isys_symlink( vfs_request->in_upcall.req.sym.entry_name, vfs_request->in_upcall.req.sym.parent_refn, @@ -544,8 +616,9 @@ static PVFS_error post_symlink_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.sym.attributes, &vfs_request->in_upcall.credentials, &vfs_request->response.symlink, - &vfs_request->op_id, (void *)vfs_request); - + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; + if (ret < 0) { PVFS_perror_gossip("Posting symlink create failed", ret); @@ -556,19 +629,22 @@ static PVFS_error post_symlink_request(vfs_request_t *vfs_request) static PVFS_error post_getattr_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "got a getattr request for fsid %d | handle %llu\n", vfs_request->in_upcall.req.getattr.refn.fs_id, llu(vfs_request->in_upcall.req.getattr.refn.handle)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_getattr( vfs_request->in_upcall.req.getattr.refn, vfs_request->in_upcall.req.getattr.mask, &vfs_request->in_upcall.credentials, &vfs_request->response.getattr, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -580,6 +656,7 @@ static PVFS_error post_getattr_request(vfs_request_t *vfs_request) static PVFS_error post_setattr_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -588,11 +665,13 @@ static PVFS_error post_setattr_request(vfs_request_t *vfs_request) llu(vfs_request->in_upcall.req.setattr.refn.handle), vfs_request->in_upcall.req.setattr.attributes.mask); + fill_hints(&hints, vfs_request); ret = PVFS_isys_setattr( vfs_request->in_upcall.req.setattr.refn, vfs_request->in_upcall.req.setattr.attributes, &vfs_request->in_upcall.credentials, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -604,7 +683,8 @@ static PVFS_error post_setattr_request(vfs_request_t *vfs_request) static PVFS_error post_remove_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a remove request for %s under fsid %d and " @@ -612,11 +692,13 @@ static PVFS_error post_remove_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.remove.parent_refn.fs_id, llu(vfs_request->in_upcall.req.remove.parent_refn.handle)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_remove( vfs_request->in_upcall.req.remove.d_name, vfs_request->in_upcall.req.remove.parent_refn, &vfs_request->in_upcall.credentials, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -628,6 +710,7 @@ static PVFS_error post_remove_request(vfs_request_t *vfs_request) static PVFS_error post_mkdir_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -636,13 +719,15 @@ static PVFS_error post_mkdir_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.mkdir.parent_refn.fs_id, llu(vfs_request->in_upcall.req.mkdir.parent_refn.handle)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_mkdir( vfs_request->in_upcall.req.mkdir.d_name, vfs_request->in_upcall.req.mkdir.parent_refn, vfs_request->in_upcall.req.mkdir.attributes, &vfs_request->in_upcall.credentials, &vfs_request->response.mkdir, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -654,6 +739,7 @@ static PVFS_error post_mkdir_request(vfs_request_t *vfs_request) static PVFS_error post_readdir_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "Got a readdir request " "for %llu,%d (token %llu)\n", @@ -661,13 +747,15 @@ static PVFS_error post_readdir_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.readdir.refn.fs_id, llu(vfs_request->in_upcall.req.readdir.token)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_readdir( vfs_request->in_upcall.req.readdir.refn, vfs_request->in_upcall.req.readdir.token, vfs_request->in_upcall.req.readdir.max_dirent_count, &vfs_request->in_upcall.credentials, &vfs_request->response.readdir, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -678,6 +766,7 @@ static PVFS_error post_readdir_request(vfs_request_t *vfs_request) static PVFS_error post_readdirplus_request(vfs_request_t *vfs_request) { + PVFS_hint hints; PVFS_error ret = -PVFS_EINVAL; gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "Got a readdirplus request " @@ -686,6 +775,7 @@ static PVFS_error post_readdirplus_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.readdirplus.refn.fs_id, llu(vfs_request->in_upcall.req.readdirplus.token)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_readdirplus( vfs_request->in_upcall.req.readdirplus.refn, vfs_request->in_upcall.req.readdirplus.token, @@ -693,7 +783,7 @@ static PVFS_error post_readdirplus_request(vfs_request_t *vfs_request) &vfs_request->in_upcall.credentials, vfs_request->in_upcall.req.readdirplus.mask, &vfs_request->response.readdirplus, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, (void *)vfs_request, hints); if (ret < 0) { @@ -705,6 +795,7 @@ static PVFS_error post_readdirplus_request(vfs_request_t *vfs_request) static PVFS_error post_rename_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -717,13 +808,15 @@ static PVFS_error post_rename_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.rename.new_parent_refn.fs_id, llu(vfs_request->in_upcall.req.rename.new_parent_refn.handle)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_rename( vfs_request->in_upcall.req.rename.d_old_name, vfs_request->in_upcall.req.rename.old_parent_refn, vfs_request->in_upcall.req.rename.d_new_name, vfs_request->in_upcall.req.rename.new_parent_refn, &vfs_request->in_upcall.credentials, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -735,7 +828,8 @@ static PVFS_error post_rename_request(vfs_request_t *vfs_request) static PVFS_error post_truncate_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a truncate request for %llu under " "fsid %d to be size %lld\n", @@ -743,11 +837,13 @@ static PVFS_error post_truncate_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.truncate.refn.fs_id, lld(vfs_request->in_upcall.req.truncate.size)); + fill_hints(&hints, vfs_request); ret = PVFS_isys_truncate( vfs_request->in_upcall.req.truncate.refn, vfs_request->in_upcall.req.truncate.size, &vfs_request->in_upcall.credentials, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -759,7 +855,8 @@ static PVFS_error post_truncate_request(vfs_request_t *vfs_request) static PVFS_error post_getxattr_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "got a getxattr request for fsid %d | handle %llu\n", @@ -799,6 +896,7 @@ static PVFS_error post_getxattr_request(vfs_request_t *vfs_request) vfs_request->response.geteattr.val_array[0].buffer_sz = PVFS_REQ_LIMIT_VAL_LEN; + fill_hints(&hints, vfs_request); /* Remember to free these up */ ret = PVFS_isys_geteattr_list( vfs_request->in_upcall.req.getxattr.refn, @@ -807,7 +905,9 @@ static PVFS_error post_getxattr_request(vfs_request_t *vfs_request) &vfs_request->key, &vfs_request->response.geteattr, &vfs_request->op_id, + hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -819,6 +919,7 @@ static PVFS_error post_getxattr_request(vfs_request_t *vfs_request) static PVFS_error post_setxattr_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -838,6 +939,7 @@ static PVFS_error post_setxattr_request(vfs_request_t *vfs_request) vfs_request->val.buffer_sz = vfs_request->in_upcall.req.setxattr.keyval.val_sz; + fill_hints(&hints, vfs_request); ret = PVFS_isys_seteattr_list( vfs_request->in_upcall.req.setxattr.refn, &vfs_request->in_upcall.credentials, @@ -846,7 +948,9 @@ static PVFS_error post_setxattr_request(vfs_request_t *vfs_request) &vfs_request->val, vfs_request->in_upcall.req.setxattr.flags, &vfs_request->op_id, + hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -858,6 +962,7 @@ static PVFS_error post_setxattr_request(vfs_request_t *vfs_request) static PVFS_error post_removexattr_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, @@ -872,12 +977,15 @@ static PVFS_error post_removexattr_request(vfs_request_t *vfs_request) GOSSIP_CLIENTCORE_DEBUG, "removexattr key %s\n", (char *) vfs_request->key.buffer); + fill_hints(&hints, vfs_request); ret = PVFS_isys_deleattr( - vfs_request->in_upcall.req.setxattr.refn, + vfs_request->in_upcall.req.removexattr.refn, &vfs_request->in_upcall.credentials, &vfs_request->key, &vfs_request->op_id, + hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -890,7 +998,8 @@ static PVFS_error post_listxattr_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; int i = 0, j = 0; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "got a listxattr request for fsid %d | handle %llu\n", @@ -932,6 +1041,8 @@ static PVFS_error post_listxattr_request(vfs_request_t *vfs_request) free(vfs_request->response.listeattr.key_array); return -PVFS_ENOMEM; } + + fill_hints(&hints, vfs_request); ret = PVFS_isys_listeattr( vfs_request->in_upcall.req.listxattr.refn, vfs_request->in_upcall.req.listxattr.token, @@ -939,8 +1050,10 @@ static PVFS_error post_listxattr_request(vfs_request_t *vfs_request) &vfs_request->in_upcall.credentials, &vfs_request->response.listeattr, &vfs_request->op_id, + hints, (void *)vfs_request); - + vfs_request->hints = hints; + if (ret < 0) { PVFS_perror_gossip("Posting listxattr failed", ret); @@ -1032,7 +1145,7 @@ static inline int generate_upcall_mntent(struct PVFS_sys_mntent *mntent, GOSSIP_CLIENTCORE_DEBUG, "Got FS Name: %s (len=%d)\n", mntent->pvfs_fs_name, (int)strlen(mntent->pvfs_fs_name)); - mntent->encoding = ENCODING_DEFAULT; + mntent->encoding = PVFS2_ENCODING_DEFAULT; mntent->flowproto = FLOWPROTO_DEFAULT; /* also fill in the fs_id for umount */ @@ -1121,7 +1234,9 @@ static PVFS_error service_fs_umount_request(vfs_request_t *vfs_request) ok: PVFS_util_free_mntent(&mntent); - write_inlined_device_response(vfs_request); + /* let handle_unexp_vfs_request() function detect completion and handle */ + vfs_request->op_id = -1; + return 0; fail_downcall: gossip_err( @@ -1138,7 +1253,6 @@ static PVFS_error service_fs_umount_request(vfs_request_t *vfs_request) static PVFS_error service_perf_count_request(vfs_request_t *vfs_request) { char* tmp_str; - PVFS_error ret = -PVFS_EINVAL; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a perf count request of type %d\n", @@ -1163,7 +1277,23 @@ static PVFS_error service_perf_count_request(vfs_request_t *vfs_request) vfs_request->out_downcall.status = 0; } break; - +/* + case PVFS2_PERF_COUNT_REQUEST_STATIC_ACACHE: + tmp_str = PINT_perf_generate_text(static_acache_pc, + PERF_COUNT_BUF_SIZE); + if(!tmp_str) + { + vfs_request->out_downcall.status = -PVFS_EINVAL; + } + else + { + memcpy(vfs_request->out_downcall.resp.perf_count.buffer, + tmp_str, PERF_COUNT_BUF_SIZE); + free(tmp_str); + vfs_request->out_downcall.status = 0; + } + break; +*/ case PVFS2_PERF_COUNT_REQUEST_NCACHE: tmp_str = PINT_perf_generate_text(ncache_pc, PERF_COUNT_BUF_SIZE); @@ -1183,12 +1313,11 @@ static PVFS_error service_perf_count_request(vfs_request_t *vfs_request) default: /* unsupported request, didn't match anything in case statement */ vfs_request->out_downcall.status = -PVFS_ENOSYS; - write_inlined_device_response(vfs_request); - return 0; break; } - write_inlined_device_response(vfs_request); + /* let handle_unexp_vfs_request() function detect completion and handle */ + vfs_request->op_id = -1; return 0; } @@ -1201,12 +1330,14 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) int tmp_param = -1; int tmp_subsystem = -1; unsigned int tmp_perf_val; + uint64_t mask = 0; gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a param request for op %d\n", vfs_request->in_upcall.req.param.op); vfs_request->out_downcall.type = vfs_request->in_upcall.type; + vfs_request->op_id = -1; switch(vfs_request->in_upcall.req.param.op) { @@ -1227,6 +1358,22 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) tmp_param = ACACHE_RECLAIM_PERCENTAGE; tmp_subsystem = ACACHE; break; + case PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS: + tmp_param = STATIC_ACACHE_TIMEOUT_MSECS; + tmp_subsystem = ACACHE; + break; + case PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT: + tmp_param = STATIC_ACACHE_HARD_LIMIT; + tmp_subsystem = ACACHE; + break; + case PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT: + tmp_param = STATIC_ACACHE_SOFT_LIMIT; + tmp_subsystem = ACACHE; + break; + case PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE: + tmp_param = STATIC_ACACHE_RECLAIM_PERCENTAGE; + tmp_subsystem = ACACHE; + break; case PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS: tmp_param = NCACHE_TIMEOUT_MSECS; tmp_subsystem = NCACHE; @@ -1244,6 +1391,20 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) tmp_subsystem = NCACHE; break; /* These next few case statements return without falling through */ + case PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG: + gossip_debug(GOSSIP_PROC_DEBUG,"Got request to SET the client debug mask...\n"); + gossip_debug(GOSSIP_PROC_DEBUG,"s_value is %s\n",vfs_request->in_upcall.req.param.s_value); + + mask=PVFS_debug_eventlog_to_mask(vfs_request->in_upcall.req.param.s_value); + + ret=gossip_set_debug_mask(1,mask); + gossip_debug(GOSSIP_PROC_DEBUG,"Value of new debug mask is %0x.\n" + ,(unsigned int)gossip_debug_mask); + + vfs_request->out_downcall.status = 0; + vfs_request->out_downcall.resp.param.value=mask; + return(0); + case PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS: if(vfs_request->in_upcall.req.param.type == PVFS2_PARAM_REQUEST_GET) @@ -1257,7 +1418,6 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) vfs_request->in_upcall.req.param.value; } vfs_request->out_downcall.status = 0; - write_inlined_device_response(vfs_request); return(0); break; case PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE: @@ -1273,11 +1433,13 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) tmp_perf_val = vfs_request->in_upcall.req.param.value; ret = PINT_perf_set_info( acache_pc, PINT_PERF_HISTORY_SIZE, tmp_perf_val); +/* ret = PINT_perf_set_info( + static_acache_pc, PINT_PERF_HISTORY_SIZE, tmp_perf_val); +*/ ret = PINT_perf_set_info( ncache_pc, PINT_PERF_HISTORY_SIZE, tmp_perf_val); } vfs_request->out_downcall.status = ret; - write_inlined_device_response(vfs_request); return(0); break; case PVFS2_PARAM_REQUEST_OP_PERF_RESET: @@ -1285,11 +1447,11 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) PVFS2_PARAM_REQUEST_SET) { PINT_perf_reset(acache_pc); +/* PINT_perf_reset(static_acache_pc);*/ PINT_perf_reset(ncache_pc); } vfs_request->out_downcall.resp.param.value = 0; vfs_request->out_downcall.status = 0; - write_inlined_device_response(vfs_request); return(0); break; } @@ -1298,7 +1460,6 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) { /* unsupported request, didn't match anything in case statement */ vfs_request->out_downcall.status = -PVFS_ENOSYS; - write_inlined_device_response(vfs_request); return 0; } @@ -1333,7 +1494,6 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) PINT_ncache_set_info(tmp_param, val); } } - write_inlined_device_response(vfs_request); return 0; } #undef ACACHE @@ -1342,16 +1502,19 @@ static PVFS_error service_param_request(vfs_request_t *vfs_request) static PVFS_error post_statfs_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a statfs request for fsid %d\n", vfs_request->in_upcall.req.statfs.fs_id); + fill_hints(&hints, vfs_request); ret = PVFS_isys_statfs( vfs_request->in_upcall.req.statfs.fs_id, &vfs_request->in_upcall.credentials, &vfs_request->response.statfs, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; vfs_request->out_downcall.status = ret; vfs_request->out_downcall.type = vfs_request->in_upcall.type; @@ -1410,7 +1573,7 @@ static PVFS_error service_fs_key_request(vfs_request_t *vfs_request) out: vfs_request->out_downcall.status = ret; vfs_request->out_downcall.type = vfs_request->in_upcall.type; - write_inlined_device_response(vfs_request); + vfs_request->op_id = -1; return 0; } @@ -1418,6 +1581,7 @@ static PVFS_error service_fs_key_request(vfs_request_t *vfs_request) static PVFS_error post_io_readahead_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; gossip_debug( GOSSIP_MMAP_RCACHE_DEBUG, @@ -1451,13 +1615,15 @@ static PVFS_error post_io_readahead_request(vfs_request_t *vfs_request) PVFS_BYTE, &vfs_request->file_req); assert(ret == 0); + fill_hints(&hints, vfs_request); ret = PVFS_isys_io( vfs_request->in_upcall.req.io.refn, vfs_request->file_req, 0, vfs_request->io_tmp_buf, vfs_request->mem_req, &vfs_request->in_upcall.credentials, &vfs_request->response.io, vfs_request->in_upcall.req.io.io_type, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; if (ret < 0) { @@ -1472,7 +1638,8 @@ static PVFS_error post_io_readahead_request(vfs_request_t *vfs_request) static PVFS_error post_io_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + #ifdef USE_MMAP_RA_CACHE int val = 0, amt_returned = 0; void *buf = NULL; @@ -1586,6 +1753,7 @@ static PVFS_error post_io_request(vfs_request_t *vfs_request) PVFS_BYTE, &vfs_request->file_req); assert(ret == 0); + fill_hints(&hints, vfs_request); ret = PVFS_isys_io( vfs_request->in_upcall.req.io.refn, vfs_request->file_req, vfs_request->in_upcall.req.io.offset, @@ -1593,7 +1761,7 @@ static PVFS_error post_io_request(vfs_request_t *vfs_request) &vfs_request->in_upcall.credentials, &vfs_request->response.io, vfs_request->in_upcall.req.io.io_type, - &vfs_request->op_id, (void *)vfs_request); + &vfs_request->op_id, hints, (void *)vfs_request); if (ret < 0) { @@ -1625,8 +1793,8 @@ static PVFS_error post_io_request(vfs_request_t *vfs_request) free(vfs_request->io_tmp_buf); } vfs_request->io_tmp_buf = NULL; + vfs_request->op_id = -1; - write_inlined_device_response(vfs_request); return 0; #endif /* USE_MMAP_RA_CACHE */ } @@ -1636,6 +1804,8 @@ static PVFS_error post_iox_request(vfs_request_t *vfs_request) int32_t i, num_ops_posted, iox_count, iox_index; int32_t *mem_sizes = NULL; PVFS_error ret = -PVFS_EINVAL; + PVFS_hint hints; + struct read_write_x *rwx = (struct read_write_x *) vfs_request->in_upcall.trailer_buf; if (vfs_request->in_upcall.trailer_size <= 0 || rwx == NULL) @@ -1762,6 +1932,8 @@ static PVFS_error post_iox_request(vfs_request_t *vfs_request) gossip_err("post_iox_request: request_hindexed failed\n"); break; } + + fill_hints(&hints, vfs_request); /* post the I/O */ ret = PVFS_isys_io( vfs_request->in_upcall.req.iox.refn, vfs_request->file_req_a[i], @@ -1771,6 +1943,7 @@ static PVFS_error post_iox_request(vfs_request_t *vfs_request) &vfs_request->response.iox[i], vfs_request->in_upcall.req.iox.io_type, &vfs_request->op_ids[i], + hints, (void *)vfs_request); if (ret < 0) @@ -1820,8 +1993,6 @@ static PVFS_error post_iox_request(vfs_request_t *vfs_request) static PVFS_error service_mmap_ra_flush_request( vfs_request_t *vfs_request) { - PVFS_error ret = -PVFS_EINVAL; - gossip_debug( GOSSIP_MMAP_RCACHE_DEBUG, "Flushing mmap-racache elem %llu, %d\n", llu(vfs_request->in_upcall.req.ra_cache_flush.refn.handle), @@ -1833,8 +2004,8 @@ static PVFS_error service_mmap_ra_flush_request( /* we need to send a blank success response */ vfs_request->out_downcall.type = PVFS2_VFS_OP_MMAP_RA_FLUSH; vfs_request->out_downcall.status = 0; + vfs_request->op_id = -1; - write_inlined_device_response(vfs_request); return 0; } #endif @@ -1858,25 +2029,28 @@ static PVFS_error service_operation_cancellation( vfs_request->out_downcall.type = PVFS2_VFS_OP_CANCEL; vfs_request->out_downcall.status = ret; + vfs_request->op_id = -1; - write_inlined_device_response(vfs_request); return 0; } static PVFS_error post_fsync_request(vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - + PVFS_hint hints; + gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got a flush request for %llu,%d\n", llu(vfs_request->in_upcall.req.fsync.refn.handle), vfs_request->in_upcall.req.fsync.refn.fs_id); + fill_hints(&hints, vfs_request); ret = PVFS_isys_flush( vfs_request->in_upcall.req.fsync.refn, &vfs_request->in_upcall.credentials, - &vfs_request->op_id, (void *)vfs_request); - + &vfs_request->op_id, hints, (void *)vfs_request); + vfs_request->hints = hints; + if (ret < 0) { PVFS_perror_gossip("Posting flush failed", ret); @@ -1888,15 +2062,15 @@ static PVFS_object_ref perform_lookup_on_create_error( PVFS_object_ref parent, char *entry_name, PVFS_credentials *credentials, - int follow_link) + int follow_link, + PVFS_hint hints) { PVFS_error ret = 0; PVFS_sysresp_lookup lookup_response; PVFS_object_ref refn = { PVFS_HANDLE_NULL, PVFS_FS_ID_NULL }; - ret = PVFS_sys_ref_lookup( parent.fs_id, entry_name, parent, credentials, - &lookup_response, follow_link); + &lookup_response, follow_link, hints); if (ret) { @@ -2151,11 +2325,14 @@ static inline void package_downcall_members( */ if (*error_code == -PVFS_EEXIST) { + PVFS_hint hints; + fill_hints(&hints, vfs_request); vfs_request->out_downcall.resp.create.refn = perform_lookup_on_create_error( vfs_request->in_upcall.req.create.parent_refn, vfs_request->in_upcall.req.create.d_name, - &vfs_request->in_upcall.credentials, 1); + &vfs_request->in_upcall.credentials, 1, hints); + vfs_request->hints = hints; if (vfs_request->out_downcall.resp.create.refn.handle == PVFS_HANDLE_NULL) @@ -2354,6 +2531,7 @@ static inline void package_downcall_members( { pvfs2_fs_mount_response_t *mr = &vfs_request->out_downcall.resp.fs_mount; + osd_find_scsi_addresses(vfs_request->mntent->fs_id, &mr->num_osd, mr->osd_addrs, sizeof(mr->osd_addrs) / @@ -2599,6 +2777,12 @@ static inline void package_downcall_members( } break; } + case PVFS2_VFS_OP_FS_UMOUNT: + case PVFS2_VFS_OP_PERF_COUNT: + case PVFS2_VFS_OP_PARAM: + case PVFS2_VFS_OP_FSKEY: + case PVFS2_VFS_OP_CANCEL: + break; default: gossip_err("Completed upcall of unknown type %x!\n", vfs_request->in_upcall.type); @@ -2619,6 +2803,7 @@ static inline PVFS_error repost_unexp_vfs_request( PINT_dev_release_unexpected(&vfs_request->info); PINT_sys_release(vfs_request->op_id); + PVFS_hint_free(vfs_request->hints); memset(vfs_request, 0, sizeof(vfs_request_t)); vfs_request->is_dev_unexp = 1; @@ -2641,7 +2826,6 @@ static inline PVFS_error handle_unexp_vfs_request( vfs_request_t *vfs_request) { PVFS_error ret = -PVFS_EINVAL; - int posted_op = 0; assert(vfs_request); @@ -2671,8 +2855,9 @@ static inline PVFS_error handle_unexp_vfs_request( goto repost_op; } - if (!remount_complete && - (vfs_request->in_upcall.type != PVFS2_VFS_OP_FS_MOUNT)) + if (remount_complete == REMOUNT_NOTCOMPLETED && + (vfs_request->in_upcall.type != PVFS2_VFS_OP_FS_MOUNT) && + (vfs_request->in_upcall.type != PVFS2_VFS_OP_CANCEL) ) { gossip_debug( GOSSIP_CLIENTCORE_DEBUG, "Got an upcall operation of " @@ -2716,71 +2901,54 @@ static inline PVFS_error handle_unexp_vfs_request( switch(vfs_request->in_upcall.type) { case PVFS2_VFS_OP_LOOKUP: - posted_op = 1; ret = post_lookup_request(vfs_request); break; case PVFS2_VFS_OP_CREATE: - posted_op = 1; ret = post_create_request(vfs_request); break; case PVFS2_VFS_OP_SYMLINK: - posted_op = 1; ret = post_symlink_request(vfs_request); break; case PVFS2_VFS_OP_GETATTR: - posted_op = 1; ret = post_getattr_request(vfs_request); break; case PVFS2_VFS_OP_SETATTR: - posted_op = 1; ret = post_setattr_request(vfs_request); break; case PVFS2_VFS_OP_REMOVE: - posted_op = 1; ret = post_remove_request(vfs_request); break; case PVFS2_VFS_OP_MKDIR: - posted_op = 1; ret = post_mkdir_request(vfs_request); break; case PVFS2_VFS_OP_READDIR: - posted_op = 1; ret = post_readdir_request(vfs_request); break; case PVFS2_VFS_OP_READDIRPLUS: - posted_op = 1; ret = post_readdirplus_request(vfs_request); break; case PVFS2_VFS_OP_RENAME: - posted_op = 1; ret = post_rename_request(vfs_request); break; case PVFS2_VFS_OP_TRUNCATE: - posted_op = 1; ret = post_truncate_request(vfs_request); break; case PVFS2_VFS_OP_GETXATTR: - posted_op = 1; ret = post_getxattr_request(vfs_request); break; case PVFS2_VFS_OP_SETXATTR: - posted_op = 1; ret = post_setxattr_request(vfs_request); break; case PVFS2_VFS_OP_REMOVEXATTR: - posted_op = 1; ret = post_removexattr_request(vfs_request); break; case PVFS2_VFS_OP_LISTXATTR: - posted_op = 1; ret = post_listxattr_request(vfs_request); break; case PVFS2_VFS_OP_STATFS: - posted_op = 1; ret = post_statfs_request(vfs_request); break; case PVFS2_VFS_OP_FS_MOUNT: - posted_op = 1; ret = post_fs_mount_request(vfs_request); break; /* @@ -2805,11 +2973,9 @@ static inline PVFS_error handle_unexp_vfs_request( blocking and handled inline */ case PVFS2_VFS_OP_FILE_IO: - posted_op = 1; ret = post_io_request(vfs_request); break; case PVFS2_VFS_OP_FILE_IOX: - posted_op = 1; ret = post_iox_request(vfs_request); break; #ifdef USE_MMAP_RA_CACHE @@ -2825,7 +2991,6 @@ static inline PVFS_error handle_unexp_vfs_request( ret = service_operation_cancellation(vfs_request); break; case PVFS2_VFS_OP_FSYNC: - posted_op = 1; ret = post_fsync_request(vfs_request); break; case PVFS2_VFS_OP_INVALID: @@ -2833,13 +2998,14 @@ static inline PVFS_error handle_unexp_vfs_request( gossip_err( "Got an unrecognized/unimplemented vfs operation of " "type %x.\n", vfs_request->in_upcall.type); + ret = -PVFS_ENOSYS; break; } /* if we failed to post the operation, then we should go ahead and write * a generic response down with the error code filled in */ - if(posted_op == 1 && ret < 0) + if(ret < 0) { #ifndef GOSSIP_DISABLE_DEBUG gossip_err( @@ -2971,6 +3137,7 @@ static PVFS_error process_vfs_requests(void) for(i = 0; i < op_count; i++) { vfs_request = vfs_request_array[i]; + assert(vfs_request); /* assert(vfs_request->op_id == op_id_array[i]); */ if (vfs_request->num_ops == 1 && @@ -3117,6 +3284,29 @@ static PVFS_error process_vfs_requests(void) vfs_request, "normal_completion"); assert(ret == 0); } + + /* The status of the remount thread needs to be checked in the event + * the remount fails on client-core startup. If this is the initial + * startup then any mount requests will fail as expected and the + * client-core will behave normally. However, if a mount was + * previously successful (in a previous client-core incarnation) + * client-core doesn't check if the remount succeeded before + * handling the mount request and fs_add. Then any subsequent requests + * cause this thread spins around PINT_dev_test_unexpected. + * + * With the current structure of process_vfs_request, creating the + * remount thread before entering the while loop, it seems exiting + * client-core on a failed remount attempt is the most staight forward + * way to handle this case. Exiting will cause the parent to kickoff + * another client-core and try the remount until it succeeds. + */ + if( remount_complete == REMOUNT_FAILED ) + { + gossip_debug(GOSSIP_CLIENTCORE_DEBUG, + "%s: remount not completed successfully, no longer " + "handling requests.\n", __func__); + return -PVFS_EAGAIN; + } } gossip_debug(GOSSIP_CLIENTCORE_DEBUG, @@ -3131,17 +3321,43 @@ int main(int argc, char **argv) struct tm *local_time = NULL; uint64_t debug_mask = GOSSIP_NO_DEBUG; PINT_client_sm *acache_timer_sm_p = NULL; - PINT_smcb *smcb = NULL; + /* PINT_client_sm *static_acache_timer_sm_p = NULL; */ + PINT_smcb *acache_smcb = NULL; + /* PINT_smcb *acache_static_smcb = NULL; */ + PINT_smcb *ncache_smcb = NULL; PINT_client_sm *ncache_timer_sm_p = NULL; - /* if pvfs2-client-core segfaults, at least log the occurence so - * pvfs2-client won't repeatedly respawn pvfs2-client-core */ +#ifdef __PVFS2_SEGV_BACKTRACE__ + struct sigaction segv_action; + + segv_action.sa_sigaction = (void *)client_segfault_handler; + sigemptyset (&segv_action.sa_mask); + segv_action.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONESHOT; + sigaction (SIGSEGV, &segv_action, NULL); + sigaction (SIGABRT, &segv_action, NULL); +#else + + /* if pvfs2-client-core segfaults or aborts, at least log the occurence so + * pvfs2-client won't repeatedly respawn pvfs2-client-core + * */ signal(SIGSEGV, client_segfault_handler); + signal(SIGABRT, client_segfault_handler); +#endif memset(&s_opts, 0, sizeof(options_t)); parse_args(argc, argv, &s_opts); - if(!s_opts.standalone) + signal(SIGHUP, client_core_sig_handler); + signal(SIGINT, client_core_sig_handler); + signal(SIGPIPE, client_core_sig_handler); + signal(SIGILL, client_core_sig_handler); + signal(SIGTERM, client_core_sig_handler); + + /* we don't want to write a core file if we're running under + * the client parent process, because the client-core process + * could keep segfaulting, and the client would keep restarting it... + */ + if(s_opts.child) { struct rlimit lim = {0,0}; @@ -3153,10 +3369,6 @@ int main(int argc, char **argv) "continuing", ret); } } - else - { - signal(SIGHUP, client_core_sig_handler); - } /* convert gossip mask if provided on command line */ if (s_opts.gossip_mask) @@ -3213,13 +3425,17 @@ int main(int argc, char **argv) } /* get rid of stdout/stderr/stdin */ - freopen("/dev/null", "r", stdin); - freopen("/dev/null", "w", stdout); - freopen("/dev/null", "w", stderr); + if(!freopen("/dev/null", "r", stdin)) + gossip_err("Error: failed to reopen stdin.\n"); + if(!freopen("/dev/null", "w", stdout)) + gossip_err("Error: failed to reopen stdout.\n"); + if(!freopen("/dev/null", "w", stderr)) + gossip_err("Error: failed to reopen stderr.\n"); start_time = time(NULL); local_time = localtime(&start_time); + gossip_err("PVFS Client Daemon Started. Version %s\n", PVFS2_VERSION); gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "***********************" "****************************\n"); gossip_debug(GOSSIP_CLIENTCORE_DEBUG, @@ -3248,6 +3464,11 @@ int main(int argc, char **argv) } set_device_parameters(&s_opts); + if(s_opts.events) + { + PINT_event_enable(s_opts.events); + } + /* start performance counters for acache */ acache_pc = PINT_perf_initialize(acache_keys); if(!acache_pc) @@ -3260,8 +3481,29 @@ int main(int argc, char **argv) if(ret < 0) { gossip_err("Error: PINT_perf_set_info (history_size).\n"); + finalize_perf_items( 0 ); + return(ret); + } + + /* + static_acache_pc = PINT_perf_initialize(acache_keys); + if(!static_acache_pc) + { + gossip_err("Error: PINT_perf_initialize failure.\n"); + finalize_perf_items( 0 ); + return(-PVFS_ENOMEM); + } + + ret = PINT_perf_set_info(static_acache_pc, PINT_PERF_HISTORY_SIZE, + s_opts.perf_history_size); + if(ret < 0) + { + gossip_err("Error: PINT_perf_set_info (history_size).\n"); + finalize_perf_items( 0 ); return(ret); } + */ + PINT_acache_enable_perf_counter(acache_pc); /* start performance counters for ncache */ @@ -3269,6 +3511,7 @@ int main(int argc, char **argv) if(!ncache_pc) { gossip_err("Error: PINT_perf_initialize failure.\n"); + finalize_perf_items( 0 ); return(-PVFS_ENOMEM); } ret = PINT_perf_set_info(ncache_pc, PINT_PERF_HISTORY_SIZE, @@ -3276,92 +3519,86 @@ int main(int argc, char **argv) if(ret < 0) { gossip_err("Error: PINT_perf_set_info (history_size).\n"); + finalize_perf_items( 0 ); return(ret); } PINT_ncache_enable_perf_counter(ncache_pc); /* start a timer to roll over performance counters (acache) */ - PINT_smcb_alloc(&smcb, PVFS_CLIENT_PERF_COUNT_TIMER, + PINT_smcb_alloc(&acache_smcb, PVFS_CLIENT_PERF_COUNT_TIMER, sizeof(struct PINT_client_sm), client_op_state_get_machine, client_state_machine_terminate, s_client_dev_context); - if (!smcb) + if (!acache_smcb) { + finalize_perf_items( 0 ); return(-PVFS_ENOMEM); } - acache_timer_sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + acache_timer_sm_p = PINT_sm_frame(acache_smcb, PINT_FRAME_CURRENT); acache_timer_sm_p->u.perf_count_timer.interval_secs = &s_opts.perf_time_interval_secs; acache_timer_sm_p->u.perf_count_timer.pc = acache_pc; - ret = PINT_client_state_machine_post(smcb, NULL, NULL); + ret = PINT_client_state_machine_post(acache_smcb, NULL, NULL); if (ret < 0) { gossip_lerr("Error posting acache timer.\n"); + finalize_perf_items( 1, acache_smcb ); return(ret); } - PINT_smcb_alloc(&smcb, PVFS_CLIENT_PERF_COUNT_TIMER, + /* + PINT_smcb_alloc(&acache_static_smcb, PVFS_CLIENT_PERF_COUNT_TIMER, sizeof(struct PINT_client_sm), client_op_state_get_machine, client_state_machine_terminate, s_client_dev_context); - if (!smcb) + if (!acache_static_smcb) { + finalize_perf_items( 1, acache_smcb ); return(-PVFS_ENOMEM); } - ncache_timer_sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - ncache_timer_sm_p->u.perf_count_timer.interval_secs = - &s_opts.perf_time_interval_secs; - ncache_timer_sm_p->u.perf_count_timer.pc = ncache_pc; - ret = PINT_client_state_machine_post(smcb, NULL, NULL); - if (ret < 0) - { - gossip_lerr("Error posting ncache timer.\n"); - return(ret); - } - -#if 0 - /* old timer code */ - acache_timer_sm_p = (PINT_client_sm *)malloc(sizeof(PINT_client_sm)); - if(!acache_timer_sm_p) - { - return(-PVFS_ENOMEM); - } - memset(acache_timer_sm_p, 0, sizeof(*acache_timer_sm_p)); - acache_timer_sm_p->u.perf_count_timer.interval_secs = + static_acache_timer_sm_p = PINT_sm_frame(acache_static_smcb, + PINT_FRAME_CURRENT); + static_acache_timer_sm_p->u.perf_count_timer.interval_secs = &s_opts.perf_time_interval_secs; - acache_timer_sm_p->u.perf_count_timer.pc = acache_pc; - ret = PINT_client_state_machine_post( - acache_timer_sm_p, PVFS_CLIENT_PERF_COUNT_TIMER, NULL, NULL); + static_acache_timer_sm_p->u.perf_count_timer.pc = static_acache_pc; + ret = PINT_client_state_machine_post(acache_static_smcb, NULL, NULL); if (ret < 0) { + gossip_lerr("Error posting acache timer.\n"); + finalize_perf_items( 2, acache_smcb, acache_static_smcb ); return(ret); } + */ - /* start a timer to roll over performance counters (ncache) */ - ncache_timer_sm_p = (PINT_client_sm *)malloc(sizeof(PINT_client_sm)); - if(!ncache_timer_sm_p) + PINT_smcb_alloc(&ncache_smcb, PVFS_CLIENT_PERF_COUNT_TIMER, + sizeof(struct PINT_client_sm), + client_op_state_get_machine, + client_state_machine_terminate, + s_client_dev_context); + if (!ncache_smcb) { - return(-PVFS_ENOMEM); + finalize_perf_items( 1, acache_smcb); + return(-PVFS_ENOMEM); } - memset(ncache_timer_sm_p, 0, sizeof(*ncache_timer_sm_p)); + ncache_timer_sm_p = PINT_sm_frame(ncache_smcb, PINT_FRAME_CURRENT); ncache_timer_sm_p->u.perf_count_timer.interval_secs = &s_opts.perf_time_interval_secs; ncache_timer_sm_p->u.perf_count_timer.pc = ncache_pc; - ret = PINT_client_state_machine_post( - ncache_timer_sm_p, PVFS_CLIENT_PERF_COUNT_TIMER, NULL, NULL); + ret = PINT_client_state_machine_post(ncache_smcb, NULL, NULL); if (ret < 0) { + gossip_lerr("Error posting ncache timer.\n"); + finalize_perf_items( 2, acache_smcb, ncache_smcb ); return(ret); } - /* end of old code */ -#endif ret = initialize_ops_in_progress_table(); if (ret) { PVFS_perror("initialize_ops_in_progress_table", ret); + finalize_perf_items( 2, acache_smcb, ncache_smcb ); return ret; } @@ -3369,6 +3606,7 @@ int main(int argc, char **argv) if (ret < 0) { PVFS_perror("PINT_dev_initialize", ret); + finalize_perf_items( 2, acache_smcb, ncache_smcb ); return -PVFS_EDEVINIT; } @@ -3378,6 +3616,7 @@ int main(int argc, char **argv) if (ret < 0) { PVFS_perror("PINT_dev_get_mapped_region", ret); + finalize_perf_items( 2, acache_smcb, ncache_smcb ); return ret; } @@ -3385,6 +3624,7 @@ int main(int argc, char **argv) if (ret < 0) { PVFS_perror("device job_open_context failed", ret); + finalize_perf_items( 2, acache_smcb, ncache_smcb ); return ret; } @@ -3397,17 +3637,18 @@ int main(int argc, char **argv) if (pthread_create(&remount_thread, NULL, exec_remount, NULL)) { gossip_err("Cannot create remount thread!"); + finalize_perf_items( 2, acache_smcb, ncache_smcb ); return -1; } ret = process_vfs_requests(); if (ret) { - gossip_err("Failed to process vfs requests!"); + gossip_err("Failed to process vfs requests!\n"); } /* join remount thread; should be long done by now */ - if (remount_complete) + if (remount_complete == REMOUNT_COMPLETED ) { pthread_join(remount_thread, NULL); } @@ -3437,13 +3678,33 @@ int main(int argc, char **argv) gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "calling PVFS_sys_finalize()\n"); + +#if 0 + /*release smcb associated with the acache-timer*/ + if (static_acache_timer_sm_p->sys_op_id) + PINT_sys_release(static_acache_timer_sm_p->sys_op_id); +#endif + + finalize_perf_items( 2, acache_smcb, ncache_smcb ); + + gossip_err("pvfs2-client-core shutting down.\n"); if (PVFS_sys_finalize()) { - gossip_err("Failed to finalize PVFS\n"); return 1; } - gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "%s terminating\n", argv[0]); + /* if failed remount tell the parent it's something we did wrong. */ + if( remount_complete != REMOUNT_COMPLETED ) + { + return(-PVFS_EAGAIN); + } + + /* forward the signal on to the parent */ + if(s_client_signal) + { + kill(0, s_client_signal); + } + return 0; } @@ -3469,8 +3730,10 @@ static void print_help(char *progname) printf("--logtype=file|syslog specify writing logs to file or syslog\n"); printf("--logstamp=none|usec|datetime overrides the default log message's time stamp\n"); printf("--gossip-mask=MASK_LIST gossip logging mask\n"); + printf("--create-request-id create a id which is transfered to the server\n"); printf("--desc-count=VALUE overrides the default # of kernel buffer descriptors\n"); printf("--desc-size=VALUE overrides the default size of each kernel buffer descriptor\n"); + printf("--events=EVENT_LIST specify the events to enable\n"); } static void parse_args(int argc, char **argv, options_t *opts) @@ -3496,12 +3759,13 @@ static void parse_args(int argc, char **argv, options_t *opts) {"logfile",1,0,0}, {"logtype",1,0,0}, {"logstamp",1,0,0}, - {"standalone",0,0,0}, + {"child",0,0,0}, + {"events",1,0,0}, {0,0,0,0} }; assert(opts); - opts->perf_time_interval_secs = PERF_DEFAULT_TIME_INTERVAL_SECS; + opts->perf_time_interval_secs = PERF_DEFAULT_UPDATE_INTERVAL / 1000; opts->perf_history_size = PERF_DEFAULT_HISTORY_SIZE; while((ret = getopt_long(argc, argv, "ha:n:L:", @@ -3671,9 +3935,13 @@ static void parse_args(int argc, char **argv, options_t *opts) { opts->gossip_mask = optarg; } - else if (strcmp("standalone", cur_option) == 0) + else if (strcmp("child", cur_option) == 0) + { + opts->child = 1; + } + else if (strcmp("events", cur_option) == 0) { - opts->standalone = 1; + opts->events = optarg; } break; case 'h': @@ -3808,6 +4076,43 @@ static void reset_ncache_timeout(void) } } +static void finalize_perf_items(int n, ... ) +{ + + int i=0; + PINT_smcb *smcb; + va_list v_args; + + va_start(v_args, n); + for( i=0; i < n; i++ ) + { + smcb = va_arg(v_args, PINT_smcb *); + if( smcb ) + { + PINT_client_state_machine_release( smcb ); + } + } + va_end( v_args ); + + if( acache_pc != NULL ) + { + PINT_perf_finalize( acache_pc ); + } + + /* + if( static_acache_pc != NULL ) + { + PINT_perf_finalize( static_acache_pc ); + } + */ + + if( ncache_pc != NULL ) + { + PINT_perf_finalize( ncache_pc ); + } + return; +} + #ifndef GOSSIP_DISABLE_DEBUG static char *get_vfs_op_name_str(int op_type) { @@ -3980,6 +4285,51 @@ static void set_device_parameters(options_t *s_opts) return; } +static int get_mac(void); + +inline static void fill_hints(PVFS_hint *hints, vfs_request_t *req) +{ + int32_t mac; + + *hints = NULL; + if(!s_opts.events) return; + + mac = get_mac(); + gossip_debug(GOSSIP_CLIENTCORE_DEBUG, "mac: %d\n", mac); + PVFS_hint_add(hints, PVFS_HINT_CLIENT_ID_NAME, sizeof(mac), &mac); +} + +static int get_mac(void) +{ + int sock; + struct ifreq iface; + int mac; + + strcpy(iface.ifr_name,"eth0"); + + if((sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) + { + perror("socket"); + return -1; + } + else + { + if((ioctl(sock, SIOCGIFHWADDR, &iface)) < 0) + { + perror("ioctl SIOCGIFHWADDR"); + return -1; + } + else + { + mac = iface.ifr_hwaddr.sa_data[0] & 0xff; + mac |= (iface.ifr_hwaddr.sa_data[1] & 0xff) << 8; + mac |= (iface.ifr_hwaddr.sa_data[2] & 0xff) << 8; + mac |= (iface.ifr_hwaddr.sa_data[3] & 0xff) << 8; + return mac; + } + } +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/apps/kernel/linux/pvfs2-client.c b/src/apps/kernel/linux/pvfs2-client.c index 09d2bad..c67321c 100644 --- a/src/apps/kernel/linux/pvfs2-client.c +++ b/src/apps/kernel/linux/pvfs2-client.c @@ -38,6 +38,9 @@ static char s_client_core_path[PATH_MAX]; #define DEFAULT_LOGFILE "/tmp/pvfs2-client.log" +#define CLIENT_RESTART_INTERVAL_SECS 10 +#define CLIENT_MAX_RESTARTS 10 + typedef struct { int verbose; @@ -59,6 +62,7 @@ typedef struct char *dev_buffer_count; char *dev_buffer_size; char *logtype; + char *events; } options_t; static void client_sig_handler(int signum); @@ -172,6 +176,10 @@ static int monitor_pvfs2_client(options_t *opts) int dev_init_failures = 0; char* arg_list[128] = {NULL}; int arg_index; + int restart_count = 0; + struct timeval last_restart, now; + + gettimeofday(&last_restart, NULL); assert(opts); @@ -192,9 +200,12 @@ static int monitor_pvfs2_client(options_t *opts) } /* get rid of stdout/stderr/stdin */ - freopen("/dev/null", "r", stdin); - freopen("/dev/null", "w", stdout); - freopen("/dev/null", "w", stderr); + if(!freopen("/dev/null", "r", stdin)) + gossip_err("Error: failed to reopen stdin.\n"); + if(!freopen("/dev/null", "w", stdout)) + gossip_err("Error: failed to reopen stdout.\n"); + if(!freopen("/dev/null", "w", stderr)) + gossip_err("Error: failed to reopen stderr.\n"); wpid = waitpid(core_pid, &ret, 0); assert(wpid != -1); @@ -239,6 +250,7 @@ static int monitor_pvfs2_client(options_t *opts) break; } core_pid = -1; + sleep(1); continue; } @@ -252,6 +264,13 @@ static int monitor_pvfs2_client(options_t *opts) exit(1); } + /* catch special case of exiting due to inability to remount */ + /* we want to try again in this case. */ + if (WEXITSTATUS(ret) == (unsigned char)-PVFS_EAGAIN) + { + continue; + } + if ((opts->path[0] != '/') && (opts->path [0] != '.')) { printf("*** The pvfs2-client-core has exited ***\n"); @@ -266,22 +285,56 @@ static int monitor_pvfs2_client(options_t *opts) { dev_init_failures = 0; - if (opts->verbose) + if(!strcmp(opts->logtype, "file")) { - printf("Child process with pid %d was killed by an " - "uncaught signal %d\n", core_pid, - WTERMSIG(ret)); + gossip_enable_file(opts->logfile, "a"); } + else if(!strcmp(opts->logtype, "syslog")) + { + gossip_enable_syslog(LOG_INFO); + } + else + { + gossip_enable_stderr(); + } + + gossip_err("Child process with pid %d was killed by " + "signal %d\n", core_pid, WTERMSIG(ret)); core_pid = -1; + + gettimeofday(&now, NULL); + + if(((now.tv_sec + now.tv_usec*1e-6) - + (last_restart.tv_sec + last_restart.tv_usec*1e-6)) + < CLIENT_RESTART_INTERVAL_SECS) + { + if(restart_count > CLIENT_MAX_RESTARTS) + { + gossip_err("Chld process is restarting too quickly " + "(within %d secs) after %d attempts! " + "Aborting the client.\n", + CLIENT_RESTART_INTERVAL_SECS, restart_count); + exit(1); + } + } + else + { + /* reset restart count */ + restart_count = 0; + } + + gossip_disable(); + + last_restart = now; continue; } } else { - sleep(1); - arg_list[0] = PVFS2_CLIENT_CORE_NAME; arg_index = 1; + + arg_list[arg_index++] = "--child"; arg_list[arg_index++] = "-a"; arg_list[arg_index++] = opts->acache_timeout; arg_list[arg_index++] = "-n"; @@ -369,6 +422,12 @@ static int monitor_pvfs2_client(options_t *opts) arg_list[arg_index+1] = opts->dev_buffer_size; arg_index+=2; } + if(opts->events) + { + arg_list[arg_index] = "--events"; + arg_list[arg_index+1] = opts->events; + arg_index+=2; + } if(opts->verbose) { @@ -417,8 +476,11 @@ static void print_help(char *progname) printf("--gossip-mask=MASK_LIST gossip logging mask\n"); printf("-p PATH, --path PATH execute pvfs2-client at " "PATH\n"); + printf("--desc-count=VALUE overrides the default # of kernel buffer descriptors\n"); + printf("--desc-size=VALUE overrides the default size of each kernel buffer descriptor\n"); printf("--logstamp=none|usec|datetime override default log message time stamp format\n"); printf("--logtype=file|syslog specify writing logs to file or syslog\n"); + printf("--events=EVENTS enable tracing of certain EVENTS\n"); } static void parse_args(int argc, char **argv, options_t *opts) @@ -449,6 +511,7 @@ static void parse_args(int argc, char **argv, options_t *opts) {"gossip-mask",1,0,0}, {"path",1,0,0}, {"logstamp",1,0,0}, + {"events",1,0,0}, {0,0,0,0} }; @@ -557,6 +620,11 @@ static void parse_args(int argc, char **argv, options_t *opts) opts->gossip_mask = optarg; break; } + else if (strcmp("events", cur_option) == 0) + { + opts->events = optarg; + } + break; case 'h': do_help: diff --git a/src/apps/user/getmattr.c b/src/apps/user/getmattr.c new file mode 100644 index 0000000..de2b6aa --- /dev/null +++ b/src/apps/user/getmattr.c @@ -0,0 +1,200 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pvfs2-config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pvfs2.h" +#include "pvfs2-mirror.h" + +struct options_t +{ + char *filename; + int32_t copies; + int32_t mode; +}; + + +static int parse_args(int argc, char **argv, struct options_t *my_args); +static void usage(void); + +/*This program retrieves the mirroring mode and/or the number of mirror copies*/ +/*for a given file. Since these values are numeric, we could NOT use */ +/*getfattr(). This program is intended to be used when the PVFS client is in */ +/*kernel mode. */ +int main(int argc, char **argv) +{ + struct options_t my_args = { .filename = NULL + ,.copies = 0 + ,.mode = 0 + }; + + int ret; + int copies=0, mode=0; + + /*Parse the command line*/ + ret = parse_args(argc, argv, &my_args); + if (ret) + { + printf("Error parsing the command line : %d\n",ret); + exit(ret); + } + + /*Get the mirroring attributes for the given file*/ + if (my_args.mode) { +#ifdef HAVE_GETXATTR_EXTRA_ARGS + ret = getxattr(my_args.filename + ,"user.pvfs2.mirror.mode" + ,&mode + ,sizeof(mode) + ,0 + ,0 ); +#else + ret = getxattr(my_args.filename + ,"user.pvfs2.mirror.mode" + ,&mode + ,sizeof(mode) ); +#endif + if (!ret) + perror("Failure to get mirror mode"); + else { + printf("Mirroring Mode : "); + + switch((MIRROR_MODE)mode) + { + case NO_MIRRORING: + { + printf("Turned OFF \n"); + break; + } + case MIRROR_ON_IMMUTABLE: + { + printf("Create Mirror when IMMUTABLE is set\n"); + break; + } + default: + { + printf("currently unsupported(%d).\n",mode); + break; + } + }/*end switch*/ + }/*end if*/ + }/*end if mode*/ + + if (my_args.copies){ +#ifdef HAVE_GETXATTR_EXTRA_ARGS + ret = getxattr(my_args.filename + ,"user.pvfs2.mirror.copies" + ,&(copies) + ,sizeof(copies) + ,0 + ,0); +#else + ret = getxattr(my_args.filename + ,"user.pvfs2.mirror.copies" + ,&(copies) + ,sizeof(copies) ); +#endif + if (!ret) + perror("Failure to get mirror copies"); + else + printf("Number of Mirrored Copies : %d\n",copies); + }/*end if copies*/ + + exit(0); +} /*end main*/ + + + +/* parse_args() + * + * parses command line arguments + * + */ +static int parse_args(int argc, char **argv, struct options_t *my_args) +{ + int one_opt = 0; + + /*c=copies, m=mode, f=filename(may include path), h|? = help*/ + char flags[] = "cmf:h?"; + + /*must have the filename, at a minimum*/ + if (argc == 1) + usage(); + + while((one_opt = getopt(argc, argv, flags)) != EOF) + { + switch(one_opt) + { + case('c'): + { my_args->copies = 1; + break; + } + case('m'): + { my_args->mode = 1; + break; + } + case('f'): + { + my_args->filename = optarg; + break; + } + case('h'): + case('?'): + default: + { + usage(); + break; + } + } + }/*end while*/ + + /*filename is required*/ + if (my_args->filename == NULL ) + usage(); + + /*get all attributes if none is specified*/ + if (my_args->copies == 0 && my_args->mode == 0) { + my_args->copies = my_args->mode = 1; + } + + return(0); +}/*end function parse_args*/ + + +static void usage(void) +{ + fprintf(stderr,"getmattr [-c] [-m] [-h] -f file\n"); + fprintf(stderr,"\t-c : Retrieve the number of mirror copies\n" + "\t-m : Retrieve the mirroring mode\n" + "\t-h : Display this message\n\n" + "Retrieve copies and mode when none specified. Filename " + "is required.\n"); + exit(0); +}/*end function usage()*/ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/apps/user/module.mk.in b/src/apps/user/module.mk.in new file mode 100644 index 0000000..1d25032 --- /dev/null +++ b/src/apps/user/module.mk.in @@ -0,0 +1,7 @@ +DIR := src/apps/user + +USERSRC := \ + $(DIR)/getmattr.c \ + $(DIR)/setmattr.c + + diff --git a/src/apps/user/setmattr.c b/src/apps/user/setmattr.c new file mode 100644 index 0000000..aaf4b4b --- /dev/null +++ b/src/apps/user/setmattr.c @@ -0,0 +1,191 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pvfs2-config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pvfs2.h" +#include "pvfs2-mirror.h" + +struct options_t +{ + char *filename; + int32_t copies; + int32_t mode; +}; + + +static int parse_args(int argc, char **argv, struct options_t *my_args); +static void usage(void); + +/*This program sets the mirroring mode and/or the number of mirror copies for */ +/*a given file. Since these values are numeric, we could NOT use setfattr(). */ +/*This program is to be used when the PVFS client is in kernel mode. */ +int main(int argc, char **argv) +{ + struct options_t my_args = { .filename = NULL + ,.copies = -1 + ,.mode = -1 + }; + + int ret; + + /*Parse the command line*/ + ret = parse_args(argc, argv, &my_args); + if (ret) + { + printf("Error parsing the command line : %d\n",ret); + exit(ret); + } + + /*Set the mirroring attributes for the given file*/ + if (my_args.mode > 0) { + printf("Setting mirror mode to %d\n" + ,my_args.mode); +#ifdef HAVE_SETXATTR_EXTRA_ARGS + ret = setxattr(my_args.filename + ,"user.pvfs2.mirror.mode" + ,&(my_args.mode) + ,sizeof(my_args.mode) + ,0 + ,0); +#else + ret = setxattr(my_args.filename + ,"user.pvfs2.mirror.mode" + ,&(my_args.mode) + ,sizeof(my_args.mode) + ,0); +#endif + if (ret) + perror("Failure to set mirror mode"); + } + + if (my_args.copies >= 0){ + printf("Setting number of mirrored copies to %d\n" + ,my_args.copies); +#ifdef HAVE_SETXATTR_EXTRA_ARGS + ret = setxattr(my_args.filename + ,"user.pvfs2.mirror.copies" + ,&(my_args.copies) + ,sizeof(my_args.copies) + ,0 + ,0); +#else + ret = setxattr(my_args.filename + ,"user.pvfs2.mirror.copies" + ,&(my_args.copies) + ,sizeof(my_args.copies) + ,0); +#endif + if (ret) + perror("Failure to set mirror copies"); + } + + exit(0); +} /*end program*/ + +/* parse_args() + * + * parses command line arguments + * + */ +static int parse_args(int argc, char **argv, struct options_t *my_args) +{ + int one_opt = 0; + int j; + + /*c=copies, m=mode, f=filename(may include path), h|? = help*/ + char flags[] = "c:m:f:h?"; + + + /*no arguments*/ + if (argc == 1) + usage(); + + while((one_opt = getopt(argc, argv, flags)) != EOF) + { + switch(one_opt) + { + case('c'): + { for(j=0; jcopies = atoi(optarg); + else + usage(); + break; + } + case('m'): + { for(j=0; jmode = atoi(optarg); + else + usage(); + break; + } + case('f'): + { + my_args->filename = optarg; + break; + } + case('h'): + case('?'): + default: + { + usage(); + } + } + }/*end while*/ + + /*filename is required*/ + if (my_args->filename == NULL ) + usage(); + + /*at least one (copies|mode) is required*/ + if (my_args->copies < 0 && my_args->mode < 0) + usage(); + + /*mode must be valid*/ + if (my_args->mode > 0 && (my_args->mode != NO_MIRRORING && + my_args->mode != MIRROR_ON_IMMUTABLE) ) + usage(); + + return(0); +}/*end function parse_args*/ + + +static void usage(void) +{ + fprintf(stderr,"setmattr {-c copies} {-m mode} {-h} -f file\n"); + fprintf(stderr,"\tcopies : positive numeric value\n" + "\t mode : 100 => No Mirroring\n" + "\t 200 => Create Mirror when IMMUTABLE is set\n" + "\t -h : Display this message\n" + "\t file : file to mirror (may include path)\n"); + exit(0); +}/*end function usage()*/ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/apps/vis/pvfs2-vis.c b/src/apps/vis/pvfs2-vis.c index dc0e3ce..798ecc0 100644 --- a/src/apps/vis/pvfs2-vis.c +++ b/src/apps/vis/pvfs2-vis.c @@ -179,7 +179,7 @@ int pvfs2_vis_start(char* path, int update_interval) next_id_array, io_server_count, HISTORY, - NULL); + NULL, NULL); if (ret < 0) { return ret; @@ -294,7 +294,7 @@ static void *poll_for_updates(void *args) next_id_array, server_count, history_count, - NULL); + NULL, NULL); if (ret < 0) { pint_vis_error = ret; diff --git a/src/client/sysint/acache.c b/src/client/sysint/acache.c index e8f6817..0e5e16d 100644 --- a/src/client/sysint/acache.c +++ b/src/client/sysint/acache.c @@ -5,6 +5,7 @@ */ #include +#include #include "pvfs2-attr.h" #include "acache.h" @@ -20,13 +21,17 @@ */ /* compile time defaults */ -enum { -ACACHE_DEFAULT_TIMEOUT_MSECS = 5000, -ACACHE_DEFAULT_SOFT_LIMIT = 5120, -ACACHE_DEFAULT_HARD_LIMIT = 10240, -ACACHE_DEFAULT_RECLAIM_PERCENTAGE = 25, -ACACHE_DEFAULT_REPLACE_ALGORITHM = LEAST_RECENTLY_USED, -}; +#define ACACHE_DEFAULT_TIMEOUT_MSECS 5000 +#define ACACHE_DEFAULT_SOFT_LIMIT 5120 +#define ACACHE_DEFAULT_HARD_LIMIT 10240 +#define ACACHE_DEFAULT_RECLAIM_PERCENTAGE 25 +#define ACACHE_DEFAULT_REPLACE_ALGORITHM LEAST_RECENTLY_USED + +#define STATIC_ACACHE_DEFAULT_TIMEOUT_MSECS 7200000 /* 2 hours */ +#if 0 +#define CAPABILITIES_ACACHE_DEFAULT_TIMEOUT_MSECS 7200000 /* 2 hours */ +#endif +#define DYNAMIC_ACACHE_DEFAULT_TIMEOUT_MSECS 5000 /* 5 seconds */ struct PINT_perf_key acache_keys[] = { @@ -46,30 +51,56 @@ struct PINT_perf_key acache_keys[] = /* data to be stored in a cached entry */ struct acache_payload { - PVFS_object_ref refn; /* PVFS2 object reference */ - PVFS_object_attr attr; /* cached attributes */ - int attr_status; /* are the attributes valid? */ - PVFS_size size; /* cached size */ - int size_status; /* is the size valid? */ + /** Non-static objects */ + PVFS_object_ref refn; /**< PVFS2 object reference */ + PVFS_object_attr attr; /**< cached attributes */ + int attr_status; /**< are the attributes valid? */ + PVFS_size size; /**< cached size */ + int size_status; /**< is the size valid? */ + + /** Static objects */ + uint32_t mask; + + PVFS_ds_type objtype; + PINT_dist *dist; + uint32_t dist_size; + PVFS_handle *dfile_array; + uint32_t dfile_count; + PVFS_handle *mirror_dfile_array; + uint32_t mirror_copies_count; + + /* Additional time stamps */ + #if 0 + uint64_t msecs_capabilities; /**< Time when the capabilities attr was refreshed. */ + #endif + uint64_t msecs_dynamic; /**< Time when the dynamic attrs were refreshed. */ }; - + static struct PINT_tcache* acache = NULL; static gen_mutex_t acache_mutex = GEN_MUTEX_INITIALIZER; static int acache_compare_key_entry(void* key, struct qhash_head* link); -static int acache_hash_key(void* key, int table_size); static int acache_free_payload(void* payload); + +static int acache_hash_key(void* key, int table_size); static struct PINT_perf_counter* acache_pc = NULL; +static int set_tcache_defaults(struct PINT_tcache* instance); + +static void load_payload(struct PINT_tcache* instance, + PVFS_object_ref refn, + void* payload, + struct PINT_perf_counter* pc); + /** * Enables perf counter instrumentation of the acache */ void PINT_acache_enable_perf_counter( - struct PINT_perf_counter* pc) /**< perf counter instance to use */ + struct PINT_perf_counter* pc_in) /**< counter for cache fields */ { gen_mutex_lock(&acache_mutex); - acache_pc = pc; + acache_pc = pc_in; assert(acache_pc); /* set initial values */ @@ -81,7 +112,6 @@ void PINT_acache_enable_perf_counter( acache->enable, PINT_PERF_SET); gen_mutex_unlock(&acache_mutex); - return; } @@ -95,7 +125,7 @@ int PINT_acache_initialize(void) gen_mutex_lock(&acache_mutex); - /* create tcache instance */ + /* create tcache instances */ acache = PINT_tcache_initialize(acache_compare_key_entry, acache_hash_key, acache_free_payload, @@ -105,41 +135,27 @@ int PINT_acache_initialize(void) gen_mutex_unlock(&acache_mutex); return(-PVFS_ENOMEM); } - - /* fill in defaults that are specific to acache */ + ret = PINT_tcache_set_info(acache, TCACHE_TIMEOUT_MSECS, - ACACHE_DEFAULT_TIMEOUT_MSECS); + STATIC_ACACHE_DEFAULT_TIMEOUT_MSECS); if(ret < 0) { PINT_tcache_finalize(acache); + /* PINT_tcache_finalize(static_acache); */ gen_mutex_unlock(&acache_mutex); return(ret); } - ret = PINT_tcache_set_info(acache, TCACHE_HARD_LIMIT, - ACACHE_DEFAULT_HARD_LIMIT); - if(ret < 0) - { - PINT_tcache_finalize(acache); - gen_mutex_unlock(&acache_mutex); - return(ret); - } - ret = PINT_tcache_set_info(acache, TCACHE_SOFT_LIMIT, - ACACHE_DEFAULT_SOFT_LIMIT); - if(ret < 0) - { - PINT_tcache_finalize(acache); - gen_mutex_unlock(&acache_mutex); - return(ret); - } - ret = PINT_tcache_set_info(acache, TCACHE_RECLAIM_PERCENTAGE, - ACACHE_DEFAULT_RECLAIM_PERCENTAGE); + + /* fill in defaults that are common to both */ + ret = set_tcache_defaults(acache); if(ret < 0) { PINT_tcache_finalize(acache); + /* PINT_tcache_finalize(static_acache); */ gen_mutex_unlock(&acache_mutex); return(ret); } - + gen_mutex_unlock(&acache_mutex); return(0); } @@ -149,7 +165,6 @@ void PINT_acache_finalize(void) { gen_mutex_lock(&acache_mutex); - assert(acache != NULL); PINT_tcache_finalize(acache); acache = NULL; @@ -167,9 +182,22 @@ int PINT_acache_get_info( unsigned int* arg) /**< output value */ { int ret = -1; - + gen_mutex_lock(&acache_mutex); - ret = PINT_tcache_get_info(acache, option, arg); + + if(option & STATIC_ACACHE_OPT) + { + /* this is a static acache option; strip mask and pass along to + * tcache + */ + option -= STATIC_ACACHE_OPT; + ret = PINT_tcache_get_info(acache, option, arg); + } + else + { + ret = PINT_tcache_get_info(acache, option, arg); + } + gen_mutex_unlock(&acache_mutex); return(ret); @@ -187,17 +215,39 @@ int PINT_acache_set_info( int ret = -1; gen_mutex_lock(&acache_mutex); - ret = PINT_tcache_set_info(acache, option, arg); - /* record any resulting parameter changes */ - PINT_perf_count(acache_pc, PERF_ACACHE_SOFT_LIMIT, - acache->soft_limit, PINT_PERF_SET); - PINT_perf_count(acache_pc, PERF_ACACHE_HARD_LIMIT, - acache->hard_limit, PINT_PERF_SET); - PINT_perf_count(acache_pc, PERF_ACACHE_ENABLED, - acache->enable, PINT_PERF_SET); - PINT_perf_count(acache_pc, PERF_ACACHE_NUM_ENTRIES, - acache->num_entries, PINT_PERF_SET); + if(option & STATIC_ACACHE_OPT) + { + /* this is a static acache option; strip mask and pass along to + * tcache + */ + option -= STATIC_ACACHE_OPT; + ret = PINT_tcache_set_info(acache, option, arg); + + /* record any parameter changes that may have resulted*/ + PINT_perf_count(acache_pc, PERF_ACACHE_SOFT_LIMIT, + acache->soft_limit, PINT_PERF_SET); + PINT_perf_count(acache_pc, PERF_ACACHE_HARD_LIMIT, + acache->hard_limit, PINT_PERF_SET); + PINT_perf_count(acache_pc, PERF_ACACHE_ENABLED, + acache->enable, PINT_PERF_SET); + PINT_perf_count(acache_pc, PERF_ACACHE_NUM_ENTRIES, + acache->num_entries, PINT_PERF_SET); + } + else + { + ret = PINT_tcache_set_info(acache, option, arg); + + /* record any parameter changes that may have resulted*/ + PINT_perf_count(acache_pc, PERF_ACACHE_SOFT_LIMIT, + acache->soft_limit, PINT_PERF_SET); + PINT_perf_count(acache_pc, PERF_ACACHE_HARD_LIMIT, + acache->hard_limit, PINT_PERF_SET); + PINT_perf_count(acache_pc, PERF_ACACHE_ENABLED, + acache->enable, PINT_PERF_SET); + PINT_perf_count(acache_pc, PERF_ACACHE_NUM_ENTRIES, + acache->num_entries, PINT_PERF_SET); + } gen_mutex_unlock(&acache_mutex); @@ -222,6 +272,12 @@ int PINT_acache_get_cached_entry( struct PINT_tcache_entry* tmp_entry; struct acache_payload* tmp_payload; int status; + /* Storage of current time */ + struct timeval current_time = { 0, 0}; + uint64_t current_time_msecs = 0; + /* Flags indicating whether dynamic attrs or capabilities attr have expired. */ + /* unsigned char capabilities_expired = 0; */ + unsigned char dynamic_attrs_expired = 0; gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: get_cached_entry(): H=%llu\n", llu(refn.handle)); @@ -229,26 +285,78 @@ int PINT_acache_get_cached_entry( /* assume everything is timed out for starters */ *attr_status = -PVFS_ETIME; *size_status = -PVFS_ETIME; + attr->mask = 0; gen_mutex_lock(&acache_mutex); - /* lookup entry */ + /* lookup */ ret = PINT_tcache_lookup(acache, &refn, &tmp_entry, &status); if(ret < 0 || status != 0) { + PINT_perf_count(acache_pc, PERF_ACACHE_MISSES, 1, PINT_PERF_ADD); gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: miss: H=%llu\n", llu(refn.handle)); - PINT_perf_count(acache_pc, PERF_ACACHE_MISSES, 1, PINT_PERF_ADD); + tmp_payload = NULL; + } + else + { + PINT_perf_count(acache_pc, PERF_ACACHE_HITS, 1, PINT_PERF_ADD); + tmp_payload = tmp_entry->payload; + } + + if(!tmp_payload) + { + /* missed everything */ gen_mutex_unlock(&acache_mutex); return(ret); } - tmp_payload = tmp_entry->payload; - - gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: status=%d, attr_status=%d, size_status=%d\n", + + /* Get the time of day and store as milliseconds */ + gettimeofday(¤t_time, NULL); + current_time_msecs = current_time.tv_sec * 1000; + current_time_msecs += current_time.tv_usec / 1000; + + #if 0 + if((current_time_msecs - tmp_payload->msecs_capabilities) > + CAPABILITIES_ACACHE_DEFAULT_TIMEOUT_MSECS) + { + capabilities_expired = 1; + /* Invalidate entire entry */ + PINT_tcache_delete(acache, tmp_entry); + PINT_perf_count(acache_pc, PERF_ACACHE_DELETIONS, 1, + PINT_PERF_ADD); + /* set the new current number of entries */ + PINT_perf_count(acache_pc, PERF_ACACHE_NUM_ENTRIES, + acache->num_entries, PINT_PERF_SET); + /* return since record is invalid */ + gen_mutex_unlock(&acache_mutex); + return(ret); /* Todo return indicating invalid credentials or -PVFS_ENOENT? */ + } + #endif + + /* Check to see if dynamic attrs have expired. */ + if((current_time_msecs - tmp_payload->msecs_dynamic) > + DYNAMIC_ACACHE_DEFAULT_TIMEOUT_MSECS) + { + dynamic_attrs_expired = 1; + /* Mark the dynamic attrs invalid */ + tmp_payload->attr_status = -PVFS_ETIME; + *attr_status = -PVFS_ETIME; + tmp_payload->size_status = -PVFS_ETIME; + *size_status = -PVFS_ETIME; + } + + /* Reset Dynamic attrs timestamp since it was hit */ + tmp_payload->msecs_dynamic = current_time_msecs; + +//#if 0 + gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: " + "status=%d, attr_status=%d, size_status=%d\n", status, tmp_payload->attr_status, tmp_payload->size_status); +//#endif - /* copy out attributes if valid */ - if(tmp_payload->attr_status == 0) + /* copy out non-static attributes if valid */ + if(tmp_payload && tmp_payload->attr_status == 0) { gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: copying out attr.\n"); ret = PINT_copy_object_attr(attr, &(tmp_payload->attr)); @@ -261,13 +369,77 @@ int PINT_acache_get_cached_entry( } /* copy out size if valid */ - if(tmp_payload->size_status == 0) + if(tmp_payload && tmp_payload->size_status == 0) { gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: copying out size.\n"); *size = tmp_payload->size; *size_status = 0; } + /* copy out static attributes */ + if(tmp_payload) + { + attr->mask |= tmp_payload->mask; + if(tmp_payload->mask & PVFS_ATTR_COMMON_TYPE) + { + attr->objtype = tmp_payload->objtype; + } + + if(tmp_payload->mask & PVFS_ATTR_META_DFILES) + { + if(attr->u.meta.dfile_array) + free(attr->u.meta.dfile_array); + attr->u.meta.dfile_array = + malloc(tmp_payload->dfile_count*sizeof(PVFS_handle)); + if(!attr->u.meta.dfile_array) + { + gen_mutex_unlock(&acache_mutex); + return(-PVFS_ENOMEM); + } + memcpy(attr->u.meta.dfile_array, tmp_payload->dfile_array, + tmp_payload->dfile_count*sizeof(PVFS_handle)); + attr->u.meta.dfile_count = tmp_payload->dfile_count; + } + + if(tmp_payload->mask & PVFS_ATTR_META_MIRROR_DFILES) + { + if(attr->u.meta.mirror_dfile_array) + free(attr->u.meta.mirror_dfile_array); + attr->u.meta.mirror_dfile_array = + malloc(tmp_payload->dfile_count*sizeof(PVFS_handle)* + tmp_payload->mirror_copies_count); + + if(!attr->u.meta.mirror_dfile_array) + { + gen_mutex_unlock(&acache_mutex); + return(-PVFS_ENOMEM); + } + + memcpy(attr->u.meta.mirror_dfile_array + ,tmp_payload->mirror_dfile_array + ,tmp_payload->dfile_count*sizeof(PVFS_handle)* + tmp_payload->mirror_copies_count); + attr->u.meta.mirror_copies_count = + tmp_payload->mirror_copies_count; + } + + if(tmp_payload->mask & PVFS_ATTR_META_DIST) + { + if(attr->u.meta.dist) + PINT_dist_free(attr->u.meta.dist); + attr->u.meta.dist = PINT_dist_copy(tmp_payload->dist); + if(!attr->u.meta.dist) + { + if(attr->u.meta.dfile_array) + free(attr->u.meta.dfile_array); + gen_mutex_unlock(&acache_mutex); + return(-PVFS_ENOMEM); + } + attr->u.meta.dist_size = tmp_payload->dist_size; + } + *attr_status = 0; + } + gen_mutex_unlock(&acache_mutex); gossip_debug(GOSSIP_ACACHE_DEBUG, @@ -275,14 +447,12 @@ int PINT_acache_get_cached_entry( "size_status=%d, attr_status=%d\n", llu(refn.handle), *size_status, *attr_status); - if(*size_status == 0 || *attr_status == 0) + if(*size_status == 0 || *attr_status == 0) /* TODO what about the static attrs? */ { /* return success if we got _anything_ out of the cache */ - PINT_perf_count(acache_pc, PERF_ACACHE_HITS, 1, PINT_PERF_ADD); return(0); } - PINT_perf_count(acache_pc, PERF_ACACHE_MISSES, 1, PINT_PERF_ADD); return(-PVFS_ETIME); } @@ -301,7 +471,7 @@ void PINT_acache_invalidate( gen_mutex_lock(&acache_mutex); - /* find out if the entry is in the cache */ + /* find out if we have non-static items cached */ ret = PINT_tcache_lookup(acache, &refn, &tmp_entry, @@ -312,7 +482,7 @@ void PINT_acache_invalidate( PINT_perf_count(acache_pc, PERF_ACACHE_DELETIONS, 1, PINT_PERF_ADD); } - + /* set the new current number of entries */ PINT_perf_count(acache_pc, PERF_ACACHE_NUM_ENTRIES, acache->num_entries, PINT_PERF_SET); @@ -373,20 +543,10 @@ int PINT_acache_update( PVFS_object_attr *attr, /**< attributes to copy into cache */ PVFS_size* size) /**< logical file size (NULL if not available) */ { + struct acache_payload* tmp_payload = NULL; + uint32_t old_mask; int ret = -1; - struct PINT_tcache_entry* tmp_entry; - struct acache_payload* tmp_payload; - int status; - int purged; - unsigned int enabled; - /* skip out immediately if the cache is disabled */ - PINT_tcache_get_info(acache, TCACHE_ENABLE, &enabled); - if(!enabled) - { - return(0); - } - gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: update(): H=%llu\n", llu(refn.handle)); @@ -394,92 +554,144 @@ int PINT_acache_update( { return(-PVFS_EINVAL); } - - /* create new payload with updated information */ - tmp_payload = (struct acache_payload*)calloc(1, sizeof(struct - acache_payload)); - tmp_payload->refn = refn; - tmp_payload->attr_status = -PVFS_ETIME; - tmp_payload->size_status = -PVFS_ETIME; - - /* fill in attributes */ - if(attr) + + /* do we have static fields? */ + if(attr && (attr->mask & PVFS_STATIC_ATTR_MASK)) { - ret = PINT_copy_object_attr(&(tmp_payload->attr), attr); - if(ret < 0) + tmp_payload = + (struct acache_payload*)calloc(1, sizeof(*tmp_payload)); + if(!tmp_payload) { - free(tmp_payload); - return(ret); + ret = -PVFS_ENOMEM; + goto err; + } + + tmp_payload->refn = refn; + tmp_payload->mask = attr->mask & PVFS_STATIC_ATTR_MASK; + if(attr->mask & PVFS_ATTR_COMMON_TYPE) + { + tmp_payload->objtype = attr->objtype; + } + if(attr->mask & PVFS_ATTR_META_DFILES) + { + tmp_payload->dfile_array = + malloc(attr->u.meta.dfile_count*sizeof(PVFS_handle)); + if(!tmp_payload->dfile_array) + { + ret = -PVFS_ENOMEM; + goto err; + } + memcpy(tmp_payload->dfile_array, attr->u.meta.dfile_array, + attr->u.meta.dfile_count*sizeof(PVFS_handle)); + tmp_payload->dfile_count = attr->u.meta.dfile_count; + } + if(attr->mask & PVFS_ATTR_META_MIRROR_DFILES) + { + tmp_payload->mirror_dfile_array = + malloc(attr->u.meta.dfile_count * sizeof(PVFS_handle) * + attr->u.meta.mirror_copies_count); + if (!tmp_payload->mirror_dfile_array) + { + ret = -PVFS_ENOMEM; + goto err; + } + memcpy(tmp_payload->mirror_dfile_array + ,attr->u.meta.mirror_dfile_array + ,attr->u.meta.dfile_count * sizeof(PVFS_handle) * + attr->u.meta.mirror_copies_count); + tmp_payload->mirror_copies_count = + attr->u.meta.mirror_copies_count; + } + if(attr->mask & PVFS_ATTR_META_DIST) + { + tmp_payload->dist = PINT_dist_copy(attr->u.meta.dist); + if(!tmp_payload->dist) + { + ret = -PVFS_ENOMEM; + goto err; + } + tmp_payload->dist_size = attr->u.meta.dist_size; } - tmp_payload->attr_status = 0; } - - /* fill in size */ - if(size) + + /* do we have size or other non-static fields? TODO non-static fields in the attr-mask? */ + if(size || (attr && (attr->mask & (~(PVFS_STATIC_ATTR_MASK))))) { - tmp_payload->size = *size; - tmp_payload->size_status = 0; + /* Allocate memory for acache payload if not previously done. */ + if(!tmp_payload) + { + tmp_payload = + (struct acache_payload*)calloc(1, sizeof(*tmp_payload)); + if(!tmp_payload) + { + ret = -PVFS_ENOMEM; + goto err; + } + tmp_payload->refn = refn; + } + + tmp_payload->attr_status = -PVFS_ETIME; + tmp_payload->size_status = -PVFS_ETIME; + + if(attr && (attr->mask & (~(PVFS_STATIC_ATTR_MASK)))) + { + /* modify mask temporarily so that we only copy non-static fields + * here + */ + old_mask = attr->mask; + attr->mask = (attr->mask & (~(PVFS_STATIC_ATTR_MASK))); + ret = PINT_copy_object_attr(&(tmp_payload->attr), attr); + if(ret < 0) + { + goto err; + } + tmp_payload->attr_status = 0; + attr->mask = old_mask; + } + + if(size) + { + tmp_payload->size = *size; + tmp_payload->size_status = 0; + } + } - + +#if 0 gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: update(): attr_status=%d, size_status=%d\n", tmp_payload->attr_status, tmp_payload->size_status); - +#endif + gen_mutex_lock(&acache_mutex); - /* find out if the entry is already in the cache */ - ret = PINT_tcache_lookup(acache, - &refn, - &tmp_entry, - &status); - if(ret == 0) + if(tmp_payload) { - /* found match in cache; destroy old payload, replace, and - * refresh time stamp - */ - acache_free_payload(tmp_entry->payload); - tmp_entry->payload = tmp_payload; - ret = PINT_tcache_refresh_entry(acache, tmp_entry); - /* this counts as an update of an existing entry */ - PINT_perf_count(acache_pc, PERF_ACACHE_UPDATES, 1, PINT_PERF_ADD); + load_payload(acache, refn, tmp_payload, acache_pc); } - else + + gen_mutex_unlock(&acache_mutex); + + return(0); + +err: + if(tmp_payload) { - /* not found in cache; insert new payload*/ - ret = PINT_tcache_insert_entry(acache, &refn, tmp_payload, &purged); - /* the purged variable indicates how many entries had to be purged - * from the tcache to make room for this new one - */ - if(purged == 1) + if(tmp_payload->dfile_array) { - /* since only one item was purged, we count this as one item being - * replaced rather than as a purge and an insert - */ - PINT_perf_count(acache_pc, PERF_ACACHE_REPLACEMENTS, purged, - PINT_PERF_ADD); + free(tmp_payload->dfile_array); } - else + if(tmp_payload->mirror_dfile_array) { - /* otherwise we just purged as part of reclaimation */ - /* if we didn't purge anything, then the "purged" variable will - * be zero and this counter call won't do anything. - */ - PINT_perf_count(acache_pc, PERF_ACACHE_PURGES, purged, - PINT_PERF_ADD); + free(tmp_payload->mirror_dfile_array); } + if(tmp_payload->dist) + { + PINT_dist_free(tmp_payload->dist); + } + free(tmp_payload); + PINT_free_object_attr(&tmp_payload->attr); + free(tmp_payload); } - PINT_perf_count(acache_pc, PERF_ACACHE_NUM_ENTRIES, - acache->num_entries, PINT_PERF_SET); - - gen_mutex_unlock(&acache_mutex); - - /* cleanup if we did not succeed for some reason */ - if(ret < 0) - { - acache_free_payload(tmp_payload); - } - - gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: update(): return=%d\n", ret); - return(ret); } @@ -534,12 +746,132 @@ static int acache_free_payload(void* payload) { struct acache_payload* tmp_payload = (struct acache_payload*)payload; - PINT_free_object_attr(&tmp_payload->attr); + if(&tmp_payload->attr) + PINT_free_object_attr(&tmp_payload->attr); + + if(tmp_payload->dfile_array) + { + free(tmp_payload->dfile_array); + } + if(tmp_payload->mirror_dfile_array) + { + free(tmp_payload->mirror_dfile_array); + } + if(tmp_payload->dist) + { + PINT_dist_free(tmp_payload->dist); + } + free(tmp_payload); return(0); } - + +static int set_tcache_defaults(struct PINT_tcache* instance) +{ + int ret; + + ret = PINT_tcache_set_info(instance, TCACHE_HARD_LIMIT, + ACACHE_DEFAULT_HARD_LIMIT); + if(ret < 0) + { + return(ret); + } + ret = PINT_tcache_set_info(instance, TCACHE_SOFT_LIMIT, + ACACHE_DEFAULT_SOFT_LIMIT); + if(ret < 0) + { + return(ret); + } + ret = PINT_tcache_set_info(instance, TCACHE_RECLAIM_PERCENTAGE, + ACACHE_DEFAULT_RECLAIM_PERCENTAGE); + if(ret < 0) + { + return(ret); + } + + return(0); +} + +static void load_payload(struct PINT_tcache* instance, + PVFS_object_ref refn, + void* payload, + struct PINT_perf_counter* pc) +{ + int status; + int purged; + struct PINT_tcache_entry* tmp_entry; + int ret; + /* Storage of current time */ + struct timeval current_time = { 0, 0}; + uint64_t current_time_msecs = 0; + + /* find out if the entry is already in the cache */ + ret = PINT_tcache_lookup(instance, + &refn, + &tmp_entry, + &status); + + /* Get the time of day and convert to milliseconds. */ + gettimeofday(¤t_time, NULL); + current_time_msecs = current_time.tv_sec * 1000; + current_time_msecs += current_time.tv_usec / 1000; + + if(ret == 0) + { + /* Update the dynamic attrs' timestamp */ + ((struct acache_payload *)payload)->msecs_dynamic = current_time_msecs; + + #if 0 + /* Copy out previous timestamps */ + ((struct acache_payload *)payload)->msecs_capabilities = + ((struct acache_payload *)(tmp_entry->payload))->msecs_capabilities; + #endif + + /* Free the entry's old payload */ + instance->free_payload(tmp_entry->payload); + + /* Point to the new one */ + tmp_entry->payload = payload; + ret = PINT_tcache_refresh_entry(instance, tmp_entry); + /* this counts as an update of an existing entry */ + PINT_perf_count(pc, PERF_ACACHE_UPDATES, 1, PINT_PERF_ADD); + } + else + { + /* Set the timestamps we'll track outside of tcache control */ + /* ((struct acache_payload *)payload)->msecs_capabilities = current_time_msecs; */ + ((struct acache_payload *)payload)->msecs_dynamic = current_time_msecs; + + /* not found in cache; insert new payload*/ + ret = PINT_tcache_insert_entry(instance, + &refn, payload, &purged); + /* the purged variable indicates how many entries had to be purged + * from the tcache to make room for this new one + */ + if(purged == 1) + { + /* since only one item was purged, we count this as one item being + * replaced rather than as a purge and an insert + */ + PINT_perf_count(pc, PERF_ACACHE_REPLACEMENTS, purged, + PINT_PERF_ADD); + } + else + { + /* otherwise we just purged as part of reclaimation */ + /* if we didn't purge anything, then the "purged" variable will + * be zero and this counter call won't do anything. + */ + PINT_perf_count(pc, PERF_ACACHE_PURGES, purged, + PINT_PERF_ADD); + } + } + PINT_perf_count(pc, PERF_ACACHE_NUM_ENTRIES, + instance->num_entries, PINT_PERF_SET); + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/client/sysint/acache.h b/src/client/sysint/acache.h index 9b0da89..c0710a4 100644 --- a/src/client/sysint/acache.h +++ b/src/client/sysint/acache.h @@ -77,14 +77,21 @@ /** @see PINT_tcache_options */ #define PINT_acache_options PINT_tcache_options -enum { -ACACHE_TIMEOUT_MSECS = TCACHE_TIMEOUT_MSECS, -ACACHE_NUM_ENTRIES = TCACHE_NUM_ENTRIES, -ACACHE_HARD_LIMIT = TCACHE_HARD_LIMIT, -ACACHE_SOFT_LIMIT = TCACHE_SOFT_LIMIT, -ACACHE_ENABLE = TCACHE_ENABLE, -ACACHE_RECLAIM_PERCENTAGE = TCACHE_RECLAIM_PERCENTAGE, -}; +#define ACACHE_TIMEOUT_MSECS TCACHE_TIMEOUT_MSECS +#define ACACHE_NUM_ENTRIES TCACHE_NUM_ENTRIES +#define ACACHE_HARD_LIMIT TCACHE_HARD_LIMIT +#define ACACHE_SOFT_LIMIT TCACHE_SOFT_LIMIT +#define ACACHE_ENABLE TCACHE_ENABLE +#define ACACHE_RECLAIM_PERCENTAGE TCACHE_RECLAIM_PERCENTAGE + +#define STATIC_ACACHE_OPT 1024 + +#define STATIC_ACACHE_TIMEOUT_MSECS (TCACHE_TIMEOUT_MSECS | STATIC_ACACHE_OPT) +#define STATIC_ACACHE_NUM_ENTRIES (TCACHE_NUM_ENTRIES | STATIC_ACACHE_OPT) +#define STATIC_ACACHE_HARD_LIMIT (TCACHE_HARD_LIMIT | STATIC_ACACHE_OPT) +#define STATIC_ACACHE_SOFT_LIMIT (TCACHE_SOFT_LIMIT | STATIC_ACACHE_OPT) +#define STATIC_ACACHE_ENABLE (TCACHE_ENABLE | STATIC_ACACHE_OPT) +#define STATIC_ACACHE_RECLAIM_PERCENTAGE (TCACHE_RECLAIM_PERCENTAGE | STATIC_ACACHE_OPT) enum { diff --git a/src/client/sysint/client-job-timer.sm b/src/client/sysint/client-job-timer.sm index 96eaadf..c567754 100644 --- a/src/client/sysint/client-job-timer.sm +++ b/src/client/sysint/client-job-timer.sm @@ -7,9 +7,13 @@ #include #include #include +#ifdef WIN32 +#include "wincommon.h" +#else #include -#include #include +#endif +#include #include "state-machine.h" #include "client-state-machine.h" diff --git a/src/client/sysint/client-state-machine.c b/src/client/sysint/client-state-machine.c index 53da044..36a8799 100644 --- a/src/client/sysint/client-state-machine.c +++ b/src/client/sysint/client-state-machine.c @@ -23,11 +23,19 @@ #include "gossip.h" #include "pvfs2-util.h" #include "id-generator.h" +#include "ncache.h" +#include "acache.h" +#include "pint-event.h" +#include "pint-hint.h" #define MAX_RETURNED_JOBS 256 job_context_id pint_client_sm_context = -1; +extern int pint_client_pid; + +extern PINT_event_id PINT_client_sys_event_id; + /* used for locally storing completed operations from test() call so that we can retrieve them in testsome() while still making progress @@ -38,6 +46,8 @@ static PINT_smcb *s_completion_list[MAX_RETURNED_JOBS] = {NULL}; static gen_mutex_t s_completion_list_mutex = GEN_MUTEX_INITIALIZER; static gen_mutex_t test_mutex = GEN_MUTEX_INITIALIZER; +static void PINT_sys_release_smcb(PINT_smcb *smcb); + #define CLIENT_SM_ASSERT_INITIALIZED() \ do { assert(pint_client_sm_context != -1); } while(0) @@ -143,7 +153,28 @@ static PVFS_error completion_list_retrieve_completed( if (user_ptr_array) { - user_ptr_array[i] = (void *)sm_p->user_ptr; + /* if this smcb has been set cancelled and is a PVFS_SYS_IO + * state machine then stick the user_ptr of the base frame + * in to the user_ptr_array instead of the standard sm_p + * user_ptr. This prevents segfaults back in + * process_vfs_requests which expects the pointer to be a + * vfs_request. + */ + if( smcb->op_cancelled && smcb->op == PVFS_SYS_IO ) + { + PINT_client_sm *sm_base_p = PINT_sm_frame(smcb, + (-(smcb->frame_count -1))); + assert(sm_base_p); + gossip_debug(GOSSIP_CANCEL_DEBUG, "%s: assignment of " + "PVFS_SYS_IO user_ptr from sm_base_p(%p), " + "user_ptr(%p)\n", __func__, sm_base_p, + sm_base_p->user_ptr); + user_ptr_array[i] = sm_base_p->user_ptr; + } + else + { + user_ptr_array[i] = (void *)sm_p->user_ptr; + } } s_completion_list[i] = NULL; @@ -175,25 +206,29 @@ static inline int cancelled_io_jobs_are_pending(PINT_smcb *smcb) test()). to avoid passing out the same completed op mutliple times, do not add the operation to the completion list until all cancellations on the I/O operation are accounted for - */ + */ + PINT_client_sm *sm_base_p = + PINT_sm_frame(smcb, (-(smcb->frame_count -1))); + assert(sm_p); + assert(sm_base_p); /* this *can* possibly be 0 in the case that the I/O has already completed and no job cancellation were issued at I/O cancel time */ - if (sm_p->u.io.total_cancellations_remaining > 0) + if (sm_base_p->u.io.total_cancellations_remaining > 0) { - sm_p->u.io.total_cancellations_remaining--; + sm_base_p->u.io.total_cancellations_remaining--; } gossip_debug( GOSSIP_IO_DEBUG, "(%p) cancelled_io_jobs_are_pending: %d " - "remaining (op %s)\n", sm_p, - sm_p->u.io.total_cancellations_remaining, + "remaining (op %s)\n", sm_base_p, + sm_base_p->u.io.total_cancellations_remaining, (PINT_smcb_complete(smcb) ? "complete" : "NOT complete")); - return (sm_p->u.io.total_cancellations_remaining != 0); + return (sm_base_p->u.io.total_cancellations_remaining != 0); } /* this array must be ordered to match the enum in client-state-machine.h */ @@ -233,7 +268,8 @@ struct PINT_client_op_entry_s PINT_client_sm_mgmt_table[] = {&pvfs2_client_mgmt_remove_object_sm}, {&pvfs2_client_mgmt_remove_dirent_sm}, {&pvfs2_client_mgmt_create_dirent_sm}, - {&pvfs2_client_mgmt_get_dirdata_handle_sm} + {&pvfs2_client_mgmt_get_dirdata_handle_sm}, + {&pvfs2_client_mgmt_get_uid_list_sm} }; @@ -295,6 +331,9 @@ int client_state_machine_terminate( struct PINT_smcb *smcb, job_status_s *js_p) { int ret; + PINT_client_sm *sm_p; + + sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); gossip_debug(GOSSIP_CLIENT_DEBUG, "client_state_machine_terminate smcb %p\n",smcb); @@ -304,11 +343,24 @@ int client_state_machine_terminate( (cancelled_io_jobs_are_pending(smcb))) && !PINT_smcb_immediate_completion(smcb)) { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "client_state_machine_terminate smcb %p completing\n",smcb); + + PINT_EVENT_END(PINT_client_sys_event_id, pint_client_pid, NULL, sm_p->event_id, 0); + + PVFS_hint_free(sm_p->hints); + sm_p->hints = NULL; + gossip_debug(GOSSIP_CLIENT_DEBUG, "add smcb %p to completion list\n", smcb); ret = add_sm_to_completion_list(smcb); assert(ret == 0); } + else + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "client_state_machine_terminate smcb %p waiting for cancelled jobs\n",smcb); + } return SM_ACTION_TERMINATE; } @@ -355,6 +407,15 @@ PVFS_error PINT_client_state_machine_post( int pvfs_sys_op = PINT_smcb_op(smcb); PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PVFS_hint_add_internal(&sm_p->hints, PINT_HINT_OP_ID, sizeof(pvfs_sys_op), &pvfs_sys_op); + + PINT_EVENT_START(PINT_client_sys_event_id, pint_client_pid, NULL, &sm_p->event_id, + PINT_HINT_GET_CLIENT_ID(sm_p->hints), + PINT_HINT_GET_RANK(sm_p->hints), + PINT_HINT_GET_REQUEST_ID(sm_p->hints), + PINT_HINT_GET_HANDLE(sm_p->hints), + pvfs_sys_op); + gossip_debug(GOSSIP_CLIENT_DEBUG, "PINT_client_state_machine_post smcb %p, op: %s\n", smcb, PINT_client_get_name_str(smcb->op)); @@ -363,6 +424,9 @@ PVFS_error PINT_client_state_machine_post( if (!smcb) { + /* give back the hint added above */ + PVFS_hint_free( sm_p->hints ); + sm_p->hints = NULL; return ret; } @@ -383,6 +447,11 @@ PVFS_error PINT_client_state_machine_post( { /* state machine code failed */ gen_mutex_unlock(&test_mutex); + + /* give back the hint added above */ + PVFS_hint_free( sm_p->hints ); + sm_p->hints = NULL; + return sm_ret; } @@ -390,10 +459,12 @@ PVFS_error PINT_client_state_machine_post( { assert(sm_ret == SM_ACTION_TERMINATE); + PINT_EVENT_END(PINT_client_sys_event_id, pint_client_pid, NULL, sm_p->event_id, 0); + *op_id = -1; - /* free the smcb */ - PINT_smcb_free(smcb); + /* free the smcb and any other extra data allocated there */ + PINT_sys_release_smcb(smcb); gossip_debug( GOSSIP_CLIENT_DEBUG, "Posted %s (%llu) " @@ -401,6 +472,7 @@ PVFS_error PINT_client_state_machine_post( PINT_client_get_name_str(pvfs_sys_op), llu((op_id ? *op_id : -1)), js.error_code); + } else { @@ -428,10 +500,19 @@ PVFS_error PINT_client_state_machine_release( { PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + if( sm_p ) + { + PVFS_hint_free( sm_p->hints ); + sm_p->hints = NULL; + } + PINT_smcb_set_complete(smcb); PINT_id_gen_safe_unregister(sm_p->sys_op_id); + /* free the internal hint list */ + PVFS_hint_free(sm_p->hints); + PINT_smcb_free(smcb); return 0; } @@ -446,9 +527,12 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) PVFS_error ret = -PVFS_EINVAL; PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; + PINT_client_sm *sm_base_p = NULL; gossip_debug(GOSSIP_CLIENT_DEBUG, "PINT_client_io_cancel id %lld\n",lld(id)); + gossip_debug(GOSSIP_CANCEL_DEBUG, + "PINT_client_io_cancel id %lld\n",lld(id)); smcb = PINT_id_gen_safe_lookup(id); if (!smcb) @@ -471,6 +555,26 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) /* op already completed; nothing to cancel. */ return 0; } + + /* We also don't cancel small I/O operations as posted by + * sys-small-io.sm. Check the corresponding flag. We have + * to jump to the base frame rather than the current frame for this + * information because small-io may have pushed a msgpairarray. + * + * sm_base_p is used below instead of sm_p since it contains the correct + * counters and context pointers. In the event the control block only + * has one frame it behaves as it did previously. If the cancellation is + * occuring when a non-IO frame has been pushed on the stack, which doesn't + * have the expected structure, it doesn't cause a segfault but leaves + * it on the state machines stack. + */ + sm_base_p = PINT_sm_frame(smcb, (-(smcb->frame_count -1))); + assert(sm_base_p); + if(sm_base_p->u.io.small_io) + { + gossip_debug(GOSSIP_CANCEL_DEBUG, "skipping cancellation of small I/O operation.\n"); + return(0); + } /* if we fall to here, the I/O operation is still in flight */ /* first, set a flag informing the sys_io state machine that the @@ -485,9 +589,9 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) ret = 0; /* now run through and cancel the outstanding jobs */ - for(i = 0; i < sm_p->u.io.context_count; i++) + for(i = 0; i < sm_base_p->u.io.context_count; i++) { - PINT_client_io_ctx *cur_ctx = &sm_p->u.io.contexts[i]; + PINT_client_io_ctx *cur_ctx = &sm_base_p->u.io.contexts[i]; assert(cur_ctx); if (cur_ctx->msg_send_in_progress) @@ -503,7 +607,7 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) PVFS_perror_gossip("job_bmi_cancel failed", ret); break; } - sm_p->u.io.total_cancellations_remaining++; + sm_base_p->u.io.total_cancellations_remaining++; } if (cur_ctx->msg_recv_in_progress) @@ -519,7 +623,7 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) PVFS_perror_gossip("job_bmi_cancel failed", ret); break; } - sm_p->u.io.total_cancellations_remaining++; + sm_base_p->u.io.total_cancellations_remaining++; } if (cur_ctx->flow_in_progress) @@ -534,7 +638,7 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) PVFS_perror_gossip("job_flow_cancel failed", ret); break; } - sm_p->u.io.total_cancellations_remaining++; + sm_base_p->u.io.total_cancellations_remaining++; } if (cur_ctx->write_ack_in_progress) @@ -550,12 +654,12 @@ PVFS_error PINT_client_io_cancel(PVFS_sys_op_id id) PVFS_perror_gossip("job_bmi_cancel failed", ret); break; } - sm_p->u.io.total_cancellations_remaining++; + sm_base_p->u.io.total_cancellations_remaining++; } } gossip_debug(GOSSIP_CANCEL_DEBUG, "(%p) Total cancellations " - "remaining: %d\n", sm_p, - sm_p->u.io.total_cancellations_remaining); + "remaining: %d\n", sm_base_p, + sm_base_p->u.io.total_cancellations_remaining); return ret; } @@ -758,7 +862,6 @@ PVFS_error PINT_client_wait_internal( { smcb = PINT_id_gen_safe_lookup(op_id); assert(smcb); - sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); do { @@ -771,6 +874,8 @@ PVFS_error PINT_client_wait_internal( } while (!PINT_smcb_complete(smcb) && (ret == 0)); + sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + if (ret) { PVFS_perror_gossip("PINT_client_state_machine_test()", ret); @@ -783,13 +888,12 @@ PVFS_error PINT_client_wait_internal( return ret; } -/** Frees resources associated with state machine instance. +/** Finds state machine referenced by op_id and releases resources + * associated with it */ void PINT_sys_release(PVFS_sys_op_id op_id) { PINT_smcb *smcb; - PINT_client_sm *sm_p; - PVFS_credentials *cred_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: id %lld\n", __func__, lld(op_id)); smcb = PINT_id_gen_safe_lookup(op_id); @@ -797,6 +901,20 @@ void PINT_sys_release(PVFS_sys_op_id op_id) { return; } + PINT_id_gen_safe_unregister(op_id); + PINT_sys_release_smcb(smcb); + + return; +} + +/** releases resources associated with an smcb. Can be used both on + * immediate completion and asynchronous completion + */ +static void PINT_sys_release_smcb(PINT_smcb *smcb) +{ + PINT_client_sm *sm_p; + PVFS_credentials *cred_p; + sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); if (sm_p == NULL) { @@ -805,8 +923,10 @@ void PINT_sys_release(PVFS_sys_op_id op_id) else { cred_p = sm_p->cred_p; + /* free the hint if sm_p isn't null */ + PVFS_hint_free( sm_p->hints ); + sm_p->hints = NULL; } - PINT_id_gen_safe_unregister(op_id); if (PINT_smcb_op(smcb) && cred_p) { @@ -874,6 +994,7 @@ const char *PINT_client_get_name_str(int op_type) { PVFS_MGMT_CREATE_DIRENT, "PVFS_MGMT_CREATE_DIRENT" }, { PVFS_MGMT_GET_DIRDATA_HANDLE, "PVFS_MGMT_GET_DIRDATA_HANDLE" }, + { PVFS_MGMT_GET_UID_LIST, "PVFS_MGMT_GET_UID_LIST" }, { PVFS_SYS_GETEATTR, "PVFS_SYS_GETEATTR" }, { PVFS_SYS_SETEATTR, "PVFS_SYS_SETEATTR" }, { PVFS_SYS_DELEATTR, "PVFS_SYS_DELEATTR" }, @@ -946,6 +1067,84 @@ int PVFS_mgmt_wait( "mgmt"); } +PVFS_error PVFS_sys_set_info( + enum PVFS_sys_setinfo_opt option, + unsigned int arg) +{ + PVFS_error ret = -PVFS_ENOSYS; + + switch(option) + { + case PVFS_SYS_NCACHE_TIMEOUT_MSECS: + ret = PINT_ncache_set_info(NCACHE_TIMEOUT_MSECS, arg); + break; + case PVFS_SYS_ACACHE_TIMEOUT_MSECS: + ret = PINT_acache_set_info(ACACHE_TIMEOUT_MSECS, arg); + break; + case PVFS_SYS_MSG_TIMEOUT_SECS: + case PVFS_SYS_MSG_RETRY_LIMIT: + case PVFS_SYS_MSG_RETRY_DELAY_MSECS: + ret = -PVFS_ENOSYS; + break; +#if 0 + /* need some other code cleanup before these can be implemented */ + case PVFS_SYS_MSG_TIMEOUT_SECS: + PINT_sys_msg_timeout_secs = arg; + ret = 0; + break; + case PVFS_SYS_MSG_RETRY_LIMIT: + PINT_sys_msg_retry_limit = arg; + ret = 0; + break; + case PVFS_SYS_MSG_RETRY_DELAY_MSECS: + PINT_sys_msg_retry_delay_msecs = arg; + ret = 0; + break; +#endif + } + + return(ret); +} + +PVFS_error PVFS_sys_get_info( + enum PVFS_sys_setinfo_opt option, + unsigned int* arg) +{ + PVFS_error ret = -PVFS_ENOSYS; + + switch(option) + { + case PVFS_SYS_NCACHE_TIMEOUT_MSECS: + ret = PINT_ncache_get_info(NCACHE_TIMEOUT_MSECS, arg); + break; + case PVFS_SYS_ACACHE_TIMEOUT_MSECS: + ret = PINT_acache_get_info(ACACHE_TIMEOUT_MSECS, arg); + break; + case PVFS_SYS_MSG_TIMEOUT_SECS: + case PVFS_SYS_MSG_RETRY_LIMIT: + case PVFS_SYS_MSG_RETRY_DELAY_MSECS: + ret = -PVFS_ENOSYS; + break; +#if 0 + case PVFS_SYS_MSG_TIMEOUT_SECS: + *arg = PINT_sys_msg_timeout_secs; + ret = 0; + break; + case PVFS_SYS_MSG_RETRY_LIMIT: + *arg = PINT_sys_msg_retry_limit; + ret = 0; + break; + case PVFS_SYS_MSG_RETRY_DELAY_MSECS: + *arg = PINT_sys_msg_retry_delay_msecs; + ret = 0; + break; +#endif + } + + return(ret); +} + + /* * Local variables: * c-indent-level: 4 diff --git a/src/client/sysint/client-state-machine.h b/src/client/sysint/client-state-machine.h index 0e38e4e..66c9298 100644 --- a/src/client/sysint/client-state-machine.h +++ b/src/client/sysint/client-state-machine.h @@ -28,21 +28,19 @@ #include "pint-sysint-utils.h" #include "pint-perf-counter.h" #include "state-machine.h" +#include "pvfs2-hint.h" +#include "pint-event.h" #include "osd.h" -/* skip everything except #includes if __SM_CHECK_DEP is already defined; this - * allows us to get the dependencies right for msgpairarray.sm which relies - * on conflicting headers for dependency information - */ -#ifndef __SM_CHECK_DEP - #define MAX_LOOKUP_SEGMENTS PVFS_REQ_LIMIT_PATH_SEGMENT_COUNT #define MAX_LOOKUP_CONTEXTS PVFS_REQ_LIMIT_MAX_SYMLINK_RESOLUTION_COUNT /* Default client timeout in seconds used to set the timeout for jobs that * send or receive request messages. */ +#ifndef PVFS2_CLIENT_JOB_BMI_TIMEOUT_DEFAULT #define PVFS2_CLIENT_JOB_BMI_TIMEOUT_DEFAULT 30 +#endif /* Default number of times to retry restartable client operations. */ #define PVFS2_CLIENT_RETRY_LIMIT_DEFAULT (5) @@ -78,20 +76,28 @@ struct PINT_client_remove_sm struct PINT_client_create_sm { char *object_name; /* input parameter */ - PVFS_sysresp_create *create_resp; /* in/out parameter*/ - PVFS_sys_attr sys_attr; /* input parameter */ + PVFS_object_attr attr; /* input parameter */ + PVFS_sysresp_create *create_resp; /* in/out parameter */ int retry_count; int num_data_files; + int user_requested_num_data_files; int stored_error_code; PINT_dist *dist; PVFS_sys_layout layout; + PVFS_handle metafile_handle; + int datafile_count; PVFS_handle *datafile_handles; + int stuffed; + PVFS_object_attr store_attr; + + PVFS_handle handles[2]; + /* needed for osd state machines */ + PVFS_object_attr cache_attr; PVFS_BMI_addr_t *data_server_addrs; PVFS_handle_extent_array *io_handle_extent_array; - PVFS_object_attr cache_attr; }; struct PINT_client_mkdir_sm @@ -101,6 +107,7 @@ struct PINT_client_mkdir_sm PVFS_sys_attr sys_attr; /* input parameter */ PVFS_ds_keyval *key_array; PVFS_ds_keyval *val_array; + PVFS_handle cid; int retry_count; int stored_error_code; @@ -145,6 +152,35 @@ struct PINT_client_mgmt_get_dirdata_handle_sm PVFS_handle *dirdata_handle; }; +/* this structure is used to handle mirrored retries in the small-io case*/ +typedef struct PINT_client_mirror_ctx +{ + /*which copy of the mirrored handle are we using?*/ + uint32_t current_copies_count; + + /*the primary datahandle*/ + PVFS_handle original_datahandle; + + /*the server_nr for the primary datahandle*/ + uint32_t original_server_nr; + + /*do we retry the primary or use a mirrored handle?*/ + PVFS_boolean retry_original; + + /*did the current message for this handle complete without any errors?*/ + PVFS_boolean msg_completed; + +} PINT_client_small_io_ctx; + + + +/* this structure is used to handle mirrored retries when + * pvfs2_client_datafile_getattr_sizes_sm is called. +*/ +typedef struct PINT_client_mirror_ctx PINT_client_getattr_mirror_ctx; + + + typedef struct PINT_client_io_ctx { /* the index of the current context (in the context array) */ @@ -156,14 +192,22 @@ typedef struct PINT_client_io_ctx /* the data handle we're responsible for doing I/O on */ PVFS_handle data_handle; - /* a reference to the msgpair we're using for communication */ - PINT_sm_msgpair_state msg; + /* first level index into mirror_dfile_array. second level is */ + /* the server_nr. mirror_dfile_array[current_copies_count][server_nr] */ + uint32_t current_copies_count; + + /* increment after one set of mirrors have been tried. */ + uint32_t local_retry_count; + + /* should we retry the original or not? */ + uint32_t retry_original; job_id_t flow_job_id; job_status_s flow_status; flow_descriptor flow_desc; PVFS_msg_tag_t session_tag; + PINT_sm_msgpair_state msg; PINT_client_sm_recv_state write_ack; /* @@ -211,6 +255,8 @@ struct PINT_client_io_sm PINT_client_io_ctx *contexts; int context_count; + PINT_client_small_io_ctx *small_io_ctx; + int total_cancellations_remaining; int retry_count; @@ -229,6 +275,9 @@ struct PINT_client_io_sm struct PINT_client_flush_sm { +#ifdef WIN32 + int field; +#endif }; struct PINT_client_readdir_sm @@ -261,6 +310,13 @@ struct PINT_client_readdirplus_sm PVFS_handle **handles; }; +/* + * A segment is part of a path - namely each part of the + * path delimited by / characters. as each segment is + * looked up we record the PVFS_object_ref for the + * object in the FS, and its attributes. Other fields + * keep up with the segment in contect of the whole path + */ typedef struct { char *seg_name; @@ -270,6 +326,13 @@ typedef struct PVFS_object_ref seg_resolved_refn; } PINT_client_lookup_sm_segment; +/* + * A context is a group of segments that have been looked up + * on a server. A server can resolve more than one segment + * in a single request, and these groupings are maintained + * in a context. Again fields manage the progress as these + * lookups happen. + */ typedef struct { int total_segments; @@ -309,10 +372,9 @@ struct PINT_client_mgmt_setparam_list_sm { PVFS_fs_id fs_id; enum PVFS_server_param param; - int64_t value; + struct PVFS_mgmt_setparam_value *value; PVFS_id_gen_t *addr_array; int count; - uint64_t *old_value_array; int *root_check_status_array; PVFS_error_details *details; }; @@ -330,9 +392,10 @@ struct PINT_client_mgmt_statfs_list_sm struct PINT_client_mgmt_perf_mon_list_sm { PVFS_fs_id fs_id; - struct PVFS_mgmt_perf_stat **perf_matrix; + int64_t **perf_matrix; uint64_t *end_time_ms_array; int server_count; + int *key_count; int history_count; PVFS_id_gen_t *addr_array; uint32_t *next_id_array; @@ -358,6 +421,7 @@ struct PINT_client_mgmt_iterate_handles_list_sm int *handle_count_array; PVFS_ds_position *position_array; PVFS_error_details *details; + int flags; }; struct PINT_client_mgmt_get_dfile_array_sm @@ -386,9 +450,13 @@ struct PINT_server_fetch_config_sm_state int nservers; PVFS_BMI_addr_t *addr_array; char **fs_config_bufs; - int32_t *fs_config_buf_size; + int *fs_config_buf_size; + int result_count; /* number of servers that actually responded */ + int* result_indexes; /* index into fs_config_bufs of valid responses */ }; + + /* flag to disable cached lookup during getattr nested sm */ #define PINT_SM_GETATTR_BYPASS_CACHE 1 @@ -407,6 +475,13 @@ typedef struct PINT_sm_getattr_state */ PVFS_object_attr attr; + + /* mirror retry information */ + PINT_client_getattr_mirror_ctx *mir_ctx_array; + uint32_t mir_ctx_count; + uint32_t retry_count; + uint32_t *index_to_server; + PVFS_ds_type ref_type; PVFS_size * size_array; @@ -490,6 +565,17 @@ struct PINT_sysdev_unexp_sm struct PINT_dev_unexp_info *info; }; +/* scratch area used for the UID management state machine */ +struct PINT_client_mgmt_get_uid_list_sm +{ + PVFS_fs_id fs_id; + uint32_t history; + int server_count; + PVFS_id_gen_t *addr_array; /* in */ + PVFS_uid_info_s **uid_statistics; /* out */ + uint32_t *uid_count; /* out */ +}; + typedef struct { PVFS_dirent **dirent_array; @@ -509,6 +595,8 @@ typedef struct PINT_client_sm PVFS_sys_op_id sys_op_id; void *user_ptr; + PINT_event_id event_id; + /* stores the final operation error code on operation exit */ PVFS_error error_code; @@ -524,25 +612,19 @@ typedef struct PINT_client_sm /* fetch_config state used by the nested fetch config state machines */ struct PINT_server_fetch_config_sm_state fetch_config; - /* msgpair array ptr used when operations can be performed - * concurrently. this must be allocated within the upper-level - * state machine and is used with the msgpairarray sm. - * If you have a single msgpair, use the msgpair working space - * and pint msgarray at it. - */ - int msgarray_count; /* number of msgpairs in array */ - PINT_sm_msgpair_state *msgarray; /* array of msgpairs to process */ - PINT_sm_msgpair_params msgarray_params; - PINT_sm_msgpair_state msgpair; /* working space for a single msgpair */ + PVFS_hint hints; + + PINT_sm_msgarray_op msgarray_op; PVFS_object_ref object_ref; PVFS_object_ref parent_ref; + PVFS_credentials *cred_p; union { struct PINT_client_remove_sm remove; - struct PINT_client_create_sm create; + struct PINT_client_create_sm create; struct PINT_client_mkdir_sm mkdir; struct PINT_client_symlink_sm sym; struct PINT_client_getattr_sm getattr; @@ -571,6 +653,7 @@ typedef struct PINT_client_sm struct PINT_client_perf_count_timer_sm perf_count_timer; struct PINT_sysdev_unexp_sm sysdev_unexp; struct PINT_client_job_timer_sm job_timer; + struct PINT_client_mgmt_get_uid_list_sm get_uid_list; } u; } PINT_client_sm; @@ -659,8 +742,8 @@ enum PVFS_MGMT_REMOVE_DIRENT = 78, PVFS_MGMT_CREATE_DIRENT = 79, PVFS_MGMT_GET_DIRDATA_HANDLE = 80, + PVFS_MGMT_GET_UID_LIST = 81, PVFS_SERVER_GET_CONFIG = 200, - PVFS_SERVER_FETCH_CONFIG = 201, PVFS_CLIENT_JOB_TIMER = 300, PVFS_CLIENT_PERF_COUNT_TIMER = 301, PVFS_DEV_UNEXPECTED = 400 @@ -668,7 +751,7 @@ enum #define PVFS_OP_SYS_MAXVALID 21 #define PVFS_OP_SYS_MAXVAL 69 -#define PVFS_OP_MGMT_MAXVALID 81 +#define PVFS_OP_MGMT_MAXVALID 82 #define PVFS_OP_MGMT_MAXVAL 199 int PINT_client_io_cancel(job_id_t id); @@ -702,63 +785,27 @@ do { \ } \ } while(0) -#define PINT_init_msgarray_params(msgarray_params_ptr, __fsid) \ -do { \ - PINT_sm_msgpair_params *mpp = msgarray_params_ptr; \ - struct server_configuration_s *server_config = \ - PINT_get_server_config_struct(__fsid); \ - mpp->job_context = pint_client_sm_context; \ - if (server_config) \ - { \ - mpp->job_timeout = server_config->client_job_bmi_timeout; \ - mpp->retry_limit = server_config->client_retry_limit; \ - mpp->retry_delay = server_config->client_retry_delay_ms; \ - } \ - else \ - { \ - mpp->job_timeout = PVFS2_CLIENT_JOB_BMI_TIMEOUT_DEFAULT; \ - mpp->retry_limit = PVFS2_CLIENT_RETRY_LIMIT_DEFAULT; \ - mpp->retry_delay = PVFS2_CLIENT_RETRY_DELAY_MS_DEFAULT; \ - } \ - PINT_put_server_config_struct(server_config); \ -} while(0) - -#define PINT_init_msgpair(sm_p, msg_p) \ -do { \ - msg_p = &sm_p->msgpair; \ - memset(msg_p, 0, sizeof(PINT_sm_msgpair_state)); \ - if (sm_p->msgarray && (sm_p->msgarray != &(sm_p->msgpair)))\ - { \ - free(sm_p->msgarray); \ - sm_p->msgarray = NULL; \ - } \ - sm_p->msgarray = msg_p; \ - sm_p->msgarray_count = 1; \ +#define PINT_init_msgarray_params(client_sm_p, __fsid) \ +do { \ + PINT_sm_msgpair_params *mpp = &client_sm_p->msgarray_op.params; \ + struct server_configuration_s *server_config = \ + PINT_get_server_config_struct(__fsid); \ + mpp->job_context = pint_client_sm_context; \ + if (server_config) \ + { \ + mpp->job_timeout = server_config->client_job_bmi_timeout; \ + mpp->retry_limit = server_config->client_retry_limit; \ + mpp->retry_delay = server_config->client_retry_delay_ms; \ + } \ + else \ + { \ + mpp->job_timeout = PVFS2_CLIENT_JOB_BMI_TIMEOUT_DEFAULT; \ + mpp->retry_limit = PVFS2_CLIENT_RETRY_LIMIT_DEFAULT; \ + mpp->retry_delay = PVFS2_CLIENT_RETRY_DELAY_MS_DEFAULT; \ + } \ + PINT_put_server_config_struct(server_config); \ } while(0) - -/************************************ - * state-machine.h included here - ************************************/ -#if 0 -#define PINT_OP_STATE PINT_client_sm - -/* This macro allows the generic state-machine-fns.h locate function - * to access the appropriate sm struct based on the client operation index - * from the above enum. Because the enum starts management operations at - * 70, the management table was separated out from the sys table and the - * necessary checks and subtractions are made in this macro. - */ -#define PINT_OP_STATE_GET_MACHINE(_op) \ - ((_op <= PVFS_OP_SYS_MAXVAL) ? (PINT_client_sm_sys_table[_op - 1].sm) : \ - ((_op <= PVFS_OP_MGMT_MAXVAL) ? \ - (PINT_client_sm_mgmt_table[_op - PVFS_OP_SYS_MAXVAL - 1].sm) : \ - ((_op == PVFS_SERVER_GET_CONFIG) ? (&pvfs2_server_get_config_sm) : \ - (_op == PVFS_SERVER_FETCH_CONFIG) ? (&pvfs2_server_fetch_config_sm) : \ - ((_op == PVFS_CLIENT_JOB_TIMER) ? (&pvfs2_client_job_timer_sm) : \ - ((_op == PVFS_CLIENT_PERF_COUNT_TIMER) ? (&pvfs2_client_perf_count_timer_sm) : NULL))))) -#endif - struct PINT_client_op_entry_s { struct PINT_state_machine_s * sm; @@ -778,9 +825,6 @@ extern struct PINT_state_machine_s pvfs2_client_datafile_getattr_sizes_sm; extern struct PINT_state_machine_s pvfs2_client_setattr_sm; extern struct PINT_state_machine_s pvfs2_client_io_sm; extern struct PINT_state_machine_s pvfs2_client_osd_io_sm; -extern struct PINT_state_machine_s pvfs2_client_osd_dirops_sm; -extern struct PINT_state_machine_s pvfs2_client_osd_dirops_attr1_sm; -extern struct PINT_state_machine_s pvfs2_client_osd_dirops_attr4_sm; extern struct PINT_state_machine_s pvfs2_client_small_io_sm; extern struct PINT_state_machine_s pvfs2_client_flush_sm; extern struct PINT_state_machine_s pvfs2_client_sysint_readdir_sm; @@ -811,6 +855,7 @@ extern struct PINT_state_machine_s pvfs2_client_del_eattr_sm; extern struct PINT_state_machine_s pvfs2_client_list_eattr_sm; extern struct PINT_state_machine_s pvfs2_client_statfs_sm; extern struct PINT_state_machine_s pvfs2_fs_add_sm; +extern struct PINT_state_machine_s pvfs2_client_mgmt_get_uid_list_sm; /* nested state machines (helpers) */ extern struct PINT_state_machine_s pvfs2_client_lookup_ncache_sm; @@ -825,7 +870,6 @@ extern struct PINT_state_machine_s pvfs2_server_fetch_config_nested_sm; struct PINT_state_machine_s *client_op_state_get_machine(int); int client_state_machine_terminate(struct PINT_smcb *, job_status_s *); -#endif /* __SM_CHECK_DEP */ #endif /* __PVFS2_CLIENT_STATE_MACHINE_H */ /* diff --git a/src/client/sysint/finalize.c b/src/client/sysint/finalize.c index 84837fd..9f4c48f 100644 --- a/src/client/sysint/finalize.c +++ b/src/client/sysint/finalize.c @@ -22,6 +22,7 @@ #include "src/server/request-scheduler/request-scheduler.h" #include "job-time-mgr.h" #include "pint-util.h" +#include "pint-event.h" extern job_context_id pint_client_sm_context; @@ -54,12 +55,17 @@ int PVFS_sys_finalize() PINT_req_sched_finalize(); + /* release timer_queue resources, if there are any */ + PINT_timer_queue_finalize(); + BMI_finalize(); PINT_encode_finalize(); PINT_dist_finalize(); + PINT_event_finalize(); + PINT_release_pvfstab(); gossip_disable(); diff --git a/src/client/sysint/fs-add.sm b/src/client/sysint/fs-add.sm index 557ee01..61ee79b 100644 --- a/src/client/sysint/fs-add.sm +++ b/src/client/sysint/fs-add.sm @@ -22,7 +22,9 @@ #include #include #include +#ifndef WIN32 #include +#endif #include "acache.h" #include "ncache.h" @@ -44,6 +46,9 @@ enum { gen_mutex_t mt_config = GEN_MUTEX_INITIALIZER; extern job_context_id pint_client_sm_context; +static int server_fetch_config_comp_fn( + void *v_p, struct PVFS_server_resp *resp_p, int i); + %% machine pvfs2_fs_add_sm @@ -73,6 +78,12 @@ machine pvfs2_fs_add_sm state do_config_integrity_checks { jump pvfs2_server_fetch_config_nested_sm; + default => compare_hashes; + } + + state compare_hashes + { + run fs_add_compare_hashes; default => final_cleanup; } @@ -83,6 +94,28 @@ machine pvfs2_fs_add_sm } } +nested machine pvfs2_server_fetch_config_nested_sm +{ + state fetch_setup_msgpair + { + run server_fetch_config_setup_msgpair; + success => fetch_xfer_msgpair; + default => fetch_cleanup; + } + + state fetch_xfer_msgpair + { + jump pvfs2_msgpairarray_sm; + default => fetch_cleanup; + } + + state fetch_cleanup + { + run server_fetch_config_cleanup; + default => return; + } +} + %% /** Tell the system interface about the location of a PVFS2 file system. @@ -172,14 +205,12 @@ PVFS_error PVFS_isys_fs_add( * PINT_init_msgarray_params(), because we don't yet have a server * configuration file to override default parameters. */ - sm_p->msgarray_params.job_context = pint_client_sm_context; - sm_p->msgarray_params.job_timeout = 30; /* 30 second job timeout */ - sm_p->msgarray_params.retry_delay = 2000; /* 2 second retry delay */ - sm_p->msgarray_params.retry_limit = 5; /* retry up to 5 times */ - - sm_p->msgarray_count = 1; - sm_p->msgarray = &(sm_p->msgpair); + sm_p->msgarray_op.params.job_context = pint_client_sm_context; + sm_p->msgarray_op.params.job_timeout = 30; /* 30 second job timeout */ + sm_p->msgarray_op.params.retry_delay = 2000; /* 2 second retry delay */ + sm_p->msgarray_op.params.retry_limit = 5; /* retry up to 5 times */ + PINT_msgpair_init(&sm_p->msgarray_op); PINT_init_sysint_credentials(sm_p->cred_p, &creds); return PINT_client_state_machine_post( @@ -189,7 +220,7 @@ PVFS_error PVFS_isys_fs_add( int PVFS_sys_fs_add(struct PVFS_sys_mntent *mntent) { int ret = -PVFS_EINVAL, error = 0; - PVFS_sys_op_id op_id; + PVFS_sys_op_id op_id = -1; ret = PVFS_isys_fs_add(mntent, &op_id, NULL); if (ret) @@ -269,7 +300,6 @@ static PINT_sm_action pvfs2_server_prepare_fetch_config( js_p->error_code = SKIP_INTEGRITY_CHECK; return SM_ACTION_COMPLETE; } - sm_p->fetch_config.addr_array = (PVFS_BMI_addr_t *) malloc(count * sizeof(PVFS_BMI_addr_t)); if (sm_p->fetch_config.addr_array == NULL) { @@ -308,7 +338,6 @@ static PINT_sm_action pvfs2_server_prepare_fetch_config( } sm_p->fetch_config.nservers = count; - sm_p->fetch_config.fs_config_bufs = (char **) calloc(count, sizeof(char *)); if (sm_p->fetch_config.fs_config_bufs == NULL) { @@ -329,10 +358,9 @@ static PINT_sm_action pvfs2_server_prepare_fetch_config( js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } - sm_p->msgarray_count = count; - sm_p->msgarray = (PINT_sm_msgpair_state *) malloc( - count * sizeof(PINT_sm_msgpair_state)); - if (!sm_p->msgarray) + + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, count); + if (ret != 0) { fprintf(stderr, "Could not allocate msgarray for fetch_config\n"); free(sm_p->fetch_config.addr_array); @@ -341,9 +369,26 @@ static PINT_sm_action pvfs2_server_prepare_fetch_config( sm_p->fetch_config.fs_config_bufs = NULL; free(sm_p->fetch_config.fs_config_buf_size); sm_p->fetch_config.fs_config_buf_size = NULL; + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + sm_p->fetch_config.result_indexes = (int*)calloc(count, + sizeof(int)); + if(sm_p->fetch_config.result_indexes == NULL) + { + free(sm_p->fetch_config.addr_array); + sm_p->fetch_config.addr_array = NULL; + free(sm_p->fetch_config.fs_config_bufs); + sm_p->fetch_config.fs_config_bufs = NULL; + free(sm_p->fetch_config.fs_config_buf_size); + sm_p->fetch_config.fs_config_buf_size = NULL; + free(sm_p->fetch_config.result_indexes); + sm_p->fetch_config.result_indexes = NULL; js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } + js_p->error_code = 0; return SM_ACTION_COMPLETE; } @@ -371,10 +416,16 @@ static int compare_hashes(PINT_client_sm *sm_p, job_status_s *js_p) char **sha1_fs_digests = NULL; size_t digest_len; PVFS_fs_id fsid; + int* indexes = sm_p->fetch_config.result_indexes; + + count = sm_p->fetch_config.result_count; + if(count < 2) + { + /* nothing to compare */ + goto out; + } - assert(js_p->error_code == 0); fsid = sm_p->u.get_config.mntent->fs_id; - count = sm_p->fetch_config.nservers; sha1_fs_digests = (char **) calloc(count, sizeof(char *)); if (sha1_fs_digests == NULL) { @@ -383,7 +434,8 @@ static int compare_hashes(PINT_client_sm *sm_p, job_status_s *js_p) } for (i = 1; i < count; i++) { - if (sm_p->fetch_config.fs_config_buf_size[0] != sm_p->fetch_config.fs_config_buf_size[i]) + if (sm_p->fetch_config.fs_config_buf_size[indexes[0]] != + sm_p->fetch_config.fs_config_buf_size[indexes[i]]) { fs_conf_size_mismatch = 1; } @@ -394,16 +446,18 @@ static int compare_hashes(PINT_client_sm *sm_p, job_status_s *js_p) for (i = 0; i < count; i++) { gossip_err(" FS config file on %s -> (size) %d\n", - PVFS_mgmt_map_addr(fsid, sm_p->cred_p, sm_p->fetch_config.addr_array[i], &tmp), - sm_p->fetch_config.fs_config_buf_size[i] - 1); + PVFS_mgmt_map_addr(fsid, sm_p->cred_p, + sm_p->fetch_config.addr_array[indexes[i]], &tmp), + sm_p->fetch_config.fs_config_buf_size[indexes[i]] - 1); ret = -PVFS_EINVAL; } goto out; } for (i = 0; i < count; i++) { - ret = PINT_util_digest_sha1(sm_p->fetch_config.fs_config_bufs[i], - sm_p->fetch_config.fs_config_buf_size[i], + ret = + PINT_util_digest_sha1(sm_p->fetch_config.fs_config_bufs[indexes[i]], + sm_p->fetch_config.fs_config_buf_size[indexes[i]], &sha1_fs_digests[i], &digest_len); if (ret < 0) goto out; @@ -424,7 +478,8 @@ static int compare_hashes(PINT_client_sm *sm_p, job_status_s *js_p) unsigned char str[256]; hash2str((unsigned char *) sha1_fs_digests[i], digest_len, str); gossip_err(" FS config file on %s -> (SHA1) %s\n", - PVFS_mgmt_map_addr(fsid, sm_p->cred_p, sm_p->fetch_config.addr_array[i], &tmp), str); + PVFS_mgmt_map_addr(fsid, sm_p->cred_p, + sm_p->fetch_config.addr_array[indexes[i]], &tmp), str); } ret = -PVFS_EINVAL; goto out; @@ -442,6 +497,8 @@ out: sm_p->fetch_config.addr_array = NULL; free(sm_p->fetch_config.fs_config_bufs); sm_p->fetch_config.fs_config_bufs = NULL; + free(sm_p->fetch_config.result_indexes); + sm_p->fetch_config.result_indexes = NULL; free(sm_p->fetch_config.fs_config_buf_size); sm_p->fetch_config.fs_config_buf_size = NULL; js_p->error_code = (ret == -PVFS_EOPNOTSUPP) ? 0 : ret; @@ -550,26 +607,128 @@ static PINT_sm_action fs_add_parent_cleanup(struct PINT_smcb *smcb, job_status_s return SM_ACTION_COMPLETE; } -static PINT_sm_action fs_add_final_cleanup( +static PINT_sm_action fs_add_compare_hashes( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + if (js_p->error_code == SKIP_INTEGRITY_CHECK) js_p->error_code = 0; - else if (js_p->error_code == 0) + else { compare_hashes(sm_p, js_p); } + + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action fs_add_final_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); if(sm_p->u.get_config.free_config_flag) { PINT_config_release(sm_p->u.get_config.config); free(sm_p->u.get_config.config); } + if (js_p->error_code == SKIP_INTEGRITY_CHECK) + js_p->error_code = 0; sm_p->error_code = js_p->error_code; PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; } +static PINT_sm_action server_fetch_config_setup_msgpair(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + int i; + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgpair_state *msg_p = NULL; + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "fetch_config state: server_fetch_config_setup_msgpair\n"); + + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) + { + sm_p->msgarray_op.msgarray[i].enc_type = sm_p->msgarray_op.msgpair.enc_type; + PINT_SERVREQ_GETCONFIG_FILL(msg_p->req, *sm_p->cred_p, sm_p->hints); + + msg_p->fs_id = PVFS_FS_ID_NULL; + msg_p->handle = PVFS_HANDLE_NULL; + /* only try once to retrieve a config file from each server */ + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + msg_p->comp_fn = server_fetch_config_comp_fn; + msg_p->svr_addr = sm_p->fetch_config.addr_array[i]; + } + /* don't complain so much about servers we can't reach yet */ + sm_p->msgarray_op.params.quiet_flag = 1; + + js_p->error_code = 0; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action server_fetch_config_cleanup(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_msgpairarray_destroy(&sm_p->msgarray_op); + /* preserve js_p->error_code */ + + return SM_ACTION_COMPLETE; +} + +static int server_fetch_config_comp_fn( + void *v_p, + struct PVFS_server_resp *resp_p, + int i) +{ + PINT_smcb *smcb = v_p; + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + int j; + + /* if this particular request was successful, then store the server + * response and let the caller sort it out */ + + if (sm_p->msgarray_op.msgarray[i].op_status == 0) + { + sm_p->fetch_config.fs_config_bufs[i] = + strdup(resp_p->u.getconfig.fs_config_buf); + sm_p->fetch_config.fs_config_buf_size[i] = + resp_p->u.getconfig.fs_config_buf_size; + } + + /* is this this last response? */ + if (i == (sm_p->msgarray_op.count -1)) + { + /* look through responses, count the number of valid responses we + * received, and mark where they are in the array + */ + sm_p->fetch_config.result_count = 0; + for(j=0; jmsgarray_op.count; j++) + { + if(sm_p->msgarray_op.msgarray[j].op_status == 0) + { + sm_p->fetch_config.result_indexes[sm_p->fetch_config.result_count] + = j; + sm_p->fetch_config.result_count++; + } + } + if(sm_p->fetch_config.result_count > 0) + { + /* we got at least one config file */ + return(0); + } + else + { + /* pick an error code */ + return PINT_msgarray_status(&sm_p->msgarray_op); + } + } + return 0; +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/client/sysint/getparent.c b/src/client/sysint/getparent.c index 8833e20..6d8c45e 100644 --- a/src/client/sysint/getparent.c +++ b/src/client/sysint/getparent.c @@ -15,7 +15,8 @@ int PVFS_sys_getparent( PVFS_fs_id fs_id, char *entry_name, const PVFS_credentials *credentials, - PVFS_sysresp_getparent *resp) + PVFS_sysresp_getparent *resp, + PVFS_hint hints) { int ret = -PVFS_EINVAL; char parent_buf[PVFS_NAME_MAX] = {0}; @@ -38,7 +39,7 @@ int PVFS_sys_getparent( memset(&resp_look,0,sizeof(PVFS_sysresp_lookup)); ret = PVFS_sys_lookup(fs_id, parent_buf, credentials, - &resp_look, PVFS2_LOOKUP_LINK_NO_FOLLOW); + &resp_look, PVFS2_LOOKUP_LINK_NO_FOLLOW, hints); if (ret) { gossip_err("Lookup failed on %s\n",parent_buf); diff --git a/src/client/sysint/initialize.c b/src/client/sysint/initialize.c index 4140721..a1faac0 100644 --- a/src/client/sysint/initialize.c +++ b/src/client/sysint/initialize.c @@ -11,7 +11,9 @@ #include #include #include +#ifndef WIN32 #include +#endif #include "acache.h" #include "ncache.h" @@ -28,11 +30,16 @@ #include "src/server/request-scheduler/request-scheduler.h" #include "job-time-mgr.h" #include "pint-util.h" +#include "pint-event.h" PINT_smcb *g_smcb = NULL; extern job_context_id pint_client_sm_context; +PINT_event_id PINT_client_sys_event_id; + +int pint_client_pid; + typedef enum { CLIENT_NO_INIT = 0, @@ -69,6 +76,13 @@ int PVFS_sys_initialize(uint64_t default_debug_mask) PINT_client_status_flag client_status_flag = CLIENT_NO_INIT; PINT_smcb *smcb = NULL; uint64_t debug_mask = 0; + char *event_mask = NULL; + +#ifdef WIN32 + pint_client_pid = (int) GetCurrentProcessId(); +#else + pint_client_pid = getpid(); +#endif gossip_enable_stderr(); @@ -84,6 +98,27 @@ int PVFS_sys_initialize(uint64_t default_debug_mask) gossip_enable_file(debug_file, "w"); } + ret = PINT_event_init(PINT_EVENT_TRACE_TAU); + +/* ignore error * + * if (ret < 0) + { + gossip_err("Error initializing event interface.\n"); + return (ret); + } */ + + + /** + * (ClientID, Rank, RequestID, Handle, Sys) + */ + PINT_event_define_event(NULL, "sys", "%d%d%d%llu%d", "", &PINT_client_sys_event_id); + + event_mask = getenv("PVFS2_EVENTMASK"); + if (event_mask) + { + PINT_event_enable(event_mask); + } + ret = id_gen_safe_initialize(); if(ret < 0) { @@ -98,6 +133,7 @@ int PVFS_sys_initialize(uint64_t default_debug_mask) gossip_lerr("Error initializing distributions.\n"); goto error_exit; } + client_status_flag |= CLIENT_DIST_INIT; /* initlialize the protocol encoder */ ret = PINT_encode_initialize(); diff --git a/src/client/sysint/mgmt-create-dirent.sm b/src/client/sysint/mgmt-create-dirent.sm index 71bde79..d136f1c 100644 --- a/src/client/sysint/mgmt-create-dirent.sm +++ b/src/client/sysint/mgmt-create-dirent.sm @@ -69,6 +69,7 @@ PVFS_error PVFS_imgmt_create_dirent( PVFS_handle entry_handle, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -97,11 +98,12 @@ PVFS_error PVFS_imgmt_create_dirent( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->parent_ref = parent_ref; sm_p->u.mgmt_create_dirent.entry = entry; sm_p->u.mgmt_create_dirent.entry_handle = entry_handle; + PVFS_hint_copy(hints, &sm_p->hints); gossip_debug( GOSSIP_CLIENT_DEBUG, "Trying to create dirent %s under %llu,%d\n", @@ -118,7 +120,8 @@ PVFS_error PVFS_mgmt_create_dirent( PVFS_object_ref parent_ref, char *entry, PVFS_handle entry_handle, - PVFS_credentials *credentials) + PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -127,7 +130,7 @@ PVFS_error PVFS_mgmt_create_dirent( "PVFS_mgmt_create_dirent entered\n"); ret = PVFS_imgmt_create_dirent( - parent_ref, entry, entry_handle, credentials, &op_id, NULL); + parent_ref, entry, entry_handle, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_imgmt_create_dirent call", ret); @@ -167,7 +170,8 @@ static int mgmt_create_dirent_setup_msgpair( js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; gossip_debug( GOSSIP_CLIENT_DEBUG, " creating dirent entry %s with " @@ -182,10 +186,11 @@ static int mgmt_create_dirent_setup_msgpair( sm_p->u.mgmt_create_dirent.entry, sm_p->u.mgmt_create_dirent.entry_handle, sm_p->parent_ref.handle, - sm_p->parent_ref.fs_id); + sm_p->parent_ref.fs_id, + sm_p->hints); msg_p->fs_id = sm_p->parent_ref.fs_id; - msg_p->handle = sm_p->u.mgmt_create_dirent.entry_handle; + msg_p->handle = sm_p->parent_ref.handle; msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; msg_p->comp_fn = mgmt_create_dirent_comp_fn; @@ -197,6 +202,8 @@ static int mgmt_create_dirent_setup_msgpair( gossip_err("Failed to map server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -204,9 +211,13 @@ static int mgmt_create_dirent_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index) { PINT_smcb *smcb = v_p; +#ifdef WIN32 + PINT_client_sm *sm_p = + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#else PINT_client_sm *sm_p __attribute__((unused)) = - PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#endif assert(resp_p->op == PVFS_SERV_CRDIRENT); if (resp_p->status == 0) diff --git a/src/client/sysint/mgmt-event-mon-list.sm b/src/client/sysint/mgmt-event-mon-list.sm index 09254f4..61998ad 100644 --- a/src/client/sysint/mgmt-event-mon-list.sm +++ b/src/client/sysint/mgmt-event-mon-list.sm @@ -63,10 +63,12 @@ PVFS_error PVFS_imgmt_event_mon_list( int event_count, PVFS_error_details *details, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; + int ret; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_imgmt_event_mon_list entered\n"); @@ -88,7 +90,7 @@ PVFS_error PVFS_imgmt_event_mon_list( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.event_mon_list.fs_id = fs_id; sm_p->u.event_mon_list.event_matrix = event_matrix; @@ -96,14 +98,13 @@ PVFS_error PVFS_imgmt_event_mon_list( sm_p->u.event_mon_list.event_count = event_count; sm_p->u.event_mon_list.addr_array = addr_array; sm_p->u.event_mon_list.details = details; + PVFS_hint_copy(hints, &sm_p->hints); - sm_p->msgarray_count = server_count; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - server_count * sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, server_count); + if(ret != 0) { PINT_smcb_free(smcb); - return -PVFS_ENOMEM; + return ret; } return PINT_client_state_machine_post( @@ -117,7 +118,8 @@ PVFS_error PVFS_mgmt_event_mon_list( PVFS_BMI_addr_t *addr_array, int server_count, int event_count, - PVFS_error_details *details) + PVFS_error_details *details, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -127,7 +129,7 @@ PVFS_error PVFS_mgmt_event_mon_list( ret = PVFS_imgmt_event_mon_list( fs_id, credentials, event_matrix, addr_array, server_count, - event_count, details, &op_id, NULL); + event_count, details, &op_id, hints, NULL); if (ret) { @@ -163,14 +165,13 @@ static PINT_sm_action mgmt_event_mon_list_setup_msgpair( js_p->error_code = 0; - for (i = 0; i < sm_p->msgarray_count; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - msg_p = &sm_p->msgarray[i]; - PINT_SERVREQ_MGMT_EVENT_MON_FILL( msg_p->req, *sm_p->cred_p, - sm_p->u.event_mon_list.event_count); + sm_p->u.event_mon_list.event_count, + sm_p->hints); msg_p->fs_id = sm_p->u.event_mon_list.fs_id; msg_p->handle = PVFS_HANDLE_NULL; @@ -178,6 +179,8 @@ static PINT_sm_action mgmt_event_mon_list_setup_msgpair( msg_p->comp_fn = event_mon_list_comp_fn; msg_p->svr_addr = sm_p->u.event_mon_list.addr_array[i]; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -195,15 +198,15 @@ static PINT_sm_action mgmt_event_mon_list_cleanup( for(i = 0; i < sm_p->u.event_mon_list.server_count; i++) { - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { if (errct < sm_p->u.event_mon_list.details->count_allocated) { sm_p->u.event_mon_list.details->error[errct].error = - sm_p->msgarray[i].op_status; + sm_p->msgarray_op.msgarray[i].op_status; sm_p->u.event_mon_list.details->error[errct].addr = - sm_p->msgarray[i].svr_addr; + sm_p->msgarray_op.msgarray[i].svr_addr; errct++; } else @@ -216,10 +219,7 @@ static PINT_sm_action mgmt_event_mon_list_cleanup( error = -PVFS_EDETAIL; } - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); sm_p->error_code = error; @@ -233,12 +233,12 @@ static int event_mon_list_comp_fn(void* v_p, { int j = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); /* if this particular request was successful, then store the * performance information in an array to be returned to caller */ - if (sm_p->msgarray[i].op_status == 0) + if (sm_p->msgarray_op.msgarray[i].op_status == 0) { memcpy(sm_p->u.event_mon_list.event_matrix[i], resp_p->u.mgmt_event_mon.event_array, @@ -249,13 +249,13 @@ static int event_mon_list_comp_fn(void* v_p, /* if this is the last response, check all of the status values and * return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for(j = 0; j < sm_p->msgarray_count; j++) + for(j = 0; j < sm_p->msgarray_op.count; j++) { - if (sm_p->msgarray[j].op_status != 0) + if (sm_p->msgarray_op.msgarray[j].op_status != 0) { - return sm_p->msgarray[j].op_status; + return sm_p->msgarray_op.msgarray[j].op_status; } } } diff --git a/src/client/sysint/mgmt-get-config.c b/src/client/sysint/mgmt-get-config.c index 034472c..927d669 100755 --- a/src/client/sysint/mgmt-get-config.c +++ b/src/client/sysint/mgmt-get-config.c @@ -6,7 +6,9 @@ #include #include +#ifndef WIN32 #include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -61,7 +63,7 @@ int PVFS_mgmt_get_config( sm_p->u.get_config.persist_config_buffers = 1; - PINT_init_msgarray_params(&sm_p->msgarray_params, *fsid); + PINT_init_msgarray_params(sm_p, *fsid); PINT_init_sysint_credentials(sm_p->cred_p, &creds); @@ -82,12 +84,11 @@ int PVFS_mgmt_get_config( mntent.pvfs_fs_name = cur_fs->file_system_name; sm_p->u.get_config.config = config; - sm_p->msgpair.enc_type = cur_fs->encoding; + sm_p->msgarray_op.msgpair.enc_type = cur_fs->encoding; sm_p->u.get_config.mntent = &mntent; - sm_p->msgarray_count = 1; - sm_p->msgarray = &(sm_p->msgpair); + PINT_msgpair_init(&sm_p->msgarray_op); ret = PINT_client_state_machine_post( smcb, &op_id, NULL); diff --git a/src/client/sysint/mgmt-get-dfile-array.sm b/src/client/sysint/mgmt-get-dfile-array.sm index 8d77c20..1868685 100644 --- a/src/client/sysint/mgmt-get-dfile-array.sm +++ b/src/client/sysint/mgmt-get-dfile-array.sm @@ -55,6 +55,7 @@ PVFS_error PVFS_imgmt_get_dfile_array( PVFS_handle *dfile_array, int dfile_count, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -80,11 +81,12 @@ PVFS_error PVFS_imgmt_get_dfile_array( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->object_ref = ref; sm_p->u.get_dfile_array.dfile_array = dfile_array; sm_p->u.get_dfile_array.dfile_count = dfile_count; + PVFS_hint_copy(hints, &sm_p->hints); PINT_SM_GETATTR_STATE_FILL( sm_p->getattr, @@ -103,7 +105,8 @@ PVFS_error PVFS_mgmt_get_dfile_array( PVFS_object_ref ref, PVFS_credentials *credentials, PVFS_handle *dfile_array, - int dfile_count) + int dfile_count, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -112,7 +115,7 @@ PVFS_error PVFS_mgmt_get_dfile_array( "PVFS_mgmt_get_dfile_array entered\n"); ret = PVFS_imgmt_get_dfile_array( - ref, credentials, dfile_array, dfile_count, &op_id, NULL); + ref, credentials, dfile_array, dfile_count, &op_id, hints, NULL); if (ret) { diff --git a/src/client/sysint/mgmt-get-dirdata-handle.sm b/src/client/sysint/mgmt-get-dirdata-handle.sm index 4b0fd53..7f49dba 100644 --- a/src/client/sysint/mgmt-get-dirdata-handle.sm +++ b/src/client/sysint/mgmt-get-dirdata-handle.sm @@ -71,6 +71,7 @@ PVFS_error PVFS_imgmt_get_dirdata_handle( PVFS_handle *out_dirdata_handle, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -98,10 +99,11 @@ PVFS_error PVFS_imgmt_get_dirdata_handle( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->parent_ref = parent_ref; sm_p->u.mgmt_get_dirdata_handle.dirdata_handle = out_dirdata_handle; + PVFS_hint_copy(hints, &sm_p->hints); gossip_debug( GOSSIP_CLIENT_DEBUG, "Trying to get dirdata object of parent " @@ -116,7 +118,8 @@ PVFS_error PVFS_imgmt_get_dirdata_handle( PVFS_error PVFS_mgmt_get_dirdata_handle( PVFS_object_ref parent_ref, PVFS_handle *out_dirdata_handle, - PVFS_credentials *credentials) + PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -125,7 +128,7 @@ PVFS_error PVFS_mgmt_get_dirdata_handle( "PVFS_mgmt_get_dirdata_handle entered\n"); ret = PVFS_imgmt_get_dirdata_handle( - parent_ref, out_dirdata_handle, credentials, &op_id, NULL); + parent_ref, out_dirdata_handle, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_imgmt_get_dirdata_handle call", ret); @@ -161,13 +164,15 @@ static int mgmt_get_dirdata_handle_setup_msgpair( js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_MGMT_GET_DIRDATA_HANDLE_FILL( msg_p->req, *sm_p->cred_p, sm_p->parent_ref.fs_id, - sm_p->parent_ref.handle); + sm_p->parent_ref.handle, + sm_p->hints); gossip_debug(GOSSIP_CLIENT_DEBUG, "- doing MGMT_GET_DIRDATA_HANDLE " "under %llu,%d\n", llu(sm_p->parent_ref.handle), @@ -186,6 +191,8 @@ static int mgmt_get_dirdata_handle_setup_msgpair( gossip_err("Failed to map server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -193,7 +200,7 @@ static int mgmt_get_dirdata_handle_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); assert(resp_p->op == PVFS_SERV_MGMT_GET_DIRDATA_HANDLE); diff --git a/src/client/sysint/mgmt-iterate-handles-list.sm b/src/client/sysint/mgmt-iterate-handles-list.sm index 5506163..9e7394b 100644 --- a/src/client/sysint/mgmt-iterate-handles-list.sm +++ b/src/client/sysint/mgmt-iterate-handles-list.sm @@ -64,12 +64,15 @@ PVFS_error PVFS_imgmt_iterate_handles_list( PVFS_ds_position *position_array, PVFS_BMI_addr_t *addr_array, int server_count, + int flags, PVFS_error_details *details, + PVFS_hint hints, PVFS_mgmt_op_id *op_id, void *user_ptr) { PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; + int ret; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_imgmt_iterate_handles_list() entered.\n"); @@ -91,7 +94,7 @@ PVFS_error PVFS_imgmt_iterate_handles_list( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.iterate_handles_list.fs_id = fs_id; sm_p->u.iterate_handles_list.server_count = server_count; @@ -100,15 +103,14 @@ PVFS_error PVFS_imgmt_iterate_handles_list( sm_p->u.iterate_handles_list.handle_count_array = handle_count_array; sm_p->u.iterate_handles_list.position_array = position_array; sm_p->u.iterate_handles_list.details = details; + PVFS_hint_copy(hints, &sm_p->hints); + sm_p->u.iterate_handles_list.flags = flags; - sm_p->msgarray_count = server_count; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - server_count * sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, server_count); + if(ret != 0) { - PVFS_util_release_credentials(sm_p->cred_p); PINT_smcb_free(smcb); - return -PVFS_ENOMEM; + return ret; } return PINT_client_state_machine_post( @@ -125,7 +127,9 @@ PVFS_error PVFS_mgmt_iterate_handles_list( PVFS_ds_position *position_array, PVFS_BMI_addr_t *addr_array, int server_count, - PVFS_error_details *details) + int flags, + PVFS_error_details *details, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -135,7 +139,7 @@ PVFS_error PVFS_mgmt_iterate_handles_list( ret = PVFS_imgmt_iterate_handles_list( fs_id, credentials, handle_matrix, handle_count_array, - position_array, addr_array, server_count, details, &op_id, NULL); + position_array, addr_array, server_count, flags, details, hints, &op_id, NULL); if (ret) { @@ -171,17 +175,18 @@ static PINT_sm_action mgmt_iterate_handles_list_setup_msgpair( /* setup msgpair array */ j=0; - for(i=0; i < sm_p->u.iterate_handles_list.server_count; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - msg_p = &sm_p->msgarray[j]; - +skipped: /* skip servers that have already reached end */ /* TODO: use a better #define or something for ITERATE_END */ - if(sm_p->u.iterate_handles_list.position_array[i] + if(sm_p->u.iterate_handles_list.position_array[j] == PVFS_ITERATE_END) { - sm_p->msgarray_count--; - sm_p->u.iterate_handles_list.handle_count_array[i] = 0; + sm_p->msgarray_op.count--; + sm_p->u.iterate_handles_list.handle_count_array[j] = 0; + j++; + goto skipped; } else { @@ -189,25 +194,22 @@ static PINT_sm_action mgmt_iterate_handles_list_setup_msgpair( msg_p->req, *sm_p->cred_p, sm_p->u.iterate_handles_list.fs_id, - sm_p->u.iterate_handles_list.handle_count_array[i], - sm_p->u.iterate_handles_list.position_array[i]); - j++; + sm_p->u.iterate_handles_list.handle_count_array[j], + sm_p->u.iterate_handles_list.position_array[j], + sm_p->u.iterate_handles_list.flags, + sm_p->hints); msg_p->fs_id = sm_p->u.iterate_handles_list.fs_id; msg_p->handle = PVFS_HANDLE_NULL; msg_p->retry_flag = PVFS_MSGPAIR_RETRY; msg_p->comp_fn = iterate_handles_list_comp_fn; - msg_p->svr_addr = sm_p->u.iterate_handles_list.addr_array[i]; - - if (server_is_osd(msg_p->svr_addr)) { - msg_p->suppress = 1; - sm_p->u.iterate_handles_list.handle_count_array[i] = 0; - sm_p->u.iterate_handles_list.position_array[i] = PVFS_ITERATE_END; - } + msg_p->svr_addr = sm_p->u.iterate_handles_list.addr_array[j]; + j++; } } /* TODO: be nicer about this, user called function too many times */ - assert(sm_p->msgarray_count > 0); + assert(sm_p->msgarray_op.count > 0); + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); /* immediate return: next state jumps to msgpairarray machine */ js_p->error_code = 0; @@ -228,15 +230,15 @@ static PINT_sm_action mgmt_iterate_handles_list_cleanup( for(i = 0; i < sm_p->u.iterate_handles_list.server_count; i++) { - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { if (errct < sm_p->u.iterate_handles_list.details->count_allocated) { sm_p->u.iterate_handles_list.details->error[ - errct].error = sm_p->msgarray[i].op_status; + errct].error = sm_p->msgarray_op.msgarray[i].op_status; sm_p->u.iterate_handles_list.details->error[ - errct].addr = sm_p->msgarray[i].svr_addr; + errct].addr = sm_p->msgarray_op.msgarray[i].svr_addr; errct++; } else @@ -249,10 +251,7 @@ static PINT_sm_action mgmt_iterate_handles_list_cleanup( error = -PVFS_EDETAIL; } - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); sm_p->error_code = error; @@ -266,17 +265,17 @@ static int iterate_handles_list_comp_fn(void *v_p, { int j = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); /* if this particular request was successful, then collect info from * response */ - if (sm_p->msgarray[i].op_status == 0) + if (sm_p->msgarray_op.msgarray[i].op_status == 0) { /* first, we have to match this up with the correct array entry */ for (j=0; ju.iterate_handles_list.server_count; j++) { - if (sm_p->msgarray[i].svr_addr + if (sm_p->msgarray_op.msgarray[i].svr_addr == sm_p->u.iterate_handles_list.addr_array[j]) { break; @@ -297,15 +296,9 @@ static int iterate_handles_list_comp_fn(void *v_p, /* if this is the last response, check all of the status values and * return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; jmsgarray_count; j++) - { - if (sm_p->msgarray[j].op_status != 0) - { - return(sm_p->msgarray[j].op_status); - } - } + return PINT_msgarray_status(&sm_p->msgarray_op); } return 0; diff --git a/src/client/sysint/mgmt-misc.c b/src/client/sysint/mgmt-misc.c index d370790..8775c6e 100644 --- a/src/client/sysint/mgmt-misc.c +++ b/src/client/sysint/mgmt-misc.c @@ -19,6 +19,7 @@ #include "bmi.h" #include "pint-sysint-utils.h" #include "pint-cached-config.h" +#include "pint-util.h" #include "server-config.h" #include "client-state-machine.h" @@ -54,7 +55,8 @@ PVFS_error PVFS_mgmt_statfs_all( PVFS_credentials *credentials, struct PVFS_mgmt_server_stat *stat_array, int *inout_count_p, - PVFS_error_details *details) + PVFS_error_details *details, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL; PVFS_BMI_addr_t *addr_array = NULL; @@ -96,7 +98,7 @@ PVFS_error PVFS_mgmt_statfs_all( ret = PVFS_mgmt_statfs_list( fs_id, credentials, stat_array, addr_array, - real_count, details); + real_count, details, hints); free(addr_array); @@ -112,11 +114,11 @@ PVFS_error PVFS_mgmt_setparam_all( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, - uint64_t *old_value_array, - PVFS_error_details *details) + struct PVFS_mgmt_setparam_value *value, + PVFS_error_details *details, + PVFS_hint hints) { - int i, j, count = 0; + int count = 0; PVFS_error ret = -PVFS_EINVAL; PVFS_BMI_addr_t *addr_array = NULL; ret = PINT_cached_config_count_servers( @@ -145,20 +147,9 @@ PVFS_error PVFS_mgmt_setparam_all( return ret; } - /* filter out OSDs; they do not do this */ - for (i=0; imsgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); - sm_p->msgpair.fs_id = fs_id; - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_NO_RETRY; - sm_p->msgpair.svr_addr = addr; + PINT_msgpair_init(&sm_p->msgarray_op); + sm_p->msgarray_op.msgpair.fs_id = fs_id; + sm_p->msgarray_op.msgpair.retry_flag = PVFS_MSGPAIR_NO_RETRY; + sm_p->msgarray_op.msgpair.svr_addr = addr; + + PVFS_hint_copy(hints, &sm_p->hints); gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_mgmt_noop calling " "PINT_client_state_machine_post()\n"); @@ -110,14 +114,15 @@ PVFS_error PVFS_imgmt_noop( PVFS_error PVFS_mgmt_noop( PVFS_fs_id fs_id, PVFS_credentials *credentials, - PVFS_BMI_addr_t addr) + PVFS_BMI_addr_t addr, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_mgmt_noop entered\n"); - ret = PVFS_imgmt_noop(fs_id, credentials, addr, &op_id, NULL); + ret = PVFS_imgmt_noop(fs_id, credentials, addr, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_imgmt_noop call", ret); @@ -151,7 +156,7 @@ static int check_poweron_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, { PINT_smcb *smcb = v_p; PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: status %d\n", __func__, command->status); @@ -173,18 +178,16 @@ static PINT_sm_action mgmt_noop_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG, "noop state: mgmt_noop_setup_msgpair\n"); - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; - - if (server_is_osd(sm_p->msgpair.svr_addr)) { - osd_command_set_test_unit_ready(&sm_p->msgpair.osd_command); - sm_p->msgpair.comp_fn = check_poweron_comp_fn; + if (server_is_osd(sm_p->msgarray_op.msgpair.svr_addr)) { + osd_command_set_test_unit_ready(&sm_p->msgarray_op.msgpair.osd_command); + sm_p->msgarray_op.msgpair.comp_fn = check_poweron_comp_fn; js_p->error_code = OSD_MSGPAIR; } else { - PINT_SERVREQ_MGMT_NOOP_FILL(sm_p->msgpair.req, *sm_p->cred_p); + PINT_SERVREQ_MGMT_NOOP_FILL(sm_p->msgarray_op.msgpair.req, *sm_p->cred_p, sm_p->hints); js_p->error_code = 0; } + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } diff --git a/src/client/sysint/mgmt-perf-mon-list.sm b/src/client/sysint/mgmt-perf-mon-list.sm index f02e8ab..a2149ae 100644 --- a/src/client/sysint/mgmt-perf-mon-list.sm +++ b/src/client/sysint/mgmt-perf-mon-list.sm @@ -59,24 +59,28 @@ machine pvfs2_client_mgmt_perf_mon_list_sm PVFS_error PVFS_imgmt_perf_mon_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, - struct PVFS_mgmt_perf_stat **perf_matrix, + int64_t **perf_matrix, uint64_t *end_time_ms_array, PVFS_BMI_addr_t *addr_array, uint32_t* next_id_array, int server_count, + int *key_count, int history_count, PVFS_error_details *details, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PINT_smcb *smcb; PINT_client_sm *sm_p; + int ret; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_imgmt_perf_mon_list entered\n"); - if ((server_count < 1) || (history_count < 1) || !perf_matrix || - !addr_array || !end_time_ms_array) + if ((server_count < 1) || (history_count < 1) || !key_count + || (*key_count < 1) || !perf_matrix || !addr_array + || !end_time_ms_array) { return -PVFS_EINVAL; } @@ -93,25 +97,24 @@ PVFS_error PVFS_imgmt_perf_mon_list( sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.perf_mon_list.fs_id = fs_id; sm_p->u.perf_mon_list.perf_matrix = perf_matrix; sm_p->u.perf_mon_list.server_count = server_count; + sm_p->u.perf_mon_list.key_count = key_count; sm_p->u.perf_mon_list.history_count = history_count; sm_p->u.perf_mon_list.addr_array = addr_array; sm_p->u.perf_mon_list.next_id_array = next_id_array; sm_p->u.perf_mon_list.end_time_ms_array = end_time_ms_array; sm_p->u.perf_mon_list.details = details; + PVFS_hint_copy(hints, &sm_p->hints); - sm_p->msgarray_count = server_count; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - server_count * sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, server_count); + if(ret != 0) { - PVFS_util_release_credentials(sm_p->cred_p); PINT_smcb_free(smcb); - return -PVFS_ENOMEM; + return ret; } return PINT_client_state_machine_post( @@ -123,13 +126,15 @@ PVFS_error PVFS_imgmt_perf_mon_list( PVFS_error PVFS_mgmt_perf_mon_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, - struct PVFS_mgmt_perf_stat **perf_matrix, + int64_t **perf_matrix, uint64_t *end_time_ms_array, PVFS_BMI_addr_t *addr_array, uint32_t* next_id_array, int server_count, + int *key_count, int history_count, - PVFS_error_details *details) + PVFS_error_details *details, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -139,8 +144,8 @@ PVFS_error PVFS_mgmt_perf_mon_list( ret = PVFS_imgmt_perf_mon_list( fs_id, credentials, perf_matrix, end_time_ms_array, - addr_array, next_id_array, server_count, history_count, - details, &op_id, NULL); + addr_array, next_id_array, server_count, key_count, + history_count, details, &op_id, hints, NULL); if (ret) { @@ -175,15 +180,15 @@ static PINT_sm_action mgmt_perf_mon_list_setup_msgpair( "mgmt_perf_mon_list_setup_msgpair\n"); /* setup msgpair array */ - for (i=0; i < sm_p->msgarray_count; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - msg_p = &sm_p->msgarray[i]; - PINT_SERVREQ_MGMT_PERF_MON_FILL( msg_p->req, *sm_p->cred_p, sm_p->u.perf_mon_list.next_id_array[i], - sm_p->u.perf_mon_list.history_count); + *sm_p->u.perf_mon_list.key_count, + sm_p->u.perf_mon_list.history_count, + sm_p->hints); msg_p->fs_id = sm_p->u.perf_mon_list.fs_id; msg_p->handle = PVFS_HANDLE_NULL; @@ -194,6 +199,8 @@ static PINT_sm_action mgmt_perf_mon_list_setup_msgpair( /* immediate return: next state jumps to msgpairarray machine */ js_p->error_code = 0; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -211,15 +218,15 @@ static PINT_sm_action mgmt_perf_mon_list_cleanup( for(i = 0; i < sm_p->u.perf_mon_list.server_count; i++) { - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { if (errct < sm_p->u.perf_mon_list.details->count_allocated) { sm_p->u.perf_mon_list.details->error[errct].error = - sm_p->msgarray[i].op_status; + sm_p->msgarray_op.msgarray[i].op_status; sm_p->u.perf_mon_list.details->error[errct].addr = - sm_p->msgarray[i].svr_addr; + sm_p->msgarray_op.msgarray[i].svr_addr; errct++; } else @@ -232,10 +239,7 @@ static PINT_sm_action mgmt_perf_mon_list_cleanup( error = -PVFS_EDETAIL; } - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); sm_p->error_code = error; @@ -249,33 +253,35 @@ static int perf_mon_list_comp_fn(void* v_p, { int j = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); /* if this particular request was successful, then store the * performance information in an array to be returned to caller */ - if (sm_p->msgarray[i].op_status == 0) + if (sm_p->msgarray_op.msgarray[i].op_status == 0) { sm_p->u.perf_mon_list.next_id_array[i] = resp_p->u.mgmt_perf_mon.suggested_next_id; sm_p->u.perf_mon_list.end_time_ms_array[i] = resp_p->u.mgmt_perf_mon.end_time_ms; + *sm_p->u.perf_mon_list.key_count = + resp_p->u.mgmt_perf_mon.key_count; memcpy(sm_p->u.perf_mon_list.perf_matrix[i], resp_p->u.mgmt_perf_mon.perf_array, resp_p->u.mgmt_perf_mon.perf_array_count - * sizeof(struct PVFS_mgmt_perf_stat)); + * sizeof(int64_t)); } /* if this is the last response, check all of the status values and * return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; j < sm_p->msgarray_count; j++) + for (j=0; j < sm_p->msgarray_op.count; j++) { - if (sm_p->msgarray[j].op_status != 0) + if (sm_p->msgarray_op.msgarray[j].op_status != 0) { - return(sm_p->msgarray[j].op_status); + return(sm_p->msgarray_op.msgarray[j].op_status); } } } diff --git a/src/client/sysint/mgmt-remove-dirent.sm b/src/client/sysint/mgmt-remove-dirent.sm index 7d71797..b67f9a0 100644 --- a/src/client/sysint/mgmt-remove-dirent.sm +++ b/src/client/sysint/mgmt-remove-dirent.sm @@ -69,6 +69,7 @@ PVFS_error PVFS_imgmt_remove_dirent( char *entry, PVFS_credentials *credentials, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -96,10 +97,11 @@ PVFS_error PVFS_imgmt_remove_dirent( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->parent_ref = parent_ref; sm_p->u.mgmt_remove_dirent.entry = entry; + PVFS_hint_copy(hints, &sm_p->hints); gossip_debug( GOSSIP_CLIENT_DEBUG, "Trying to remove dirent %s under %llu,%d\n", @@ -115,7 +117,8 @@ PVFS_error PVFS_imgmt_remove_dirent( PVFS_error PVFS_mgmt_remove_dirent( PVFS_object_ref parent_ref, char *entry, - PVFS_credentials *credentials) + PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -124,7 +127,7 @@ PVFS_error PVFS_mgmt_remove_dirent( "PVFS_mgmt_remove_dirent entered\n"); ret = PVFS_imgmt_remove_dirent( - parent_ref, entry, credentials, &op_id, NULL); + parent_ref, entry, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_imgmt_remove_dirent call", ret); @@ -164,14 +167,16 @@ static PINT_sm_action mgmt_remove_dirent_setup_msgpair( js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_MGMT_REMOVE_DIRENT_FILL( msg_p->req, *sm_p->cred_p, sm_p->parent_ref.fs_id, sm_p->parent_ref.handle, - sm_p->u.mgmt_remove_dirent.entry); + sm_p->u.mgmt_remove_dirent.entry, + sm_p->hints); gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing MGMT_REMOVE_DIRENT %s " "under %llu,%d\n", sm_p->u.mgmt_remove_dirent.entry, @@ -188,8 +193,10 @@ static PINT_sm_action mgmt_remove_dirent_setup_msgpair( if (ret) { gossip_err("Failed to map server address\n"); - js_p->error_code = ret; + js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -197,9 +204,13 @@ static int mgmt_remove_dirent_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index) { PINT_smcb *smcb = v_p; +#ifdef WIN32 + PINT_client_sm *sm_p = + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#else PINT_client_sm *sm_p __attribute__((unused)) = - PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#endif assert(resp_p->op == PVFS_SERV_MGMT_REMOVE_DIRENT); if (resp_p->status == 0) diff --git a/src/client/sysint/mgmt-remove-object.sm b/src/client/sysint/mgmt-remove-object.sm index 0705081..f240378 100644 --- a/src/client/sysint/mgmt-remove-object.sm +++ b/src/client/sysint/mgmt-remove-object.sm @@ -68,6 +68,7 @@ PVFS_error PVFS_imgmt_remove_object( PVFS_object_ref object_ref, PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -94,9 +95,10 @@ PVFS_error PVFS_imgmt_remove_object( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, object_ref.fs_id); + PINT_init_msgarray_params(sm_p, object_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->object_ref = object_ref; + PVFS_hint_copy(hints, &sm_p->hints); gossip_debug( GOSSIP_CLIENT_DEBUG, "Trying to remove handle %llu,%d\n", @@ -113,7 +115,8 @@ PVFS_error PVFS_imgmt_remove_object( */ PVFS_error PVFS_mgmt_remove_object( PVFS_object_ref object_ref, - PVFS_credentials *credentials) + PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -121,7 +124,7 @@ PVFS_error PVFS_mgmt_remove_object( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_mgmt_remove_object entered\n"); - ret = PVFS_imgmt_remove_object(object_ref, credentials, &op_id, NULL); + ret = PVFS_imgmt_remove_object(object_ref, credentials, &op_id, NULL, hints); if (ret) { PVFS_perror_gossip("PVFS_imgmt_remove_object call", ret); @@ -161,13 +164,15 @@ static PINT_sm_action mgmt_remove_object_setup_msgpair( js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_MGMT_REMOVE_OBJECT_FILL( msg_p->req, *sm_p->cred_p, sm_p->object_ref.fs_id, - sm_p->object_ref.handle); + sm_p->object_ref.handle, + sm_p->hints); gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing MGMT_REMOVE_OBJECT on " "%llu,%d\n", llu(sm_p->object_ref.handle), @@ -184,8 +189,10 @@ static PINT_sm_action mgmt_remove_object_setup_msgpair( if (ret) { gossip_err("Failed to map server address\n"); - js_p->error_code = ret; + js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -193,9 +200,13 @@ static int mgmt_remove_object_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index) { PINT_smcb *smcb = v_p; +#ifdef WIN32 + PINT_client_sm *sm_p = + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#else PINT_client_sm *sm_p __attribute__((unused)) = - PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#endif assert(resp_p->op == PVFS_SERV_MGMT_REMOVE_OBJECT); if (resp_p->status == 0) diff --git a/src/client/sysint/mgmt-repair-file.sm b/src/client/sysint/mgmt-repair-file.sm index afdc512..d7f2e98 100644 --- a/src/client/sysint/mgmt-repair-file.sm +++ b/src/client/sysint/mgmt-repair-file.sm @@ -206,7 +206,7 @@ PVFS_error PVFS_imgmt_repair_file( } memset(sm_p->u.create.datafile_handles, 0,sizeof(PVFS_handle)); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.create.object_name = object_name; sm_p->u.create.create_resp = resp; @@ -224,7 +224,7 @@ PVFS_error PVFS_imgmt_repair_file( free(sm_p); return -PVFS_ENOMEM; } - + /*Set requested number of dfiles to 1*/ num_dfiles_req = 1; diff --git a/src/client/sysint/mgmt-setparam-list.sm b/src/client/sysint/mgmt-setparam-list.sm index c868ee9..bbbd889 100644 --- a/src/client/sysint/mgmt-setparam-list.sm +++ b/src/client/sysint/mgmt-setparam-list.sm @@ -64,11 +64,11 @@ PVFS_error PVFS_imgmt_setparam_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, + struct PVFS_mgmt_setparam_value *value, PVFS_BMI_addr_t *addr_array, - uint64_t *old_value_array, int count, PVFS_error_details *details, + PVFS_hint hints, PVFS_mgmt_op_id *op_id, void *user_ptr) { @@ -84,9 +84,6 @@ PVFS_error PVFS_imgmt_setparam_list( return ret; } - if (count == 0) - return 0; - PINT_smcb_alloc(&smcb, PVFS_MGMT_SETPARAM_LIST, sizeof(struct PINT_client_sm), client_op_state_get_machine, @@ -98,23 +95,20 @@ PVFS_error PVFS_imgmt_setparam_list( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.setparam_list.fs_id = fs_id; sm_p->u.setparam_list.param = param; sm_p->u.setparam_list.value = value; sm_p->u.setparam_list.addr_array = addr_array; - sm_p->u.setparam_list.old_value_array = old_value_array; sm_p->u.setparam_list.count = count; sm_p->u.setparam_list.details = details; - sm_p->msgarray_count = count; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - count * sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, count); + if(ret != 0) { PINT_smcb_free(smcb); - return -PVFS_ENOMEM; + return ret; } if (sm_p->u.setparam_list.root_check_status_array) @@ -123,11 +117,6 @@ PVFS_error PVFS_imgmt_setparam_list( sm_p->u.setparam_list.root_check_status_array = NULL; } - if (old_value_array != NULL) - { - memset(old_value_array, 0, (count * sizeof(uint64_t))); - } - return PINT_client_state_machine_post( smcb, op_id, user_ptr); } @@ -138,23 +127,20 @@ PVFS_error PVFS_mgmt_setparam_list( PVFS_fs_id fs_id, PVFS_credentials *credentials, enum PVFS_server_param param, - uint64_t value, + struct PVFS_mgmt_setparam_value *value, PVFS_BMI_addr_t *addr_array, - uint64_t *old_value_array, int count, - PVFS_error_details *details) + PVFS_error_details *details, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; gossip_debug(GOSSIP_CLIENT_DEBUG, "%s entered\n", __func__); - if (count == 0) - return 0; - ret = PVFS_imgmt_setparam_list( - fs_id, credentials, param, value, addr_array, old_value_array, - count, details, &op_id, NULL); + fs_id, credentials, param, value, addr_array, + count, details, hints, &op_id, NULL); if (ret) { @@ -188,17 +174,16 @@ static PINT_sm_action mgmt_setparam_list_setup_msgpair( "mgmt_setparam_list_setup_msgpair\n"); /* setup msgpair array */ - for (i=0; i < sm_p->msgarray_count; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - msg_p = &sm_p->msgarray[i]; - PINT_SERVREQ_MGMT_SETPARAM_FILL( msg_p->req, *sm_p->cred_p, sm_p->u.setparam_list.fs_id, sm_p->u.setparam_list.param, - sm_p->u.setparam_list.value); - + sm_p->u.setparam_list.value, + sm_p->hints); + msg_p->fs_id = sm_p->u.setparam_list.fs_id; msg_p->handle = PVFS_HANDLE_NULL; msg_p->retry_flag = PVFS_MSGPAIR_RETRY; @@ -224,6 +209,8 @@ static PINT_sm_action mgmt_setparam_list_setup_msgpair( /* immediate return: next state jumps to msgpairarray machine */ js_p->error_code = 0; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -248,14 +235,14 @@ static PINT_sm_action mgmt_setparam_list_cleanup( } else { - status = sm_p->msgarray[i].op_status; + status = sm_p->msgarray_op.msgarray[i].op_status; } if (errct < sm_p->u.setparam_list.details->count_allocated) { sm_p->u.setparam_list.details->error[errct].error = status; sm_p->u.setparam_list.details->error[errct].addr = - sm_p->msgarray[i].svr_addr; + sm_p->msgarray_op.msgarray[i].svr_addr; errct++; } else @@ -275,10 +262,7 @@ static PINT_sm_action mgmt_setparam_list_cleanup( sm_p->u.setparam_list.root_check_status_array = NULL; } - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); sm_p->error_code = error; @@ -297,26 +281,19 @@ static int collect_old_values_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int i) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); int j; - if ((sm_p->msgarray[i].op_status == 0) && - (sm_p->u.setparam_list.old_value_array)) - { - sm_p->u.setparam_list.old_value_array[i] = - resp_p->u.mgmt_setparam.old_value; - } - /* if this is the last response, check all of the status values * and return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j = 0; j < sm_p->msgarray_count; j++) + for (j = 0; j < sm_p->msgarray_op.count; j++) { - if (sm_p->msgarray[j].op_status != 0) + if (sm_p->msgarray_op.msgarray[j].op_status != 0) { - return sm_p->msgarray[j].op_status; + return sm_p->msgarray_op.msgarray[j].op_status; } } } @@ -338,14 +315,14 @@ static int root_check_comp_fn( int j = 0; int owners = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); /* store the op_status before it's overwritten with the return value of this comp_fn by the msgpairarray code */ sm_p->u.setparam_list.root_check_status_array[i] = - sm_p->msgarray[i].op_status; + sm_p->msgarray_op.msgarray[i].op_status; /* need to return non-zero status at the end if any of the statuses * are nonzero diff --git a/src/client/sysint/mgmt-statfs-list.sm b/src/client/sysint/mgmt-statfs-list.sm index 44c6ae1..18301a2 100644 --- a/src/client/sysint/mgmt-statfs-list.sm +++ b/src/client/sysint/mgmt-statfs-list.sm @@ -82,10 +82,12 @@ PVFS_error PVFS_imgmt_statfs_list( int count, PVFS_error_details *details, PVFS_mgmt_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; + int ret; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_imgmt_statfs_list entered\n"); @@ -106,25 +108,23 @@ PVFS_error PVFS_imgmt_statfs_list( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.statfs_list.fs_id = fs_id; sm_p->u.statfs_list.stat_array = stat_array; sm_p->u.statfs_list.count = count; sm_p->u.statfs_list.addr_array = addr_array; sm_p->u.statfs_list.details = details; + PVFS_hint_copy(hints, &sm_p->hints); memset(sm_p->u.statfs_list.stat_array, 0, (count * sizeof(struct PVFS_mgmt_server_stat))); - sm_p->msgarray_count = count; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - (count * sizeof(PINT_sm_msgpair_state))); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, count); + if(ret != 0) { - PVFS_util_release_credentials(sm_p->cred_p); PINT_smcb_free(smcb); - return -PVFS_ENOMEM; + return ret; } return PINT_client_state_machine_post( @@ -139,7 +139,8 @@ PVFS_error PVFS_mgmt_statfs_list( struct PVFS_mgmt_server_stat *stat_array, PVFS_BMI_addr_t *addr_array, int count, - PVFS_error_details *details) + PVFS_error_details *details, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_mgmt_op_id op_id; @@ -148,7 +149,7 @@ PVFS_error PVFS_mgmt_statfs_list( ret = PVFS_imgmt_statfs_list( fs_id, credentials, stat_array, addr_array, count, - details, &op_id, NULL); + details, &op_id, hints, NULL); if (ret) { @@ -184,14 +185,13 @@ static PINT_sm_action mgmt_statfs_list_setup_msgpair( js_p->error_code = 0; - for(i = 0; imsgarray_count; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - msg_p = &sm_p->msgarray[i]; - PINT_SERVREQ_STATFS_FILL( msg_p->req, *sm_p->cred_p, - sm_p->u.statfs_list.fs_id); + sm_p->u.statfs_list.fs_id, + sm_p->hints); msg_p->fs_id = sm_p->u.statfs_list.fs_id; msg_p->handle = PVFS_HANDLE_NULL; @@ -199,6 +199,8 @@ static PINT_sm_action mgmt_statfs_list_setup_msgpair( msg_p->comp_fn = statfs_list_comp_fn; msg_p->svr_addr = sm_p->u.statfs_list.addr_array[i]; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -216,14 +218,14 @@ static PINT_sm_action mgmt_statfs_list_cleanup( for(i = 0; i < sm_p->u.statfs_list.count; i++) { - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { if (errct < sm_p->u.statfs_list.details->count_allocated) { sm_p->u.statfs_list.details->error[errct].error = - sm_p->msgarray[i].op_status; + sm_p->msgarray_op.msgarray[i].op_status; sm_p->u.statfs_list.details->error[errct].addr = - sm_p->msgarray[i].svr_addr; + sm_p->msgarray_op.msgarray[i].svr_addr; errct++; } else @@ -236,12 +238,7 @@ static PINT_sm_action mgmt_statfs_list_cleanup( error = -PVFS_EDETAIL; } - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } - + PINT_msgpairarray_destroy(&sm_p->msgarray_op); sm_p->error_code = error; return SM_ACTION_COMPLETE; @@ -253,12 +250,12 @@ static int statfs_list_comp_fn(void *v_p, { int j = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); /* if this particular request was successful, then store the * statfs information in an array to be returned to caller */ - if (sm_p->msgarray[i].op_status == 0) + if (sm_p->msgarray_op.msgarray[i].op_status == 0) { struct PVFS_mgmt_server_stat *sm_stat = &sm_p->u.statfs_list.stat_array[i]; @@ -280,7 +277,7 @@ static int statfs_list_comp_fn(void *v_p, sm_stat->bmi_address = PVFS_mgmt_map_addr( sm_p->u.statfs_list.fs_id, sm_p->cred_p, - sm_p->msgarray[i].svr_addr, &sm_stat->server_type); + sm_p->msgarray_op.msgarray[i].svr_addr, &sm_stat->server_type); assert(sm_stat->bmi_address); assert(sm_stat->handles_total_count >= @@ -290,13 +287,13 @@ static int statfs_list_comp_fn(void *v_p, /* if this is the last response, check all of the status values * and return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; j < sm_p->msgarray_count; j++) + for (j=0; j < sm_p->msgarray_op.count; j++) { - if (sm_p->msgarray[j].op_status != 0) + if (sm_p->msgarray_op.msgarray[j].op_status != 0) { - return(sm_p->msgarray[j].op_status); + return(sm_p->msgarray_op.msgarray[j].op_status); } } } diff --git a/src/client/sysint/module.mk.in b/src/client/sysint/module.mk.in index a83139d..fa8ee23 100644 --- a/src/client/sysint/module.mk.in +++ b/src/client/sysint/module.mk.in @@ -51,9 +51,9 @@ CLIENT_SMCGEN := \ $(DIR)/mgmt-remove-dirent.c \ $(DIR)/mgmt-create-dirent.c \ $(DIR)/mgmt-get-dirdata-handle.c \ + $(DIR)/mgmt-get-uid-list.c \ $(DIR)/osdsm.c \ - $(DIR)/sys-osd-io.c \ - $(DIR)/sys-osd-dir.c + $(DIR)/sys-osd-io.c # track generated .c files that need to be removed during dist clean, etc. SMCGEN += $(CLIENT_SMCGEN) diff --git a/src/client/sysint/ncache.c b/src/client/sysint/ncache.c index 48e9e8b..a624bc4 100644 --- a/src/client/sysint/ncache.c +++ b/src/client/sysint/ncache.c @@ -397,8 +397,6 @@ int PINT_ncache_update( &status); if(ret == 0) { - gossip_debug(GOSSIP_NCACHE_DEBUG, "ncache: update old name [%s]\n", - entry); /* found match in cache; destroy old payload, replace, and * refresh time stamp */ @@ -409,8 +407,6 @@ int PINT_ncache_update( } else { - gossip_debug(GOSSIP_NCACHE_DEBUG, "ncache: update new name [%s]\n", - entry); /* not found in cache; insert new payload*/ ret = PINT_tcache_insert_entry(ncache, &entry_key, diff --git a/src/client/sysint/osd.c b/src/client/sysint/osd.c index 1550cad..ab01a07 100644 --- a/src/client/sysint/osd.c +++ b/src/client/sysint/osd.c @@ -265,8 +265,10 @@ void osd_find_scsi_addresses(PVFS_fs_id fs_id, int32_t *num_osd, goto out; } for (j = 0; j < num_drives; j++) { + /*if (strcmp(drives[j].targetname, + hmap->alias_mapping->host_alias) == 0) {*/ if (strcmp(drives[j].targetname, - hmap->alias_mapping->host_alias) == 0) { + "beaf10") == 0) { if (num >= max_addrs) { num = 0; goto out; diff --git a/src/client/sysint/osdsm.sm b/src/client/sysint/osdsm.sm index 3e09944..6662247 100644 --- a/src/client/sysint/osdsm.sm +++ b/src/client/sysint/osdsm.sm @@ -33,6 +33,7 @@ #include "pint-util.h" #include "server-config-mgr.h" #include "pvfs2-internal.h" +#include "osd-util/osd-util.h" enum { @@ -75,6 +76,12 @@ nested machine pvfs2_osd_msgpairarray_sm { run osd_msgpairarray_all_complete; MSGPAIRS_RETRY => post_retry; + default => done; + } + + state done + { + run osd_msgpairarray_done; default => return; } } @@ -83,20 +90,21 @@ nested machine pvfs2_osd_msgpairarray_sm static int osd_msgpairarray_init(struct PINT_smcb *smcb, job_status_s *js_p) { - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + int i = 0; PINT_sm_msgpair_state *msg_p = NULL; gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s\n", __func__); js_p->error_code = 0; - + /* set number of operations that must complete. */ - sm_p->msgarray_params.comp_ct = sm_p->msgarray_count; + sm_p->msgarray_op.params.comp_ct = sm_p->msgarray_op.count; - for (i = 0; i < sm_p->msgarray_count; i++) + for (i = 0; i < sm_p->msgarray_op.count; i++) { - msg_p = &sm_p->msgarray[i]; + msg_p = &sm_p->msgarray_op.msgarray[i]; assert(msg_p); assert((msg_p->retry_flag == PVFS_MSGPAIR_RETRY) || @@ -105,10 +113,11 @@ static int osd_msgpairarray_init(struct PINT_smcb *smcb, job_status_s *js_p) msg_p->retry_count = 0; if (msg_p->suppress) { msg_p->complete = 1; - --sm_p->msgarray_params.comp_ct; + --sm_p->msgarray_op.params.comp_ct; } else msg_p->complete = 0; } + return SM_ACTION_COMPLETE; } @@ -126,7 +135,7 @@ static int osd_msgpairarray_init(struct PINT_smcb *smcb, job_status_s *js_p) */ static int osd_msgpairarray_post(struct PINT_smcb *smcb, job_status_s *js_p) { - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); int ret = -PVFS_EINVAL, i; PVFS_msg_tag_t session_tag; PINT_sm_msgpair_state *msg_p; @@ -134,14 +143,14 @@ static int osd_msgpairarray_post(struct PINT_smcb *smcb, job_status_s *js_p) gossip_debug( GOSSIP_MSGPAIR_DEBUG, "%s: sm %p " "%d total message(s) with %d incomplete\n", __func__, sm_p, - sm_p->msgarray_count, sm_p->msgarray_params.comp_ct); + sm_p->msgarray_op.count, sm_p->msgarray_op.params.comp_ct); - assert(sm_p->msgarray_count > 0); - assert(sm_p->msgarray_params.comp_ct >= 1); + assert(sm_p->msgarray_op.count > 0); + assert(sm_p->msgarray_op.params.comp_ct >= 1); - for (i = 0; i < sm_p->msgarray_count; i++) + for (i = 0; i < sm_p->msgarray_op.count; i++) { - msg_p = &sm_p->msgarray[i]; + msg_p = &sm_p->msgarray_op.msgarray[i]; assert(msg_p); @@ -149,6 +158,7 @@ static int osd_msgpairarray_post(struct PINT_smcb *smcb, job_status_s *js_p) here we skip over the msgs that have already completed in the case of being in the retry code path when it's ok */ + if (msg_p->complete) { continue; @@ -163,18 +173,18 @@ static int osd_msgpairarray_post(struct PINT_smcb *smcb, job_status_s *js_p) __func__, sm_p, i, llu(msg_p->svr_addr)); ret = job_bmi_osd_submit(msg_p->svr_addr, &msg_p->osd_command, i, - sm_p->msgarray_params.job_context, smcb, - sm_p->msgarray_params.job_timeout); + sm_p->msgarray_op.params.job_context, smcb, + sm_p->msgarray_op.params.job_timeout); if (ret < 0) { PVFS_perror_gossip("Command submit failed", ret); msg_p->op_status = ret; msg_p->send_id = 0; - --sm_p->msgarray_params.comp_ct; + --sm_p->msgarray_op.params.comp_ct; } } - if (sm_p->msgarray_params.comp_ct == 0) + if (sm_p->msgarray_op.params.comp_ct == 0) { /* everything is completed already (could happen in some failure * cases); jump straight to final completion function. @@ -197,12 +207,12 @@ static int osd_msgpairarray_post_retry(struct PINT_smcb *smcb, job_id_t tmp_id; gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p, wait %d ms\n", - __func__, sm_p, sm_p->msgarray_params.retry_delay); + __func__, sm_p, sm_p->msgarray_op.params.retry_delay); return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, + sm_p->msgarray_op.params.retry_delay, sm_p, 0, js_p, &tmp_id, - sm_p->msgarray_params.job_context); + sm_p->msgarray_op.params.job_context); } /* @@ -211,22 +221,23 @@ static int osd_msgpairarray_post_retry(struct PINT_smcb *smcb, static int osd_msgpairarray_one_complete(struct PINT_smcb *smcb, job_status_s *js_p) { - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p tag %lld count %d\n", __func__, sm_p, lld(js_p->status_user_tag), - sm_p->msgarray_count); + sm_p->msgarray_op.count); + /* match operation with something in the msgpair array */ - assert(js_p->status_user_tag < sm_p->msgarray_count); + assert(js_p->status_user_tag < sm_p->msgarray_op.count); - msg_p = &sm_p->msgarray[js_p->status_user_tag]; + msg_p = &sm_p->msgarray_op.msgarray[js_p->status_user_tag]; msg_p->recv_id = 0; /* decrement comp_ct until all operations have completed */ - --sm_p->msgarray_params.comp_ct; - if (sm_p->msgarray_params.comp_ct > 0) { + --sm_p->msgarray_op.params.comp_ct; + if (sm_p->msgarray_op.params.comp_ct > 0) { js_p->error_code = 0; return SM_ACTION_DEFERRED; } else { @@ -239,22 +250,23 @@ static int osd_msgpairarray_one_complete(struct PINT_smcb *smcb, static int osd_msgpairarray_all_complete(struct PINT_smcb *smcb, job_status_s *js_p) { - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); int i; js_p->error_code = 0; gossip_debug(GOSSIP_MSGPAIR_DEBUG, "(%p) %s\n", sm_p, __func__); - for (i = 0; i < sm_p->msgarray_count; i++) + for (i = 0; i < sm_p->msgarray_op.count; i++) { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; + PINT_sm_msgpair_state *msg_p = &sm_p->msgarray_op.msgarray[i]; struct osd_command *command = &msg_p->osd_command; /* * Can take multiple trips through this function as we retry * ones that failed. */ + if (msg_p->complete) continue; @@ -281,6 +293,14 @@ static int osd_msgpairarray_all_complete(struct PINT_smcb *smcb, return SM_ACTION_COMPLETE; } +static int osd_msgpairarray_done( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + int task_id, error_code, remaining; + PINT_sm_pop_frame(smcb, &task_id, &error_code, &remaining); + return SM_ACTION_COMPLETE; +} + /* * vim: ts=8 sts=4 sw=4 expandtab ft=c */ diff --git a/src/client/sysint/perf-count-timer.sm b/src/client/sysint/perf-count-timer.sm index 5991d23..8ff7fdb 100644 --- a/src/client/sysint/perf-count-timer.sm +++ b/src/client/sysint/perf-count-timer.sm @@ -7,9 +7,11 @@ #include #include #include +#ifndef WIN32 #include -#include #include +#endif +#include #include "client-state-machine.h" #include "pint-perf-counter.h" @@ -80,11 +82,19 @@ static PINT_sm_action perf_count_timer_do_work( tmp_text = PINT_perf_generate_text(sm_p->u.perf_count_timer.pc, 4096); if(tmp_text) { +#ifdef WIN32 + token = strtok(tmp_text, delim); /* thread-safe */ +#else token = strtok_r(tmp_text, delim, &ptr); +#endif while(token) { gossip_perf_log("%s\n", token); +#ifdef WIN32 + token = strtok(NULL, delim); +#else token = strtok_r(NULL, delim, &ptr); +#endif } free(tmp_text); } diff --git a/src/client/sysint/pint-sysdev-unexp.sm b/src/client/sysint/pint-sysdev-unexp.sm index 3a075e3..8dc3a5c 100644 --- a/src/client/sysint/pint-sysdev-unexp.sm +++ b/src/client/sysint/pint-sysdev-unexp.sm @@ -7,9 +7,11 @@ #include #include #include +#ifndef WIN32 #include -#include #include +#endif +#include #include "state-machine.h" #include "client-state-machine.h" diff --git a/src/client/sysint/pint-sysint-utils.c b/src/client/sysint/pint-sysint-utils.c index 286edad..4e2f67b 100644 --- a/src/client/sysint/pint-sysint-utils.c +++ b/src/client/sysint/pint-sysint-utils.c @@ -4,7 +4,9 @@ * See COPYING in top-level directory. */ +#ifndef WIN32 #include +#endif #include #include "pvfs2-sysint.h" @@ -66,7 +68,7 @@ int PINT_lookup_parent( } ret = PVFS_sys_lookup(fs_id, buf, credentials, - &resp_look, PVFS2_LOOKUP_LINK_FOLLOW); + &resp_look, PVFS2_LOOKUP_LINK_FOLLOW, NULL); if (ret < 0) { gossip_err("Lookup failed on %s\n", buf); diff --git a/src/client/sysint/pint-sysint-utils.h b/src/client/sysint/pint-sysint-utils.h index 64f9810..808228b 100644 --- a/src/client/sysint/pint-sysint-utils.h +++ b/src/client/sysint/pint-sysint-utils.h @@ -11,7 +11,11 @@ #include #include +#ifdef WIN32 +#include "wincommon.h" +#else #include +#endif #include #include @@ -30,7 +34,9 @@ int PINT_server_get_config( struct server_configuration_s *config, - struct PVFS_sys_mntent* mntent_p); + struct PVFS_sys_mntent* mntent_p, + PVFS_hint hints); + struct server_configuration_s *PINT_get_server_config_struct( PVFS_fs_id fs_id); void PINT_put_server_config_struct( diff --git a/src/client/sysint/remove.sm b/src/client/sysint/remove.sm index 6de0a5a..a92102e 100644 --- a/src/client/sysint/remove.sm +++ b/src/client/sysint/remove.sm @@ -24,7 +24,8 @@ enum { REMOVE_MUST_REMOVE_DATAFILES = 1, OSD_MSGPAIR = 2001, - OSD_MDFILE_MSGPAIR = 2002 + OSD_MDFILE_MSGPAIR = 2002, + SKIP_COLLECTION_REMOVAL = 2003 }; #define HANDLE_REMOVE_ERROR(state) \ @@ -111,6 +112,21 @@ nested machine pvfs2_client_remove_helper_sm state object_remove_xfer_msgpair { jump pvfs2_msgpairarray_sm; + success => remove_collection; + default => object_remove_failure; + } + + state remove_collection + { + run remove_collection; + success => remove_collection_xfer; + SKIP_COLLECTION_REMOVAL => remove_helper_cleanup; + default => object_remove_failure; + } + + state remove_collection_xfer + { + jump pvfs2_osd_msgpairarray_sm; success => remove_helper_cleanup; default => object_remove_failure; } @@ -190,12 +206,12 @@ static PINT_sm_action remove_datafile_remove_setup_msgpair( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int i = 0; - int ret = -PVFS_EINVAL; PVFS_object_attr *attr = NULL; + int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p = NULL; + int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); - /* post all datafile remove requests and responses simultaneously. - * + /* * NOTE: it's easier to clean up from a metafile with no datafiles * than the other way around! so we remove datafiles first. */ @@ -207,66 +223,49 @@ static PINT_sm_action remove_datafile_remove_setup_msgpair( attr = &sm_p->getattr.attr; assert(attr); - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - attr->u.meta.dfile_count * sizeof(PINT_sm_msgpair_state)); - if (!sm_p->msgarray) - { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - sm_p->msgarray_count = attr->u.meta.dfile_count; - memset(sm_p->msgarray, 0, sm_p->msgarray_count * sizeof(*sm_p->msgarray)); + /* post a send/recv pair for the remove to the + first server in the list of datafiles */ + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; - /* initialize message structures */ - for (i = 0; i < attr->u.meta.dfile_count; i++) - { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; + if (is_osd) { + struct osd_command *command = &msg_p->osd_command; + ret = osd_command_set_remove(command, PVFS_OSD_DATA_PID, + attr->u.meta.dfile_array[0]); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_set_remove failed", + __func__); + js_p->error_code = ret; + return 1; + } + + js_p->error_code = OSD_MSGPAIR; /* if any is, all are */ - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = attr->u.meta.dfile_array[i]; - msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + } else { + PINT_SERVREQ_TREE_REMOVE_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + attr->u.meta.dfile_count, + attr->u.meta.dfile_array, + sm_p->hints); } - /* fill in address of each server to contact */ - ret = PINT_serv_msgpairarray_resolve_addrs( - sm_p->msgarray_count, sm_p->msgarray); + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = attr->u.meta.dfile_array[0]; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; /* TODO: Do we want retry? */ + msg_p->comp_fn = NULL; - if (ret < 0) - { - gossip_err("Error: failed to resolve server addresses.\n"); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - - /* for each datafile, post a send/recv pair for the remove */ - for (i = 0; i < attr->u.meta.dfile_count; i++) + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); + + if (ret) { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; - - gossip_debug(GOSSIP_CLIENT_DEBUG, - " datafile_remove: removing handle %llu\n", - llu(attr->u.meta.dfile_array[i])); - - if (server_is_osd(msg_p->svr_addr)) { - struct osd_command *command = &msg_p->osd_command; - ret = osd_command_set_remove(command, PVFS_OSD_DATA_PID, - attr->u.meta.dfile_array[i]); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_remove failed", - __func__); - js_p->error_code = ret; - return 1; - } - js_p->error_code = OSD_MSGPAIR; /* if any is, all are */ - } else { - PINT_SERVREQ_REMOVE_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - attr->u.meta.dfile_array[i]); - } + gossip_err("Failed to map meta server address\n"); + js_p->error_code = ret; } + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -288,17 +287,19 @@ static PINT_sm_action remove_object_remove_setup_msgpair( return SM_ACTION_COMPLETE; } - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = sm_p->object_ref.handle; + ret = PINT_cached_config_map_to_server( &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); + if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; - return SM_ACTION_COMPLETE; } if (server_is_osd(msg_p->svr_addr)) { @@ -309,7 +310,7 @@ static PINT_sm_action remove_object_remove_setup_msgpair( */ /* Remove the metafile object */ uint64_t oid = sm_p->object_ref.handle; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; ret = osd_command_set_remove(command, PVFS_OSD_META_PID, oid); if (ret) { @@ -324,14 +325,76 @@ static PINT_sm_action remove_object_remove_setup_msgpair( msg_p->req, *sm_p->cred_p, sm_p->object_ref.fs_id, - sm_p->object_ref.handle); + sm_p->object_ref.handle, + sm_p->hints); js_p->error_code = 0; } msg_p->retry_flag = PVFS_MSGPAIR_RETRY; msg_p->comp_fn = NULL; - return SM_ACTION_COMPLETE; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action remove_collection( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p = NULL; + struct server_configuration_s *server_config; + PINT_llist *cur = NULL; + struct host_alias_s *cur_alias; + PVFS_BMI_addr_t addr; + + js_p->error_code = 0; + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); + + if (!(server_config->member_attr) || sm_p->getattr.attr.objtype != PVFS_TYPE_DIRECTORY) { + js_p->error_code = SKIP_COLLECTION_REMOVAL; + return SM_ACTION_COMPLETE; + } + + cur = server_config->host_aliases; + while(cur) + { + cur_alias = PINT_llist_head(cur); + if (!cur_alias) + { + break; + } + if(!strncmp(cur_alias->bmi_address, "osd", 3)) { + BMI_addr_lookup(&addr,cur_alias->bmi_address); + } + + cur = PINT_llist_next(cur); + } + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->svr_addr = addr; + msg_p->comp_fn = NULL; + + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; + + ret = osd_command_set_remove_collection(command, PVFS_OSD_DATA_PID, sm_p->getattr.attr.cid, 0); + + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_set_remove_collection failed", + __func__); + js_p->error_code = ret; + return 1; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; } static PINT_sm_action remove_datafile_remove_failure( @@ -376,14 +439,10 @@ static PINT_sm_action remove_helper_cleanup( PINT_acache_invalidate(sm_p->object_ref); - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); /* came directly from object_remove_setup_msgpair, no error to check */ - if (js_p->error_code == OSD_MDFILE_MSGPAIR) + if (js_p->error_code == OSD_MDFILE_MSGPAIR || js_p->error_code == SKIP_COLLECTION_REMOVAL) js_p->error_code = 0; if (js_p->error_code) diff --git a/src/client/sysint/server-get-config.sm b/src/client/sysint/server-get-config.sm index c296af8..0dceba2 100644 --- a/src/client/sysint/server-get-config.sm +++ b/src/client/sysint/server-get-config.sm @@ -6,7 +6,9 @@ #include #include +#ifndef WIN32 #include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -14,10 +16,9 @@ #include "job.h" #include "gossip.h" #include "str-utils.h" +#include "pint-util.h" #include "pint-cached-config.h" #include "PINT-reqproto-encode.h" -#include "osd-util/osd-sense.h" -#include "osd-initiator/sense.h" extern job_context_id pint_client_sm_context; @@ -29,13 +30,6 @@ static int server_parse_config( static int server_get_config_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int i); -static int server_fetch_config_comp_fn( - void *v_p, struct PVFS_server_resp *resp_p, int i); - -enum { - OSD_MSGPAIR = 2001, -}; - %% nested machine pvfs2_server_get_config_nested_sm @@ -43,18 +37,10 @@ nested machine pvfs2_server_get_config_nested_sm state setup_msgpair { run server_get_config_setup_msgpair; - OSD_MSGPAIR => osd_xfer_msgpair; success => xfer_msgpair; default => cleanup; } - state osd_xfer_msgpair - { - jump pvfs2_osd_msgpairarray_sm; - success => parse; - default => cleanup; - } - state xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -90,43 +76,6 @@ machine pvfs2_server_get_config_sm } } -nested machine pvfs2_server_fetch_config_nested_sm -{ - state fetch_setup_msgpair - { - run server_fetch_config_setup_msgpair; - success => fetch_xfer_msgpair; - default => fetch_cleanup; - } - - state fetch_xfer_msgpair - { - jump pvfs2_msgpairarray_sm; - default => fetch_cleanup; - } - - state fetch_cleanup - { - run server_fetch_config_cleanup; - default => return; - } -} - -machine pvfs2_server_fetch_config_sm -{ - state run_fetch_nested - { - jump pvfs2_server_fetch_config_nested_sm; - default => parent_fetch_cleanup; - } - - state parent_fetch_cleanup - { - run server_fetch_config_parent_cleanup; - default => terminate; - } -} - %% /* @@ -138,7 +87,8 @@ machine pvfs2_server_fetch_config_sm */ int PINT_server_get_config( struct server_configuration_s *config, - struct PVFS_sys_mntent* mntent_p) + struct PVFS_sys_mntent* mntent_p, + PVFS_hint hints) { int ret = -PVFS_EINVAL; PINT_smcb *smcb = NULL; @@ -175,17 +125,17 @@ int PINT_server_get_config( * PINT_init_msgarray_params(), because we don't yet have a server * configuration file to override default parameters. */ - sm_p->msgarray_params.job_context = pint_client_sm_context; - sm_p->msgarray_params.job_timeout = 30; /* 30 second job timeout */ - sm_p->msgarray_params.retry_delay = 2000; /* 2 second retry delay */ - sm_p->msgarray_params.retry_limit = 5; /* retry up to 5 times */ - - sm_p->msgarray_count = 1; - sm_p->msgarray = &(sm_p->msgpair); + sm_p->msgarray_op.params.job_context = pint_client_sm_context; + sm_p->msgarray_op.params.job_timeout = 30; /* 30 second job timeout */ + sm_p->msgarray_op.params.retry_delay = 2000; /* 2 second retry delay */ + sm_p->msgarray_op.params.retry_limit = 5; /* retry up to 5 times */ + PINT_msgpair_init(&sm_p->msgarray_op); PINT_init_sysint_credentials(sm_p->cred_p, &creds); sm_p->u.get_config.mntent = mntent_p; sm_p->u.get_config.config = config; + + PVFS_hint_copy(hints, &sm_p->hints); ret = PINT_client_state_machine_post(smcb, &op_id, NULL); if (ret) @@ -207,6 +157,79 @@ int PINT_server_get_config( return(error); } +#ifdef WIN32 +static int server_parse_config( + struct server_configuration_s *config, + char *fs_config_buf, + uint32_t fs_config_buf_size) +{ + int ret = 1, template_index = 1; + char temp_path[MAX_PATH], temp_file[MAX_PATH]; + char *server_alias = NULL; + DWORD rc = 1, use_temp_path, bytes; + HANDLE hFile; + + if (config == NULL) + { + return ret; + } + + assert(fs_config_buf); + + /* get the path to the TEMP directory */ + use_temp_path = GetTempPath(MAX_PATH, temp_path); + if (use_temp_path) + { + /* get temporary file name in TEMP directory */ + use_temp_path = GetTempFileName(temp_path, + "__pvfs_fs_config", 0, temp_file); + } + + if (!use_temp_path) + { + /* get temporary file name in current directory */ + rc = GetTempFileName(".", "__pvfs_fs_config", 0, temp_file); + } + + if (!rc) + { + gossip_err("Error: Cannot create temporary " + "configuration files!\n"); + return ret; + } + + hFile = CreateFile(temp_file, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); + + if (hFile == INVALID_HANDLE_VALUE) + { + gossip_err("Error: Cannot create temporary " + "configuration files!\n"); + return ret; + } + + assert(!fs_config_buf[fs_config_buf_size - 1]); + + rc = WriteFile(hFile, fs_config_buf, fs_config_buf_size - 1, &bytes, NULL); + + CloseHandle(hFile); + + if (rc && (bytes == fs_config_buf_size - 1)) + { + server_alias = PINT_util_guess_alias(); + ret = PINT_parse_config(config, temp_file, server_alias, 0); + } + else + { + gossip_err("Failed to write fs.conf buffer to temp file: %s: %d\n", + temp_file, GetLastError()); + } + + DeleteFile(temp_file); + + return ret; +} +#else static int server_parse_config( struct server_configuration_s *config, char *fs_config_buf, @@ -214,6 +237,7 @@ static int server_parse_config( { int ret = 1, template_index = 1; int fs_fd = 0; + char *server_alias = NULL; char fs_template_array[2][64] = { ".__pvfs_fs_configXXXXXX", @@ -248,7 +272,8 @@ static int server_parse_config( if (write(fs_fd,fs_config_buf, (fs_config_buf_size - 1)) == (fs_config_buf_size - 1)) { - ret = PINT_parse_config(config, fs_template, NULL); + server_alias = PINT_util_guess_alias(); + ret = PINT_parse_config(config, fs_template, server_alias, 0); } else { @@ -262,6 +287,7 @@ static int server_parse_config( } return ret; } +#endif static PINT_sm_action server_get_config_setup_msgpair( struct PINT_smcb *smcb, job_status_s *js_p) @@ -272,17 +298,18 @@ static PINT_sm_action server_get_config_setup_msgpair( PVFS_BMI_addr_t serv_addr; gossip_debug(GOSSIP_CLIENT_DEBUG, - "get_config state: server_get_config_setup_msgpair\n"); + "get_config state: server_get_config_setup_msgpair\n"); + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; if (ENCODING_IS_VALID(sm_p->u.get_config.mntent->encoding)) { - sm_p->msgpair.enc_type = sm_p->u.get_config.mntent->encoding; + msg_p->enc_type = sm_p->u.get_config.mntent->encoding; } - msg_p = &sm_p->msgarray[0]; - ret = BMI_addr_lookup(&serv_addr, - sm_p->u.get_config.mntent->the_pvfs_config_server); + sm_p->u.get_config.mntent->the_pvfs_config_server); if (ret < 0) { gossip_lerr("Failed to resolve BMI address %s\n", @@ -291,23 +318,7 @@ static PINT_sm_action server_get_config_setup_msgpair( return SM_ACTION_COMPLETE; } - if (server_is_osd(serv_addr)) { - struct osd_command *command = &msg_p->osd_command; - int len = 64 * 1024; - - ret = osd_command_set_read(command, PVFS_OSD_META_PID, - PVFS_OSD_FSCONF_OID, len, 0); - command->inlen_alloc = len; - command->indata = malloc(len); - if (!command->indata) { - js_p->error_code = -ENOMEM; - return SM_ACTION_COMPLETE; - } - js_p->error_code = OSD_MSGPAIR; - } else { - PINT_SERVREQ_GETCONFIG_FILL(msg_p->req, *sm_p->cred_p); - js_p->error_code = 0; - } + PINT_SERVREQ_GETCONFIG_FILL(msg_p->req, *sm_p->cred_p, sm_p->hints); msg_p->fs_id = PVFS_FS_ID_NULL; msg_p->handle = PVFS_HANDLE_NULL; @@ -315,6 +326,8 @@ static PINT_sm_action server_get_config_setup_msgpair( msg_p->comp_fn = server_get_config_comp_fn; msg_p->svr_addr = serv_addr; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + js_p->error_code = 0; return SM_ACTION_COMPLETE; } @@ -361,7 +374,8 @@ static PINT_sm_action server_get_config_parse( if (!cur_fs) { gossip_err("Warning:\n Cannot retrieve information about " - "pvfstab entry %s\n", + "filesystem %s at tab entry: %s\n", + sm_p->u.get_config.mntent->pvfs_fs_name, sm_p->u.get_config.mntent->the_pvfs_config_server); /* @@ -405,32 +419,8 @@ static int server_get_config_comp_fn( struct PVFS_server_resp *resp_p, int i) { - int j = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[0]; - - if (server_is_osd(msg_p->svr_addr)) { - struct osd_command *command = &msg_p->osd_command; - - if (command->status == SAM_STAT_CHECK_CONDITION) { - int key, code; - osd_sense_extract(command->sense, command->sense_len, &key, &code); - /* ignore "read past end of user object" */ - if (key == OSD_SSK_RECOVERED_ERROR && - code == OSD_ASC_READ_PAST_END_OF_USER_OBJECT) { - command->status = 0; - } - } - if (command->status != 0) - return osd_errno_from_status(command->status); - if (command->inlen == 0) - return -ENOENT; - sm_p->u.get_config.fs_config_buf = strdup(command->indata); - sm_p->u.get_config.fs_config_buf_size = command->inlen + 1; - free(command->indata); - return 0; - } + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); /* only posted one msgpair */ assert(i==0); @@ -438,7 +428,7 @@ static int server_get_config_comp_fn( /* if this particular request was successful, then store the server * response and let the caller sort it out */ - if (sm_p->msgarray[i].op_status == 0) + if (sm_p->msgarray_op.msgarray[i].op_status == 0) { sm_p->u.get_config.fs_config_buf = strdup(resp_p->u.getconfig.fs_config_buf); @@ -449,188 +439,12 @@ static int server_get_config_comp_fn( /* if this is the last response, check all of the status values * and return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; j < sm_p->msgarray_count; j++) - { - if (sm_p->msgarray[j].op_status != 0) - { - return(sm_p->msgarray[j].op_status); - } - } + return PINT_msgarray_status(&sm_p->msgarray_op); } return 0; } - -/* - * Force a fetch the configuration file from a given set of servers - * This is different from get_config() in that we do not depend on the - * configuration server specified in the mnt tab file - * Assumes that servers specified in the array are unique! - * CURRENTLY UNUSED - */ -#ifdef __GNUC__ -static int PINT_fetch_config_list(int nservers, PVFS_BMI_addr_t *servers, - char **fs_config_bufs, int *fs_config_buf_size) __attribute__((unused)); -#endif - -static int PINT_fetch_config_list(int nservers, PVFS_BMI_addr_t *servers, - char **fs_config_bufs, int *fs_config_buf_size) -{ - int ret = -PVFS_EINVAL; - PINT_smcb *smcb = NULL; - PINT_client_sm *sm_p = NULL; - PVFS_error error = 0; - PVFS_credentials creds; - PVFS_sys_op_id op_id; - - gossip_debug(GOSSIP_CLIENT_DEBUG, - "PINT_fetch_config_list entered\n"); - if (nservers < 0 || !servers || !fs_config_bufs || !fs_config_buf_size) - { - return ret; - } - - PVFS_util_gen_credentials(&creds); - PINT_smcb_alloc(&smcb, PVFS_SERVER_FETCH_CONFIG, - sizeof(struct PINT_client_sm), - client_op_state_get_machine, - client_state_machine_terminate, - pint_client_sm_context); - if (smcb == NULL) - { - return -PVFS_ENOMEM; - } - sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - - /* NOTE: we set these fields manually here rather than use - * PINT_init_msgarray_params(), because we don't yet have a server - * configuration file to override default parameters. - */ - sm_p->msgarray_params.job_context = pint_client_sm_context; - sm_p->msgarray_params.job_timeout = 30; /* 30 second job timeout */ - sm_p->msgarray_params.retry_delay = 2000; /* 2 second retry delay */ - sm_p->msgarray_params.retry_limit = 5; /* retry up to 5 times */ - PINT_init_sysint_credentials(sm_p->cred_p, &creds); - sm_p->fetch_config.nservers = nservers; - sm_p->fetch_config.addr_array = servers; - sm_p->fetch_config.fs_config_bufs = fs_config_bufs; - sm_p->fetch_config.fs_config_buf_size = fs_config_buf_size; - sm_p->msgarray_count = nservers; - sm_p->msgarray = (PINT_sm_msgpair_state *) malloc( - nservers * sizeof(PINT_sm_msgpair_state)); - if (!sm_p->msgarray) - { - PINT_smcb_free(smcb); - return -PVFS_ENOMEM; - } - - - ret = PINT_client_state_machine_post( - smcb, &op_id, NULL); - if (ret) - { - PVFS_perror_gossip("PINT_client_state_machine_post call", ret); - error = ret; - } - else - { - ret = PVFS_sys_wait(op_id, "X-fetch_config", &error); - if (ret) - { - PVFS_perror_gossip("PVFS_sys_wait call", ret); - error = ret; - } - } - - PINT_sys_release(op_id); - return(error); -} - -static PINT_sm_action server_fetch_config_setup_msgpair(struct PINT_smcb *smcb, - job_status_s *js_p) -{ - int i; - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = NULL; - - gossip_debug(GOSSIP_CLIENT_DEBUG, - "fetch_config state: server_fetch_config_setup_msgpair\n"); - - for (i = 0; i < sm_p->msgarray_count; i++) - { - msg_p = &sm_p->msgarray[i]; - sm_p->msgarray[i].enc_type = sm_p->msgpair.enc_type; - PINT_SERVREQ_GETCONFIG_FILL(msg_p->req, *sm_p->cred_p); - - msg_p->fs_id = PVFS_FS_ID_NULL; - msg_p->handle = PVFS_HANDLE_NULL; - msg_p->retry_flag = PVFS_MSGPAIR_RETRY; - msg_p->comp_fn = server_fetch_config_comp_fn; - msg_p->svr_addr = sm_p->fetch_config.addr_array[i]; - } - - js_p->error_code = 0; - return SM_ACTION_COMPLETE; -} - -static PINT_sm_action server_fetch_config_cleanup(struct PINT_smcb *smcb, - job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - } - /* preserve js_p->error_code */ - - return SM_ACTION_COMPLETE; -} - -static int server_fetch_config_comp_fn( - void *v_p, - struct PVFS_server_resp *resp_p, - int i) -{ - int j = 0; - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - - /* if this particular request was successful, then store the server - * response and let the caller sort it out */ - - if (sm_p->msgarray[i].op_status == 0) - { - sm_p->fetch_config.fs_config_bufs[i] = - strdup(resp_p->u.getconfig.fs_config_buf); - sm_p->fetch_config.fs_config_buf_size[i] = - resp_p->u.getconfig.fs_config_buf_size; - } - - /* if this is the last response, check all of the status values - * and return error code if any requests failed - */ - if (i == (sm_p->msgarray_count -1)) - { - for (j=0; j < sm_p->msgarray_count; j++) - { - if (sm_p->msgarray[j].op_status != 0) - { - return(sm_p->msgarray[j].op_status); - } - } - } - return 0; -} - -static PINT_sm_action server_fetch_config_parent_cleanup(struct PINT_smcb *smcb, - job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - sm_p->error_code = js_p->error_code; - PINT_SET_OP_COMPLETE; - return SM_ACTION_DEFERRED; -} /* * Local variables: diff --git a/src/client/sysint/sys-create.sm b/src/client/sysint/sys-create.sm index 6167954..78c27af 100644 --- a/src/client/sysint/sys-create.sm +++ b/src/client/sysint/sys-create.sm @@ -26,6 +26,7 @@ #include "pint-dist-utils.h" #include "ncache.h" #include "pvfs2-internal.h" +#include "pvfs2-dist-varstrip.h" #include "osd-util/osd-util.h" extern job_context_id pint_client_sm_context; @@ -36,7 +37,7 @@ enum }; /* completion function prototypes */ -static int create_create_comp_fn( +static int create_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index); static int create_datafiles_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index); @@ -52,8 +53,9 @@ static PINT_dist* get_default_distribution(PVFS_fs_id fs_id); enum { OSD_MSGPAIR = 2001, - NO_OSD_HANDLES = 2002, - OSD_MDFILE_MSGPAIR = 2003 + OSD_CREATE_MSGPAIR = 2002, + OSD_MDFILE_MSGPAIR = 2003, + OSD_POST_CREATE = 2004 }; %% @@ -76,38 +78,52 @@ machine pvfs2_client_create_sm state parent_getattr_inspect { run create_parent_getattr_inspect; - success => dspace_create_setup_msgpair; + success => create_setup_msgpair; default => cleanup; } - state dspace_create_setup_msgpair + state create_setup_msgpair { - run create_dspace_create_setup_msgpair; - OSD_MSGPAIR => dspace_create_xfer_osd_msgpair; + run create_create_setup_msgpair; + OSD_MSGPAIR => create_xfer_osd_msgpair; + OSD_CREATE_MSGPAIR => create_xfer_datafile_msgpair; OSD_MDFILE_MSGPAIR => datafiles_setup_msgpair_array; - success => dspace_create_xfer_msgpair; + success => create_xfer_msgpair; default => cleanup; } - state dspace_create_xfer_osd_msgpair + state create_xfer_osd_msgpair { jump pvfs2_osd_msgpairarray_sm; success => datafiles_setup_msgpair_array; default => cleanup; } - state dspace_create_xfer_msgpair + state create_xfer_msgpair + { + jump pvfs2_msgpairarray_sm; + success => crdirent_setup_msgpair; + default => cleanup; + } + + state create_xfer_datafile_msgpair { jump pvfs2_msgpairarray_sm; success => datafiles_setup_msgpair_array; default => cleanup; } + state crdirent_setup_msgpair + { + run create_crdirent_setup_msgpair; + success => crdirent_xfer_msgpair; + default => crdirent_failure; + } + state datafiles_setup_msgpair_array { run create_datafiles_setup_msgpair_array; OSD_MSGPAIR => datafiles_xfer_osd_msgpair_array; - success => datafiles_xfer_msgpair_array; default => cleanup; } @@ -115,62 +131,20 @@ machine pvfs2_client_create_sm { jump pvfs2_osd_msgpairarray_sm; success => create_setattr_setup_msgpair; - default => datafiles_failure; - } - - state datafiles_xfer_msgpair_array - { - jump pvfs2_msgpairarray_sm; - success => create_setattr_setup_msgpair; - default => datafiles_failure; - } - - state datafiles_failure - { - run create_datafiles_failure; - default => delete_handles_setup_osd_msgpair_array; + default => crdirent_failure; } state create_setattr_setup_msgpair { run create_setattr_setup_msgpair; OSD_MSGPAIR => create_setattr_xfer_osd_msgpair; - success => create_setattr_xfer_msgpair; - default => cleanup; + default => crdirent_setup_msgpair; } state create_setattr_xfer_osd_msgpair { jump pvfs2_osd_msgpairarray_sm; success => crdirent_setup_msgpair; - default => create_setattr_failure; - } - - state create_setattr_xfer_msgpair - { - jump pvfs2_msgpairarray_sm; - success => crdirent_setup_msgpair; - default => create_setattr_failure; - } - - state create_setattr_failure - { - run create_setattr_failure; - default => delete_handles_setup_osd_msgpair_array; - } - - state crdirent_setup_msgpair - { - run create_crdirent_setup_msgpair; - OSD_MSGPAIR => crdirent_osd_msgpair; - success => crdirent_xfer_msgpair; - default => crdirent_failure; - } - - state crdirent_osd_msgpair - { - jump pvfs2_client_osd_dirops_sm; - success => cleanup; default => crdirent_failure; } @@ -184,20 +158,6 @@ machine pvfs2_client_create_sm state crdirent_failure { run create_crdirent_failure; - default => delete_handles_setup_osd_msgpair_array; - } - - state delete_handles_setup_osd_msgpair_array - { - run create_delete_handles_setup_osd_msgpair_array; - NO_OSD_HANDLES => delete_handles_setup_msgpair_array; - success => delete_handles_xfer_osd_msgpair_array; - default => cleanup; - } - - state delete_handles_xfer_osd_msgpair_array - { - jump pvfs2_osd_msgpairarray_sm; default => delete_handles_setup_msgpair_array; } @@ -235,12 +195,12 @@ PVFS_error PVFS_isys_create( PVFS_sys_layout *layout, PVFS_sysresp_create *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; - int num_dfiles_req; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_create entered\n"); @@ -285,19 +245,45 @@ PVFS_error PVFS_isys_create( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.create.object_name = object_name; sm_p->u.create.create_resp = resp; - sm_p->u.create.datafile_handles = NULL; - PVFS_util_copy_sys_attr(&sm_p->u.create.sys_attr, &attr); + PINT_CONVERT_ATTR(&sm_p->u.create.attr, &attr, PVFS_ATTR_META_ALL); + + /* save the original attribute passed in. since create does it's own + * retries we need the original attribute available on retries */ + PINT_copy_object_attr(&(sm_p->u.create.store_attr), &(sm_p->u.create.attr)); + sm_p->u.create.stored_error_code = 0; sm_p->u.create.retry_count = 0; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent_ref.handle); sm_p->parent_ref = parent_ref; + if(attr.mask & PVFS_ATTR_SYS_DFILE_COUNT) + { + sm_p->u.create.user_requested_num_data_files = attr.dfile_count; + } + /* copy layout to sm struct */ if(layout) { + /* make sure it is a supported layout */ + switch(layout->algorithm) + { + /* these are valid */ + case PVFS_SYS_LAYOUT_NONE: + case PVFS_SYS_LAYOUT_LOCAL: + case PVFS_SYS_LAYOUT_ROUND_ROBIN: + case PVFS_SYS_LAYOUT_RANDOM: + case PVFS_SYS_LAYOUT_LIST: + break; + /* anything else is not */ + default: + return(-PVFS_EINVAL); + } + sm_p->u.create.layout.algorithm = layout->algorithm; if(layout->algorithm == PVFS_SYS_LAYOUT_LIST) { @@ -341,7 +327,8 @@ PVFS_error PVFS_isys_create( else { /* Get the default distribution */ - sm_p->u.create.dist = get_default_distribution(sm_p->parent_ref.fs_id); + sm_p->u.create.dist = + get_default_distribution(sm_p->parent_ref.fs_id); if (!sm_p->u.create.dist) { PINT_smcb_free(smcb); @@ -349,45 +336,10 @@ PVFS_error PVFS_isys_create( } } - /* If an application hint has been provided, use that to request dfile - else, if a tabfile hint has been provided, use that instead*/ - num_dfiles_req = 0; - if (attr.mask & PVFS_ATTR_SYS_DFILE_COUNT) - { - num_dfiles_req = attr.dfile_count; - } - else - { - /* Check the mount options */ - int rc; - struct PVFS_sys_mntent mntent; - - rc = PVFS_util_get_mntent_copy(sm_p->object_ref.fs_id, &mntent); - if (0 == rc) - { - num_dfiles_req = mntent.default_num_dfiles; - PVFS_util_free_mntent(&mntent); - } - } - - /* Determine the number of dfiles, passing in client hints to - override any server hints */ - ret = PINT_cached_config_get_num_dfiles(sm_p->object_ref.fs_id, - sm_p->u.create.dist, - num_dfiles_req, - &sm_p->u.create.num_data_files); - - if (ret < 0) - { - gossip_err("Failed to get number of data servers\n"); - PINT_smcb_free(smcb); - return ret; - } - gossip_debug( GOSSIP_CLIENT_DEBUG, "Creating file %s under %llu, %d\n", object_name, llu(parent_ref.handle), parent_ref.fs_id); - + return PINT_client_state_machine_post( smcb, op_id, user_ptr); } @@ -395,21 +347,37 @@ PVFS_error PVFS_isys_create( /** Create a file with a specified distribution. */ PVFS_error PVFS_sys_create( - char *object_name, - PVFS_object_ref parent_ref, - PVFS_sys_attr attr, - const PVFS_credentials *credentials, - PVFS_sys_dist *dist, - PVFS_sys_layout *layout, - PVFS_sysresp_create *resp) + char *object_name, /**< name of the file to create */ + PVFS_object_ref parent_ref, /**< handle of the parent dir */ + PVFS_sys_attr attr, /**< attributes of new file */ + const PVFS_credentials *credentials,/**< identity of the caller */ + PVFS_sys_dist *dist, /**< distribution of new file */ + PVFS_sysresp_create *resp, /**< response from the request */ + PVFS_sys_layout *layout, /**< selection of servers to hold file */ + PVFS_hint hints) /**< user supplied PVFS hints */ { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; + int ios_num; + struct server_configuration_s *server_config; + + server_config = PINT_get_server_config_struct( + parent_ref.fs_id); + PINT_cached_config_get_num_io(parent_ref.fs_id, &ios_num); + if(server_config->energysaving) + { + if(server_config->econumnodes >= ios_num) + { + server_config->econumnodes = 0; + server_config->energysaving = 0; + } + } + PINT_put_server_config_struct(server_config); gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_create entered\n"); ret = PVFS_isys_create(object_name, parent_ref, attr, credentials, - dist, layout, resp, &op_id, NULL); + dist, layout, resp, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_create call", ret); @@ -437,69 +405,72 @@ static PINT_sm_action create_init( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); job_id_t tmp_id; - gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: init\n"); - assert((js_p->error_code == 0) || (js_p->error_code == CREATE_RETRY)); + PINT_SM_GETATTR_STATE_FILL( + sm_p->getattr, + sm_p->object_ref, + PVFS_ATTR_COMMON_ALL|PVFS_ATTR_DIR_HINT, + PVFS_TYPE_DIRECTORY, + 0); + if (js_p->error_code == CREATE_RETRY) { js_p->error_code = 0; + /* call for getattr_acache_lookup (next state) */ + PINT_SM_GETATTR_STATE_FILL( + sm_p->getattr, + sm_p->object_ref, + PVFS_ATTR_COMMON_ALL|PVFS_ATTR_DIR_HINT, + PVFS_TYPE_DIRECTORY, + 0); + return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); } - PINT_SM_GETATTR_STATE_FILL( - sm_p->getattr, - sm_p->object_ref, - PVFS_ATTR_COMMON_ALL|PVFS_ATTR_DIR_HINT, - PVFS_TYPE_DIRECTORY, - 0); return SM_ACTION_COMPLETE; } -static int create_create_comp_fn(void *v_p, - struct PVFS_server_resp *resp_p, - int index) +static int create_comp_fn(void *v_p, + struct PVFS_server_resp *resp_p, + int index) { + gossip_err("create_comp_fn\n"); PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - - int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + int is_osd_meta = fsid_is_osd_meta(sm_p->object_ref.fs_id); - + gossip_debug(GOSSIP_CLIENT_DEBUG, "create_create_comp_fn\n"); - if (is_osd_meta) { + if (is_osd_meta) { int ret; uint64_t oid; PVFS_error status; status = osd_errno_from_status( - sm_p->msgpair.osd_command.status); + sm_p->msgarray_op.msgpair.osd_command.status); if (status != 0) { return status; } /* otherwise, just stash the newly created meta handle */ - ret = osd_command_attr_resolve(&sm_p->msgpair.osd_command); + ret = osd_command_attr_resolve(&sm_p->msgarray_op.msgpair.osd_command); if (ret) { osd_error_xerrno(ret, "%s: attr_resolve failed", __func__); } - oid = get_ntohll(sm_p->msgpair.osd_command.attr[0].val); + oid = get_ntohll(sm_p->msgarray_op.msgpair.osd_command.attr[0].val); sm_p->u.create.metafile_handle = oid; sm_p->u.create.create_resp->ref.handle = oid; sm_p->u.create.create_resp->ref.fs_id = sm_p->object_ref.fs_id; - osd_command_attr_free(&sm_p->msgpair.osd_command); - - } else if (is_osd_md) { - /* There is nothing to be done here */ - + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); } else { assert(resp_p->op == PVFS_SERV_CREATE); @@ -509,9 +480,24 @@ static int create_create_comp_fn(void *v_p, } /* otherwise, just stash the newly created meta handle */ - sm_p->u.create.metafile_handle = resp_p->u.create.handle; + sm_p->u.create.metafile_handle = + resp_p->u.create.metafile_handle; + sm_p->u.create.datafile_count = resp_p->u.create.datafile_count; + sm_p->u.create.datafile_handles = malloc( + sizeof(*sm_p->u.create.datafile_handles) * + sm_p->u.create.datafile_count); + if(!sm_p->u.create.datafile_handles) + { + return -PVFS_ENOMEM; + } + memcpy(sm_p->u.create.datafile_handles, + resp_p->u.create.datafile_handles, + (sizeof(*sm_p->u.create.datafile_handles) * + resp_p->u.create.datafile_count)); + + sm_p->u.create.stuffed = resp_p->u.create.stuffed; } - + gossip_debug( GOSSIP_CLIENT_DEBUG, "*** Got newly created handle %llu\n", llu(sm_p->u.create.metafile_handle)); @@ -523,19 +509,20 @@ static int create_datafiles_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index) { + gossip_err("create_datafiles_comp_fn\n"); PVFS_error status; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); - + gossip_debug(GOSSIP_CLIENT_DEBUG, "create_datafiles_comp_fn[%d]\n",index); if (is_osd) { status = osd_errno_from_status( - sm_p->msgarray[index].osd_command.status); + sm_p->msgarray_op.msgarray[index].osd_command.status); } else { assert(resp_p->op == PVFS_SERV_CREATE); status = resp_p->status; @@ -578,29 +565,29 @@ static int create_datafiles_comp_fn(void *v_p, PVFS_handle first = ea->extent_array[0].first; PVFS_handle last = ea->extent_array[0].last; - ret = osd_command_attr_resolve(&sm_p->msgarray[index].osd_command); - if (ret) - osd_error_xerrno(ret, "%s: attr_resolve failed", __func__); - oid = get_ntohll(sm_p->msgarray[index].osd_command.attr[0].val); +/* ret = osd_command_attr_resolve(&sm_p->msgarray_op.msgarray[index].osd_command);*/ +/* if (ret)*/ +/* osd_error_xerrno(ret, "%s: attr_resolve failed", __func__);*/ +/* oid = get_ntohll(sm_p->msgarray_op.msgarray[index].osd_command.attr[0].val);*/ + oid = sm_p->u.create.datafile_handles[index]; if (oid < first || oid > last) gossip_err("%s: OSD-assigned oid %llu out of range %llu-%llu\n", __func__, llu(oid), llu(first), llu(last)); - sm_p->u.create.datafile_handles[index] = oid; - +/* sm_p->u.create.datafile_handles[index] = oid;*/ + /* The first datafile also happens to be the metafile */ if (is_osd_md && index == 0) { sm_p->u.create.metafile_handle = oid; sm_p->u.create.create_resp->ref.handle = oid; sm_p->u.create.create_resp->ref.fs_id = sm_p->object_ref.fs_id; } - osd_command_attr_free(&sm_p->msgarray[index].osd_command); - - } else - sm_p->u.create.datafile_handles[index] = resp_p->u.create.handle; +/* osd_command_attr_free(&sm_p->msgarray_op.msgarray[index].osd_command);*/ + } gossip_debug(GOSSIP_CLIENT_DEBUG, "Datafile handle %d is %llu\n", index, llu(sm_p->u.create.datafile_handles[index])); + return 0; } @@ -609,25 +596,22 @@ static int create_setattr_comp_fn(void *v_p, int index) { int res, status; - PVFS_object_ref refn; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_CLIENT_DEBUG, "create_setattr_comp_fn\n"); - if (server_is_osd(sm_p->msgpair.svr_addr)) { - osd_command_attr_free(&sm_p->msgpair.osd_command); - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); + if (server_is_osd(sm_p->msgarray_op.msgpair.svr_addr)) { + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); } else { assert(resp_p->op == PVFS_SERV_SETATTR); status = resp_p->status; } - refn.fs_id = sm_p->msgpair.fs_id; - refn.handle = sm_p->msgpair.handle; - res = PINT_copy_object_attr(&sm_p->u.create.cache_attr, - &sm_p->msgpair.req.u.setattr.attr); + &sm_p->msgarray_op.msgpair.req.u.setattr.attr); + if(res != 0) { return res; @@ -640,53 +624,25 @@ static int create_crdirent_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index) { - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - gossip_debug(GOSSIP_CLIENT_DEBUG, "create_crdirent_comp_fn\n"); - if (server_is_osd(sm_p->msgpair.svr_addr)) { - return osd_errno_from_status(sm_p->msgpair.osd_command.status); - } else { - assert(resp_p->op == PVFS_SERV_CRDIRENT); - return resp_p->status; - } -} - -static int create_delete_handles_comp_fn(void *v_p, - struct PVFS_server_resp *resp_p, - int index) -{ - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PVFS_error status; - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[index]; - - gossip_debug(GOSSIP_CLIENT_DEBUG, "create_delete_handles_comp_fn\n"); - - if (server_is_osd(msg_p->svr_addr)) { - status = - osd_errno_from_status(sm_p->msgarray[index].osd_command.status); - } else { - assert(resp_p->op == PVFS_SERV_REMOVE); - status = resp_p->status; - } - - if (status != 0) - { - gossip_debug(GOSSIP_CLIENT_DEBUG, - "Failed to remove handle number %d\n", index); - } - return status; + assert(resp_p->op == PVFS_SERV_CRDIRENT); + return resp_p->status; } -static PINT_sm_action create_dspace_create_setup_msgpair( +static PINT_sm_action create_create_setup_msgpair( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; PVFS_handle_extent_array meta_handle_extent_array; PINT_sm_msgpair_state *msg_p = NULL; + int server_type; + struct server_configuration_s *server_config; + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); /* * Check whether we're storing file metadata on a dedicated OSD MDS @@ -694,53 +650,64 @@ static PINT_sm_action create_dspace_create_setup_msgpair( */ int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); int is_osd_meta = fsid_is_osd_meta(sm_p->object_ref.fs_id); - + int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); + gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: " "dspace_create_setup_msgpair\n"); - if (is_osd_md) { + //if (is_osd_md) { /* * OK, so osd_type == OSD_MDFILE which means that we're storing the * file metadata as attributes of the first datafile. There is nothing * else to be done here so we'll go ahead and skip the - * dspace_create_xfer state and jump directly to the + * create_xfer state and jump directly to the * datafiles_setup_msgpair_array state. */ - js_p->error_code = OSD_MDFILE_MSGPAIR; - return SM_ACTION_COMPLETE; - } + //js_p->error_code = OSD_MDFILE_MSGPAIR; + //return SM_ACTION_COMPLETE; + //} + + gossip_debug(GOSSIP_CLIENT_DEBUG," create: posting create req\n"); - if (sm_p->u.create.num_data_files > PVFS_REQ_LIMIT_DFILE_COUNT) + /* reset the attributes to what got passed in to the sysint call. the retry + * path comes through here so we'll want to reset it after each try. + * force the mask to all meta attributes. */ + if( sm_p->u.create.retry_count > 0 ) { - sm_p->u.create.num_data_files = PVFS_REQ_LIMIT_DFILE_COUNT; - gossip_err("Warning: reducing number of data " - "files to PVFS_REQ_LIMIT_DFILE_COUNT\n"); + PINT_copy_object_attr(&(sm_p->u.create.attr), + &(sm_p->u.create.store_attr)); + sm_p->u.create.attr.mask |= PVFS_ATTR_META_ALL; } - gossip_debug(GOSSIP_CLIENT_DEBUG, "need to create %d datafiles\n", - sm_p->u.create.num_data_files); - - gossip_debug(GOSSIP_CLIENT_DEBUG," create: posting create req\n"); - - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; ret = PINT_cached_config_get_next_meta( - sm_p->object_ref.fs_id, - &msg_p->svr_addr, &meta_handle_extent_array, 0); - - if (ret) + sm_p->object_ref.fs_id, &msg_p->svr_addr, &meta_handle_extent_array, 0); + gossip_err("server: %s\n", BMI_addr_rev_lookup(msg_p->svr_addr)); + if(ret != 0) { gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; return SM_ACTION_COMPLETE; } + /* resolve and print selected server only if gossip debugging enabled */ + if(gossip_debug_enabled(GOSSIP_CLIENT_DEBUG)) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "PVFS_isys_create() selected meta server: %s\n", + PINT_cached_config_map_addr(sm_p->object_ref.fs_id, + msg_p->svr_addr, + &server_type)); + } + if (is_osd_meta) { /* * Create a zero-length datafile on the metadata OSD. We'll fill in * the required attributes in create_setattr_setup_msgpair(). */ - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; struct attribute_list attr = { ATTR_GET, CUR_CMD_ATTR_PG, CCAP_OID, NULL, CCAP_OID_LEN }; @@ -762,39 +729,55 @@ static PINT_sm_action create_dspace_create_setup_msgpair( js_p->error_code = OSD_MSGPAIR; } else { - /* vanilla PVFS */ PINT_SERVREQ_CREATE_FILL( msg_p->req, *sm_p->cred_p, sm_p->object_ref.fs_id, - PVFS_TYPE_METAFILE, - meta_handle_extent_array); + sm_p->u.create.attr, + sm_p->u.create.num_data_files, + server_config->econumnodes, + sm_p->u.create.layout, + sm_p->hints); js_p->error_code = 0; + if (is_osd && !server_config->post_create) + js_p->error_code = OSD_CREATE_MSGPAIR; } msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = meta_handle_extent_array.extent_array[0].first; + gossip_err("msg_p->handle: %d\n", msg_p->handle); msg_p->retry_flag = PVFS_MSGPAIR_RETRY; - msg_p->comp_fn = create_create_comp_fn; - + msg_p->comp_fn = create_comp_fn; + msg_p->req.u.create.attr.u.meta.dfile_count = 0; + msg_p->req.u.create.attr.u.meta.dist = + sm_p->u.create.dist; + msg_p->req.u.create.attr.u.meta.dist_size = + PINT_DIST_PACK_SIZE(sm_p->u.create.dist); + msg_p->req.u.create.attr.cid = sm_p->getattr.attr.cid; + sm_p->u.create.attr.cid = sm_p->getattr.attr.cid; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } static PINT_sm_action create_datafiles_setup_msgpair_array( struct PINT_smcb *smcb, job_status_s *js_p) { + gossip_err("create_datafiles_setup_msgpair_array\n"); struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret = -PVFS_EINVAL, i = 0; + int ret = -PVFS_EINVAL; struct server_configuration_s *server_config; int is_osd; + PINT_sm_msgpair_state *msg_p = NULL; gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: " "datafiles_setup_msgpair_array\n"); server_config = PINT_get_server_config_struct( sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); is_osd = (server_config->osd_type != OSD_NONE); - + js_p->error_code = is_osd ? OSD_MSGPAIR : 0; /* allocate handle extent array objects */ @@ -823,13 +806,14 @@ static PINT_sm_action create_datafiles_setup_msgpair_array( js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } - + ret = PINT_cached_config_map_servers( sm_p->object_ref.fs_id, &sm_p->u.create.num_data_files, &sm_p->u.create.layout, sm_p->u.create.data_server_addrs, sm_p->u.create.io_handle_extent_array); + if(ret < 0) { gossip_err("create: failed to map the layout to a set of IO servers\n"); @@ -843,93 +827,59 @@ static PINT_sm_action create_datafiles_setup_msgpair_array( js_p->error_code = ret; return SM_ACTION_COMPLETE; } + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; - memset(&sm_p->msgpair, 0, sizeof(PINT_sm_msgpair_state)); +#define CURRENT_COMMAND_PAGE 0xfffffffeUL +#define CURRENT_COMMAND_PAGE_OID 4 - /* allocate msgarray and set msgarray_count */ - if (sm_p->msgarray && (sm_p->msgarray != &(sm_p->msgpair))) - { - free(sm_p->msgarray); - } - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - (sm_p->u.create.num_data_files * sizeof(PINT_sm_msgpair_state))); - if (sm_p->msgarray == NULL) - { - gossip_err("create: failed to allocate msgarray\n"); - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - memset(sm_p->msgarray, 0, sm_p->u.create.num_data_files * - sizeof(*sm_p->msgarray)); - sm_p->msgarray_count = sm_p->u.create.num_data_files; + if (is_osd) { + struct osd_command *command = &sm_p->msgarray_op.msgarray[0].osd_command; + uint64_t attrval; +/* struct attribute_list attrs[] = {{ ATTR_GET, CUR_CMD_ATTR_PG,*/ +/* CCAP_OID, NULL, CCAP_OID_LEN },*/ +/* { ATTR_SET, USER_COLL_PG, 1, &attrval, 8}};*/ - /* for each datafile, prepare to post a create send/recv pair */ - for(i = 0; i < sm_p->u.create.num_data_files; i++) - { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; + struct attribute_list attr = {ATTR_SET, USER_COLL_PG, 1, &attrval, 8}; -#define CURRENT_COMMAND_PAGE 0xfffffffeUL -#define CURRENT_COMMAND_PAGE_OID 4 + if(!sm_p->getattr.attr.cid) { + sm_p->getattr.attr.cid = COLLECTION_OID_LB; /* root directory */ + } + set_htonll(&attrval, sm_p->getattr.attr.cid); - if (is_osd) { - struct osd_command *command = &sm_p->msgarray[i].osd_command; - struct attribute_list attr = { ATTR_GET, CUR_CMD_ATTR_PG, - CCAP_OID, NULL, CCAP_OID_LEN }; - - /* - * A hack in PVFS_util_init_defaults has set up an object an the - * low bound of the extent array. So as we create handles, we get - * them in increasing order that satisfies our extent bounds. - */ - ret = osd_command_set_create(command, PVFS_OSD_DATA_PID, 0, 1); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_create failed", + /* + * A hack in PVFS_util_init_defaults has set up an object an the + * low bound of the extent array. So as we create handles, we get + * them in increasing order that satisfies our extent bounds. + */ + ret = osd_command_set_create(command, PVFS_OSD_DATA_PID, sm_p->u.create.datafile_handles[0], 1); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_set_create failed", __func__); - js_p->error_code = ret; - return 1; - } + js_p->error_code = ret; + return 1; + } - ret = osd_command_attr_build(command, &attr, 1); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_attr_build failed", +/* ret = osd_command_attr_build(command, attrs, 2);*/ + ret = osd_command_attr_build(command, &attr, 1); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_attr_build failed", __func__); - js_p->error_code = ret; - return 1; - } - } else { - PINT_SERVREQ_CREATE_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - PVFS_TYPE_DATAFILE, - sm_p->u.create.io_handle_extent_array[i]); + js_p->error_code = ret; + return 1; } - - gossip_debug(GOSSIP_CLIENT_DEBUG, "posting datafile[%d] create " - "with extents %llu-%llu\n", i, - llu(sm_p->u.create.io_handle_extent_array[i]. - extent_array[0].first), - llu(sm_p->u.create.io_handle_extent_array[i]. - extent_array[0].last)); - - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = sm_p->u.create.io_handle_extent_array[i]. - extent_array[0].first; - msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; - msg_p->comp_fn = create_datafiles_comp_fn; - msg_p->svr_addr = sm_p->u.create.data_server_addrs[i]; } - return SM_ACTION_COMPLETE; -} -static PINT_sm_action create_datafiles_failure( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - sm_p->u.create.stored_error_code = js_p->error_code; + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->u.create.io_handle_extent_array[0]. + extent_array[0].first; + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + msg_p->comp_fn = create_datafiles_comp_fn; + msg_p->svr_addr = sm_p->u.create.data_server_addrs[0]; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); - gossip_debug(GOSSIP_CLIENT_DEBUG, - "create state: datafiles_failure\n"); return SM_ACTION_COMPLETE; } @@ -939,18 +889,27 @@ static PINT_sm_action create_setattr_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; PINT_sm_msgpair_state *msg_p = NULL; - + struct server_configuration_s *server_config; + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); + int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); int is_osd_meta = fsid_is_osd_meta(sm_p->object_ref.fs_id); gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: setattr_setup_msgpair\n"); - js_p->error_code = (is_osd_meta || is_osd_md) ? OSD_MSGPAIR : 0; + js_p->error_code = 0; + + if (!is_osd_md && !is_osd_meta) + return SM_ACTION_COMPLETE; gossip_debug(GOSSIP_CLIENT_DEBUG," create: posting setattr req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; if (is_osd_meta || is_osd_md) { /* @@ -962,7 +921,7 @@ static PINT_sm_action create_setattr_setup_msgpair( char *dist_buf; int i, numattrs = 7; struct attribute_list attr[numattrs]; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; /* Set attr type, page and number */ for (i = 0; i < numattrs; i++) { @@ -972,20 +931,20 @@ static PINT_sm_action create_setattr_setup_msgpair( } /* uid */ - attr[0].val = &sm_p->u.create.sys_attr.owner; + attr[0].val = &sm_p->u.create.attr.owner; attr[0].len = sizeof(PVFS_uid); /* gid */ - attr[1].val = &sm_p->u.create.sys_attr.group; + attr[1].val = &sm_p->u.create.attr.group; attr[1].len = sizeof(PVFS_gid); /* XXX Default to PVFS_PERM_VALID till we can figure out the umask */ - sm_p->u.create.sys_attr.perms = PVFS_PERM_VALID; - attr[2].val = &sm_p->u.create.sys_attr.perms; + sm_p->u.create.attr.perms = PVFS_PERM_VALID; + attr[2].val = &sm_p->u.create.attr.perms; attr[2].len = sizeof(PVFS_permissions); /* mask */ - sm_p->u.create.sys_attr.mask = PVFS_ATTR_COMMON_UID | + sm_p->u.create.attr.mask = PVFS_ATTR_COMMON_UID | PVFS_ATTR_COMMON_GID | PVFS_ATTR_COMMON_PERM | PVFS_ATTR_COMMON_ATIME | @@ -994,12 +953,12 @@ static PINT_sm_action create_setattr_setup_msgpair( PVFS_ATTR_META_DIST | PVFS_ATTR_META_DFILES | PVFS_ATTR_COMMON_TYPE; - attr[3].val = &sm_p->u.create.sys_attr.mask; + attr[3].val = &sm_p->u.create.attr.mask; attr[3].len = sizeof(uint32_t); /* object type */ - sm_p->u.create.sys_attr.objtype = PVFS_TYPE_METAFILE; - attr[4].val = &sm_p->u.create.sys_attr.objtype; + sm_p->u.create.attr.objtype = PVFS_TYPE_METAFILE; + attr[4].val = &sm_p->u.create.attr.objtype; attr[4].len = sizeof(PVFS_ds_type); /* @@ -1037,23 +996,24 @@ static PINT_sm_action create_setattr_setup_msgpair( return 1; } } else { - PINT_SERVREQ_SETATTR_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->u.create.metafile_handle, - PVFS_TYPE_METAFILE, - sm_p->u.create.sys_attr, - PVFS_ATTR_META_ALL); - - msg_p->req.u.setattr.attr.u.meta.dfile_array = - sm_p->u.create.datafile_handles; - msg_p->req.u.setattr.attr.u.meta.dfile_count = - sm_p->u.create.num_data_files; - msg_p->req.u.setattr.attr.u.meta.dist = - sm_p->u.create.dist; - msg_p->req.u.setattr.attr.u.meta.dist_size = - PINT_DIST_PACK_SIZE(sm_p->u.create.dist); +/* PINT_SERVREQ_SETATTR_FILL(*/ +/* msg_p->req,*/ +/* *sm_p->cred_p,*/ +/* sm_p->object_ref.fs_id,*/ +/* sm_p->u.create.metafile_handle,*/ +/* PVFS_TYPE_METAFILE,*/ +/* sm_p->u.create.attr,*/ +/* PVFS_ATTR_META_ALL,*/ +/* sm_p->hints);*/ + +/* msg_p->req.u.setattr.attr.u.meta.dfile_array =*/ +/* sm_p->u.create.datafile_handles;*/ +/* msg_p->req.u.setattr.attr.u.meta.dfile_count =*/ +/* sm_p->u.create.num_data_files;*/ +/* msg_p->req.u.setattr.attr.u.meta.dist =*/ +/* sm_p->u.create.dist;*/ +/* msg_p->req.u.setattr.attr.u.meta.dist_size =*/ +/* PINT_DIST_PACK_SIZE(sm_p->u.create.dist);*/ } msg_p->fs_id = sm_p->object_ref.fs_id; @@ -1069,17 +1029,9 @@ static PINT_sm_action create_setattr_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } - return SM_ACTION_COMPLETE; -} - -static PINT_sm_action create_setattr_failure( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - sm_p->u.create.stored_error_code = js_p->error_code; + js_p->error_code = (is_osd_meta || is_osd_md) ? OSD_MSGPAIR : 0; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); - gossip_debug(GOSSIP_CLIENT_DEBUG, - "create state: setattr_failure\n"); return SM_ACTION_COMPLETE; } @@ -1093,51 +1045,44 @@ static PINT_sm_action create_crdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: crdirent_setup_msgpair\n"); - gossip_debug(GOSSIP_CLIENT_DEBUG, + js_p->error_code = 0; + + gossip_debug(GOSSIP_CLIENT_DEBUG, "create: %s: posting crdirent req: parent handle: %llu, " "name: %s, handle: %llu\n", __func__, llu(sm_p->object_ref.handle), sm_p->u.create.object_name, llu(sm_p->u.create.metafile_handle)); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + gossip_err("is_osd: %d\n", server_is_osd(msg_p->svr_addr)); + PINT_SERVREQ_CRDIRENT_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->u.create.object_name, + sm_p->u.create.metafile_handle, + sm_p->object_ref.handle, + sm_p->object_ref.fs_id, + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + msg_p->comp_fn = create_crdirent_comp_fn; - /* find BMI addr of parent directory */ ret = PINT_cached_config_map_to_server( &msg_p->svr_addr, sm_p->object_ref.handle, sm_p->object_ref.fs_id); + if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; - goto out; - } - - if (server_is_osd(msg_p->svr_addr)) { - /* - * Directory operations for metafile and mdfile. We don't do anything - * here because we'll handle the individual directory operations in - * different states. - */ - js_p->error_code = OSD_MSGPAIR; - } else { - /* Directory operations for stock PVFS and datafile */ - PINT_SERVREQ_CRDIRENT_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->u.create.object_name, - sm_p->u.create.metafile_handle, - sm_p->object_ref.handle, - sm_p->object_ref.fs_id); - js_p->error_code = 0; } - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = sm_p->object_ref.handle; - msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; - msg_p->comp_fn = create_crdirent_comp_fn; - -out: + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -1157,196 +1102,6 @@ static PINT_sm_action create_crdirent_failure( return SM_ACTION_COMPLETE; } -/* delete the newly created datafile handles, first OSD ones */ -static int create_delete_handles_setup_osd_msgpair_array( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int i, actual_count; - int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); - - gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: %s\n", __func__); - - if (!is_osd) { - js_p->error_code = NO_OSD_HANDLES; - return SM_ACTION_COMPLETE; - } - - actual_count = 0; - for(i = 0; i < sm_p->u.create.num_data_files; i++) - { - if (sm_p->u.create.datafile_handles && - (sm_p->u.create.datafile_handles[i] != PVFS_HANDLE_NULL)) - { - actual_count++; - } - } - - if (actual_count == 0) { - js_p->error_code = NO_OSD_HANDLES; - return SM_ACTION_COMPLETE; - } - - js_p->error_code = 0; - - memset(&sm_p->msgpair, 0, sizeof(PINT_sm_msgpair_state)); - - if (sm_p->msgarray && (sm_p->msgarray != &(sm_p->msgpair))) - { - free(sm_p->msgarray); - } - sm_p->msgarray = malloc(actual_count * sizeof(*sm_p->msgarray)); - - if (sm_p->msgarray == NULL) - { - gossip_err("create: failed to allocate msgarray\n"); - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - sm_p->msgarray_count = actual_count; - - assert(sm_p->u.create.data_server_addrs); - - for (i = 0; i < actual_count; i++) - { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; - int ret; - - gossip_debug(GOSSIP_CLIENT_DEBUG, - "%s: posting data file remove req %d\n", __func__, i); - - ret = osd_command_set_remove(&sm_p->msgarray[i].osd_command, - PVFS_OSD_DATA_PID, - sm_p->u.create.datafile_handles[i]); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_remove failed", - __func__); - js_p->error_code = ret; - continue; - } - - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = sm_p->u.create.datafile_handles[i]; - msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; - msg_p->comp_fn = create_delete_handles_comp_fn; - msg_p->svr_addr = sm_p->u.create.data_server_addrs[i]; - - gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: remove datafile %llu\n", - __func__, llu(msg_p->handle)); - } - return SM_ACTION_COMPLETE; -} - -/* delete the newly created meta and data handles */ -static PINT_sm_action create_delete_handles_setup_msgpair_array( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret = -PVFS_EINVAL, i = 0, actual_count = 0; - int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); - PVFS_BMI_addr_t metafile_server_addr; - - gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: " - "delete_handles_setup_msgpair_array\n"); - - js_p->error_code = 0; - - memset(&sm_p->msgpair, 0, sizeof(PINT_sm_msgpair_state)); - - ret = PINT_cached_config_map_to_server( - &metafile_server_addr, sm_p->u.create.metafile_handle, - sm_p->object_ref.fs_id); - - if (ret) - { - gossip_err("Failed to map meta server address\n"); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - - /* - in the case that all datafiles have already been created, - actual_count will be (sm_p->u.create.num_data_files + 1). - otherwise, it will be somewhere between 1 (for the metafile) and - 1 + the number of data files - */ - actual_count = 1; - if (!is_osd) { - for(i = 0; i < sm_p->u.create.num_data_files; i++) - { - if (sm_p->u.create.datafile_handles && - (sm_p->u.create.datafile_handles[i] != PVFS_HANDLE_NULL)) - actual_count++; - } - } - - if (sm_p->msgarray && (sm_p->msgarray != &(sm_p->msgpair))) - { - free(sm_p->msgarray); - } - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - (actual_count * sizeof(PINT_sm_msgpair_state))); - - if (sm_p->msgarray == NULL) - { - gossip_err("create: failed to allocate msgarray\n"); - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - sm_p->msgarray_count = actual_count; - - assert(sm_p->u.create.data_server_addrs); - - /* - for the metafile and each datafile, prepare to post a remove - send/recv pair - */ - for(i = 0; i < actual_count; i++) - { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; - - gossip_debug(GOSSIP_CLIENT_DEBUG, - "create: posting data file remove req %d\n",i); - - /* arbitrarily handle deletion of the metafile last */ - if (i == (actual_count - 1)) - { - PINT_SERVREQ_REMOVE_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->u.create.metafile_handle); - - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = sm_p->u.create.metafile_handle; - msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; - msg_p->comp_fn = create_delete_handles_comp_fn; - msg_p->svr_addr = metafile_server_addr; - - gossip_debug(GOSSIP_CLIENT_DEBUG, " Preparing to remove " - "metafile handle %llu\n", llu(msg_p->handle)); - } - else - { - PINT_SERVREQ_REMOVE_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->u.create.datafile_handles[i]); - - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = sm_p->u.create.datafile_handles[i]; - msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; - msg_p->comp_fn = create_delete_handles_comp_fn; - msg_p->svr_addr = sm_p->u.create.data_server_addrs[i]; - - gossip_debug(GOSSIP_CLIENT_DEBUG, " Preparing to remove " - "datafile handle %llu\n", llu(msg_p->handle)); - } - } - return SM_ACTION_COMPLETE; -} - static PINT_sm_action create_cleanup( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -1357,7 +1112,10 @@ static PINT_sm_action create_cleanup( gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: cleanup\n"); - PVFS_util_release_sys_attr(&sm_p->u.create.sys_attr); + PINT_free_object_attr(&sm_p->u.create.attr); + + if(js_p->error_code == OSD_POST_CREATE) + js_p->error_code = 0; PINT_SM_GETATTR_STATE_CLEAR(sm_p->getattr); @@ -1371,6 +1129,7 @@ static PINT_sm_action create_cleanup( { metafile_ref.handle = sm_p->u.create.metafile_handle; metafile_ref.fs_id = sm_p->object_ref.fs_id; + metafile_ref.cid = sm_p->getattr.attr.cid; /* fill in outgoing response fields */ sm_p->u.create.create_resp->ref = metafile_ref; @@ -1380,10 +1139,41 @@ static PINT_sm_action create_cleanup( (const PVFS_object_ref*) &metafile_ref, (const PVFS_object_ref*) &(sm_p->object_ref)); + sm_p->u.create.attr.mask |= PVFS_ATTR_META_DFILES; + sm_p->u.create.attr.u.meta.dfile_array = + sm_p->u.create.datafile_handles; + sm_p->u.create.attr.u.meta.dfile_count = + sm_p->u.create.datafile_count; + + if(sm_p->u.create.stuffed) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "created stuffed file\n"); + sm_p->u.create.attr.u.meta.stuffed_size = 0; + } + else + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "created un-stuffed file\n"); + sm_p->u.create.attr.mask |= PVFS_ATTR_META_UNSTUFFED; + } + + if(sm_p->u.create.dist) + { + sm_p->u.create.attr.u.meta.dist = sm_p->u.create.dist; + sm_p->u.create.attr.u.meta.dist_size = PINT_DIST_PACK_SIZE(sm_p->u.create.dist); + sm_p->u.create.attr.mask |= PVFS_ATTR_META_DIST; + } + /* we only insert a cache entry if the entire create succeeds, - * i.e. crdirent succeeded. set size to 0 */ + * set size to 0 + */ + /* Also, make sure to clear time masks. The server is responsible + * for setting that. + */ + sm_p->u.create.attr.mask &= (~(PVFS_ATTR_COMMON_MTIME)); + sm_p->u.create.attr.mask &= (~(PVFS_ATTR_COMMON_CTIME)); + sm_p->u.create.attr.mask &= (~(PVFS_ATTR_COMMON_ATIME)); ret = PINT_acache_update(metafile_ref, - &sm_p->u.create.cache_attr, + &sm_p->u.create.attr, &tmp_size); if(ret < 0) { @@ -1391,7 +1181,7 @@ static PINT_sm_action create_cleanup( } } else if ((PVFS_ERROR_CLASS(-sm_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.create.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.create.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.create.stored_error_code = 0; sm_p->u.create.retry_count++; @@ -1403,42 +1193,25 @@ static PINT_sm_action create_cleanup( return SM_ACTION_COMPLETE; } - PINT_free_object_attr(&sm_p->u.create.cache_attr); - - if (sm_p->u.create.io_handle_extent_array) - { - free(sm_p->u.create.io_handle_extent_array); - sm_p->u.create.io_handle_extent_array = NULL; - } - - if (sm_p->u.create.data_server_addrs) - { - free(sm_p->u.create.data_server_addrs); - sm_p->u.create.data_server_addrs = NULL; - } - - if (sm_p->u.create.datafile_handles) + if(sm_p->u.create.layout.algorithm == PVFS_SYS_LAYOUT_LIST) { - free(sm_p->u.create.datafile_handles); - sm_p->u.create.datafile_handles = NULL; + free(sm_p->u.create.layout.server_list.servers); + sm_p->u.create.layout.server_list.servers = NULL; } - if (sm_p->u.create.dist) + if(sm_p->u.create.dist) { PINT_dist_free(sm_p->u.create.dist); sm_p->u.create.dist = NULL; } - if(sm_p->u.create.layout.algorithm == PVFS_SYS_LAYOUT_LIST) + if(sm_p->u.create.datafile_handles) { - free(sm_p->u.create.layout.server_list.servers); + free(sm_p->u.create.datafile_handles); + sm_p->u.create.datafile_handles = NULL; } - if (sm_p->msgarray != &(sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; @@ -1452,9 +1225,9 @@ static PINT_sm_action create_parent_getattr_inspect( { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); PVFS_object_attr *attr = NULL; - int num_dfiles_requested_override = 0; PINT_dist *current_dist; int ret = 0; + int num_dfiles_requested = 0; gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: parent_getattr_inspect\n"); @@ -1470,7 +1243,7 @@ static PINT_sm_action create_parent_getattr_inspect( gossip_debug(GOSSIP_CLIENT_DEBUG, "parent has setgid bit set.\n"); gossip_debug(GOSSIP_CLIENT_DEBUG, " - modifying requested attr " "for new file.\n"); - sm_p->u.create.sys_attr.group = attr->group; + sm_p->u.create.attr.group = attr->group; /* note that permission checking is left to server even in this case */ } gossip_debug(GOSSIP_CLIENT_DEBUG, "create_parent_getattr: [%p] " @@ -1482,18 +1255,6 @@ static PINT_sm_action create_parent_getattr_inspect( attr->u.dir.hint.dist_name_len, attr->u.dir.hint.dist_params_len); - num_dfiles_requested_override = attr->u.dir.hint.dfile_count; - /* override the # of data files for this create */ - if (num_dfiles_requested_override > 0) - { - /* Determine the number of dfiles */ - PINT_cached_config_get_num_dfiles(sm_p->object_ref.fs_id, - sm_p->u.create.dist, - num_dfiles_requested_override, - &sm_p->u.create.num_data_files); - } - gossip_debug(GOSSIP_CLIENT_DEBUG, "Setting number of datafiles to %d [requested %d]\n", - sm_p->u.create.num_data_files, num_dfiles_requested_override); current_dist = sm_p->u.create.dist; /* We have an overriding distribution name for this directory.. honor that */ if (attr->u.dir.hint.dist_name_len > 0) @@ -1505,7 +1266,8 @@ static PINT_sm_action create_parent_getattr_inspect( new_dist = PINT_dist_create(attr->u.dir.hint.dist_name); if (new_dist) { - gossip_debug(GOSSIP_CLIENT_DEBUG, "Overridding distribution name to %s instead of %s\n", + gossip_debug(GOSSIP_CLIENT_DEBUG, + "Overridding distribution name to %s instead of %s\n", attr->u.dir.hint.dist_name, current_dist->dist_name); PINT_dist_free(current_dist); @@ -1514,13 +1276,17 @@ static PINT_sm_action create_parent_getattr_inspect( } else { - gossip_debug(GOSSIP_CLIENT_DEBUG, "Could not override distribution name with %s instead of %s\n", + gossip_debug( + GOSSIP_CLIENT_DEBUG, + "Could not override distribution name with %s instead of %s\n", attr->u.dir.hint.dist_name, current_dist->dist_name); } } else { - gossip_debug(GOSSIP_CLIENT_DEBUG, "retaining current distribution name %s\n", + gossip_debug( + GOSSIP_CLIENT_DEBUG, + "retaining current distribution name %s\n", current_dist->dist_name); } } @@ -1533,15 +1299,44 @@ static PINT_sm_action create_parent_getattr_inspect( int64_t tmp_val; int nparams = 0; + if (strncmp(current_dist->dist_name, + PVFS_DIST_VARSTRIP_NAME, + PVFS_DIST_VARSTRIP_NAME_SIZE) == 0) + { + /* varstrip parameters are a special case; we can't use the + * normal split_keyvals function because the : separater is also + * used within paramers that only varstrip can parse + */ + + /* look for a "strips:" prefix */ + if(strstr(attr->u.dir.hint.dist_params, "strips:") + != attr->u.dir.hint.dist_params) + { + gossip_err("Error: failed to parse directory hints for varstrip distribution.\n"); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + if(current_dist->methods->set_param(current_dist->dist_name, + current_dist->params, + "strips", + &attr->u.dir.hint.dist_params[strlen("strips:")])) + { + gossip_err("Error: failed to set directory hints for varstrip distribution.\n"); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + } /* ignore parse errors! */ - if (PINT_split_keyvals(attr->u.dir.hint.dist_params, + /* TODO: why should we ignore parsing errors? */ + else if (PINT_split_keyvals(attr->u.dir.hint.dist_params, &nparams, &key, &val) == 0) { int i; for (i = 0; i < nparams; i++) { - gossip_debug(GOSSIP_CLIENT_DEBUG, "distribution parameter %s, value %s\n", - key[i], val[i]); + gossip_debug(GOSSIP_CLIENT_DEBUG, + "distribution parameter %s, value %s\n", + key[i], val[i]); /* NOTE: just as in server-config.c when parsing "Param" and * "Value" fields, we will assume that all values are 64 bit * integers. The only difference here is that we scan @@ -1551,7 +1346,9 @@ static PINT_sm_action create_parent_getattr_inspect( ret = sscanf(val[i], SCANF_lld, &tmp_val); if(ret != 1) { - gossip_err("Error: unsupported type for distribution parameter %s, value %s found in directory hints.\n", + gossip_err( + "Error: unsupported type for distribution parameter %s, " + "value %s found in directory hints.\n", key[i], val[i]); gossip_err("Error: continuing anyway.\n"); } @@ -1563,9 +1360,10 @@ static PINT_sm_action create_parent_getattr_inspect( &tmp_val)) { - gossip_err("Error: could not override hinted distribution parameter %s, value %s found in directory hints\n", - key[i], - val[i]); + gossip_err( + "Error: could not override hinted distribution " + "parameter %s, value %s found in directory hints\n", + key[i], val[i]); } } free(key[i]); @@ -1575,6 +1373,57 @@ static PINT_sm_action create_parent_getattr_inspect( free(val); } } + + /* priority for determining user's preference for number of data files: + * 1) count specified in attr's passed into sys_create + * 2) directory hints + * 3) mount options + * 4) system default + * All of the above can be overridden by the distribution itself. + */ + + if(sm_p->u.create.user_requested_num_data_files > 0) + { + /* specified by sys_create caller */ + num_dfiles_requested = sm_p->u.create.user_requested_num_data_files; + } + else if(attr->u.dir.hint.dfile_count > 0) + { + num_dfiles_requested = attr->u.dir.hint.dfile_count; + } + else + { + /* Check the mount options */ + int rc; + struct PVFS_sys_mntent mntent; + + rc = PVFS_util_get_mntent_copy(sm_p->object_ref.fs_id, &mntent); + if (0 == rc) + { + num_dfiles_requested = mntent.default_num_dfiles; + PVFS_util_free_mntent(&mntent); + } + } + + /* Determine the number of dfiles. Pass in the number requested by the + * client, but will be overridden by default configuration and/or + * distribution if necessary + */ + ret = PINT_cached_config_get_num_dfiles(sm_p->object_ref.fs_id, + sm_p->u.create.dist, + num_dfiles_requested, + &sm_p->u.create.num_data_files); + + if(ret < 0) + { + gossip_err("Error: failed to get number of data servers\n"); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_CLIENT_DEBUG, "Setting number of datafiles to %d [requested %d]\n", + sm_p->u.create.num_data_files, num_dfiles_requested); + return SM_ACTION_COMPLETE; } @@ -1652,6 +1501,99 @@ static PINT_dist* get_default_distribution(PVFS_fs_id fs_id) return dist; } +static int create_delete_handles_comp_fn(void *v_p, + struct PVFS_server_resp *resp_p, + int index) +{ + gossip_debug(GOSSIP_CLIENT_DEBUG, "create_delete_handles_comp_fn\n"); + + assert(resp_p->op == PVFS_SERV_REMOVE); + + if (resp_p->status != 0) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "Failed to remove handle number %d\n", index); + } + return resp_p->status; +} + +/* delete the newly created meta and data handles */ +static PINT_sm_action create_delete_handles_setup_msgpair_array( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -PVFS_EINVAL, i = 0; + PINT_sm_msgpair_state *msg_p = NULL; + + gossip_debug(GOSSIP_CLIENT_DEBUG, "create state: " + "delete_handles_setup_msgpair_array\n"); + + js_p->error_code = 0; + + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, (sm_p->u.create.datafile_count+1)); + if(ret != 0) + { + gossip_err("Failed to initialize %d msgpairs\n", (sm_p->u.create.datafile_count+1)); + js_p->error_code = ret; + return(SM_ACTION_COMPLETE); + } + + /* + for the metafile and each datafile, prepare to post a remove + send/recv pair + */ + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "create: posting data file remove req %d\n",i); + + /* arbitrarily handle deletion of the metafile last */ + if (i == sm_p->u.create.datafile_count) + { + PINT_SERVREQ_REMOVE_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + sm_p->u.create.metafile_handle, + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->u.create.metafile_handle; + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + msg_p->comp_fn = create_delete_handles_comp_fn; + + gossip_debug(GOSSIP_CLIENT_DEBUG, " Preparing to remove " + "metafile handle %llu\n", llu(msg_p->handle)); + } + else + { + PINT_SERVREQ_REMOVE_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + sm_p->u.create.datafile_handles[i], + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->u.create.datafile_handles[i]; + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + msg_p->comp_fn = create_delete_handles_comp_fn; + + gossip_debug(GOSSIP_CLIENT_DEBUG, " Preparing to remove " + "datafile handle %llu\n", llu(msg_p->handle)); + } + } + ret = PINT_serv_msgpairarray_resolve_addrs(&sm_p->msgarray_op); + if(ret) + { + gossip_err("Error: failed to resolve server addresses.\n"); + js_p->error_code = ret; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + /* * Local variables: * mode: c diff --git a/src/client/sysint/sys-del-eattr.sm b/src/client/sysint/sys-del-eattr.sm index ed69dba..51ba83c 100644 --- a/src/client/sysint/sys-del-eattr.sm +++ b/src/client/sysint/sys-del-eattr.sm @@ -6,7 +6,9 @@ #include #include +#ifndef WIN32 #include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -55,6 +57,7 @@ PVFS_error PVFS_isys_deleattr( const PVFS_credentials *credentials, PVFS_ds_keyval *key_p, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { int ret = -PVFS_EINVAL; @@ -81,12 +84,13 @@ PVFS_error PVFS_isys_deleattr( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.deleattr.key_p = key_p; sm_p->error_code = 0; sm_p->object_ref = ref; - + PVFS_hint_copy(hints, &sm_p->hints); + return PINT_client_state_machine_post( smcb, op_id, user_ptr); } @@ -94,7 +98,8 @@ PVFS_error PVFS_isys_deleattr( PVFS_error PVFS_sys_deleattr( PVFS_object_ref ref, const PVFS_credentials *credentials, - PVFS_ds_keyval *key_p) + PVFS_ds_keyval *key_p, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -102,7 +107,7 @@ PVFS_error PVFS_sys_deleattr( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_deleattr entered\n"); ret = PVFS_isys_deleattr(ref, credentials, - key_p, &op_id, NULL); + key_p, &op_id, hints, NULL); if (ret) { @@ -128,35 +133,40 @@ static PINT_sm_action del_eattr_setup_msgpair( { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "del_eattr state: del_eattr_setup_msgpair\n"); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + PINT_SERVREQ_DELEATTR_FILL( - sm_p->msgpair.req, + msg_p->req, (*sm_p->cred_p), sm_p->object_ref.fs_id, sm_p->object_ref.handle, - (*sm_p->u.deleattr.key_p) + (*sm_p->u.deleattr.key_p), + sm_p->hints ); - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; - sm_p->msgpair.fs_id = sm_p->object_ref.fs_id; - sm_p->msgpair.handle = sm_p->object_ref.handle; - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgpair.comp_fn = del_eattr_comp_fn; + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = del_eattr_comp_fn; ret = PINT_cached_config_map_to_server( - &sm_p->msgpair.svr_addr, - sm_p->msgpair.handle, - sm_p->msgpair.fs_id); + &msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id); if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = 0; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -180,7 +190,8 @@ static int del_eattr_comp_fn( int j = 0; int ret = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "del_eattr completion fn: del_eattr_comp_fn\n"); @@ -190,23 +201,23 @@ static int del_eattr_comp_fn( /* no return value from del eattrib so just check status */ - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { - ret = sm_p->msgarray[i].op_status; + ret = sm_p->msgarray_op.msgarray[i].op_status; } /* if this is the last response, check all of the status values * and return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; j < sm_p->msgarray_count; j++) - { - if (sm_p->msgarray[j].op_status != 0) - { - return(sm_p->msgarray[j].op_status); - } - } + foreach_msgpair(&sm_p->msgarray_op, msg_p, j) + { + if (msg_p->op_status != 0) + { + return(msg_p->op_status); + } + } } return ret; } diff --git a/src/client/sysint/sys-flush.sm b/src/client/sysint/sys-flush.sm index 5cd3255..9a25c7c 100644 --- a/src/client/sysint/sys-flush.sm +++ b/src/client/sysint/sys-flush.sm @@ -23,14 +23,6 @@ #include "pint-util.h" #include "pvfs2-internal.h" -#include "osd-util/osd-util.h" - -enum -{ - OSD_MSGPAIR = 2001, - ALL_DONE = 2002, -}; - extern job_context_id pint_client_sm_context; %% @@ -48,25 +40,9 @@ machine pvfs2_client_flush_sm { run flush_datafile_setup_msgpairarray; success => flush_datafile_xfer_msgpairarray; - OSD_MSGPAIR => flush_osd_datafile_xfer_msgpairarray; default => cleanup; } - state flush_osd_datafile_xfer_msgpairarray - { - jump pvfs2_osd_msgpairarray_sm; - success => flush_switch_to_metafile; - default => flush_datafile_flush_failure; - } - - state flush_switch_to_metafile - { - run flush_switch_to_metafile; - success => flush_datafile_xfer_msgpairarray; - ALL_DONE => cleanup; - default => flush_datafile_flush_failure; - } - state flush_datafile_xfer_msgpairarray { jump pvfs2_msgpairarray_sm; @@ -95,6 +71,7 @@ PVFS_error PVFS_isys_flush( PVFS_object_ref ref, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -121,9 +98,11 @@ PVFS_error PVFS_isys_flush( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &ref.handle); /* NOTE: This state machine previously multiplied the default job timeout * by five to allow for potentially long sync delays. We instead now set @@ -146,14 +125,15 @@ PVFS_error PVFS_isys_flush( */ PVFS_error PVFS_sys_flush( PVFS_object_ref ref, - const PVFS_credentials *credentials) + const PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_flush entered\n"); - ret = PVFS_isys_flush(ref, credentials, &op_id, NULL); + ret = PVFS_isys_flush(ref, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_flush call", ret); @@ -180,9 +160,6 @@ static int flush_datafile_setup_msgpairarray( int ret = -PVFS_EINVAL, i = 0; PVFS_object_attr *attr = NULL; PINT_sm_msgpair_state *msg_p = NULL; - int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); - int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); - int is_osd_meta = fsid_is_osd_meta(sm_p->object_ref.fs_id); gossip_debug(GOSSIP_CLIENT_DEBUG, "(%p) flush state: " "datafile_setup_msgpairarray\n", sm_p); @@ -191,47 +168,37 @@ static int flush_datafile_setup_msgpairarray( attr = &sm_p->getattr.attr; assert(attr); - assert(attr->mask & PVFS_ATTR_META_DFILES); - assert(attr->u.meta.dfile_count > 0); - - /* datafile count + 1 metafile */ - sm_p->msgarray_count = (attr->u.meta.dfile_count + 1); - - /* allocate memory for datafile message pairs and the metafile */ - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - (sm_p->msgarray_count * sizeof(PINT_sm_msgpair_state))); - if (!sm_p->msgarray) + if(!(attr->mask & PVFS_ATTR_META_DFILES) || + !(attr->u.meta.dfile_count > 0)) { - js_p->error_code = -PVFS_ENOMEM; + /* this object does not have defiles - sync not supported */ + js_p->error_code = -PVFS_EINVAL; return SM_ACTION_COMPLETE; } - memset(sm_p->msgarray, 0, sm_p->msgarray_count * sizeof(*sm_p->msgarray)); - for(i = 0; i < sm_p->msgarray_count; i++) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, (attr->u.meta.dfile_count + 1)); + if(ret != 0) { - msg_p = &sm_p->msgarray[i]; + gossip_err("Failed to initialize %d msgpairs\n", + (attr->u.meta.dfile_count + 1)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + /* datafile count + 1 metafile */ + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) + { if (i < attr->u.meta.dfile_count) { gossip_debug(GOSSIP_CLIENT_DEBUG, " datafile_flush: flushing handle %llu\n", llu(attr->u.meta.dfile_array[i])); - if (is_osd) { - ret = osd_command_set_flush(&msg_p->osd_command, - PVFS_OSD_DATA_PID, - attr->u.meta.dfile_array[i], 0); - if (ret) { - osd_error_xerrno(ret, "%s: set flush", __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - } else { - PINT_SERVREQ_FLUSH_FILL(msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - attr->u.meta.dfile_array[i]); - } + PINT_SERVREQ_FLUSH_FILL(msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + attr->u.meta.dfile_array[i], + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = attr->u.meta.dfile_array[i]; @@ -240,32 +207,17 @@ static int flush_datafile_setup_msgpairarray( } else { - assert(i == (sm_p->msgarray_count - 1)); + assert(i == (sm_p->msgarray_op.count - 1)); gossip_debug(GOSSIP_CLIENT_DEBUG, " metafile_flush: flushing handle %llu\n", llu(sm_p->object_ref.handle)); - if (is_osd_md) { - /* metafile is a datafile, nothing to do */ - continue; - } else if (is_osd_meta) { - /* metafile is a separate OSD, do it now */ - ret = osd_command_set_flush(&msg_p->osd_command, - PVFS_OSD_META_PID, - sm_p->object_ref.handle, 0); - if (ret) { - osd_error_xerrno(ret, "%s: meta set flush", __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - } else { - /* stock or datafile; meta is pvfs, do it in next step */ - PINT_SERVREQ_FLUSH_FILL(msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->object_ref.handle); - } + PINT_SERVREQ_FLUSH_FILL(msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + sm_p->object_ref.handle, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = sm_p->object_ref.handle; @@ -274,49 +226,14 @@ static int flush_datafile_setup_msgpairarray( } } - ret = PINT_serv_msgpairarray_resolve_addrs( - sm_p->msgarray_count - !!is_osd_md, sm_p->msgarray); + ret = PINT_serv_msgpairarray_resolve_addrs(&sm_p->msgarray_op); if (ret) { gossip_err("Error: failed to resolve server addresses.\n"); js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - - /* do datafiles first, then setup for metafile next */ - if (is_osd && attr->u.meta.dfile_count > 0) { - if (!is_osd_meta) - --sm_p->msgarray_count; /* hide the last one */ - js_p->error_code = OSD_MSGPAIR; - } - - return SM_ACTION_COMPLETE; -} - -/* - * Did the datafiles with OSD msgpair transfers. Now do the last one, - * the metafile, using normal pvfs transfer. The count was reduced above - * to hide it, now we copy it down into position and let it go. - */ -static int flush_switch_to_metafile(struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int last; - int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); - int is_osd_meta = fsid_is_osd_meta(sm_p->object_ref.fs_id); - - gossip_debug(GOSSIP_CLIENT_DEBUG, "(%p) flush state: %s\n", sm_p, __func__); - if (is_osd_meta || is_osd_md) { - /* meta: did osd flush already, md: no separate meta */ - js_p->error_code = ALL_DONE; - return SM_ACTION_COMPLETE; } - /* only datafile */ - last = sm_p->msgarray_count; - memcpy(&sm_p->msgarray[0], &sm_p->msgarray[last], - sizeof(sm_p->msgarray[0])); - sm_p->msgarray_count = 1; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -346,11 +263,7 @@ static int flush_cleanup( PINT_SM_GETATTR_STATE_CLEAR(sm_p->getattr); - if (sm_p->msgarray) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; diff --git a/src/client/sysint/sys-get-eattr.sm b/src/client/sysint/sys-get-eattr.sm index 64914e2..ac972f6 100644 --- a/src/client/sysint/sys-get-eattr.sm +++ b/src/client/sysint/sys-get-eattr.sm @@ -4,9 +4,15 @@ * See COPYING in top-level directory. */ +/** \file PVFS system calls for reading extended attributes + * \ingroup sysint + */ + #include #include +#ifndef WIN32 #include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -58,6 +64,7 @@ PVFS_error PVFS_isys_geteattr_list( PVFS_ds_keyval *key_array, PVFS_sysresp_geteattr *resp_p, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { int ret = -PVFS_EINVAL; @@ -85,13 +92,14 @@ PVFS_error PVFS_isys_geteattr_list( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.geteattr.nkey = nkey; sm_p->u.geteattr.key_array = key_array; sm_p->u.geteattr.resp_p = resp_p; sm_p->error_code = 0; sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); return PINT_client_state_machine_post( smcb, op_id, user_ptr); @@ -102,7 +110,8 @@ PVFS_error PVFS_sys_geteattr_list( const PVFS_credentials *credentials, int32_t nkey, PVFS_ds_keyval *key_array, - PVFS_sysresp_geteattr *resp_p) + PVFS_sysresp_geteattr *resp_p, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -110,7 +119,7 @@ PVFS_error PVFS_sys_geteattr_list( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_geteattr entered\n"); ret = PVFS_isys_geteattr_list(ref, credentials, - nkey, key_array, resp_p, &op_id, NULL); + nkey, key_array, resp_p, &op_id, hints, NULL); if (ret) { @@ -135,13 +144,14 @@ PVFS_error PVFS_sys_geteattr( PVFS_object_ref ref, const PVFS_credentials *credentials, PVFS_ds_keyval *key_p, - PVFS_ds_keyval *val_p) + PVFS_ds_keyval *val_p, + PVFS_hint hints) { PVFS_sysresp_geteattr resp_p; PVFS_error tmp_err; resp_p.val_array = val_p; resp_p.err_array = &tmp_err; - return PVFS_sys_geteattr_list(ref, credentials, 1, key_p, &resp_p); + return PVFS_sys_geteattr_list(ref, credentials, 1, key_p, &resp_p, hints); } static PINT_sm_action get_eattr_setup_msgpair( @@ -150,6 +160,7 @@ static PINT_sm_action get_eattr_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; int i; + PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "get_eattr state: get_eattr_setup_msgpair\n"); @@ -165,32 +176,36 @@ static PINT_sm_action get_eattr_setup_msgpair( sm_p->u.geteattr.size_array[i] = sm_p->u.geteattr.resp_p->val_array[i].buffer_sz; + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + PINT_SERVREQ_GETEATTR_FILL( - sm_p->msgpair.req, - (*sm_p->cred_p), - sm_p->object_ref.fs_id, - sm_p->object_ref.handle, - sm_p->u.geteattr.nkey, - sm_p->u.geteattr.key_array, - sm_p->u.geteattr.size_array); - - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; - sm_p->msgpair.fs_id = sm_p->object_ref.fs_id; - sm_p->msgpair.handle = sm_p->object_ref.handle; - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgpair.comp_fn = get_eattr_comp_fn; + msg_p->req, + (*sm_p->cred_p), + sm_p->object_ref.fs_id, + sm_p->object_ref.handle, + sm_p->u.geteattr.nkey, + sm_p->u.geteattr.key_array, + sm_p->u.geteattr.size_array, + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = get_eattr_comp_fn; ret = PINT_cached_config_map_to_server( - &sm_p->msgpair.svr_addr, - sm_p->msgpair.handle, - sm_p->msgpair.fs_id); + &msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id); if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = 0; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -221,7 +236,7 @@ static int get_eattr_comp_fn( int ret = 0; int decode_ret; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_CLIENT_DEBUG, "get_eattr completion fn: get_eattr_comp_fn\n"); @@ -238,7 +253,7 @@ static int get_eattr_comp_fn( * val struct the user passed in */ - if (sm_p->msgarray[i].op_status == 0) + if (sm_p->msgarray_op.msgarray[i].op_status == 0) { int k, mink; mink = sm_p->u.geteattr.nkey; @@ -305,19 +320,19 @@ static int get_eattr_comp_fn( } else { - ret = sm_p->msgarray[i].op_status; + ret = sm_p->msgarray_op.msgarray[i].op_status; } /* if this is the last response, check all of the status values * and return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; j < sm_p->msgarray_count; j++) + for (j=0; j < sm_p->msgarray_op.count; j++) { - if (sm_p->msgarray[j].op_status != 0) + if (sm_p->msgarray_op.msgarray[j].op_status != 0) { - return(sm_p->msgarray[j].op_status); + return(sm_p->msgarray_op.msgarray[j].op_status); } } } diff --git a/src/client/sysint/sys-getattr.sm b/src/client/sysint/sys-getattr.sm index 036acd5..a02efcf 100644 --- a/src/client/sysint/sys-getattr.sm +++ b/src/client/sysint/sys-getattr.sm @@ -23,9 +23,8 @@ #include "pint-cached-config.h" #include "PINT-reqproto-encode.h" #include "pvfs2-internal.h" +#include "pvfs2-types-debug.h" #include "osd-util/osd-util.h" -#include "pvfs2-dist-simple-stripe.h" -#include "pint-distribution.h" #include "src/common/misc/extent-utils.h" /* pvfs2_client_getattr_sm @@ -43,13 +42,20 @@ * we grab these sizes. */ +#ifdef WIN32 +static struct profiler getattr_prof; +#else +static struct profiler getattr_prof __attribute__((unused)); +#endif + extern job_context_id pint_client_sm_context; enum { GETATTR_ACACHE_MISS = 1, GETATTR_NEED_DATAFILE_SIZES = 2, - OSD_MSGPAIR = 2001, + GETATTR_IO_RETRY = 3, + OSD_MSGPAIR = 2001 }; /* completion function prototypes */ @@ -67,26 +73,25 @@ nested machine pvfs2_client_datafile_getattr_sizes_sm run getattr_datafile_getattr_setup_msgpairarray; OSD_MSGPAIR => datafile_getattr_xfer_osd_msgpairarray; success => datafile_getattr_xfer_msgpairarray; - default => datafile_getattr_failure; + default => datafile_getattr_cleanup; } - + state datafile_getattr_xfer_osd_msgpairarray { jump pvfs2_osd_msgpairarray_sm; - success => datafile_getattr_cleanup; - default => datafile_getattr_failure; + default => datafile_getattr_cleanup; } - + state datafile_getattr_xfer_msgpairarray { jump pvfs2_msgpairarray_sm; - success => datafile_getattr_cleanup; - default => datafile_getattr_failure; + default => datafile_getattr_retry; } - state datafile_getattr_failure + state datafile_getattr_retry { - run getattr_datafile_getattr_failure; + run getattr_datafile_getattr_retry; + GETATTR_IO_RETRY => datafile_getattr_xfer_msgpairarray; default => datafile_getattr_cleanup; } @@ -184,6 +189,7 @@ PVFS_error PVFS_isys_getattr( const PVFS_credentials *credentials, PVFS_sysresp_getattr *resp_p, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -216,17 +222,20 @@ PVFS_error PVFS_isys_getattr( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->error_code = 0; sm_p->object_ref = ref; sm_p->u.getattr.getattr_resp_p = resp_p; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle) + ,&ref.handle); PINT_SM_GETATTR_STATE_FILL( sm_p->getattr, ref, PVFS_util_sys_to_object_attr_mask( - attrmask), + attrmask), PVFS_TYPE_NONE, 0); @@ -240,7 +249,8 @@ PVFS_error PVFS_sys_getattr( PVFS_object_ref ref, uint32_t attrmask, const PVFS_credentials *credentials, - PVFS_sysresp_getattr *resp_p) + PVFS_sysresp_getattr *resp_p, + PVFS_hint hints) { PVFS_error ret, error; PVFS_sys_op_id op_id; @@ -248,7 +258,7 @@ PVFS_error PVFS_sys_getattr( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_getattr entered\n"); ret = PVFS_isys_getattr(ref, attrmask, credentials, - resp_p, &op_id, NULL); + resp_p, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_getattr call", ret); @@ -316,6 +326,7 @@ static PINT_sm_action getattr_acache_lookup( uint32_t trimmed_mask = 0; int missing_attrs; PVFS_object_ref object_ref; + struct server_configuration_s *server_config; int ret = -1; int attr_status = -1; int size_status = -1; @@ -324,6 +335,10 @@ static PINT_sm_action getattr_acache_lookup( object_ref = sm_p->getattr.object_ref; + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); + assert(object_ref.handle != PVFS_HANDLE_NULL); assert(object_ref.fs_id != PVFS_FS_ID_NULL); @@ -362,7 +377,7 @@ static PINT_sm_action getattr_acache_lookup( js_p->error_code = GETATTR_ACACHE_MISS; return SM_ACTION_COMPLETE; } - + /* acache hit, check results */ /* The sys attr mask request is converted to object @@ -370,6 +385,16 @@ static PINT_sm_action getattr_acache_lookup( * entry */ trimmed_mask = sm_p->getattr.req_attrmask; + + gossip_debug(GOSSIP_GETATTR_DEBUG,"request attrmask:\n"); + PINT_attrmask_print(GOSSIP_GETATTR_DEBUG,sm_p->getattr.req_attrmask); + + gossip_debug(GOSSIP_GETATTR_DEBUG,"trimmed attrmask:\n"); + PINT_attrmask_print(GOSSIP_GETATTR_DEBUG,trimmed_mask); + + gossip_debug(GOSSIP_GETATTR_DEBUG,"returned attrmask:\n"); + PINT_attrmask_print(GOSSIP_GETATTR_DEBUG,sm_p->getattr.attr.mask); + /* the trimmed mask is used for making sure that we're only * checking attr bits that make sense for the object type * since the caller may have requested all attributes in @@ -379,6 +404,7 @@ static PINT_sm_action getattr_acache_lookup( if (sm_p->getattr.attr.objtype == PVFS_TYPE_METAFILE) { trimmed_mask &= (PVFS_ATTR_META_ALL | + PVFS_ATTR_META_UNSTUFFED | PVFS_ATTR_DATA_SIZE | PVFS_ATTR_COMMON_ALL); } @@ -404,10 +430,21 @@ static PINT_sm_action getattr_acache_lookup( * and-ing that result with the requested mask gives us the * bits in the requested mask but not in the cached mask. */ - gossip_debug(GOSSIP_ACACHE_DEBUG, "%s: want mask 0x%x cached mask 0x%x\n", - __func__, trimmed_mask, sm_p->getattr.attr.mask); missing_attrs = ((trimmed_mask ^ sm_p->getattr.attr.mask) & trimmed_mask); + + gossip_debug(GOSSIP_GETATTR_DEBUG,"missing attrmask BEFORE:\n"); + PINT_attrmask_print(GOSSIP_GETATTR_DEBUG,missing_attrs); + + if ( missing_attrs & PVFS_ATTR_META_MIRROR_DFILES ) + { + /*Mirroring is optional, so remove mirror-dfiles*/ + missing_attrs &= ~PVFS_ATTR_META_MIRROR_DFILES; + } + + gossip_debug(GOSSIP_GETATTR_DEBUG,"missing attrmask AFTER:\n"); + PINT_attrmask_print(GOSSIP_GETATTR_DEBUG,missing_attrs); + if((missing_attrs == PVFS_ATTR_DATA_SIZE && size_status == 0) || (missing_attrs == 0)) { @@ -427,20 +464,28 @@ static PINT_sm_action getattr_acache_lookup( */ if(missing_attrs == PVFS_ATTR_DATA_SIZE) { - /* if the file size is requested but the distribution info - * isn't and it hasn't been cached, then we need to - * get that first. - */ - PINT_SM_DATAFILE_SIZE_ARRAY_INIT( - &sm_p->getattr.size_array, - sm_p->getattr.attr.u.meta.dfile_count); - - js_p->error_code = GETATTR_NEED_DATAFILE_SIZES; - gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: acache hit, need sizes" - "[%llu]\n", llu(object_ref.handle)); - return SM_ACTION_COMPLETE; + if(!(sm_p->getattr.attr.mask & PVFS_ATTR_META_UNSTUFFED)) + { + /* We are missing the size, and we don't know for sure if the + * file has been unstuffed. In this case, act as though we + * missed on all atributes so that we can get fresh stuffed size + * or datafile information as needed. + */ + } + else + { + /* if the file size is requested but the distribution info + * isn't and it hasn't been cached, then we need to + * get that first. + */ + + js_p->error_code = GETATTR_NEED_DATAFILE_SIZES; + gossip_debug(GOSSIP_ACACHE_DEBUG, "acache: acache hit, need sizes" + "[%llu]\n", llu(object_ref.handle)); + return SM_ACTION_COMPLETE; + } } - + /* we missed */ /* clean out the attributes we got from the cache; this will be * overwritten when we request updated information from the server @@ -461,32 +506,37 @@ static PINT_sm_action getattr_object_getattr_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; PVFS_object_ref object_ref; + PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "(%p) %s\n", sm_p, __func__); - memset(&sm_p->msgpair, 0, sizeof(PINT_sm_msgpair_state)); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; object_ref = sm_p->getattr.object_ref; assert(object_ref.fs_id != PVFS_FS_ID_NULL); assert(object_ref.handle != PVFS_HANDLE_NULL); - sm_p->msgpair.fs_id = object_ref.fs_id; - sm_p->msgpair.handle = object_ref.handle; + msg_p->fs_id = object_ref.fs_id; + msg_p->handle = object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = getattr_object_getattr_comp_fn; + ret = PINT_cached_config_map_to_server( - &sm_p->msgpair.svr_addr, sm_p->msgpair.handle, - sm_p->msgpair.fs_id); + &msg_p->svr_addr, msg_p->handle, + msg_p->fs_id); if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } - if (server_is_osd(sm_p->msgpair.svr_addr)) { + if (server_is_osd(msg_p->svr_addr)) { uint64_t oid, pid; int i, numattrs = 10; struct attribute_list attrl[numattrs]; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; int is_osd_md = fsid_is_osd_md(object_ref.fs_id); /* Set attr type, page and number */ @@ -542,6 +592,7 @@ static PINT_sm_action getattr_object_getattr_setup_msgpair( */ pid = PVFS_OSD_META_PID; /* datafile, metafile */ oid = object_ref.handle; + if (is_osd_md) { struct server_configuration_s *config; struct filesystem_configuration_s *fs; @@ -551,6 +602,7 @@ static PINT_sm_action getattr_object_getattr_setup_msgpair( pid = PVFS_OSD_DATA_PID; /* mdfile: assume data first */ config = PINT_get_server_config_struct(object_ref.fs_id); fs = PINT_config_find_fs_id(config, object_ref.fs_id); + PINT_put_server_config_struct(config); for (l = fs->meta_handle_ranges; l; l = PINT_llist_next(l)) { /* @@ -570,7 +622,7 @@ static PINT_sm_action getattr_object_getattr_setup_msgpair( } } out: - ret = osd_command_set_get_attributes(command, pid, oid); + ret = osd_command_set_get_attributes(command, pid, oid); if (ret) { osd_error_xerrno(ret, "%s: osd_command_set_get_attributes failed", __func__); @@ -594,24 +646,16 @@ out: } else { /* setup the msgpair to do a getattr operation */ PINT_SERVREQ_GETATTR_FILL( - sm_p->msgpair.req, + msg_p->req, *sm_p->cred_p, object_ref.fs_id, object_ref.handle, - sm_p->getattr.req_attrmask); + sm_p->getattr.req_attrmask, + sm_p->hints); js_p->error_code = 0; } - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgpair.comp_fn = getattr_object_getattr_comp_fn; - - /* point msgarray to the statically allocated msgpair */ - if (sm_p->msgarray != NULL && sm_p->msgarray != &(sm_p->msgpair)) - { - free(sm_p->msgarray); - } - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -628,15 +672,12 @@ static int getattr_object_getattr_comp_fn( PVFS_object_attr *attr = NULL; int ret, status; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - - gossip_debug(GOSSIP_GETATTR_DEBUG, - "getattr_object_getattr_comp_fn called\n"); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); attr = &sm_p->getattr.attr; - if (server_is_osd(sm_p->msgpair.svr_addr)) { - struct osd_command *command = &sm_p->msgpair.osd_command; + if (server_is_osd(sm_p->msgarray_op.msgpair.svr_addr)) { + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; status = osd_errno_from_status(command->status); if (status != 0) return status; @@ -663,20 +704,26 @@ static int getattr_object_getattr_comp_fn( PINT_copy_osd_object_attr(attr, command); } - sm_p->msgpair.req.u.getattr.attrmask = attr->mask; + sm_p->msgarray_op.msgpair.req.u.getattr.attrmask = attr->mask; osd_command_attr_free(command); - } else { assert(resp_p->op == PVFS_SERV_GETATTR); + + gossip_debug(GOSSIP_GETATTR_DEBUG, + "getattr_object_getattr_comp_fn called\n"); + if (resp_p->status != 0) + { return resp_p->status; - + } + /* * If we've reached the callback for the getattr msgpair tranfer, * then we can make a copy of the retrieved attribute for later * caching. */ - PINT_copy_object_attr(&sm_p->getattr.attr, &resp_p->u.getattr.attr); + PINT_copy_object_attr(&sm_p->getattr.attr, + &resp_p->u.getattr.attr); } /* if the ref_type mask is set to a non-zero value (!PVFS_TYPE_NONE) @@ -715,7 +762,7 @@ static int getattr_object_getattr_comp_fn( switch (attr->objtype) { case PVFS_TYPE_METAFILE: - if (sm_p->msgpair.req.u.getattr.attrmask & + if (sm_p->msgarray_op.msgpair.req.u.getattr.attrmask & PVFS_ATTR_META_DIST) { /* if we requested distribution attrs, did the distribution @@ -724,8 +771,26 @@ static int getattr_object_getattr_comp_fn( assert(attr->mask & PVFS_ATTR_META_DIST); assert(attr->u.meta.dist && (attr->u.meta.dist_size > 0)); } - - if (sm_p->msgpair.req.u.getattr.attrmask & + if (sm_p->msgarray_op.msgpair.req.u.getattr.attrmask & + PVFS_ATTR_META_MIRROR_DFILES) + { + if (attr->mask & PVFS_ATTR_META_MIRROR_DFILES) + { assert(attr->u.meta.mirror_dfile_array && + (attr->u.meta.mirror_copies_count > 0)); + gossip_debug(GOSSIP_GETATTR_DEBUG,"%s: Mirror handles and " + "copy count retrieved.\n" + ,__func__); + } + else + { + gossip_debug(GOSSIP_GETATTR_DEBUG,"%s: request attribute " + "mask says to get the mirror dfiles, if they exist, \nbut " + "the response attribute mask says they were not retrieved. " + "This is okay.\n" + ,__func__); + } + } + if (sm_p->msgarray_op.msgpair.req.u.getattr.attrmask & PVFS_ATTR_META_DFILES) { /* if we requested the datafile handles for the file, did @@ -744,13 +809,21 @@ static int getattr_object_getattr_comp_fn( */ if (sm_p->getattr.req_attrmask & PVFS_ATTR_DATA_SIZE) { + /* is the file stuffed? */ + if(!(attr->mask & PVFS_ATTR_META_UNSTUFFED)) + { + /* we can compute the size without doing any more + * getattr requests + */ + gossip_debug(GOSSIP_GETATTR_DEBUG, + "getattr_object_getattr_comp_fn: " + "detected stuffed file.\n"); + return(0); + } /* if caller asked for the size, then we need * to jump to the datafile_getattr state, which * will retrieve the datafile sizes for us. */ - PINT_SM_DATAFILE_SIZE_ARRAY_INIT( - &sm_p->getattr.size_array, - attr->u.meta.dfile_count); return GETATTR_NEED_DATAFILE_SIZES; } } @@ -774,6 +847,8 @@ static int getattr_object_getattr_comp_fn( return 0; case PVFS_TYPE_DIRDATA: return 0; + case PVFS_TYPE_INTERNAL: + return 0; default: gossip_err("error: getattr_object_getattr_comp_fn: " "handle refers to invalid object type\n"); @@ -785,10 +860,15 @@ static int getattr_object_getattr_comp_fn( static PINT_sm_action getattr_object_getattr_failure( struct PINT_smcb *smcb, job_status_s *js_p) { +#ifdef WIN32 struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - gossip_debug( - GOSSIP_CLIENT_DEBUG, - "(%p) getattr state: getattr_object_getattr_failure\n", sm_p); +#else + struct PINT_client_sm *sm_p __attribute__((unused)) = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); +#endif + + gossip_debug(GOSSIP_CLIENT_DEBUG + ,"(%p) getattr state: getattr_object_getattr_failure\n" + ,sm_p); if ((js_p->error_code != -PVFS_ENOENT) && (js_p->error_code != -PVFS_EINVAL)) @@ -800,43 +880,93 @@ static PINT_sm_action getattr_object_getattr_failure( return SM_ACTION_COMPLETE; } +/* NOTE: This nested state machine allocates and stores the results in getattr.size_array. So, + * if you call this state machine directly, do not allocate space prior to calling it to avoid a + * nasty memory leak. +*/ static PINT_sm_action getattr_datafile_getattr_setup_msgpairarray( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret = -PVFS_EINVAL, i = 0; + PINT_sm_getattr_state *getattr = &(sm_p->getattr); + int ret = -PVFS_EINVAL; + int i = 0; PVFS_object_attr *attr = NULL; + PINT_sm_msgpair_state *msg_p = NULL; + uint64_t mirror_retry = (sm_p->getattr.attr.mask & PVFS_ATTR_META_MIRROR_DFILES); + PVFS_handle *handles; int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing getattr_datafile_getattr_setup_msgpairarray...\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG,"%s: Are we mirroring? %s\n",__func__ + ,(mirror_retry ? "YES" : "NO")); + gossip_debug(GOSSIP_MIRROR_DEBUG,"%s: attr.mask:0x%08x \tmirror_retry:0x%08x\n" + ,__func__ + ,sm_p->getattr.attr.mask + ,(unsigned int)mirror_retry); + js_p->error_code = is_osd ? OSD_MSGPAIR : 0; attr = &sm_p->getattr.attr; assert(attr); - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - attr->u.meta.dfile_count * sizeof(PINT_sm_msgpair_state)); - if (!sm_p->msgarray) + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + /*initialize the size_array, which will hold the size of each datahandle*/ + PINT_SM_DATAFILE_SIZE_ARRAY_INIT(&getattr->size_array,attr->u.meta.dfile_count); + + /* initialize mir_ctx_array: one context for each handle in the file */ + getattr->mir_ctx_array = malloc(attr->u.meta.dfile_count * + sizeof(*getattr->mir_ctx_array)); + if (!getattr->mir_ctx_array) { + gossip_lerr("Unable to allocate mirror context array.\n"); js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } - memset(sm_p->msgarray, 0, - attr->u.meta.dfile_count * sizeof(*sm_p->msgarray)); + memset(getattr->mir_ctx_array,0,attr->u.meta.dfile_count * + sizeof(*getattr->mir_ctx_array)); + getattr->mir_ctx_count = attr->u.meta.dfile_count; - sm_p->msgarray_count = attr->u.meta.dfile_count; + for (i=0; imir_ctx_count; i++) + { + getattr->mir_ctx_array[i].original_datahandle = attr->u.meta.dfile_array[i]; + getattr->mir_ctx_array[i].original_server_nr = i; + } - /* for each datafile, post a send/recv pair to obtain the size */ - for(i = 0; i < sm_p->msgarray_count; i++) + /* allocate handle array and populate */ + handles = malloc(sizeof(PVFS_handle) * attr->u.meta.dfile_count); + if (!handles) { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; - assert(msg_p); + gossip_lerr("Unable to allocation local handles array.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(handles,0,sizeof(PVFS_handle) * attr->u.meta.dfile_count); + memcpy(handles,attr->u.meta.dfile_array,attr->u.meta.dfile_count * + sizeof(PVFS_handle)); - gossip_debug(GOSSIP_GETATTR_DEBUG, - " datafile_getattr: getting size for handle %llu\n", - llu(attr->u.meta.dfile_array[i])); + /*allocate index-to-server array and populate*/ + getattr->index_to_server = malloc(sizeof(uint32_t)*attr->u.meta.dfile_count); + if (!getattr->index_to_server) + { + gossip_lerr("Unable to allocate index-to-server array.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(getattr->index_to_server,0,sizeof(uint32_t)*attr->u.meta.dfile_count); + for (i=0; iu.meta.dfile_count; i++) + { + getattr->index_to_server[i] = (uint32_t)i; + } - if (is_osd) { - struct osd_command *command = &sm_p->msgarray[i].osd_command; + if (is_osd) + { + /* for each datafile, post a send/recv pair to obtain the size */ + for(i = 0; i < attr->u.meta.dfile_count; i++) + { + struct osd_command *command = &sm_p->msgarray_op.msgarray[i].osd_command; struct attribute_list id = { .type = ATTR_GET, .page = 0x1, @@ -847,41 +977,63 @@ static PINT_sm_action getattr_datafile_getattr_setup_msgpairarray( ret = osd_command_set_get_attributes(command, PVFS_OSD_DATA_PID, attr->u.meta.dfile_array[i]); if (ret) { - osd_error_xerrno(ret, + osd_error_xerrno(ret, "%s: osd_command_set_get_attributes failed", __func__); - js_p->error_code = ret; - return 1; + js_p->error_code = ret; + return 1; } ret = osd_command_attr_build(command, &id, 1); if (ret) { osd_error_xerrno(ret, "%s: osd_command_attr_build failed", - __func__); + __func__); js_p->error_code = ret; return 1; } - } else { - PINT_SERVREQ_GETATTR_FILL( + } + } else { + //START_PROFILER(getattr_prof); + PINT_SERVREQ_TREE_GET_FILE_SIZE_FILL( msg_p->req, *sm_p->cred_p, sm_p->getattr.object_ref.fs_id, - attr->u.meta.dfile_array[i], - PVFS_ATTR_DATA_SIZE); - } - - msg_p->fs_id = sm_p->getattr.object_ref.fs_id; - msg_p->handle = attr->u.meta.dfile_array[i]; - msg_p->retry_flag = PVFS_MSGPAIR_RETRY; - msg_p->comp_fn = getattr_datafile_getattr_comp_fn; + 0, + attr->u.meta.dfile_count, + handles, + (mirror_retry ? 1 : 0), + sm_p->hints); } - ret = PINT_serv_msgpairarray_resolve_addrs( - sm_p->msgarray_count, sm_p->msgarray); - if (ret < 0) + msg_p->fs_id = sm_p->getattr.object_ref.fs_id; + msg_p->handle = handles[0]; + msg_p->comp_fn = getattr_datafile_getattr_comp_fn; + + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); + if (ret) { - gossip_lerr("Error: failed to resolve meta server addresses.\n"); + gossip_lerr("Unable to map server address for this handle(%llu) and " + "filesystem(%d)\n" + ,llu(msg_p->handle) + ,msg_p->fs_id); js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* set retry flag based on mirroring option...if mirroring, we will handle + * retries from this machine; if not, msgpairarray will handle retries. + */ + if (mirror_retry) + { + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + } + else + { + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; } + + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -889,17 +1041,29 @@ static int getattr_datafile_getattr_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + PINT_sm_msgpair_state *msg = &(sm_p->msgarray_op.msgarray[index]); + struct PVFS_servreq_tree_get_file_size *tree = + &(msg->req.u.tree_get_file_size); int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); PVFS_error status; int64_t size; + PINT_sm_getattr_state *getattr = &(sm_p->getattr); + PINT_client_getattr_mirror_ctx *ctx = NULL; + uint32_t server_nr = 0; + int i = 0; + if (is_osd) { status = osd_errno_from_status( - sm_p->msgarray[index].osd_command.status); + sm_p->msgarray_op.msgarray[index].osd_command.status); } else { - assert(resp_p->op == PVFS_SERV_GETATTR); - status = resp_p->status; + if (resp_p->status) + { /* tree request had a problem */ + return resp_p->status; + } + + assert(resp_p->op == PVFS_SERV_TREE_GET_FILE_SIZE); } if (status != 0) @@ -908,7 +1072,7 @@ static int getattr_datafile_getattr_comp_fn( } if (is_osd) { - struct osd_command *command = &sm_p->msgarray[index].osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgarray[index].osd_command; int ret = osd_command_attr_resolve(command); if (ret) { osd_error_xerrno(ret, "%s: osd_command_attr_resolve failed", @@ -923,39 +1087,295 @@ static int getattr_datafile_getattr_comp_fn( size = get_ntohll(command->attr->val); osd_command_attr_free(command); } else { - size = resp_p->u.getattr.attr.u.data.size; - } - - sm_p->getattr.size_array[index] = size; + /* if we are mirroring, then we need to check the error code returned from + * each server. If an error is found, mirroring will try to get the size + * from a different server. Below, we are marking which handles completed + * successfully, which tells mirroring NOT to retry them. + */ + if ( getattr->attr.mask & PVFS_ATTR_META_MIRROR_DFILES) + { + for (i=0; iu.tree_get_file_size.handle_count; i++) + { + if (resp_p->u.tree_get_file_size.error[i] != 0) + { /* error retrieving size for this handle..we will retry it. */ + continue; + } + server_nr = getattr->index_to_server[i]; + sm_p->getattr.size_array[server_nr] = resp_p->u.tree_get_file_size.size[i]; + ctx = &(getattr->mir_ctx_array[server_nr]); + ctx->msg_completed = 1; + + /*For completed messages, update the size array with the file size + *just retrieved. + */ + getattr->size_array[server_nr] = resp_p->u.tree_get_file_size.size[i]; + gossip_debug(GOSSIP_GETATTR_DEBUG,"%s: size[%d]:%lld \thandle:%llu\n" + ,__func__ + ,i + ,llu(getattr->size_array[i]) + ,llu(tree->handle_array[i])); + }/*end for*/ + } + else + { + /* if we are NOT mirroring and an error is found for an individual handle, + * then we must invalidate the size array and return an error code. + */ + for (i=0; iu.tree_get_file_size.handle_count; i++) + { + gossip_debug(GOSSIP_GETATTR_DEBUG,"%s: error[%d]:%d" + "\tsize[%d]:%d\n" + ,__func__ + ,i + ,(int)resp_p->u.tree_get_file_size.error[i] + ,i + ,(int)resp_p->u.tree_get_file_size.size[i]); + + if (resp_p->u.tree_get_file_size.error[i] != 0) + { + gossip_debug(GOSSIP_GETATTR_DEBUG,"%s: error[%d] is %d\n" + ,__func__ + ,i + ,resp_p->u.tree_get_file_size.error[i]); + memset(getattr->size_array,0,sizeof(*getattr->size_array)); + return (resp_p->u.tree_get_file_size.error[i]); + } + + getattr->size_array[i] = resp_p->u.tree_get_file_size.size[i]; + }/*end for*/ - gossip_debug(GOSSIP_GETATTR_DEBUG, - "datafile_getattr: size of datafile %d is %lld\n", - index, lld(size)); + }/*end if*/ + } - return 0; -} + return(0); +}/*end getattr_datafile_getattr_comp_fn*/ -static PINT_sm_action getattr_datafile_getattr_failure( +static PINT_sm_action getattr_datafile_getattr_retry( struct PINT_smcb *smcb, job_status_s *js_p) { - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - gossip_debug(GOSSIP_CLIENT_DEBUG, "(%p) getattr state: " - "getattr_datafile_getattr_failure\n", sm_p); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_object_attr *attr = &(sm_p->getattr.attr); + PVFS_metafile_attr *meta = &(attr->u.meta); + PINT_sm_getattr_state *getattr = &(sm_p->getattr); + PINT_client_getattr_mirror_ctx *ctx = NULL; + PINT_sm_msgarray_op *mop = &(sm_p->msgarray_op); + PINT_sm_msgpair_state *msg = &(mop->msgarray[0]); + char *enc_req_bytes = NULL; + struct PVFS_servreq_tree_get_file_size *tree = + &(msg->req.u.tree_get_file_size); + uint32_t retry_msg_count = 0; + uint32_t index = 0; + uint32_t copies = 0; + uint32_t server_nr = 0; + int i = 0; + int j = 0; + int k = 0; + int ret = 0; + uint32_t *tmp_server_nr; + PVFS_handle *tmp_handles; + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "(%p) getattr state: " + "getattr_datafile_getattr_retry\n", sm_p); + + /*We only need to retry if we have mirrors; otherwise, msgpairarray + *has already handled the retries. + */ + if (!(attr->mask & PVFS_ATTR_META_MIRROR_DFILES)) + { + /*we are NOT mirroring.*/ + return SM_ACTION_COMPLETE; + } + + /* How many handles need to be retried? */ + for (i=0; inum_data_files; i++) + { + server_nr = getattr->index_to_server[i]; + ctx = &(getattr->mir_ctx_array[server_nr]); + if (ctx->msg_completed == 0) + retry_msg_count++; + } + + /* no retries needed */ + if (retry_msg_count == 0) + { + return SM_ACTION_COMPLETE; + } + + /* do we have any retries available? */ + if (getattr->retry_count >= mop->params.retry_limit) + { + /* at this point, we have msgpairs that need to be retried, but we + * we have met our retry limit. so, we must invalidate the size array, + * since we don't have all of the necessary sizes AND return an error. + */ + memset(getattr->size_array,0,sizeof(*getattr->size_array) * getattr->size); + js_p->error_code = (js_p->error_code ? js_p->error_code : -PVFS_ETIME); + gossip_err("%s: Ran out of retries(%d)\n",__func__ + ,getattr->retry_count); + return SM_ACTION_COMPLETE; + } + + /*allocate temporary index-to-server array*/ + tmp_server_nr = malloc(sizeof(uint32_t) * retry_msg_count); + if (!tmp_server_nr) + { + gossip_lerr("Unable to allocate temporary index-to-server array.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(tmp_server_nr,0,sizeof(uint32_t) * retry_msg_count); + + /*allocate temporary handle array*/ + tmp_handles = malloc(sizeof(PVFS_handle) * retry_msg_count); + if (!tmp_handles) + { + gossip_lerr("Unable to allocate temporary handle array.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(tmp_handles,0,sizeof(PVFS_handle) * retry_msg_count); + + /* okay. let's setup new handles to retry. + */ + for (i=0,j=0; inum_data_files; i++) + { + server_nr = getattr->index_to_server[i]; + ctx = &(getattr->mir_ctx_array[server_nr]); + + /* don't process completed messages */ + if (ctx->msg_completed) + continue; + + /* for incomplete messages, cleanup memory, if necessary */ + enc_req_bytes = (char *)&(msg->encoded_req); + for (k=0; kencoded_req); k++) + { + if (enc_req_bytes[k] != '\0') + { + PINT_encode_release(&(msg->encoded_req),PINT_ENCODE_REQ); + break; + } + }/*end for*/ + + if (msg->encoded_resp_p) + { + BMI_memfree(msg->svr_addr + ,msg->encoded_resp_p + ,msg->max_resp_sz + ,BMI_RECV); + } + + /* Should we use the original datahandle? */ + if (ctx->retry_original) + { + ctx->retry_original = 0; + tmp_handles[j] = ctx->original_datahandle; + tmp_server_nr[j] = ctx->original_server_nr; + j++; + continue; + }/*end retry_original*/ + + /* otherwise, get next mirrored handle. note: if a mirrored handle is + * zero, then this means that the creation of this mirrored object + * failed for its particular server. in this case, get the next valid + * handle. as a last resort, retry the original handle. + */ + copies = ctx->current_copies_count; + for (;copies < meta->mirror_copies_count; copies++) + { + index = (copies*meta->dfile_count) + server_nr; + if (meta->mirror_dfile_array[index] != 0) + { /* we have found a valid mirrored handle */ + tmp_handles[j] = meta->mirror_dfile_array[index]; + tmp_server_nr[j] = server_nr; + j++; + break; + } + } + + /* if we haven't found a valid mirrored handle, retry the original + * datahandle. + */ + if ( copies == meta->mirror_copies_count ) + { + tmp_handles[j] = ctx->original_datahandle; + tmp_server_nr[j] = ctx->original_server_nr; + j++; + ctx->retry_original = 0; + ctx->current_copies_count = 0; + getattr->retry_count++; + continue; + }/*end if we have to use the original*/ + + /* otherwise, setup for the next retry event */ + ctx->current_copies_count++; + if (ctx->current_copies_count == meta->mirror_copies_count) + { + ctx->current_copies_count = 0; + ctx->retry_original = 1; + getattr->retry_count++; + } + }/*end for each handle in the old request*/ + + /*replace values in old tree request*/ + free(tree->handle_array); + tree->handle_array = tmp_handles; + tree->num_data_files = retry_msg_count; + + /*replace values in old message request*/ + msg->handle = tmp_handles[0]; + msg->svr_addr=0; + ret = PINT_cached_config_map_to_server(&msg->svr_addr + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine server address for handle(%llu) and " + "file system(%d).\n" + ,llu(msg->handle) + ,msg->fs_id); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + + /*save index-to-server array*/ + free(getattr->index_to_server); + getattr->index_to_server = tmp_server_nr; + + /* Push the msgarray_op and jump to msgpairarray.sm */ + PINT_sm_push_frame(smcb,0,mop); + js_p->error_code=GETATTR_IO_RETRY; + return SM_ACTION_COMPLETE; -} +} /*end datafile_getattr_retry*/ + + + + + static PINT_sm_action getattr_datafile_getattr_cleanup( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - if(js_p->error_code == 0) - { - if (sm_p->msgarray) - { - free(sm_p->msgarray); - } - sm_p->msgarray = NULL; - } +//FINISH_PROFILER("getattr", getattr_prof, 1); + PINT_sm_getattr_state *getattr = &(sm_p->getattr); + + /* cleanup handle_array created for the one tree request */ + PINT_sm_msgpair_state *msg_p = &(sm_p->msgarray_op.msgarray[0]); + if (msg_p->req.u.tree_get_file_size.handle_array) + free(msg_p->req.u.tree_get_file_size.handle_array); + + /* cleanup tree request */ + PINT_msgpairarray_destroy(&sm_p->msgarray_op); + + /* cleanup memory that may have been used for mirrored retries.*/ + if (getattr->mir_ctx_array) + free(getattr->mir_ctx_array); + if (getattr->index_to_server) + free(getattr->index_to_server); + return SM_ACTION_COMPLETE; } @@ -974,18 +1394,32 @@ static PINT_sm_action getattr_acache_insert( if (sm_p->getattr.attr.objtype == PVFS_TYPE_METAFILE && sm_p->getattr.req_attrmask & PVFS_ATTR_DATA_SIZE) { - /* compute size as requested */ - assert(sm_p->getattr.attr.u.meta.dist); - assert(sm_p->getattr.attr.u.meta.dist->methods && - sm_p->getattr.attr.u.meta.dist->methods->logical_file_size); - - sm_p->getattr.size = - (sm_p->getattr.attr.u.meta.dist->methods->logical_file_size)( - sm_p->getattr.attr.u.meta.dist->params, - sm_p->getattr.attr.u.meta.dfile_count, - sm_p->getattr.size_array); - - tmp_size = &sm_p->getattr.size; + if(!(sm_p->getattr.attr.mask & PVFS_ATTR_META_UNSTUFFED)) + { + /* stuffed file case */ + sm_p->getattr.size = sm_p->getattr.attr.u.meta.stuffed_size; + tmp_size = &sm_p->getattr.size; + gossip_debug(GOSSIP_GETATTR_DEBUG, "getattr_acache_insert " + "calculated stuffed logical size of %lld\n", lld(*tmp_size)); + } + else + { + /* compute size as requested */ + assert(sm_p->getattr.attr.u.meta.dist); + assert(sm_p->getattr.attr.u.meta.dist->methods && + sm_p->getattr.attr.u.meta.dist->methods->logical_file_size); + + sm_p->getattr.size = + (sm_p->getattr.attr.u.meta.dist->methods->logical_file_size)( + sm_p->getattr.attr.u.meta.dist->params, + sm_p->getattr.attr.u.meta.dfile_count, + sm_p->getattr.size_array); + + tmp_size = &sm_p->getattr.size; + gossip_debug(GOSSIP_GETATTR_DEBUG,"getattr_acache_insert calculated" + " unstuffed logical size of %lld\n" + , lld(*tmp_size)); + } } PINT_acache_update(sm_p->getattr.object_ref, @@ -1003,12 +1437,43 @@ static PINT_sm_action getattr_cleanup( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_getattr_state *getattr = &(sm_p->getattr); + gossip_debug(GOSSIP_CLIENT_DEBUG, "(%p) getattr state: getattr_cleanup\n", sm_p); + gossip_debug(GOSSIP_GETATTR_DEBUG + ,"%s: js_p->error_code:%d \tgetattr->attr.mask:0x%08x\n" + ,__func__ + ,js_p->error_code + ,getattr->attr.mask); + sm_p->error_code = js_p->error_code; - PINT_SM_DATAFILE_SIZE_ARRAY_DESTROY(&sm_p->getattr.size_array); + /* cleanup size array; is only allocated if datafile sizes are retrieved */ + if (getattr->size_array) + free(getattr->size_array); + + /* cleanup getattr when an error occurs */ + if (js_p->error_code) + { + if (getattr->attr.mask & PVFS_ATTR_META_DFILES) + { + if (getattr->attr.u.meta.dfile_array) + free(getattr->attr.u.meta.dfile_array); + } + + if (getattr->attr.mask & PVFS_ATTR_META_MIRROR_DFILES) + { + if (getattr->attr.u.meta.mirror_dfile_array) + free(getattr->attr.u.meta.mirror_dfile_array); + } + + if (getattr->attr.mask & PVFS_ATTR_META_DIST) + { + PINT_dist_free(getattr->attr.u.meta.dist); + } + }/*end if error*/ return SM_ACTION_COMPLETE; } @@ -1068,12 +1533,17 @@ static PINT_sm_action getattr_set_sys_response( { sysresp->attr.dfile_count = attr->u.meta.dfile_count; } + if (sm_p->getattr.req_attrmask & PVFS_ATTR_META_MIRROR_DFILES) + { + sysresp->attr.mirror_copies_count = attr->u.meta.mirror_copies_count; + } } if (attr->objtype == PVFS_TYPE_DIRECTORY) { gossip_debug(GOSSIP_CLIENT_DEBUG, "dfile_count: %d\n", attr->u.dir.hint.dfile_count); - gossip_debug(GOSSIP_CLIENT_DEBUG, "dist_name_len = %d, dist_params_len = %d\n", + gossip_debug(GOSSIP_CLIENT_DEBUG, "dist_name_len = %d, " + "dist_params_len = %d\n", attr->u.dir.hint.dist_name_len, attr->u.dir.hint.dist_params_len); sysresp->attr.dfile_count = attr->u.dir.hint.dfile_count; /* @@ -1090,7 +1560,8 @@ static PINT_sm_action getattr_set_sys_response( PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; } - gossip_debug(GOSSIP_CLIENT_DEBUG, "dist_name_hint: %s\n", sysresp->attr.dist_name); + gossip_debug(GOSSIP_CLIENT_DEBUG, "dist_name_hint: %s\n" + , sysresp->attr.dist_name); } if (attr->u.dir.hint.dist_params_len > 0 && (sm_p->getattr.req_attrmask & PVFS_ATTR_DIR_HINT)) @@ -1104,7 +1575,8 @@ static PINT_sm_action getattr_set_sys_response( PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; } - gossip_debug(GOSSIP_CLIENT_DEBUG, "dist_name_params: %s\n", sysresp->attr.dist_params); + gossip_debug(GOSSIP_CLIENT_DEBUG, "dist_name_params: %s\n" + , sysresp->attr.dist_params); } } @@ -1118,6 +1590,7 @@ static PINT_sm_action getattr_set_sys_response( sysresp->attr.mask = PVFS_util_object_to_sys_attr_mask(attr->mask); sysresp->attr.size = 0; sysresp->attr.objtype = attr->objtype; + sysresp->attr.cid = attr->cid; if (js_p->error_code == 0) { @@ -1139,6 +1612,14 @@ static PINT_sm_action getattr_set_sys_response( sysresp->attr.mask |= PVFS_ATTR_SYS_SIZE; } + if(attr->mask & PVFS_ATTR_META_DIST) + { + /* we have enough information to set a block size */ + sysresp->attr.blksize = attr->u.meta.dist->methods->get_blksize( + attr->u.meta.dist->params); + sysresp->attr.mask |= PVFS_ATTR_SYS_BLKSIZE; + } + /* if this is a symlink, add the link target */ if (sm_p->getattr.req_attrmask & PVFS_ATTR_SYMLNK_TARGET) { diff --git a/src/client/sysint/sys-io.sm b/src/client/sysint/sys-io.sm index dd4f39d..c6132da 100644 --- a/src/client/sysint/sys-io.sm +++ b/src/client/sysint/sys-io.sm @@ -2,6 +2,7 @@ * (C) 2003 Clemson University and The University of Chicago * * See COPYING in top-level directory. + * */ /** \file @@ -12,6 +13,9 @@ #include #include +#ifndef WIN32 +#include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -22,13 +26,14 @@ #include "PINT-reqproto-encode.h" #include "pint-util.h" #include "pvfs2-internal.h" +#include "osd-util/osd-util.h" #define IO_MAX_SEGMENT_NUM 50 +#define IO_ATTR_MASKS (PVFS_ATTR_META_ALL|PVFS_ATTR_COMMON_TYPE) extern job_context_id pint_client_sm_context; -enum -{ +enum { IO_DO_OSD_IO = 2001 }; @@ -40,7 +45,12 @@ enum IO_RETRY_NODELAY, IO_GET_DATAFILE_SIZE, IO_ANALYZE_SIZE_RESULTS, - IO_DO_SMALL_IO + IO_DO_SMALL_IO, + IO_UNSTUFF, + IO_GETATTR_SERVER, + IO_MIRRORING, + IO_NO_MIRRORING, + IO_FATAL_ERROR, }; /* Helper functions local to sys-io.sm. */ @@ -109,6 +119,18 @@ static int io_contexts_init(PINT_client_sm *sm_p, int count, static void io_contexts_destroy(PINT_client_sm *sm_p); +static int unstuff_needed( + PVFS_Request mem_req, + PVFS_offset file_req_offset, + PINT_dist *dist_p, + uint32_t mask, + enum PVFS_io_type io_type); + +static int unstuff_comp_fn( + void *v_p, + struct PVFS_server_resp *resp_p, + int i); + /* misc constants and helper macros */ #define IO_RECV_COMPLETED 1 @@ -137,6 +159,7 @@ static void io_datafile_index_array_destroy( %% machine pvfs2_client_io_sm + { state init { @@ -145,12 +168,35 @@ machine pvfs2_client_io_sm } state io_getattr - { + { jump pvfs2_client_getattr_sm; + success => inspect_attr; + default => io_cleanup; + } + + state inspect_attr + { + run io_inspect_attr; + IO_UNSTUFF => unstuff_setup_msgpair; + IO_GETATTR_SERVER => unstuff_setup_msgpair; success => io_datafile_setup_msgpairs; default => io_cleanup; } + state unstuff_setup_msgpair + { + run io_unstuff_setup_msgpair; + success => unstuff_xfer_msgpair; + default => io_cleanup; + } + + state unstuff_xfer_msgpair + { + jump pvfs2_msgpairarray_sm; + success => io_datafile_setup_msgpairs; + default => io_cleanup; + } + state io_datafile_setup_msgpairs { run io_datafile_setup_msgpairs; @@ -175,15 +221,31 @@ machine pvfs2_client_io_sm } state io_datafile_post_msgpairs - { + { run io_datafile_post_msgpairs; IO_RETRY => io_datafile_post_msgpairs_retry; + IO_FATAL_ERROR => io_cleanup; default => io_datafile_complete_operations; } state io_datafile_post_msgpairs_retry { run io_datafile_post_msgpairs_retry; + IO_MIRRORING => io_datafile_mirror_retry; + IO_NO_MIRRORING => io_datafile_no_mirror_retry; + default => io_datafile_no_mirror_retry; + } + + state io_datafile_no_mirror_retry + { + run io_datafile_no_mirror_retry; + IO_DATAFILE_TRANSFERS_COMPLETE => io_analyze_results; + default => io_datafile_post_msgpairs; + } + + state io_datafile_mirror_retry + { + run io_datafile_mirror_retry; IO_DATAFILE_TRANSFERS_COMPLETE => io_analyze_results; default => io_datafile_post_msgpairs; } @@ -200,7 +262,7 @@ machine pvfs2_client_io_sm { run io_analyze_results; IO_RETRY => init; - IO_ANALYZE_SIZE_RESULTS => io_analyze_size_results; + /*IO_ANALYZE_SIZE_RESULTS => io_analyze_size_results;*/ IO_GET_DATAFILE_SIZE => io_datafile_size; default => io_cleanup; } @@ -241,6 +303,7 @@ PVFS_error PVFS_isys_io( PVFS_sysresp_io *resp_p, enum PVFS_io_type io_type, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -252,11 +315,17 @@ PVFS_error PVFS_isys_io( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_io entered [%llu]\n", llu(ref.handle)); + server_config = PINT_get_server_config_struct(ref.fs_id); + cur_fs = PINT_config_find_fs_id(server_config, ref.fs_id); + PINT_put_server_config_struct(server_config); + if ((ref.handle == PVFS_HANDLE_NULL) || (ref.fs_id == PVFS_FS_ID_NULL) || (resp_p == NULL)) { - gossip_err("invalid (NULL) required argument\n"); - return ret; + if(!server_config->post_create) { + gossip_err("invalid (NULL) required argument\n"); + return ret; + } } if ((io_type != PVFS_IO_READ) && (io_type != PVFS_IO_WRITE)) @@ -265,10 +334,6 @@ PVFS_error PVFS_isys_io( return ret; } - server_config = PINT_get_server_config_struct(ref.fs_id); - cur_fs = PINT_config_find_fs_id(server_config, ref.fs_id); - PINT_put_server_config_struct(server_config); - if (!cur_fs) { gossip_err("invalid (unknown) fs id specified\n"); @@ -296,7 +361,7 @@ PVFS_error PVFS_isys_io( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.io.io_type = io_type; sm_p->u.io.file_req = file_req; @@ -308,12 +373,15 @@ PVFS_error PVFS_isys_io( sm_p->u.io.encoding = cur_fs->encoding; sm_p->u.io.stored_error_code = 0; sm_p->u.io.retry_count = 0; - sm_p->msgarray = NULL; + sm_p->msgarray_op.msgarray = NULL; + sm_p->msgarray_op.count = 0; sm_p->u.io.datafile_index_array = NULL; sm_p->u.io.datafile_count = 0; sm_p->u.io.total_size = 0; sm_p->u.io.small_io = 0; sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &ref.handle); return PINT_client_state_machine_post( smcb, op_id, user_ptr); @@ -331,7 +399,8 @@ PVFS_error PVFS_sys_io( PVFS_Request mem_req, const PVFS_credentials *credentials, PVFS_sysresp_io *resp_p, - enum PVFS_io_type io_type) + enum PVFS_io_type io_type, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -339,7 +408,7 @@ PVFS_error PVFS_sys_io( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_io entered\n"); ret = PVFS_isys_io(ref, file_req, file_req_offset, buffer, mem_req, - credentials, resp_p, io_type, &op_id, NULL); + credentials, resp_p, io_type, &op_id, hints, NULL); if (ret == 1) return 0; else if (ret < 0) @@ -377,7 +446,7 @@ static PINT_sm_action io_init( PINT_SM_GETATTR_STATE_FILL( sm_p->getattr, sm_p->object_ref, - PVFS_ATTR_META_ALL|PVFS_ATTR_COMMON_TYPE, + IO_ATTR_MASKS, PVFS_TYPE_METAFILE, 0); @@ -403,12 +472,100 @@ static PINT_sm_action io_init( } gossip_debug(GOSSIP_IO_DEBUG, " sys-io retrying with delay.\n"); return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); } return SM_ACTION_COMPLETE; } +static PINT_sm_action io_inspect_attr( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int is_osd_meta = fsid_is_osd_meta(sm_p->getattr.object_ref.fs_id); + int is_osd_md = fsid_is_osd_md(sm_p->getattr.object_ref.fs_id); + + if (PINT_smcb_cancelled(smcb)) + { + js_p->error_code = -PVFS_ECANCEL; + return SM_ACTION_COMPLETE; + } + + /* determine if we need to unstuff or not to service this request */ + js_p->error_code = unstuff_needed( + sm_p->u.io.mem_req, + sm_p->u.io.file_req_offset, + sm_p->getattr.attr.u.meta.dist, + sm_p->getattr.attr.mask, + sm_p->u.io.io_type); + + if (is_osd_meta || is_osd_md) + { + js_p->error_code = 0; + } + + return(SM_ACTION_COMPLETE); +} + +static PINT_sm_action io_unstuff_setup_msgpair( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p = NULL; + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + if(js_p->error_code == IO_UNSTUFF) + { + /* note that unstuff must request the same attr mask that we requested + * earlier. If the file has already been unstuffed then we need an + * updated authoritative copy of all of the attrs relevant to I/O. + */ + PINT_SERVREQ_UNSTUFF_FILL( + msg_p->req, + (*sm_p->cred_p), + sm_p->object_ref.fs_id, + sm_p->object_ref.handle, + IO_ATTR_MASKS); + } + else if(js_p->error_code == IO_GETATTR_SERVER) + { + PINT_SERVREQ_GETATTR_FILL( + msg_p->req, + (*sm_p->cred_p), + sm_p->object_ref.fs_id, + sm_p->object_ref.handle, + IO_ATTR_MASKS, + sm_p->hints); + } + else + { + assert(0); + } + js_p->error_code = 0; + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = unstuff_comp_fn; + + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id); + if (ret) + { + gossip_err("Failed to map meta server address\n"); + js_p->error_code = ret; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + + static PINT_sm_action io_datafile_setup_msgpairs( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -439,11 +596,17 @@ static PINT_sm_action io_datafile_setup_msgpairs( switch(attr->objtype) { case PVFS_TYPE_METAFILE: + assert(attr->mask & PVFS_ATTR_META_DFILES); assert(attr->mask & PVFS_ATTR_META_DIST); assert(attr->u.meta.dist_size > 0); assert(attr->u.meta.dfile_array); assert(attr->u.meta.dfile_count > 0); + if (attr->mask & PVFS_ATTR_META_MIRROR_DFILES && !is_osd) + { + assert(attr->u.meta.mirror_dfile_array); + assert(attr->u.meta.mirror_copies_count); + } break; case PVFS_TYPE_DIRECTORY: js_p->error_code = -PVFS_EISDIR; @@ -577,7 +740,8 @@ static PINT_sm_action io_datafile_setup_msgpairs( attr->u.meta.dist, sm_p->u.io.file_req, sm_p->u.io.file_req_offset, - PINT_REQUEST_TOTAL_BYTES(sm_p->u.io.mem_req)); + PINT_REQUEST_TOTAL_BYTES(sm_p->u.io.mem_req), + sm_p->hints); } js_p->error_code = 0; @@ -669,13 +833,17 @@ static PINT_sm_action io_datafile_post_msgpairs( { goto try_next_encoding; } - } + } /* FIXME: make this a clean error transition by adjusting the completion count and/or (not) exiting */ + /* If one of the msgpairs gets this type of error, then the entire + * request should be aborted. Becky Ligon. + */ PVFS_perror_gossip("PINT_encode failed", ret); - js_p->error_code = ret; + sm_p->u.io.stored_error_code = ret; + js_p->error_code = IO_FATAL_ERROR; return SM_ACTION_COMPLETE; } @@ -687,7 +855,8 @@ static PINT_sm_action io_datafile_post_msgpairs( if (!msg->encoded_resp_p) { /* FIXME: see above FIXME */ - js_p->error_code = -PVFS_ENOMEM; + sm_p->u.io.stored_error_code = -PVFS_ENOMEM; + js_p->error_code = IO_FATAL_ERROR; return SM_ACTION_COMPLETE; } @@ -704,7 +873,6 @@ static PINT_sm_action io_datafile_post_msgpairs( status_user_tag, msg->max_resp_sz); cur_ctx->session_tag = PINT_util_get_next_tag(); - cur_ctx->msg_recv_has_been_posted = 0; cur_ctx->msg_recv_in_progress = 0; @@ -713,7 +881,7 @@ static PINT_sm_action io_datafile_post_msgpairs( msg->svr_addr, msg->encoded_resp_p, msg->max_resp_sz, cur_ctx->session_tag, BMI_PRE_ALLOC, smcb, status_user_tag, &msg->recv_status, &msg->recv_id, pint_client_sm_context, - server_config->client_job_bmi_timeout); + server_config->client_job_bmi_timeout, sm_p->hints); PINT_put_server_config_struct(server_config); /* ret -1: problem, do not look at msg recv_status */ @@ -753,8 +921,8 @@ static PINT_sm_action io_datafile_post_msgpairs( PVFS_perror_gossip("Receive immediately failed", msg->recv_status.error_code); - ret = msg->recv_status.error_code; - js_p->error_code = ret; + //ret = msg->recv_status.error_code; + js_p->error_code = IO_RETRY; continue; } @@ -788,7 +956,7 @@ static PINT_sm_action io_datafile_post_msgpairs( msg->encoded_req.total_size, cur_ctx->session_tag, msg->encoded_req.buffer_type, 1, smcb, status_user_tag, &msg->send_status, &msg->send_id, pint_client_sm_context, - server_config->client_job_bmi_timeout); + server_config->client_job_bmi_timeout, sm_p->hints); PINT_put_server_config_struct(server_config); if (ret < 0) { @@ -813,7 +981,6 @@ static PINT_sm_action io_datafile_post_msgpairs( } else { PVFS_perror_gossip("Send immediately failed, cancelling recv", msg->recv_status.error_code); - msg->op_status = msg->send_status.error_code; msg->send_id = 0; @@ -829,12 +996,13 @@ static PINT_sm_action io_datafile_post_msgpairs( cur_ctx->msg_send_has_been_posted = 1; ++sm_p->u.io.msgpair_completion_count; } - } + }/*end for*/ gossip_debug(GOSSIP_IO_DEBUG, "io_datafile_post_msgpairs: " "completion count is %d\n", sm_p->u.io.msgpair_completion_count); + /* if anything posted, just wait for that to complete, else * go sleep then try the remaining msgpairs again */ if (sm_p->u.io.msgpair_completion_count @@ -843,35 +1011,219 @@ static PINT_sm_action io_datafile_post_msgpairs( return SM_ACTION_DEFERRED; /* means go find another machine to run */ else { js_p->error_code = IO_RETRY; - return SM_ACTION_COMPLETE; /* means look at error_code and run my machine again */ + return SM_ACTION_COMPLETE; /* means look at error_code and run my */ + /* machine again. */ } } /* * For IO retry, come here to sleep a bit then go back and post - * some more msgpairs. + * some more msgpairs. If mirroring, then we have more setup before a + * retry can happen. Also, the retry-count is calculated differently. */ -static int -io_datafile_post_msgpairs_retry( - struct PINT_smcb *smcb, job_status_s *js_p) +static int io_datafile_post_msgpairs_retry (struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PVFS_object_attr *attr = &(sm_p->getattr.attr); + struct PINT_client_io_sm *io = &(sm_p->u.io); + + gossip_debug(GOSSIP_IO_DEBUG,"Executing io_datafile_post_msgpairs_retry...\n"); + + /* Are we mirroring on a READ request? */ + if ( (attr->mask & PVFS_ATTR_META_MIRROR_DFILES) && + io->io_type == PVFS_IO_READ ) + { + js_p->error_code = IO_MIRRORING; + return SM_ACTION_COMPLETE; + } + + js_p->error_code = IO_NO_MIRRORING; + return SM_ACTION_COMPLETE; +} + + +static int io_datafile_mirror_retry(struct PINT_smcb *smcb + ,job_status_s *js_p ) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PVFS_object_attr *attr = &(sm_p->getattr.attr); + PVFS_metafile_attr *meta = &(attr->u.meta); + struct PINT_client_io_sm *io = &(sm_p->u.io); + PINT_sm_msgpair_state *msg = NULL; + PINT_client_io_ctx *ctx = NULL; + uint32_t index = 0; + uint32_t copies = 0; + int i,j,ret; + char *enc_req_bytes = NULL; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s...\n",__func__); + gossip_debug(GOSSIP_IO_DEBUG,"Executing io_datafile_mirror_retry...\n"); + + /* Have we exhausted the number of retries */ + if (io->retry_count >= sm_p->msgarray_op.params.retry_limit) + { + js_p->error_code = IO_DATAFILE_TRANSFERS_COMPLETE; + return SM_ACTION_COMPLETE; + } + + /* Find failed contexts and prepare them for retry. + */ + for (i=0; icontext_count; i++) + { + ctx = &(io->contexts[i]); + msg = &(ctx->msg); + if (ctx->msg_recv_has_been_posted && ctx->msg_send_has_been_posted) + /* this context has not failed. */ + continue; + + /* cleanup the failed context */ + enc_req_bytes = (char *)&(msg->encoded_req); + for (j=0; jencoded_req); j++) + { + if (enc_req_bytes[j] != '\0') + { + PINT_encode_release(&(msg->encoded_req),PINT_ENCODE_REQ); + break; + } + }/*end for*/ + if (msg->encoded_resp_p) + { + BMI_memfree(msg->svr_addr + ,msg->encoded_resp_p + ,msg->max_resp_sz + ,BMI_RECV); + } + memset(&(msg->encoded_req),0,sizeof(msg->encoded_req)); + memset(&(msg->svr_addr),0,sizeof(msg->svr_addr)); + msg->encoded_resp_p = NULL; + + /* use the primary data handle */ + if (ctx->retry_original) + { + /* setup context to retry the original */ + ctx->data_handle = meta->dfile_array[ctx->server_nr]; + ctx->retry_original = 0; + msg->handle = ctx->data_handle; + msg->req.u.io.handle = ctx->data_handle; + ret=PINT_cached_config_map_to_server(&(msg->svr_addr) + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine the server address " + "for this handle (%llu)" + ,llu(msg->handle)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + PINT_flow_reset(&(ctx->flow_desc)); + continue; + } + + /* get next mirrored handle. note: if a mirrored handle is zero, then + * this means that the creation of this mirrored object failed for its + * particular server. if so, then get the next valid handle. as a + * last resort, retry the original handle. + */ + copies = ctx->current_copies_count; + for (;copies < meta->mirror_copies_count; copies++) + { + index = (copies*meta->dfile_count) + ctx->server_nr; + if (meta->mirror_dfile_array[index] != 0) + { /* we have found a valid mirrored handle */ + ctx->data_handle = meta->mirror_dfile_array[index]; + break; + } + } + + /* we have NOT found a valid mirrored handle, so retry the primary */ + if ( copies == meta->mirror_copies_count ) + { + ctx->data_handle = meta->dfile_array[ctx->server_nr]; + ctx->retry_original = 0; + ctx->current_copies_count = 0; + io->retry_count++; + /* setup context to retry original */ + msg->handle = ctx->data_handle; + msg->req.u.io.handle = ctx->data_handle; + ret=PINT_cached_config_map_to_server(&(msg->svr_addr) + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine the server address " + "for this handle (%llu)" + ,llu(msg->handle)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + PINT_flow_reset(&(ctx->flow_desc)); + continue; + } + + /* setup the context for the discovered mirrored handle */ + msg->handle = ctx->data_handle; + msg->req.u.io.handle = ctx->data_handle; + ret=PINT_cached_config_map_to_server(&(msg->svr_addr) + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine the server address " + "for this handle (%llu)" + ,llu(msg->handle)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + PINT_flow_reset(&(ctx->flow_desc)); + + + /* setup for the NEXT io-retry event for this context */ + ctx->current_copies_count++; + if ( ctx->current_copies_count == meta->mirror_copies_count ) + {/* we have gone through all of the mirrored handles, after this + * iteration executes; so, indicate original for the next retry event. + */ + ctx->current_copies_count = 0; + ctx->retry_original = 1; + io->retry_count++; + } + }/*end for each context*/ + + /* sleep a small while before starting the next round of retries */ + return (job_req_sched_post_timer(sm_p->msgarray_op.params.retry_delay + ,smcb + ,0 + ,js_p + ,NULL + ,pint_client_sm_context)); + +}/*end io_datafile_mirror_retry*/ + +static int io_datafile_no_mirror_retry(struct PINT_smcb *smcb + ,job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); /* give up if beyond retry limit */ ++sm_p->u.io.retry_count; - if (sm_p->u.io.retry_count > sm_p->msgarray_params.retry_limit) { - gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: retry %d exceeds limit %d\n", - __func__, sm_p->u.io.retry_count, sm_p->msgarray_params.retry_delay); + if (sm_p->u.io.retry_count > sm_p->msgarray_op.params.retry_limit) { + gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: retry %d exceeds limit %d\n" + ,__func__ + ,sm_p->u.io.retry_count + ,sm_p->msgarray_op.params.retry_delay); js_p->error_code = IO_DATAFILE_TRANSFERS_COMPLETE; return SM_ACTION_COMPLETE; } gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: retry %d, wait %d ms\n", __func__, - sm_p->u.io.retry_count, sm_p->msgarray_params.retry_delay); + sm_p->u.io.retry_count, sm_p->msgarray_op.params.retry_delay); - return job_req_sched_post_timer(sm_p->msgarray_params.retry_delay, + return job_req_sched_post_timer(sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, NULL, pint_client_sm_context); } + /* This state allows us to make sure all posted operations complete and are accounted for. since this handles ALL operation completions, @@ -957,7 +1309,21 @@ static PINT_sm_action io_datafile_complete_operations( cur_ctx->msg_recv_has_been_posted = 0; goto check_next_step; } - + + if(sm_p->u.io.io_type == PVFS_IO_WRITE) + { + /* we expect this write to _not_ succeed immediately, because we + * have not posted the flow yet. + */ + ret = io_post_write_ack_recv(smcb, cur_ctx); + if(ret < 0) + { + PVFS_perror_gossip("Post of write-ack recv failed", ret); + js_p->error_code = ret; + goto check_next_step; + } + } + /* for now we wait to post the flow until we get back * the response from the server for both reads and writes */ @@ -980,17 +1346,6 @@ static PINT_sm_action io_datafile_complete_operations( js_p->error_code = ret; goto check_next_step; } - - if(sm_p->u.io.io_type == PVFS_IO_WRITE) - { - ret = io_post_write_ack_recv(smcb, cur_ctx); - if(ret < 0) - { - PVFS_perror_gossip("Post of write-ack recv failed", ret); - js_p->error_code = ret; - goto check_next_step; - } - } } /* check if we've completed all msgpairs and posted all flows */ @@ -1042,7 +1397,8 @@ static PINT_sm_action io_datafile_complete_operations( gossip_debug(GOSSIP_IO_DEBUG, " matched completed flow for " "context %p%s\n", cur_ctx, ((cur_ctx->write_ack_in_progress ? - " and reset write_recv timeout" : ""))); + + " and reset write_recv timeout" : ""))); cur_ctx->flow_in_progress = 0; sm_p->u.io.flow_completion_count--; @@ -1051,14 +1407,15 @@ static PINT_sm_action io_datafile_complete_operations( /* look for flow error when no write ack is in progress (usually a * read case) */ - if (js_p->error_code < 0 && !cur_ctx->write_ack_in_progress) { + if (js_p->error_code < 0 && !cur_ctx->write_ack_in_progress) + { if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) || (PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_FLOW) || (js_p->error_code == -ECONNRESET) || (js_p->error_code == -PVFS_EPROTO)) { /* if this is a an error that we can retry */ - gossip_debug(GOSSIP_IO_DEBUG, + gossip_err( "%s: flow failed, retrying from msgpair\n", __func__); cur_ctx->msg_send_has_been_posted = 0; cur_ctx->msg_recv_has_been_posted = 0; @@ -1066,13 +1423,25 @@ static PINT_sm_action io_datafile_complete_operations( else { /* do not retry on remaining error codes */ - gossip_debug(GOSSIP_IO_DEBUG, + gossip_err( "%s: flow failed, not retrying\n", __func__); /* forcing the count high insures that the sm won't restart */ - sm_p->u.io.retry_count = sm_p->msgarray_params.retry_limit; + sm_p->u.io.retry_count = sm_p->msgarray_op.params.retry_limit; } + } + + /*To test fail-over uncomment the following. This will allow the code + * to go through the retry state at least one time on a read operation. + */ + //if (!cur_ctx->write_ack_in_progress && sm_p->u.io.retry_count==0) + //{ + // cur_ctx->msg_send_has_been_posted = 0; + // cur_ctx->msg_recv_has_been_posted = 0; + //} + + } else if (STATUS_USER_TAG_TYPE(status_user_tag, IO_SM_PHASE_FINAL_ACK)) { @@ -1115,7 +1484,7 @@ static PINT_sm_action io_datafile_complete_operations( /* bump up the retry count to prevent the state machine from * restarting after this error propigates */ - sm_p->u.io.retry_count = sm_p->msgarray_params.retry_limit; + sm_p->u.io.retry_count = sm_p->msgarray_op.params.retry_limit; } } } @@ -1123,23 +1492,27 @@ static PINT_sm_action io_datafile_complete_operations( check_next_step: /* - * If something is pending, return SM_ACTION_DEFERRED to let SM find the next thing - * to do. + * If something is pending, return SM_ACTION_DEFERRED to let SM find the + * next thing to do. */ if (sm_p->u.io.msgpair_completion_count || sm_p->u.io.flow_completion_count || sm_p->u.io.write_ack_completion_count) { if (PINT_smcb_cancelled(smcb)) + { gossip_debug(GOSSIP_IO_DEBUG, "detected I/O cancellation with " "%d flows and %d write acks pending\n", sm_p->u.io.flow_completion_count, sm_p->u.io.write_ack_completion_count); + } else + { gossip_debug(GOSSIP_IO_DEBUG, " %d flows pending, %d write acks " "pending, %d msgpair\n", sm_p->u.io.flow_completion_count, sm_p->u.io.write_ack_completion_count, sm_p->u.io.msgpair_completion_count); + } return SM_ACTION_DEFERRED; } @@ -1236,8 +1609,7 @@ static PINT_sm_action io_analyze_results( (i + 1), sm_p->u.io.datafile_count, lld(sm_p->u.io.total_size)); } - gossip_debug( - GOSSIP_IO_DEBUG, "[%d/%d] running size is %lld\n", + gossip_debug(GOSSIP_IO_DEBUG, "[%d/%d] running size is %lld\n", (i + 1), sm_p->u.io.datafile_count, lld(sm_p->u.io.total_size)); } @@ -1278,7 +1650,7 @@ static PINT_sm_action io_analyze_results( if (((PVFS_ERROR_CLASS(-ret) == PVFS_ERROR_BMI) || (PVFS_ERROR_CLASS(-ret) == PVFS_ERROR_FLOW) || (ret == -ECONNRESET) || (ret == -PVFS_EPROTO)) && - (sm_p->u.io.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.io.retry_count < sm_p->msgarray_op.params.retry_limit)) { assert(!PINT_smcb_cancelled(smcb)); @@ -1471,10 +1843,8 @@ static PINT_sm_action io_analyze_results( * we need to go and get them */ - /* this sets the array that gets filled in by the - * getattr_datafile_sizes states machine we jump to - */ - sm_p->getattr.size_array = sm_p->u.io.dfile_size_array; + /* NOTE: when jumping to getattr_datafile_sizes, results will be */ + /* allocated and stored in sm_p->getattr.size_array. */ /* setting this state result will cause the state machine to * jump to getattr_datafile_sizes and get all the @@ -1537,7 +1907,7 @@ static PINT_sm_action io_analyze_size_results( logical_file_size( attr->u.meta.dist->params, attr->u.meta.dfile_count, - sm_p->u.io.dfile_size_array); + sm_p->getattr.size_array); if(eof > eor) { eor = eof; @@ -1550,7 +1920,7 @@ static PINT_sm_action io_analyze_size_results( ret = io_zero_fill_holes(sm_p, eor, attr->u.meta.dfile_count, - sm_p->u.io.dfile_size_array, + sm_p->getattr.size_array, NULL); if(ret < 0) { @@ -1573,7 +1943,7 @@ static PINT_sm_action io_analyze_size_results( ret = io_zero_fill_holes(sm_p, eof, attr->u.meta.dfile_count, - sm_p->u.io.dfile_size_array, + sm_p->getattr.size_array, NULL); if(ret < 0) { @@ -1607,6 +1977,12 @@ static PINT_sm_action io_cleanup( PINT_SM_DATAFILE_SIZE_ARRAY_DESTROY(&sm_p->u.io.dfile_size_array); } + /*these errors occur only within THIS machine and indicate an error that + *occurred BEFORE starting msgpairs or small-io. + */ + if (js_p->error_code == IO_FATAL_ERROR) + js_p->error_code = sm_p->u.io.stored_error_code; + sm_p->error_code = js_p->error_code; if (sm_p->error_code) @@ -1667,6 +2043,8 @@ static inline int io_decode_ack_response( { PVFS_perror_gossip("io_process_context_recv (op_status)", cur_ctx->msg.op_status); + gossip_err("server: %s\n" + , BMI_addr_rev_lookup(cur_ctx->msg.svr_addr)); ret = cur_ctx->msg.op_status; } @@ -1674,6 +2052,8 @@ static inline int io_decode_ack_response( &cur_ctx->msg.encoded_req, cur_ctx->msg.encoded_resp_p, decoded_resp, &cur_ctx->msg.svr_addr, cur_ctx->msg.max_resp_sz); + memset(&cur_ctx->msg.encoded_req,0,sizeof(cur_ctx->msg.encoded_req)); + cur_ctx->msg.encoded_resp_p = NULL; } return ret; } @@ -1788,7 +2168,7 @@ static inline int io_post_flow( &cur_ctx->flow_desc, smcb, status_user_tag, &cur_ctx->flow_status, &cur_ctx->flow_job_id, pint_client_sm_context, - server_config->client_job_flow_timeout); + server_config->client_job_flow_timeout, sm_p->hints); PINT_put_server_config_struct(server_config); /* if the flow fails immediately, then we have to do some special @@ -1878,7 +2258,7 @@ static inline int io_post_write_ack_recv( cur_ctx->write_ack.max_resp_sz, cur_ctx->session_tag, BMI_PRE_ALLOC, smcb, status_user_tag, &cur_ctx->write_ack.recv_status, &cur_ctx->write_ack.recv_id, - pint_client_sm_context, JOB_TIMEOUT_INF); + pint_client_sm_context, JOB_TIMEOUT_INF, sm_p->hints); if (ret < 0) { @@ -2033,6 +2413,8 @@ static inline int io_process_context_recv( &cur_ctx->msg.encoded_req, cur_ctx->msg.encoded_resp_p, &decoded_resp, &cur_ctx->msg.svr_addr, cur_ctx->msg.max_resp_sz); + memset(&cur_ctx->msg.encoded_req,0,sizeof(cur_ctx->msg.encoded_req)); + cur_ctx->msg.encoded_resp_p = NULL; if (ret) { @@ -2784,6 +3166,142 @@ static void io_contexts_destroy(PINT_client_sm *sm_p) sm_p->u.io.context_count = 0; } +/* unstuff_needed() + * + * looks at the I/O pattern requested and compares against the distribution + * to determine if a stuffed file would have to be "unstuffed" in order to + * service the request + * + * returns IO_UNSTUFF if unstuff is needed + * returns IO_GETATTR_SERVER if current stuffed status needs to be confirmed + * returns 0 otherwise + */ +static int unstuff_needed( + PVFS_Request mem_req, + PVFS_offset file_req_offset, + PINT_dist *dist_p, + uint32_t mask, + enum PVFS_io_type io_type) +{ + PVFS_offset max_offset = 0; + PVFS_offset first_unstuffed_offset = 0; + PINT_request_file_data fake_file_data; + + gossip_debug(GOSSIP_IO_DEBUG, "sys-io checking to see if file should be unstuffed.\n"); + + /* check the flag first to see if file is already explicitly marked as + * unstuffed + */ + if(mask & PVFS_ATTR_META_UNSTUFFED) + { + gossip_debug(GOSSIP_IO_DEBUG, "sys-io detected file is already unstuffed.\n"); + return(0); + } + + /* calculate maximum logical file offset from the callers's parameters */ + /* file request is tiled, so we only need to know the beginning file + * offset and size of the memory offset */ + max_offset = file_req_offset + PINT_REQUEST_TOTAL_BYTES(mem_req); + + gossip_debug(GOSSIP_IO_DEBUG, "sys-io calculated max offset of I/O request as %lld.\n", lld(max_offset)); + + + /* we need to query the distribution to determine what the first offset + * is that does not belong to the first server/datafile. We construct a + * fake server data struct for 2 servers and find out what the first + * offset (above zero) is that hits the second server */ + fake_file_data.dist = dist_p; + fake_file_data.server_ct = 2; + fake_file_data.extend_flag = 1; + fake_file_data.fsize = 0; + fake_file_data.server_nr = 1; + + /* call next mapped offset to find the next logical offset that appears + * on the 2nd server + */ + first_unstuffed_offset = dist_p->methods->next_mapped_offset( + dist_p->params, + &fake_file_data, + 0); + + gossip_debug(GOSSIP_IO_DEBUG, "sys-io calculated first unstuffed offset as %lld.\n", lld(first_unstuffed_offset)); + + /* compare to see if the file needs to be unstuffed yet */ + if(max_offset > first_unstuffed_offset) + { + if(io_type == PVFS_IO_READ) + { + /* reads should not unstuff, but we do need to confirm that the + * attributes are up to date before proceeding + */ + gossip_debug(GOSSIP_IO_DEBUG, "sys-io will perform an extra getattr to confirm file is still stuffed.\n"); + return(IO_GETATTR_SERVER); + } + else + { + gossip_debug(GOSSIP_IO_DEBUG, "sys-io will unstuff the file.\n"); + return(IO_UNSTUFF); + } + } + + gossip_debug(GOSSIP_IO_DEBUG, "sys-io will not unstuff the file.\n"); + return(0); +} + +/* unstuff_comp_fn() + * + * completion function for unstuff msgpair array + */ +static int unstuff_comp_fn( + void *v_p, + struct PVFS_server_resp *resp_p, + int i) +{ + PINT_smcb *smcb = v_p; + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + + gossip_debug(GOSSIP_IO_DEBUG, + "unstuff/getattr completion fn: unstuff_comp_fn\n"); + + /* only posted one msgpair */ + assert(i==0); + + if (resp_p->status != 0) + { + gossip_debug(GOSSIP_IO_DEBUG, + "unstuff negative response with error code: %d\n", + resp_p->status); + return resp_p->status; + } + + assert(resp_p->op == PVFS_SERV_UNSTUFF || resp_p->op == + PVFS_SERV_GETATTR); + + if(resp_p->op == PVFS_SERV_UNSTUFF) + { + PINT_acache_update(sm_p->object_ref, + &resp_p->u.unstuff.attr, + NULL); + + /* replace attrs found by getattr */ + /* PINT_copy_object_attr() takes care of releasing old memory */ + PINT_copy_object_attr(&sm_p->getattr.attr, &resp_p->u.unstuff.attr); + } + else + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "Updating attributes before reading beyond stuffing boundary.\n"); + PINT_acache_update(sm_p->object_ref, + &resp_p->u.getattr.attr, + NULL); + + /* replace attrs found by getattr */ + /* PINT_copy_object_attr() takes care of releasing old memory */ + PINT_copy_object_attr(&sm_p->getattr.attr, &resp_p->u.getattr.attr); + } + + return(0); +} + /* * Local variables: * mode: c diff --git a/src/client/sysint/sys-list-eattr.sm b/src/client/sysint/sys-list-eattr.sm index 6945af2..1510e9d 100644 --- a/src/client/sysint/sys-list-eattr.sm +++ b/src/client/sysint/sys-list-eattr.sm @@ -6,7 +6,9 @@ #include #include +#ifndef WIN32 #include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -57,9 +59,11 @@ PVFS_error PVFS_isys_listeattr( const PVFS_credentials *credentials, PVFS_sysresp_listeattr *resp_p, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { int ret = -PVFS_EINVAL; + int i = 0; PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; @@ -73,6 +77,30 @@ PVFS_error PVFS_isys_listeattr( return ret; } + /* the resp key array is used to determine how much we allocate, + * let's enfore the max key buffer of PVFS_MAX_XATTR_NAMELEN + */ + + if( nkey > PVFS_REQ_LIMIT_EATTR_LIST ) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: requested number of keys, %d, " + " is larger than the maximum of %d\n", __func__, + resp_p->nkey, PVFS_REQ_LIMIT_EATTR_LIST ); + return ret; + } + + for( i=0; i < nkey; i++ ) + { + if( resp_p->key_array[i].buffer_sz > PVFS_MAX_XATTR_NAMELEN ) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: key_array item %d " + "buffer_sz of %d is too big (max value %d)\n", + __func__, i, resp_p->key_array[i].buffer_sz, + PVFS_MAX_XATTR_NAMELEN); + return ret; + } + } + PINT_smcb_alloc(&smcb, PVFS_SYS_LISTEATTR, sizeof(struct PINT_client_sm), client_op_state_get_machine, @@ -84,13 +112,14 @@ PVFS_error PVFS_isys_listeattr( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.listeattr.nkey = nkey; sm_p->u.listeattr.resp_p = resp_p; sm_p->u.listeattr.pos_token = token; sm_p->error_code = 0; sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); return PINT_client_state_machine_post( smcb, op_id, user_ptr); @@ -101,7 +130,8 @@ PVFS_error PVFS_sys_listeattr( PVFS_ds_position token, int32_t nkey, const PVFS_credentials *credentials, - PVFS_sysresp_listeattr *resp_p) + PVFS_sysresp_listeattr *resp_p, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -109,7 +139,7 @@ PVFS_error PVFS_sys_listeattr( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_listeattr entered\n"); ret = PVFS_isys_listeattr(ref, token, nkey, credentials, - resp_p, &op_id, NULL); + resp_p, &op_id, hints, NULL); if (ret) { @@ -136,6 +166,7 @@ static PINT_sm_action list_eattr_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; int i; + PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "list_eattr state: list_eattr_setup_msgpair\n"); @@ -149,33 +180,37 @@ static PINT_sm_action list_eattr_setup_msgpair( sm_p->u.listeattr.size_array[i] = sm_p->u.listeattr.resp_p->key_array[i].buffer_sz; + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + PINT_SERVREQ_LISTEATTR_FILL( - sm_p->msgpair.req, + msg_p->req, (*sm_p->cred_p), sm_p->object_ref.fs_id, sm_p->object_ref.handle, sm_p->u.listeattr.pos_token, sm_p->u.listeattr.nkey, - sm_p->u.listeattr.size_array + sm_p->u.listeattr.size_array, + sm_p->hints ); - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; - sm_p->msgpair.fs_id = sm_p->object_ref.fs_id; - sm_p->msgpair.handle = sm_p->object_ref.handle; - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgpair.comp_fn = list_eattr_comp_fn; + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = list_eattr_comp_fn; ret = PINT_cached_config_map_to_server( - &sm_p->msgpair.svr_addr, - sm_p->msgpair.handle, - sm_p->msgpair.fs_id); + &msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id); if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = 0; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -200,7 +235,7 @@ static int list_eattr_comp_fn( { int ret = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_CLIENT_DEBUG, "list_eattr completion fn: list_eattr_comp_fn\n"); @@ -212,9 +247,9 @@ static int list_eattr_comp_fn( { return resp_p->status; } - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { - return sm_p->msgarray[i].op_status; + return sm_p->msgarray_op.msgarray[i].op_status; } sm_p->u.listeattr.resp_p->token = resp_p->u.listeattr.token; sm_p->u.listeattr.resp_p->nkey = resp_p->u.listeattr.nkey; diff --git a/src/client/sysint/sys-lookup.sm b/src/client/sysint/sys-lookup.sm index 6815684..9ba50a4 100644 --- a/src/client/sysint/sys-lookup.sm +++ b/src/client/sysint/sys-lookup.sm @@ -23,7 +23,6 @@ #include "pint-cached-config.h" #include "PINT-reqproto-encode.h" #include "pvfs2-internal.h" -#include "osd-util/osd-util.h" extern job_context_id pint_client_sm_context; @@ -36,10 +35,6 @@ extern job_context_id pint_client_sm_context; #define GET_CURRENT_SEGMENT(__sm_p) \ (GET_SEGMENT_AT(__sm_p, (GET_CURRENT_CONTEXT(__sm_p))->current_segment)) -enum { - OSD_MSGPAIR = 2001 -}; - enum { LOOKUP_CONTINUE = 2, @@ -84,17 +79,9 @@ machine pvfs2_client_lookup_sm { run lookup_segment_setup_msgpair; success => lookup_segment_lookup_xfer_msgpair; - OSD_MSGPAIR => lookup_segment_lookup_xfer_osd_msgpair; default => lookup_segment_lookup_failure; } - state lookup_segment_lookup_xfer_osd_msgpair - { - jump pvfs2_osd_msgpairarray_sm; - success => lookup_segment_verify_attr_present; - default => lookup_segment_lookup_failure; - } - state lookup_segment_lookup_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -184,9 +171,11 @@ static int initialize_context( if(lookup_sm->current_context == lookup_sm->context_count) { + /* we have usee the last available context */ lookup_sm->context_count++; if(lookup_sm->context_count == 1) { + /* first context, so allocate one */ lookup_sm->contexts = malloc(sizeof(PINT_client_lookup_sm_ctx)); if(!lookup_sm->contexts) { @@ -195,6 +184,7 @@ static int initialize_context( } else { + /* no the first one, so realloc to get one more */ lookup_sm->contexts = realloc(lookup_sm->contexts, sizeof(PINT_client_lookup_sm_ctx) * lookup_sm->context_count); @@ -205,203 +195,211 @@ static int initialize_context( } } - if (pathname && (lookup_sm->current_context > -1)) + if (!pathname) { - ctx = &lookup_sm->contexts[lookup_sm->current_context]; - assert(ctx); + return ret; + } + pathlen = strlen(pathname); + num_segments = PINT_string_count_segments(pathname); - prev_ctx_index = (lookup_sm->current_context - 1); + if ((pathlen == 0) || (num_segments == 0)) + { + return ret; + } - pathlen = strlen(pathname); - num_segments = PINT_string_count_segments(pathname); + if ((pathlen > (PVFS_REQ_LIMIT_PATH_NAME_BYTES - 1)) || + (num_segments > PVFS_REQ_LIMIT_PATH_SEGMENT_COUNT)) + { + gossip_err("Filename %s is too long\n", pathname); + return -PVFS_ENAMETOOLONG; + } - if ((pathlen == 0) || (num_segments == 0)) - { - return ret; - } + /* initialize new context */ + if (lookup_sm->current_context < 0) + { + return ret; + } + ctx = &lookup_sm->contexts[lookup_sm->current_context]; + assert(ctx); - if ((pathlen > (PVFS_REQ_LIMIT_PATH_NAME_BYTES - 1)) || - (num_segments > PVFS_REQ_LIMIT_PATH_SEGMENT_COUNT)) - { - gossip_err("Filename %s is too long\n", pathname); - return -PVFS_ENAMETOOLONG; - } + prev_ctx_index = (lookup_sm->current_context - 1); - memset(ctx, 0, sizeof(PINT_client_lookup_sm_ctx)); + memset(ctx, 0, sizeof(PINT_client_lookup_sm_ctx)); - ctx->current_segment = 0; - ctx->total_segments = 0; - ctx->ctx_starting_refn = ctx_starting_refn; + ctx->current_segment = 0; + ctx->total_segments = 0; + ctx->ctx_starting_refn = ctx_starting_refn; - /* initialize all segments within the context */ - orig_pathname = strdup(pathname); - gossip_debug(GOSSIP_LOOKUP_DEBUG, " original pathname is: %s\n", + /* initialize all segments within the context */ + orig_pathname = strdup(pathname); + gossip_debug(GOSSIP_LOOKUP_DEBUG, " original pathname is: %s\n", orig_pathname); - while(!PINT_string_next_segment(pathname,&cur_seg_name,&state)) - { - /* grab the next segment in the context to fill in */ - cur_seg = &(ctx->segments[cur_seg_index]); - assert(cur_seg); - memset(cur_seg, 0, sizeof(PINT_client_lookup_sm_segment)); + while(!PINT_string_next_segment(pathname, &cur_seg_name, &state)) + { + /* grab the next segment in the context to fill in */ + cur_seg = &(ctx->segments[cur_seg_index]); + assert(cur_seg); + memset(cur_seg, 0, sizeof(PINT_client_lookup_sm_segment)); - gossip_debug(GOSSIP_LOOKUP_DEBUG, " cur_seg_name[%d]: %s\n", - cur_seg_index, cur_seg_name); - gossip_debug(GOSSIP_LOOKUP_DEBUG, " pathname is: %s\n", - pathname); + gossip_debug(GOSSIP_LOOKUP_DEBUG, " cur_seg_name[%d]: %s\n", + cur_seg_index, cur_seg_name); + gossip_debug(GOSSIP_LOOKUP_DEBUG, " pathname is: %s\n", + pathname); - if (strcmp(cur_seg_name,".") == 0) - { - /* reset the count of consecutive dot dot segments */ - num_consecutive_prev_ctx_dot_dots = 0; + if (strcmp(cur_seg_name,".") == 0) + { + /* reset the count of consecutive dot dot segments */ + num_consecutive_prev_ctx_dot_dots = 0; - gossip_debug(GOSSIP_LOOKUP_DEBUG, + gossip_debug(GOSSIP_LOOKUP_DEBUG, " ignoring useless segment\n"); - continue; - } - else if (strcmp(cur_seg_name,"..") == 0) + continue; + } + else if (strcmp(cur_seg_name,"..") == 0) + { + /* + if this isn't true, we need to + grab the previous context's previous segment + + if this weren't true, we'd normally: + assert(cur_seg_index > 0); + */ + if ((cur_seg_index < 1) || + (num_consecutive_prev_ctx_dot_dots > 0)) { - /* - if this isn't true, we need to - grab the previous context's previous segment + PINT_client_lookup_sm_segment *prev_ctx_prev_seg; - if this weren't true, we'd normally: - assert(cur_seg_index > 0); + gossip_debug( + GOSSIP_LOOKUP_DEBUG, " got a '..' segment that " + "requires attention of the previous context\n"); + + init_next_prev_segment: + /* + grab the previous context to access the segments + within it, assuming a previous context is available */ - if ((cur_seg_index < 1) || - (num_consecutive_prev_ctx_dot_dots > 0)) + if (prev_ctx_index < 0) { - PINT_client_lookup_sm_segment *prev_ctx_prev_seg; + gossip_debug(GOSSIP_LOOKUP_DEBUG, "there are no " + "more previous contexts available: " + "failing lookup\n"); + free(orig_pathname); + return -PVFS_ENOENT; + } - gossip_debug( - GOSSIP_LOOKUP_DEBUG, " got a '..' segment that " - "requires attention of the previous context\n"); - - init_next_prev_segment: - /* - grab the previous context to access the segments - within it, assuming a previous context is - available - */ - if (prev_ctx_index < 0) - { - gossip_debug(GOSSIP_LOOKUP_DEBUG, "there are no " - "more previous contexts available: " - "failing lookup\n"); - free(orig_pathname); - return -PVFS_ENOENT; - } + prev_ctx = &lookup_sm->contexts[prev_ctx_index]; + assert(prev_ctx); + assert(prev_ctx->current_segment > 0); - prev_ctx = &lookup_sm->contexts[prev_ctx_index]; - assert(prev_ctx); - assert(prev_ctx->current_segment > 0); + num_consecutive_prev_ctx_dot_dots++; + gossip_debug( + GOSSIP_LOOKUP_DEBUG, "num consecutive '..' " + "segments requiring the previous segment " + "is now %d\n", num_consecutive_prev_ctx_dot_dots); - num_consecutive_prev_ctx_dot_dots++; - gossip_debug( - GOSSIP_LOOKUP_DEBUG, "num consecutive '..' " - "segments requiring the previous segment " - "is now %d\n", num_consecutive_prev_ctx_dot_dots); - - /* - further, if we have a number of consecutive '..' - segments, we may need to keep backing up into - the previous contexts' space - */ - if (prev_ctx->current_segment - - num_consecutive_prev_ctx_dot_dots < 0) + /* + further, if we have a number of consecutive '..' + segments, we may need to keep backing up into + the previous contexts' space + */ + if (prev_ctx->current_segment - + num_consecutive_prev_ctx_dot_dots < 0) + { + /* skip to next previous context, if any */ + if (prev_ctx_index > -1) { - /* skip to next previous context, if any */ - if (prev_ctx_index > -1) - { - /* - bump down dot dot count since it wasn't - used yet if we got here - */ - num_consecutive_prev_ctx_dot_dots--; - prev_ctx_index--; - goto init_next_prev_segment; - } - gossip_debug( - GOSSIP_LOOKUP_DEBUG, "there are no more segments " - "in the previous context: failing lookup\n"); - free(orig_pathname); - return -PVFS_ENOENT; + /* + bump down dot dot count since it wasn't + used yet if we got here + */ + num_consecutive_prev_ctx_dot_dots--; + prev_ctx_index--; + goto init_next_prev_segment; } - - prev_ctx_prev_seg = &prev_ctx->segments[ - prev_ctx->current_segment - - num_consecutive_prev_ctx_dot_dots]; - assert(prev_ctx_prev_seg); - - /* - instead of decrementing the seg index and - continuing, we need to replace the last segment - copied from the last context in this case. (so - we drop through to segment init) - */ - ctx_starting_refn = - prev_ctx_prev_seg->seg_starting_refn; - cur_seg_name = prev_ctx_prev_seg->seg_name; gossip_debug( - GOSSIP_LOOKUP_DEBUG, - "using previous segment: %s\n", cur_seg_name); - - cur_seg_index--; - } - else - { - gossip_debug(GOSSIP_LOOKUP_DEBUG, - " got a '..' segment\n"); - cur_seg_index--; - continue; + GOSSIP_LOOKUP_DEBUG, "there are no more segments " + "in the previous context: failing lookup\n"); + free(orig_pathname); + return -PVFS_ENOENT; } + + prev_ctx_prev_seg = &prev_ctx->segments[ + prev_ctx->current_segment - + num_consecutive_prev_ctx_dot_dots]; + assert(prev_ctx_prev_seg); + + /* + instead of decrementing the seg index and + continuing, we need to replace the last segment + copied from the last context in this case. (so + we drop through to segment init) + */ + ctx_starting_refn = + prev_ctx_prev_seg->seg_starting_refn; + cur_seg_name = prev_ctx_prev_seg->seg_name; + gossip_debug( + GOSSIP_LOOKUP_DEBUG, + "using previous segment: %s\n", cur_seg_name); + + cur_seg_index--; } else { - /* reset the count of consecutive dot dot segments */ - num_consecutive_prev_ctx_dot_dots = 0; + gossip_debug(GOSSIP_LOOKUP_DEBUG, + " got a '..' segment\n"); + cur_seg_index--; + continue; } + } + else + { + /* reset the count of consecutive dot dot segments */ + num_consecutive_prev_ctx_dot_dots = 0; + } - /* - fill in the current segment now. the first segment - MUST have the same starting refn as the context - */ - if (cur_seg_index == 0) - { - cur_seg->seg_starting_refn = ctx_starting_refn; - } - if (cur_seg->seg_name) - { - free(cur_seg->seg_name); - } - cur_seg->seg_name = strdup(cur_seg_name); - assert(cur_seg->seg_name); - - slash_str = orig_pathname; - for (i = 0; i < cur_seg_index; i++) { - slash_str = strchr(slash_str, '/'); - if (slash_str == NULL) { - break; - } - slash_str++; - } - /* seg_remaining = strstr(orig_pathname, cur_seg_name); */ - seg_remaining = slash_str; - if (seg_remaining) - { - gossip_debug(GOSSIP_LOOKUP_DEBUG, - " *seg_remaining is: %s\n", seg_remaining); + /* + fill in the current segment now. the first segment + MUST have the same starting refn as the context + */ + if (cur_seg_index == 0) + { + cur_seg->seg_starting_refn = ctx_starting_refn; + } + if (cur_seg->seg_name) + { + free(cur_seg->seg_name); + } + cur_seg->seg_name = strdup(cur_seg_name); + assert(cur_seg->seg_name); - cur_seg->seg_remaining = strdup(seg_remaining); - assert(cur_seg->seg_remaining); - } - else + slash_str = orig_pathname; + for (i = 0; i < cur_seg_index; i++) + { + slash_str = strchr(slash_str, '/'); + if (slash_str == NULL) { - cur_seg->seg_remaining = NULL; + break; } + slash_str++; + } + /* seg_remaining = strstr(orig_pathname, cur_seg_name); */ + seg_remaining = slash_str; + if (seg_remaining) + { + gossip_debug(GOSSIP_LOOKUP_DEBUG, + " *seg_remaining is: %s\n", seg_remaining); - cur_seg_index++; + cur_seg->seg_remaining = strdup(seg_remaining); + assert(cur_seg->seg_remaining); } - free(orig_pathname); + else + { + cur_seg->seg_remaining = NULL; + } + + cur_seg_index++; + } + free(orig_pathname); #if 0 /* DEBUGGING ONLY */ @@ -418,10 +416,9 @@ static int initialize_context( } #endif - ctx->total_segments = cur_seg_index; - assert(ctx->current_segment == 0); - ret = 0; - } + ctx->total_segments = cur_seg_index; + assert(ctx->current_segment == 0); + ret = 0; return ret; } @@ -468,6 +465,7 @@ PVFS_error PVFS_isys_ref_lookup( PVFS_sysresp_lookup *resp, int32_t follow_link, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -492,14 +490,15 @@ PVFS_error PVFS_isys_ref_lookup( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.lookup.orig_pathname = relative_pathname; sm_p->u.lookup.starting_refn = parent; sm_p->u.lookup.lookup_resp = resp; sm_p->u.lookup.follow_link = follow_link; sm_p->u.lookup.current_context = 0; - sm_p->u.lookup.context_count = 0; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent.handle); ret = initialize_context(&sm_p->u.lookup, relative_pathname, parent); @@ -508,6 +507,13 @@ PVFS_error PVFS_isys_ref_lookup( gossip_err("%s: failed to init context (path = %s)\n", __func__, relative_pathname); + if (ret == -PVFS_ENOENT && sm_p->u.lookup.lookup_resp->error_path) + { + /* copy out error path */ + strncpy(sm_p->u.lookup.lookup_resp->error_path, + relative_pathname, + sm_p->u.lookup.lookup_resp->error_path_size); + } PVFS_util_release_credentials(sm_p->cred_p); PINT_smcb_free(smcb); return ret; @@ -528,7 +534,8 @@ PVFS_error PVFS_sys_ref_lookup( PVFS_object_ref parent, const PVFS_credentials *credentials, PVFS_sysresp_lookup *resp, - int32_t follow_link) + int32_t follow_link, + PVFS_hint hints) { PVFS_error ret, error; PVFS_sys_op_id op_id; @@ -537,7 +544,7 @@ PVFS_error PVFS_sys_ref_lookup( ret = PVFS_isys_ref_lookup( fs_id, relative_pathname, parent, credentials, resp, - follow_link, &op_id, resp); + follow_link, &op_id, hints, resp); if (ret) { @@ -569,7 +576,8 @@ PVFS_error PVFS_sys_lookup( PVFS_fs_id fs_id, char *name, const PVFS_credentials *credentials, PVFS_sysresp_lookup *resp, - int32_t follow_link) + int32_t follow_link, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL; PVFS_object_ref parent; @@ -604,7 +612,7 @@ PVFS_error PVFS_sys_lookup( */ char *path = ((name[0] == '/') ? &name[1] : name); ret = PVFS_sys_ref_lookup( - fs_id, path, parent, credentials, resp, follow_link); + fs_id, path, parent, credentials, resp, follow_link, hints); } } return ret; @@ -675,82 +683,38 @@ static PINT_sm_action lookup_segment_setup_msgpair(struct PINT_smcb *smcb, char *seg_to_lookup = NULL; gossip_debug(GOSSIP_CLIENT_DEBUG, "%s\n", __func__); + js_p->error_code = 0; /* do a lookup on the current segment of the current context */ cur_seg = GET_CURRENT_SEGMENT(sm_p); assert(cur_seg); - PINT_init_msgpair(sm_p, msg_p); - - ret = PINT_cached_config_map_to_server(&msg_p->svr_addr, - cur_seg->seg_starting_refn.handle, - cur_seg->seg_starting_refn.fs_id); - if (ret) { - gossip_err("Failed to map meta server address\n"); - js_p->error_code = ret; - return ret; - } - - if (server_is_osd(msg_p->svr_addr)) { - uint64_t oid; - struct attribute_list attr; - struct osd_command *command = &sm_p->msgpair.osd_command; - - /* Disable multi-segment resolution optimization */ - seg_to_lookup = cur_seg->seg_name; - - attr.type = ATTR_GET; - attr.page = PVFS_USEROBJECT_DIR_PG; - attr.number = jenkins_one_at_a_time_hash((uint8_t *)seg_to_lookup, - strlen(seg_to_lookup) + 1); - attr.len = 1024; - - oid = cur_seg->seg_starting_refn.handle; - ret = osd_command_set_get_attributes(command, PVFS_OSD_META_PID, oid); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_get_attributes failed", - __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - - ret = osd_command_attr_build(command, &attr, 1); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_attr_build failed", - __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - - js_p->error_code = OSD_MSGPAIR; - - } else { - - /* - the pvfs2 lookup_path server operation has an optimization that - allows several path components to be resolved at once. this - code here resolves on a segment-by-segment basis, but we can - handle the multi-segment resolution by advancing through all - handles/attrs returned and populating the segments on - completion. the lookup_comp_fn does just this. - - one way to disable use of this optimization is to make sure - seg_to_lookup is set to seg_name, and never seg_remaining. that - guarantees we're issuing a lookup on a single path segment - */ - seg_to_lookup = cur_seg->seg_remaining ? cur_seg->seg_remaining : - cur_seg->seg_name; + /* + the pvfs2 lookup_path server operation has an optimization that + allows several path components to be resolved at once. this + code here resolves on a segment-by-segment basis, but we can + handle the multi-segment resolution by advancing through all + handles/attrs returned and populating the segments on + completion. the lookup_comp_fn does just this. + + one way to disable use of this optimization is to make sure + seg_to_lookup is set to seg_name, and never seg_remaining. that + guarantees we're issuing a lookup on a single path segment + */ + seg_to_lookup = cur_seg->seg_remaining ? cur_seg->seg_remaining : + cur_seg->seg_name; - PINT_SERVREQ_LOOKUP_PATH_FILL( - msg_p->req, - *sm_p->cred_p, - seg_to_lookup, - cur_seg->seg_starting_refn.fs_id, - cur_seg->seg_starting_refn.handle, - PVFS_ATTR_COMMON_ALL); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; - js_p->error_code = 0; - } + PINT_SERVREQ_LOOKUP_PATH_FILL( + msg_p->req, + *sm_p->cred_p, + seg_to_lookup, + cur_seg->seg_starting_refn.fs_id, + cur_seg->seg_starting_refn.handle, + PVFS_ATTR_COMMON_ALL, + sm_p->hints); gossip_debug( GOSSIP_LOOKUP_DEBUG, "Looking up segment %s under handle %llu\n", @@ -761,6 +725,16 @@ static PINT_sm_action lookup_segment_setup_msgpair(struct PINT_smcb *smcb, msg_p->retry_flag = PVFS_MSGPAIR_RETRY; msg_p->comp_fn = lookup_segment_lookup_comp_fn; + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); + + if (ret) + { + gossip_err("Failed to map meta server address\n"); + js_p->error_code = ret; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -949,6 +923,13 @@ static PINT_sm_action lookup_segment_handle_relative_link( cur_seg->seg_starting_refn); if (ret) { + if (ret == -PVFS_ENOENT && sm_p->u.lookup.lookup_resp->error_path) + { + /* copy out error path */ + strncpy(sm_p->u.lookup.lookup_resp->error_path, + relative_symlink_target, + sm_p->u.lookup.lookup_resp->error_path_size); + } js_p->error_code = ret; } return SM_ACTION_COMPLETE; @@ -960,8 +941,9 @@ static PINT_sm_action lookup_segment_handle_absolute_link( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; PINT_client_lookup_sm_segment *cur_seg = NULL; - char *absolute_symlink_target = NULL; + char absolute_symlink_target[PVFS_NAME_MAX] = {0}; PVFS_object_ref root_refn; + PVFS_fs_id fs_id; /* NOTE: if we have an absolute link, we need to essentially @@ -978,7 +960,28 @@ static PINT_sm_action lookup_segment_handle_absolute_link( cur_seg = GET_CURRENT_SEGMENT(sm_p); assert(cur_seg); - absolute_symlink_target = cur_seg->seg_attr.u.sym.target_path; + ret = PVFS_util_resolve(cur_seg->seg_attr.u.sym.target_path, + &fs_id, + absolute_symlink_target, + PVFS_NAME_MAX); + if (ret < 0) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "PVFS_util_resolve returned %d on %s\n", + ret, cur_seg->seg_attr.u.sym.target_path); + if (ret == -PVFS_ENOENT && sm_p->u.lookup.lookup_resp->error_path) + { + /* copy out error path */ + strncpy(sm_p->u.lookup.lookup_resp->error_path, + cur_seg->seg_attr.u.sym.target_path, + /*absolute_symlink_target,*/ + sm_p->u.lookup.lookup_resp->error_path_size); + /* set special error code */ + ret = -PVFS_ENOTPVFS; + } + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } sm_p->u.lookup.current_context++; @@ -995,10 +998,17 @@ static PINT_sm_action lookup_segment_handle_absolute_link( assert(ret == 0); ret = initialize_context(&sm_p->u.lookup, - absolute_symlink_target, + absolute_symlink_target + 1, root_refn); if (ret) { + if (ret == -PVFS_ENOENT && sm_p->u.lookup.lookup_resp->error_path) + { + /* copy out error path */ + strncpy(sm_p->u.lookup.lookup_resp->error_path, + absolute_symlink_target + 1, + sm_p->u.lookup.lookup_resp->error_path_size); + } js_p->error_code = ret; } return SM_ACTION_COMPLETE; @@ -1150,7 +1160,7 @@ static int lookup_segment_lookup_comp_fn( { int i = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); PINT_client_lookup_sm_segment *cur_seg = NULL; int current_seg_index = sm_p->u.lookup.contexts[ sm_p->u.lookup.current_context].current_segment; @@ -1158,45 +1168,6 @@ static int lookup_segment_lookup_comp_fn( gossip_debug(GOSSIP_CLIENT_DEBUG, "lookup_segment_lookup_comp_fn\n"); - if (server_is_osd(sm_p->msgpair.svr_addr)) { - struct osd_command *command = &sm_p->msgpair.osd_command; - int status = osd_errno_from_status(command->status); - int ret; - - if (status != 0) { - gossip_debug(GOSSIP_CLIENT_DEBUG, - "osd_command_set_get_attributes failed %d\n", index); - return status; - } - - ret = osd_command_attr_resolve(command); - if (ret) { - osd_error_xerrno(ret, "%s: attr_resolve failed", __func__); - return ret; - } - - /* dirent with this handle not present on server */ - if (command->attr->outlen == 0) - return -PVFS_ENOENT; - - cur_seg = GET_SEGMENT_AT(sm_p, current_seg_index); - cur_seg->seg_resolved_refn.handle = get_ntohll(command->attr->val); - cur_seg->seg_resolved_refn.fs_id = cur_seg->seg_starting_refn.fs_id; - - gossip_debug(GOSSIP_NCACHE_DEBUG, "*** ncache update on %s " - "target (%llu|%d) parent (%llu|%d)\n", - cur_seg->seg_name, llu(cur_seg->seg_resolved_refn.handle), - cur_seg->seg_resolved_refn.fs_id, - llu(cur_seg->seg_starting_refn.handle), - cur_seg->seg_starting_refn.fs_id); - - PINT_ncache_update(cur_seg->seg_name, &cur_seg->seg_resolved_refn, - &cur_seg->seg_starting_refn); - - osd_command_attr_free(command); - return 0; - } - assert(resp_p->op == PVFS_SERV_LOOKUP_PATH); if (resp_p->status != 0) diff --git a/src/client/sysint/sys-mkdir.sm b/src/client/sysint/sys-mkdir.sm index 2c4e246..4dad827 100644 --- a/src/client/sysint/sys-mkdir.sm +++ b/src/client/sysint/sys-mkdir.sm @@ -34,8 +34,9 @@ enum { MKDIR_RETRY = 180, MKDIR_SKIP_EATTR = 181, - OSD_MSGPAIR = 2001, - NO_OSD_HANDLES = 2002, + OSD_MKDIR_MSGPAIR = 2001, + CREATE_COLLECTION = 2002, + SKIP_COLLECTION_CREATE = 2003 }; static int mkdir_msg_comp_fn( @@ -44,6 +45,8 @@ static int mkdir_crdirent_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index); static int mkdir_delete_handle_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index); +static int create_collection_comp_fn( + void *v_p, struct PVFS_server_resp *resp_p, int index); %% @@ -72,22 +75,14 @@ machine pvfs2_client_mkdir_sm state mkdir_msg_setup_msgpair { run mkdir_msg_setup_msgpair; - OSD_MSGPAIR => mkdir_msg_xfer_osd_msgpair; success => mkdir_msg_xfer_msgpair; default => mkdir_msg_failure; } - state mkdir_msg_xfer_osd_msgpair - { - jump pvfs2_osd_msgpairarray_sm; - success => mkdir_seteattr_setup_msgpair; - default => mkdir_msg_failure; - } - state mkdir_msg_xfer_msgpair { jump pvfs2_msgpairarray_sm; - success => mkdir_seteattr_setup_msgpair; + success => create_collection; default => mkdir_msg_failure; } @@ -97,6 +92,21 @@ machine pvfs2_client_mkdir_sm default => cleanup; } + state create_collection + { + run create_collection_setup_msgpair; + success => create_collection_xfer_msgpair; + SKIP_COLLECTION_CREATE => mkdir_seteattr_setup_msgpair; + default => mkdir_crdirent_failure; + } + + state create_collection_xfer_msgpair + { + jump pvfs2_osd_msgpairarray_sm; + success => mkdir_seteattr_setup_msgpair; + default => mkdir_crdirent_failure; + } + state mkdir_seteattr_setup_msgpair { run mkdir_seteattr_setup_msgpair; @@ -121,18 +131,10 @@ machine pvfs2_client_mkdir_sm state mkdir_crdirent_setup_msgpair { run mkdir_crdirent_setup_msgpair; - OSD_MSGPAIR => mkdir_crdirent_osd_msgpair; success => mkdir_crdirent_xfer_msgpair; default => mkdir_crdirent_failure; } - state mkdir_crdirent_osd_msgpair - { - jump pvfs2_client_osd_dirops_sm; - success => cleanup; - default => mkdir_crdirent_failure; - } - state mkdir_crdirent_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -149,17 +151,10 @@ machine pvfs2_client_mkdir_sm state delete_handle_setup_msgpair { run mkdir_delete_handle_setup_msgpair; - OSD_MSGPAIR => delete_handle_xfer_osd_msgpair; success => delete_handle_xfer_msgpair; default => cleanup; } - state delete_handle_xfer_osd_msgpair - { - jump pvfs2_osd_msgpairarray_sm; - default => cleanup; - } - state delete_handle_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -185,6 +180,7 @@ PVFS_error PVFS_isys_mkdir( const PVFS_credentials *credentials, PVFS_sysresp_mkdir *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -225,13 +221,15 @@ PVFS_error PVFS_isys_mkdir( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.mkdir.object_name = object_name; PVFS_util_copy_sys_attr(&sm_p->u.mkdir.sys_attr, &attr); sm_p->u.mkdir.mkdir_resp = resp; sm_p->u.mkdir.stored_error_code = 0; sm_p->object_ref = parent_ref; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent_ref.handle); gossip_debug(GOSSIP_CLIENT_DEBUG, "Creating directory named %s " "under parent handle %llu on fs %d\n", object_name, @@ -248,7 +246,8 @@ PVFS_error PVFS_sys_mkdir( PVFS_object_ref parent_ref, PVFS_sys_attr attr, const PVFS_credentials *credentials, - PVFS_sysresp_mkdir *resp) + PVFS_sysresp_mkdir *resp, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -256,7 +255,7 @@ PVFS_error PVFS_sys_mkdir( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_mkdir entered\n"); ret = PVFS_isys_mkdir(object_name, parent_ref, attr, - credentials, resp, &op_id, NULL); + credentials, resp, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_mkdir call", ret); @@ -287,24 +286,24 @@ static PINT_sm_action mkdir_init( gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir state: init\n"); assert((js_p->error_code == 0) || - (js_p->error_code == MKDIR_RETRY)); + (js_p->error_code == MKDIR_RETRY) || (js_p->error_code == CREATE_COLLECTION)); + + PINT_SM_GETATTR_STATE_FILL( + sm_p->getattr, + sm_p->object_ref, + (PVFS_ATTR_COMMON_ALL|PVFS_ATTR_DIR_HINT), + PVFS_TYPE_DIRECTORY, + 0); if (js_p->error_code == MKDIR_RETRY) { js_p->error_code = 0; return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); } - PINT_SM_GETATTR_STATE_FILL( - sm_p->getattr, - sm_p->object_ref, - (PVFS_ATTR_COMMON_ALL|PVFS_ATTR_DIR_HINT), - PVFS_TYPE_DIRECTORY, - 0); - return SM_ACTION_COMPLETE; } @@ -312,44 +311,23 @@ static int mkdir_msg_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index) { - int ret; - uint64_t oid; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); PVFS_object_attr attr; - PINT_sm_msgpair_state *msg_p = &sm_p->msgpair; gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir_msg_comp_fn\n"); - if (server_is_osd(msg_p->svr_addr)) { + assert(resp_p->op == PVFS_SERV_MKDIR); - struct osd_command *command = &sm_p->msgpair.osd_command; - - if (command->status == 0) { - /* Stash the newly created meta handle */ - ret = osd_command_attr_resolve(command); - if (ret) { - osd_error_xerrno(ret, "%s: attr_resolve failed", __func__); - return ret; - } - oid = get_ntohll(command->attr[5].val); - - sm_p->u.mkdir.metafile_handle = oid; - } - - osd_command_attr_free(command); - return osd_errno_from_status(command->status); - - } else { - assert(resp_p->op == PVFS_SERV_MKDIR); - - if (resp_p->status != 0) { - return resp_p->status; - } - /* stash the newly created meta handle */ - sm_p->u.mkdir.metafile_handle = resp_p->u.mkdir.handle; + if (resp_p->status != 0) + { + return resp_p->status; } + /* otherwise, just stash the newly created meta handle */ + sm_p->u.mkdir.metafile_handle = resp_p->u.mkdir.handle; + sm_p->u.mkdir.cid = resp_p->u.mkdir.cid; + /* also insert entry into attr cache */ PINT_CONVERT_ATTR(&attr, &sm_p->u.mkdir.sys_attr, 0); PINT_acache_update(sm_p->object_ref, &attr, NULL); @@ -365,36 +343,20 @@ static int mkdir_crdirent_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index) { - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgpair; - gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir_crdirent_comp_fn\n"); - if (server_is_osd(msg_p->svr_addr)) { - return osd_errno_from_status(sm_p->msgpair.osd_command.status); - } else { - assert(resp_p->op == PVFS_SERV_CRDIRENT); - return resp_p->status; - } + assert(resp_p->op == PVFS_SERV_CRDIRENT); + return resp_p->status; } static int mkdir_delete_handle_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index) { - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgpair; - gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir_delete_handle_comp_fn\n"); - if (server_is_osd(msg_p->svr_addr)) { - return osd_errno_from_status(sm_p->msgpair.osd_command.status); - } else { - assert(resp_p->op == PVFS_SERV_REMOVE); - return resp_p->status; - } + assert(resp_p->op == PVFS_SERV_REMOVE); + return resp_p->status; } static PINT_sm_action mkdir_msg_setup_msgpair( @@ -408,9 +370,12 @@ static PINT_sm_action mkdir_msg_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir state: mkdir_msg_setup_msgpair\n"); + js_p->error_code = 0; + gossip_debug(GOSSIP_CLIENT_DEBUG," mkdir: posting mkdir req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; ret = PINT_cached_config_get_next_meta( sm_p->object_ref.fs_id, @@ -423,89 +388,20 @@ static PINT_sm_action mkdir_msg_setup_msgpair( return SM_ACTION_COMPLETE; } - if (server_is_osd(msg_p->svr_addr)) { - /* Create a directory object and fill in the default attributes */ - int i, numattrs = 6; - struct attribute_list attr[numattrs]; - struct osd_command *command = &sm_p->msgpair.osd_command; - - /* Set attr type, page and number */ - for (i = 0; i < numattrs - 1; i++) { - attr[i].type = ATTR_SET; - attr[i].page = PVFS_USEROBJECT_ATTR_PG; - attr[i].number = i; - } - - /* uid */ - attr[0].val = &sm_p->u.mkdir.sys_attr.owner; - attr[0].len = sizeof(PVFS_uid); - - /* gid */ - attr[1].val = &sm_p->u.mkdir.sys_attr.group; - attr[1].len = sizeof(PVFS_gid); - - /* XXX Default to PVFS_PERM_VALID till we can figure out the umask */ - sm_p->u.mkdir.sys_attr.perms = PVFS_PERM_VALID; - attr[2].val = &sm_p->u.mkdir.sys_attr.perms; - attr[2].len = sizeof(PVFS_permissions); - - /* mask */ - sm_p->u.mkdir.sys_attr.mask = PVFS_ATTR_COMMON_UID | - PVFS_ATTR_COMMON_GID | - PVFS_ATTR_COMMON_PERM | - PVFS_ATTR_COMMON_ATIME | - PVFS_ATTR_COMMON_CTIME | - PVFS_ATTR_COMMON_MTIME | - PVFS_ATTR_META_DIST | - PVFS_ATTR_META_DFILES | - PVFS_ATTR_COMMON_TYPE; - attr[3].val = &sm_p->u.mkdir.sys_attr.mask; - attr[3].len = sizeof(uint32_t); - - /* object type */ - sm_p->u.mkdir.sys_attr.objtype = PVFS_TYPE_DIRECTORY; - attr[4].val = &sm_p->u.mkdir.sys_attr.objtype; - attr[4].len = sizeof(PVFS_ds_type); - - /* retrieve oid */ - attr[5].type = ATTR_GET; - attr[5].page = CUR_CMD_ATTR_PG; - attr[5].number = CCAP_OID; - attr[5].val = NULL; - attr[5].len = CCAP_OID_LEN; - - ret = osd_command_set_create(command, PVFS_OSD_META_PID, 0, 1); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_create failed", - __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - - /* Set/Retrieve the dir. attributes */ - ret = osd_command_attr_build(command, attr, numattrs); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_attr_build failed", - __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - js_p->error_code = OSD_MSGPAIR; - } else { - PINT_SERVREQ_MKDIR_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - meta_handle_extent_array, - sm_p->u.mkdir.sys_attr); - js_p->error_code = 0; - } + PINT_SERVREQ_MKDIR_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + meta_handle_extent_array, + sm_p->u.mkdir.sys_attr, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = meta_handle_extent_array.extent_array[0].first; msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; msg_p->comp_fn = mkdir_msg_comp_fn; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -519,6 +415,119 @@ static PINT_sm_action mkdir_msg_failure( return SM_ACTION_COMPLETE; } +static PINT_sm_action create_collection_setup_msgpair( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p = NULL; + struct server_configuration_s *server_config; + PINT_llist *cur = NULL; + struct host_alias_s *cur_alias; + PVFS_BMI_addr_t addr; + int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "mkdir state: create_collection_setup_msgpair\n"); + + js_p->error_code = 0; + + if (!is_osd) { + js_p->error_code = SKIP_COLLECTION_CREATE; + return SM_ACTION_COMPLETE; + } + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); + + cur = server_config->host_aliases; + while(cur) + { + cur_alias = PINT_llist_head(cur); + if (!cur_alias) + { + break; + } + if(!strncmp(cur_alias->bmi_address, "osd", 3)) { + BMI_addr_lookup(&addr,cur_alias->bmi_address); + } + + cur = PINT_llist_next(cur); + } + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->svr_addr = addr; + msg_p->comp_fn = create_collection_comp_fn; + + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; + +/* struct attribute_list attr = { ATTR_GET, CUR_CMD_ATTR_PG,*/ +/* CCAP_OID, NULL, CCAP_OID_LEN };*/ + + ret = osd_command_set_create_collection(command, PVFS_OSD_DATA_PID, sm_p->u.mkdir.cid); + + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_set_create_collection failed", + __func__); + js_p->error_code = ret; + return 1; + } + +/* ret = osd_command_attr_build(command, &attr, 1);*/ +/* if (ret) {*/ +/* osd_error_xerrno(ret, "%s: osd_command_attr_build failed",*/ +/* __func__);*/ +/* js_p->error_code = ret;*/ +/* return SM_ACTION_COMPLETE;*/ +/* }*/ + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + +static int create_collection_comp_fn(void *v_p, + struct PVFS_server_resp *resp_p, + int index) +{ + PINT_smcb *smcb = v_p; + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + PVFS_error status; + int ret; + int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); + + gossip_debug(GOSSIP_CLIENT_DEBUG, "create_collection_comp_fn\n"); + + if (is_osd) { + status = osd_errno_from_status( + sm_p->msgarray_op.msgarray[index].osd_command.status); + } else { + assert(resp_p->op == PVFS_SERV_CRDIRENT); + status = resp_p->status; + } + + if (status != 0) + { + return status; + } + +/* ret = osd_command_attr_resolve(&sm_p->msgarray_op.msgpair.osd_command);*/ +/* if (ret) {*/ +/* osd_error_xerrno(ret, "%s: attr_resolve failed", __func__);*/ +/* }*/ + +/* sm_p->object_ref.cid = get_ntohll(sm_p->msgarray_op.msgpair.osd_command.attr[0].val);*/ + + sm_p->object_ref.cid = sm_p->u.mkdir.cid; + +/* osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command);*/ + return 0; +} + static PINT_sm_action mkdir_crdirent_setup_msgpair( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -529,6 +538,8 @@ static PINT_sm_action mkdir_crdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir state: crdirent_setup_msgpair\n"); + js_p->error_code = 0; + gossip_debug(GOSSIP_CLIENT_DEBUG," mkdir: posting crdirent req\n"); gossip_debug(GOSSIP_CLIENT_DEBUG, "hooking dirent %s (%llu) under " @@ -536,43 +547,34 @@ static PINT_sm_action mkdir_crdirent_setup_msgpair( llu(sm_p->u.mkdir.metafile_handle), llu(sm_p->object_ref.handle)); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + PINT_SERVREQ_CRDIRENT_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->u.mkdir.object_name, + sm_p->u.mkdir.metafile_handle, + sm_p->object_ref.handle, + sm_p->object_ref.fs_id, + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + msg_p->comp_fn = mkdir_crdirent_comp_fn; ret = PINT_cached_config_map_to_server( &msg_p->svr_addr, sm_p->object_ref.handle, sm_p->object_ref.fs_id); + if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; - goto out; - } - - if (server_is_osd(msg_p->svr_addr)) { - /* - * Directory operations for metafile and mdfile. We don't do anything - * here because we'll handle the individual directory operations in a - * different state machine. - */ - js_p->error_code = OSD_MSGPAIR; - } else { - /* If the parent directory is /pvfs, let regular pvfs handle dirent */ - PINT_SERVREQ_CRDIRENT_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->u.mkdir.object_name, - sm_p->u.mkdir.metafile_handle, - sm_p->object_ref.handle, - sm_p->object_ref.fs_id); - js_p->error_code = 0; } - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = sm_p->object_ref.handle; - msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; - msg_p->comp_fn = mkdir_crdirent_comp_fn; - -out: + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -596,9 +598,13 @@ static PINT_sm_action mkdir_delete_handle_setup_msgpair( PVFS_BMI_addr_t metafile_server_addr; PINT_sm_msgpair_state *msg_p = NULL; - gossip_debug(GOSSIP_CLIENT_DEBUG, "%s\n", __func__); + gossip_debug(GOSSIP_CLIENT_DEBUG, "mkdir state: " + "delete_handle_setup_msgpair_array\n"); + + js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; ret = PINT_cached_config_map_to_server( &metafile_server_addr, sm_p->u.mkdir.metafile_handle, @@ -611,25 +617,12 @@ static PINT_sm_action mkdir_delete_handle_setup_msgpair( return SM_ACTION_COMPLETE; } - if (server_is_osd(metafile_server_addr)) { - ret = osd_command_set_remove(&sm_p->msgpair.osd_command, - PVFS_OSD_META_PID, - sm_p->u.mkdir.metafile_handle); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_remove failed", - __func__); - js_p->error_code = ret; - return SM_ACTION_COMPLETE; - } - js_p->error_code = OSD_MSGPAIR; - } else { - PINT_SERVREQ_REMOVE_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->u.mkdir.metafile_handle); - js_p->error_code = 0; - } + PINT_SERVREQ_REMOVE_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + sm_p->u.mkdir.metafile_handle, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = sm_p->u.mkdir.metafile_handle; @@ -639,6 +632,8 @@ static PINT_sm_action mkdir_delete_handle_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG, " Preparing to remove " "directory handle %llu\n", llu(msg_p->handle)); + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -686,7 +681,7 @@ static PINT_sm_action mkdir_cleanup( (const PVFS_object_ref*) &(sm_p->object_ref)); } else if ((PVFS_ERROR_CLASS(-sm_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.mkdir.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.mkdir.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.mkdir.stored_error_code = 0; sm_p->u.mkdir.retry_count++; @@ -802,8 +797,13 @@ static PINT_sm_action mkdir_seteattr_setup_msgpair( js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } +#ifdef WIN32 + _snprintf((char*)sm_p->u.mkdir.val_array[cur_index].buffer, + 16, "%d", sm_p->getattr.attr.u.dir.hint.dfile_count); +#else snprintf((char*)sm_p->u.mkdir.val_array[cur_index].buffer, 16, "%d", sm_p->getattr.attr.u.dir.hint.dfile_count); +#endif sm_p->u.mkdir.val_array[cur_index].buffer_sz = strlen((char*)sm_p->u.mkdir.val_array[cur_index].buffer) + 1; @@ -837,30 +837,29 @@ static PINT_sm_action mkdir_seteattr_setup_msgpair( cur_index++; } - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_SETEATTR_FILL( - sm_p->msgpair.req, + msg_p->req, (*sm_p->cred_p), sm_p->object_ref.fs_id, sm_p->u.mkdir.metafile_handle, 0, eattr_count, sm_p->u.mkdir.key_array, - sm_p->u.mkdir.val_array - ); - - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; - sm_p->msgpair.fs_id = sm_p->object_ref.fs_id; - sm_p->msgpair.handle = sm_p->u.mkdir.metafile_handle; - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_RETRY; + sm_p->u.mkdir.val_array, + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->u.mkdir.metafile_handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; /* NOTE: no comp_fn needed. */ ret = PINT_cached_config_map_to_server( - &sm_p->msgpair.svr_addr, - sm_p->msgpair.handle, - sm_p->msgpair.fs_id); + &msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id); if (ret) { gossip_err("Failed to map meta server address\n"); @@ -870,6 +869,8 @@ static PINT_sm_action mkdir_seteattr_setup_msgpair( { js_p->error_code = 0; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } diff --git a/src/client/sysint/sys-osd-dir.sm b/src/client/sysint/sys-osd-dir.sm index e2612a0..ab93b9c 100644 --- a/src/client/sysint/sys-osd-dir.sm +++ b/src/client/sysint/sys-osd-dir.sm @@ -259,7 +259,7 @@ static PINT_sm_action osd_dirops_attr1_init( if (js_p->error_code == OSD_DIROPS_RETRY) { return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); } @@ -276,14 +276,15 @@ static PINT_sm_action osd_dirops_attr1_remove_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); struct attribute_list attr; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr1: osd_dirops_attr1_remove_setup_msgpair\n"); js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; oid = sm_p->parent_ref.handle; ret = osd_command_set_get_attributes(command, PVFS_OSD_META_PID, oid); @@ -332,10 +333,10 @@ static int osd_dirops_attr1_remove_comp_fn(void *v_p, PINT_smcb *smcb = v_p; PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr1_remove_comp_fn\n"); - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (status != 0) { gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_command_set_get_attributes failed %d\n", index); @@ -354,7 +355,7 @@ static int osd_dirops_attr1_remove_comp_fn(void *v_p, sm_p->object_ref.handle = get_ntohll(command->attr->val); sm_p->object_ref.fs_id = sm_p->parent_ref.fs_id; } - osd_command_attr_free(&sm_p->msgpair.osd_command); + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); return status; } @@ -370,14 +371,15 @@ static PINT_sm_action osd_dirops_attr1_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); struct attribute_list attr[3]; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr1: osd_dirops_attr1_setup_msgpair\n"); js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; if (smcb->op == PVFS_SYS_CREATE) { /* insert dirent */ @@ -534,10 +536,10 @@ static int osd_dirops_attr1_comp_fn(void *v_p, PINT_smcb *smcb = v_p; PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr1_comp_fn\n"); - ret = osd_errno_from_status(sm_p->msgpair.osd_command.status); + ret = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (ret != 0) { gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_command_set_gen_cas failed %d\n", index); @@ -584,10 +586,10 @@ static int osd_dirops_attr1_comp_fn(void *v_p, out: /* free data alloced for the create, if this was an insert */ - if (sm_p->msgpair.osd_command.attr->len) { - free(sm_p->msgpair.osd_command.attr->val); + if (sm_p->msgarray_op.msgpair.osd_command.attr->len) { + free(sm_p->msgarray_op.msgpair.osd_command.attr->val); } - osd_command_attr_free(&sm_p->msgpair.osd_command); + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); return ret; } @@ -630,7 +632,7 @@ static PINT_sm_action osd_dirops_attr4_init( if (js_p->error_code == OSD_DIROPS_RETRY) { return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); } @@ -647,14 +649,15 @@ static PINT_sm_action osd_dirops_attr4_setup_lock_msgpair( PINT_sm_msgpair_state *msg_p = NULL; struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4: osd_dirops_attr4_setup_lock_msgpair\n"); js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* * Lock directory. A directory is represented by a single object. The @@ -740,7 +743,7 @@ static int osd_dirops_attr4_lock_comp_fn(void *v_p, gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4_lock_comp_fn\n"); - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (status != 0) { gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_command_set_cas failed %d\n", index); @@ -748,8 +751,8 @@ static int osd_dirops_attr4_lock_comp_fn(void *v_p, } /* osd_command.indata should contain the previous value of the lock */ - inbuf = (uint8_t *)sm_p->msgpair.osd_command.indata; - outbuf = (uint8_t *)sm_p->msgpair.osd_command.outdata; + inbuf = (uint8_t *)sm_p->msgarray_op.msgpair.osd_command.indata; + outbuf = (uint8_t *)sm_p->msgarray_op.msgpair.osd_command.outdata; assert(inbuf); assert(outbuf); @@ -768,8 +771,8 @@ static int osd_dirops_attr4_lock_comp_fn(void *v_p, out: /* free the 16 bytes alloced for the CAS operation above */ - free((void *)sm_p->msgpair.osd_command.indata); - free((void *)sm_p->msgpair.osd_command.outdata); + free((void *)sm_p->msgarray_op.msgpair.osd_command.indata); + free((void *)sm_p->msgarray_op.msgpair.osd_command.outdata); return ret; } @@ -784,14 +787,15 @@ static PINT_sm_action osd_dirops_attr4_remove_setup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); struct attribute_list attr; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4: osd_dirops_attr4_remove_setup_msgpair\n"); js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; oid = sm_p->parent_ref.handle; ret = osd_command_set_get_attributes(command, PVFS_OSD_META_PID, oid); @@ -840,10 +844,10 @@ static int osd_dirops_attr4_remove_comp_fn(void *v_p, PINT_smcb *smcb = v_p; PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4_remove_comp_fn\n"); - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (status != 0) { gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_command_set_get_attributes failed %d\n", index); @@ -858,7 +862,7 @@ static int osd_dirops_attr4_remove_comp_fn(void *v_p, sm_p->object_ref.handle = get_ntohll(command->attr->val); sm_p->object_ref.fs_id = sm_p->parent_ref.fs_id; - osd_command_attr_free(&sm_p->msgpair.osd_command); + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); return status; } @@ -873,14 +877,15 @@ static PINT_sm_action osd_dirops_attr4_setup_lookup_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); struct attribute_list attr; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4: osd_dirops_attr4_setup_lookup_msgpair\n"); js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; if (smcb->op == PVFS_SYS_CREATE) { /* For a create operation, object_ref refers to the dir. object */ @@ -950,14 +955,14 @@ static int osd_dirops_attr4_lookup_comp_fn(void *v_p, gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4_lookup_comp_fn\n"); - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (status != 0) { gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_command_set_get_attributes failed %d\n", index); return status; } - ret = osd_command_attr_resolve(&sm_p->msgpair.osd_command); + ret = osd_command_attr_resolve(&sm_p->msgarray_op.msgpair.osd_command); if (ret) { osd_error_xerrno(ret, "%s: attr_resolve failed", __func__); goto out; @@ -971,14 +976,14 @@ static int osd_dirops_attr4_lookup_comp_fn(void *v_p, * return 1. */ if ((smcb->op == PVFS_SYS_CREATE || smcb->op == PVFS_SYS_MKDIR) && - sm_p->msgpair.osd_command.attr->outlen) { + sm_p->msgarray_op.msgpair.osd_command.attr->outlen) { ret = 1; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4_lookup_comp_fn: dirent exists\n"); } out: - osd_command_attr_free(&sm_p->msgpair.osd_command); + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); return ret; } @@ -993,14 +998,15 @@ static PINT_sm_action osd_dirops_attr4_setup_insert_remove_msgpair( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); struct attribute_list attr; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_dirops_attr4: osd_dirops_attr4_setup_insert_remove_msgpair\n"); js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* Insert dirent as attribute of directory object */ attr.type = ATTR_SET; @@ -1097,17 +1103,17 @@ static int osd_dirops_attr4_insert_remove_comp_fn(void *v_p, gossip_debug(GOSSIP_CLIENT_DEBUG, "%s\n", __func__); - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (status != 0) gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: failed to insert/remove dirent %d\n", __func__, index); /* free data alloced for the create, if this was an insert */ - if (sm_p->msgpair.osd_command.attr->len) - free(sm_p->msgpair.osd_command.attr->val); + if (sm_p->msgarray_op.msgpair.osd_command.attr->len) + free(sm_p->msgarray_op.msgpair.osd_command.attr->val); - osd_command_attr_free(&sm_p->msgpair.osd_command); + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); return status; } @@ -1126,7 +1132,8 @@ static PINT_sm_action osd_dirops_attr4_setup_unlock_msgpair( js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* * Unlock dir. oid should correspond to the directory we're trying to @@ -1146,7 +1153,7 @@ static PINT_sm_action osd_dirops_attr4_setup_unlock_msgpair( return 1; } - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; ret = osd_command_set_cas(command, PVFS_OSD_META_PID, oid, 8, 0); if (ret) { @@ -1202,15 +1209,15 @@ static int osd_dirops_attr4_unlock_comp_fn(void *v_p, gossip_debug(GOSSIP_CLIENT_DEBUG, "%s\n", __func__); - ret = osd_errno_from_status(sm_p->msgpair.osd_command.status); + ret = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); if (ret != 0) { gossip_debug(GOSSIP_CLIENT_DEBUG, "osd_command_set_cas failed %d\n", index); } /* osd_command.indata should contain the previous value of the lock */ - inbuf = (uint8_t *)sm_p->msgpair.osd_command.indata; - outbuf = (uint8_t *)sm_p->msgpair.osd_command.outdata; + inbuf = (uint8_t *)sm_p->msgarray_op.msgpair.osd_command.indata; + outbuf = (uint8_t *)sm_p->msgarray_op.msgpair.osd_command.outdata; assert(inbuf); assert(outbuf); @@ -1228,8 +1235,8 @@ static int osd_dirops_attr4_unlock_comp_fn(void *v_p, out: /* free the 16 bytes alloced for the CAS operation above */ - free((void *)sm_p->msgpair.osd_command.indata); - free((void *)sm_p->msgpair.osd_command.outdata); + free((void *)sm_p->msgarray_op.msgpair.osd_command.indata); + free((void *)sm_p->msgarray_op.msgpair.osd_command.outdata); return ret; } diff --git a/src/client/sysint/sys-osd-io.sm b/src/client/sysint/sys-osd-io.sm index 7e09ced..1109221 100644 --- a/src/client/sysint/sys-osd-io.sm +++ b/src/client/sysint/sys-osd-io.sm @@ -35,12 +35,12 @@ #define KERNEL_BUFSIZE (400*1024) enum { - LOOP_NEXT_CHUNK = 1012, + LOOP_NEXT_CHUNK = 1012 }; static int osd_io_completion_fn(void *user_args, struct PVFS_server_resp *resp_p, int index); - + %% nested machine pvfs2_client_osd_io_sm @@ -95,11 +95,12 @@ static int osd_io_init(struct PINT_smcb *smcb, job_status_s *js_p) struct PINT_client_io_sm *io = &sm_p->u.io; PVFS_object_attr *attr = &sm_p->getattr.attr; int i, ret; - - ret = PINT_msgpairarray_init(&sm_p->msgarray, io->datafile_count); + + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, io->datafile_count); if (ret) goto out; - sm_p->msgarray_count = io->datafile_count; + + sm_p->msgarray_op.count = io->datafile_count; /* * Build req states, one for each server, and lookup addresses. @@ -109,6 +110,7 @@ static int osd_io_init(struct PINT_smcb *smcb, job_status_s *js_p) io->datafile_count); if (!io->file_req_state) goto out; + io->mem_req_state = PINT_new_request_states(io->mem_req, io->datafile_count); if (!io->mem_req_state) @@ -128,6 +130,7 @@ static int osd_io_init(struct PINT_smcb *smcb, job_status_s *js_p) io->file_data = malloc(io->datafile_count * sizeof(*io->file_data)); if (!io->file_data) goto out; + io->file_data[0].fsize = 0; io->file_data[0].server_ct = attr->u.meta.dfile_count; io->file_data[0].dist = attr->u.meta.dist; @@ -141,7 +144,7 @@ static int osd_io_init(struct PINT_smcb *smcb, job_status_s *js_p) llu(datafile_handle)); ret = PINT_cached_config_map_to_server( - &sm_p->msgarray[i].svr_addr, datafile_handle, + &sm_p->msgarray_op.msgarray[i].svr_addr, datafile_handle, sm_p->object_ref.fs_id); if (ret) goto out; @@ -155,10 +158,10 @@ static int osd_io_init(struct PINT_smcb *smcb, job_status_s *js_p) io->file_data[i].server_nr = io->datafile_index_array[i]; /* invariants */ - sm_p->msgarray[i].fs_id = sm_p->object_ref.fs_id; - sm_p->msgarray[i].handle = sm_p->object_ref.handle; - sm_p->msgarray[i].retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgarray[i].comp_fn = osd_io_completion_fn; + sm_p->msgarray_op.msgarray[i].fs_id = sm_p->object_ref.fs_id; + sm_p->msgarray_op.msgarray[i].handle = sm_p->object_ref.handle; + sm_p->msgarray_op.msgarray[i].retry_flag = PVFS_MSGPAIR_RETRY; + sm_p->msgarray_op.msgarray[i].comp_fn = osd_io_completion_fn; } out: @@ -178,6 +181,11 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) struct osd_command *command; struct bsg_iovec *iov; int dfile_count = sm_p->u.io.datafile_count; + struct server_configuration_s *server_config; + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); //~ printf("MAX IOVEC is %d\n", OSD_INIT_MAX_IOVEC); /* clients offset and length pairs */ @@ -206,7 +214,6 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) gossip_debug(GOSSIP_IO_DEBUG, "%s: SCSI buffer %lld cant handle %lld\n", __func__, lld(KERNEL_BUFSIZE), lld(sbytemax)); sbytemax = KERNEL_BUFSIZE; - } /* for each datafile/io server/osd write ALL of its data */ @@ -219,11 +226,10 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) datafile_handle = attr->u.meta.dfile_array[io->datafile_index_array[i]]; - if (PINT_REQUEST_DONE(&io->file_req_state[i]) || io->short_read[i]) { gossip_debug(GOSSIP_IO_DEBUG, "%s: Nothing to do for server %d\n", __func__, i); - sm_p->msgarray[i].suppress = 1; /* disable this entry */ + sm_p->msgarray_op.msgarray[i].suppress = 1; /* disable this entry */ continue; } @@ -328,14 +334,14 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) __func__, i, csegs_count, lld(cagg_len)); - command = &sm_p->msgarray[i].osd_command; + command = &sm_p->msgarray_op.msgarray[i].osd_command; /* must be initalized to NULL for later check */ - sm_p->msgarray[i].osd_iov = NULL; - sm_p->msgarray[i].osd_sgl = NULL; + sm_p->msgarray_op.msgarray[i].osd_iov = NULL; + sm_p->msgarray_op.msgarray[i].osd_sgl = NULL; p = io->buffer; - + if (sresult.segs == 1) { /* contiguous server buff to write to */ if (csegs_count == 1) { p += offseta[0]; @@ -355,7 +361,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) len += sizea[j]; } p = (void *) iov; - sm_p->msgarray[i].osd_iov = p; /* free IOV later */ + sm_p->msgarray_op.msgarray[i].osd_iov = p; /* free IOV later */ } else { ret = -EINVAL; goto out; @@ -373,8 +379,23 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) //~ printf("Contig READ %d\n", len); } else if (io->io_type == PVFS_IO_WRITE) { - osd_command_set_write(command, PVFS_OSD_DATA_PID, datafile_handle, + if (server_config->post_create && !target_offset[0]) + { + uint64_t attrval; + struct attribute_list attr = {ATTR_SET, USER_COLL_PG, 1, &attrval, 8}; + + if(!sm_p->getattr.attr.cid) { + sm_p->getattr.attr.cid = COLLECTION_OID_LB; /* root directory */ + } + set_htonll(&attrval, sm_p->getattr.attr.cid); + + osd_command_set_create_and_write(command, PVFS_OSD_DATA_PID, datafile_handle, len, target_offset[0]); + + osd_command_attr_build(command, &attr, 1); + } else { + osd_command_set_write(command, PVFS_OSD_DATA_PID, datafile_handle, len, target_offset[0]); + } command->outdata = p; command->outlen = len; command->iov_outlen = csegs_count; @@ -427,7 +448,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) ret = -ENOMEM; goto out; } - sm_p->msgarray[i].osd_iov = iov; /* free iov later */ + sm_p->msgarray_op.msgarray[i].osd_iov = iov; /* free iov later */ len = 0; @@ -442,7 +463,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) ret = -ENOMEM; goto out; } - sm_p->msgarray[i].osd_sgl = sgl; /* free sgl later */ + sm_p->msgarray_op.msgarray[i].osd_sgl = sgl; /* free sgl later */ hdr_offset = 0; @@ -510,7 +531,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) goto out; } len = 0; - sm_p->msgarray[i].osd_iov = iov; /* free sgl later */ + sm_p->msgarray_op.msgarray[i].osd_iov = iov; /* free sgl later */ for (i=0; imsgarray[i].osd_sgl = sgl; /* free sgl later */ + sm_p->msgarray_op.msgarray[i].osd_sgl = sgl; /* free sgl later */ hdr_offset = 0; if (flag) { @@ -587,7 +608,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) gossip_debug(GOSSIP_IO_DEBUG, "%s: Nothing to do for server [%d]", __func__, i); - sm_p->msgarray[i].suppress = 1; /* disable this entry */ + sm_p->msgarray_op.msgarray[i].suppress = 1; /* disable this entry */ continue; } } @@ -597,6 +618,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p) out: js_p->error_code = ret; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return 1; } @@ -611,9 +633,9 @@ static int osd_io_completion_fn( int index) { struct PINT_smcb *smcb = user_args; - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); struct PINT_client_io_sm *io = &sm_p->u.io; - struct osd_command *command = &sm_p->msgarray[index].osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgarray[index].osd_command; PVFS_size result_size; @@ -687,11 +709,11 @@ static int osd_io_completion_fn( } } - if (sm_p->msgarray[index].osd_sgl != NULL) - free(sm_p->msgarray[index].osd_sgl); + if (sm_p->msgarray_op.msgarray[index].osd_sgl != NULL) + free(sm_p->msgarray_op.msgarray[index].osd_sgl); - if (sm_p->msgarray[index].osd_iov != NULL) - free(sm_p->msgarray[index].osd_iov); + if (sm_p->msgarray_op.msgarray[index].osd_iov != NULL) + free(sm_p->msgarray_op.msgarray[index].osd_iov); io->dfile_size_array[index] += result_size; io->total_size += result_size; @@ -713,7 +735,6 @@ static int osd_io_maybe_xfer_more(struct PINT_smcb *smcb, job_status_s *js_p) if (io->total_size == io->mem_req->aggregate_size) more = 0; - /* * If all short read, done. * @@ -816,12 +837,17 @@ out: } static int osd_io_cleanup(struct PINT_smcb *smcb, job_status_s *js_p) -{ +{ struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); struct PINT_client_io_sm *io = &sm_p->u.io; - free(sm_p->msgarray); - sm_p->msgarray = NULL; - sm_p->msgarray_count = 0; + struct server_configuration_s *server_config; + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); + + sm_p->msgarray_op.msgarray = NULL; + sm_p->msgarray_op.count = 0; PINT_free_request_states(io->file_req_state); PINT_free_request_states(io->mem_req_state); PINT_free_request_state(io->temp_req_state); @@ -830,6 +856,7 @@ static int osd_io_cleanup(struct PINT_smcb *smcb, job_status_s *js_p) /* return this to PVFS_sys_io */ io->io_resp_p->total_completed = sm_p->u.io.total_size; + return 1; } diff --git a/src/client/sysint/sys-readdir.sm b/src/client/sysint/sys-readdir.sm index 43ec4b8..41bcbd2 100644 --- a/src/client/sysint/sys-readdir.sm +++ b/src/client/sysint/sys-readdir.sm @@ -22,14 +22,9 @@ #include "ncache.h" #include "pint-util.h" #include "pvfs2-internal.h" -#include "osd-util/osd-util.h" extern job_context_id pint_client_sm_context; -enum { - OSD_MSGPAIR = 2001 -}; - static int readdir_msg_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index); @@ -53,18 +48,10 @@ nested machine pvfs2_client_readdir_sm state readdir_msg_setup_msgpair { run readdir_msg_setup_msgpair; - OSD_MSGPAIR => readdir_msg_xfer_osd_msgpair; success => readdir_msg_xfer_msgpair; default => readdir_msg_failure; } - state readdir_msg_xfer_osd_msgpair - { - jump pvfs2_osd_msgpairarray_sm; - success => cleanup; - default => readdir_msg_failure; - } - state readdir_msg_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -115,6 +102,7 @@ PVFS_error PVFS_isys_readdir( const PVFS_credentials *credentials, PVFS_sysresp_readdir *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -149,10 +137,13 @@ PVFS_error PVFS_isys_readdir( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.readdir.readdir_resp = resp; sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &ref.handle); + /* point the sm dirent array and outcount to the readdir response field */ sm_p->readdir.dirent_array = &resp->dirent_array; sm_p->readdir.dirent_outcount = &resp->pvfs_dirent_outcount; @@ -181,7 +172,8 @@ PVFS_error PVFS_sys_readdir( PVFS_ds_position token, int32_t pvfs_dirent_incount, const PVFS_credentials *credentials, - PVFS_sysresp_readdir *resp) + PVFS_sysresp_readdir *resp, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -189,7 +181,7 @@ PVFS_error PVFS_sys_readdir( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_readdir entered\n"); ret = PVFS_isys_readdir(ref, token, pvfs_dirent_incount, - credentials, resp, &op_id, NULL); + credentials, resp, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_readdir call", ret); @@ -245,6 +237,12 @@ static PINT_sm_action readdir_msg_setup_msgpair( } js_p->error_code = 0; + if(!sm_p->getattr.attr.cid) { + sm_p->object_ref.cid = COLLECTION_OID_LB; /* root directory */ + } else { + sm_p->object_ref.cid = sm_p->getattr.attr.cid; + } + gossip_debug(GOSSIP_READDIR_DEBUG," readdir: posting readdir req\n"); gossip_debug( @@ -254,55 +252,34 @@ static PINT_sm_action readdir_msg_setup_msgpair( llu(sm_p->readdir.pos_token), sm_p->readdir.dirent_limit); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + PINT_SERVREQ_READDIR_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + sm_p->object_ref.handle, + sm_p->u.readdir.pos_token, + sm_p->u.readdir.dirent_limit, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = sm_p->object_ref.handle; - ret = PINT_cached_config_map_to_server(&msg_p->svr_addr, msg_p->handle, - msg_p->fs_id); + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = readdir_msg_comp_fn; + + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, sm_p->object_ref.handle, + sm_p->object_ref.fs_id); + if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } - if (server_is_osd(msg_p->svr_addr)) { - uint64_t oid; - struct osd_command *command = &sm_p->msgpair.osd_command; - - js_p->error_code = OSD_MSGPAIR; - - /* Retrieve all the directory entries */ - oid = sm_p->object_ref.handle; - ret = osd_command_set_get_attributes(command, PVFS_OSD_META_PID, oid); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_set_get_attributes failed", - __func__); - js_p->error_code = ret; - return 1; - } - - ret = osd_command_attr_all_build(command, PVFS_USEROBJECT_DIR_PG); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_attr_all_build failed", - __func__); - js_p->error_code = ret; - return 1; - } - } else { - js_p->error_code = 0; - PINT_SERVREQ_READDIR_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->object_ref.handle, - sm_p->readdir.pos_token, - sm_p->readdir.dirent_limit); - } - - msg_p->retry_flag = PVFS_MSGPAIR_RETRY; - msg_p->comp_fn = readdir_msg_comp_fn; - + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -310,88 +287,37 @@ static int readdir_msg_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index) { - int ret = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgpair; + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_CLIENT_DEBUG, "readdir_msg_comp_fn\n"); - if (server_is_osd(msg_p->svr_addr)) { - int i = 0, dirent_array_len = 0; - struct osd_command *command = &sm_p->msgpair.osd_command; + assert(resp_p->op == PVFS_SERV_READDIR); - ret = osd_errno_from_status(command->status); - if (ret != 0) { - return ret; - } - - ret = osd_command_attr_all_resolve(command); - if (ret) { - osd_error_xerrno(ret, "%s: osd_command_attr_all_resolve failed", - __func__); - return ret; - } + if (resp_p->status != 0) + { + return resp_p->status; + } - /* - * XXX readdir.token and readdir.directory_version are used when we - * use multiple passes to retrieve the dirents. For now, we retrieve - * all the dirents in a single pass. So we don't really need token and - * directory_version. - */ - *(sm_p->readdir.token) = 0; - *(sm_p->readdir.directory_version) = 0; - *(sm_p->readdir.dirent_outcount) = command->numattr; + /* convert servresp_readdir response to a sysresp_readdir obj */ - dirent_array_len = command->numattr * sizeof(PVFS_dirent); + *(sm_p->readdir.token) = resp_p->u.readdir.token; + *(sm_p->readdir.directory_version) = + resp_p->u.readdir.directory_version; + *(sm_p->readdir.dirent_outcount) = + resp_p->u.readdir.dirent_count; + if (*(sm_p->readdir.dirent_outcount) > 0) + { + int dirent_array_len = + (sizeof(PVFS_dirent) * *(sm_p->readdir.dirent_outcount)); - /* The dirent_array must be freed by caller */ - *(sm_p->readdir.dirent_array) = malloc(dirent_array_len); + /* this dirent_array MUST be freed by caller */ + *(sm_p->readdir.dirent_array) = + (PVFS_dirent *) malloc(dirent_array_len); assert(*(sm_p->readdir.dirent_array)); - /* populate dirent_array */ - for (i = 0; i < command->numattr; i++) { - PVFS_dirent *dirent = &(*sm_p->readdir.dirent_array)[i]; - - if (command->attr[i].outlen < 9) { - gossip_err("%s: short dirent %d/%d len %d\n", __func__, - i, command->numattr, command->attr[i].outlen); - *(sm_p->readdir.dirent_outcount) = 0; - return -EINVAL; - } - dirent->handle = get_ntohll(command->attr[i].val); - memcpy(dirent->d_name, (uint8_t *)command->attr[i].val + 8, - command->attr[i].outlen - 8); - dirent->d_name[command->attr[i].outlen - 8] = '\0'; - } - - /* free the returned attributes */ - osd_command_attr_all_free(command); - } else { - assert(resp_p->op == PVFS_SERV_READDIR); - - if (resp_p->status != 0) { - return resp_p->status; - } - - /* convert servresp_readdir response to a sysresp_readdir obj */ - *(sm_p->readdir.token) = resp_p->u.readdir.token; - *(sm_p->readdir.directory_version) = - resp_p->u.readdir.directory_version; - *(sm_p->readdir.dirent_outcount) = resp_p->u.readdir.dirent_count; - if (*(sm_p->readdir.dirent_outcount) > 0) - { - int dirent_array_len = - (sizeof(PVFS_dirent) * *(sm_p->readdir.dirent_outcount)); - - /* this dirent_array MUST be freed by caller */ - *(sm_p->readdir.dirent_array) = - (PVFS_dirent *) malloc(dirent_array_len); - assert(*(sm_p->readdir.dirent_array)); - - memcpy(*(sm_p->readdir.dirent_array), - resp_p->u.readdir.dirent_array, dirent_array_len); - } + memcpy(*(sm_p->readdir.dirent_array), + resp_p->u.readdir.dirent_array, dirent_array_len); } gossip_debug(GOSSIP_READDIR_DEBUG, "*** Got %d directory entries " diff --git a/src/client/sysint/sys-readdirplus.sm b/src/client/sysint/sys-readdirplus.sm index b471618..1fee7e0 100644 --- a/src/client/sysint/sys-readdirplus.sm +++ b/src/client/sysint/sys-readdirplus.sm @@ -28,9 +28,12 @@ #include "ncache.h" #include "pint-util.h" #include "pvfs2-internal.h" +#include "osd-util/osd-util.h" +#define MAXNAMELEN (256UL) enum { - NO_WORK = 1 + NO_WORK = 1, + OSD_MSGPAIR = 2001 }; extern job_context_id pint_client_sm_context; @@ -43,6 +46,8 @@ static int readdirplus_fetch_sizes_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int index); +static uint64_t find_min_handle(struct handle_to_index *input_handle_array, int nhandles); + %% machine pvfs2_client_readdirplus_sm @@ -57,11 +62,19 @@ machine pvfs2_client_readdirplus_sm state readdirplus_fetch_attrs_setup_msgpair { run readdirplus_fetch_attrs_setup_msgpair; + OSD_MSGPAIR => readdirplus_fetch_attrs_xfer_osd_msgpair; NO_WORK => cleanup; success => readdirplus_fetch_attrs_xfer_msgpair; default => readdirplus_msg_failure; } + state readdirplus_fetch_attrs_xfer_osd_msgpair + { + jump pvfs2_osd_msgpairarray_sm; + success => readdirplus_fetch_sizes_setup_msgpair; + default => readdirplus_msg_failure; + } + state readdirplus_fetch_attrs_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -72,11 +85,19 @@ machine pvfs2_client_readdirplus_sm state readdirplus_fetch_sizes_setup_msgpair { run readdirplus_fetch_sizes_setup_msgpair; + OSD_MSGPAIR => readdirplus_fetch_sizes_xfer_osd_msgpair; NO_WORK => cleanup; success => readdirplus_fetch_sizes_xfer_msgpair; default => readdirplus_msg_failure; } + state readdirplus_fetch_sizes_xfer_osd_msgpair + { + jump pvfs2_osd_msgpairarray_sm; + success => cleanup; + default => readdirplus_msg_failure; + } + state readdirplus_fetch_sizes_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -114,6 +135,7 @@ PVFS_error PVFS_isys_readdirplus( uint32_t attrmask, PVFS_sysresp_readdirplus *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -148,9 +170,10 @@ PVFS_error PVFS_isys_readdirplus( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); /* point the sm dirent array and outcount to the readdirplus response field */ sm_p->readdir.dirent_array = &resp->dirent_array; sm_p->readdir.dirent_outcount = &resp->pvfs_dirent_outcount; @@ -191,7 +214,8 @@ PVFS_error PVFS_sys_readdirplus( int32_t pvfs_dirent_incount, const PVFS_credentials *credentials, uint32_t attrmask, - PVFS_sysresp_readdirplus *resp) + PVFS_sysresp_readdirplus *resp, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -199,7 +223,7 @@ PVFS_error PVFS_sys_readdirplus( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_readdirplus entered\n"); ret = PVFS_isys_readdirplus(ref, token, pvfs_dirent_incount, - credentials, attrmask, resp, &op_id, NULL); + credentials, attrmask, resp, &op_id, NULL, hints); if (ret) { PVFS_perror_gossip("PVFS_isys_readdirplus call", ret); @@ -358,6 +382,14 @@ static int create_partition_handles(PVFS_fs_id fsid, int input_handle_count, int ret; ret = PINT_cached_config_map_to_server(&tmp_svr_addr, input_handle_array[i].handle, fsid); + + if( ret != 0 ) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "%s: failure mapping " + "handle to server\n", __func__); + return ret; + } + /* unique server address to find index */ is_unique_server(tmp_svr_addr, *svr_count, *svr_addr_array, &svr_index); @@ -466,6 +498,7 @@ static PINT_sm_action readdirplus_fetch_attrs_setup_msgpair(struct PINT_smcb *sm { int i, ret; struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgpair_state *msg_p = NULL; gossip_debug(GOSSIP_CLIENT_DEBUG, "readdirplus state: fetch_attrs_setup\n"); /* if there are no dirents then return NO_WORK */ @@ -491,37 +524,64 @@ static PINT_sm_action readdirplus_fetch_attrs_setup_msgpair(struct PINT_smcb *sm js_p->error_code = -PVFS_EINVAL; return SM_ACTION_COMPLETE; } - sm_p->msgarray_count = sm_p->u.readdirplus.svr_count; - sm_p->msgarray = (PINT_sm_msgpair_state *) - calloc(sm_p->msgarray_count, sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init( + &sm_p->msgarray_op, sm_p->u.readdirplus.svr_count); + if(ret != 0) { - sm_p->msgarray_count = 0; - js_p->error_code = -PVFS_ENOMEM; - gossip_err("Could not allocate memory %d\n", -PVFS_ENOMEM); + gossip_err("Failed to initialize %d msgpairs\n", + sm_p->u.readdirplus.svr_count); + js_p->error_code = ret; return SM_ACTION_COMPLETE; } - for (i = 0; i < sm_p->msgarray_count; i++) + + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - PINT_sm_msgpair_state *msg_p = NULL; - - msg_p = &sm_p->msgarray[i]; - PINT_SERVREQ_LISTATTR_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->u.readdirplus.attrmask, - sm_p->u.readdirplus.handle_count[i], - sm_p->u.readdirplus.handles[i]); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = PVFS_HANDLE_NULL; msg_p->retry_flag = PVFS_MSGPAIR_RETRY; msg_p->comp_fn = readdirplus_fetch_attrs_comp_fn; msg_p->svr_addr = sm_p->u.readdirplus.server_addresses[i]; - } - /* immediate return. next state jumps to msgpairarray machine */ - js_p->error_code = 0; - return SM_ACTION_COMPLETE; + + if (server_is_osd(msg_p->svr_addr)) + { + uint64_t oid; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; + + /* Retrieve all the directory entries */ + oid = sm_p->object_ref.handle; + ret = osd_command_set_get_attributes(command, PVFS_OSD_META_PID, oid); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_set_get_attributes failed", + __func__); + js_p->error_code = ret; + return 1; + } + + ret = osd_command_attr_all_build(command, PVFS_USEROBJECT_DIR_PG); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_attr_all_build failed", + __func__); + js_p->error_code = ret; + return 1; + } + js_p->error_code = OSD_MSGPAIR; + + } else { + PINT_SERVREQ_LISTATTR_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + sm_p->u.readdirplus.attrmask, + sm_p->u.readdirplus.handle_count[i], + sm_p->u.readdirplus.handles[i], + sm_p->hints); + /* immediate return. next state jumps to msgpairarray machine */ + js_p->error_code = 0; + } + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; } /* Phase 1 completion callback */ @@ -530,14 +590,14 @@ static int readdirplus_fetch_attrs_comp_fn(void *v_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_LISTATTR_DEBUG, "readdirplus_fetch_attrs_comp_fn called\n"); assert(resp_p->op == PVFS_SERV_LISTATTR); - + /* Mark all handles in this server range as having failed a stat */ - if (sm_p->msgarray[index].op_status != 0) { + if (sm_p->msgarray_op.msgarray[index].op_status != 0) { int i, handle_index; for (i = 0; i < sm_p->u.readdirplus.handle_count[index]; i++) { get_handle_index(sm_p->u.readdirplus.input_handle_array, @@ -547,10 +607,10 @@ static int readdirplus_fetch_attrs_comp_fn(void *v_p, NULL); assert(handle_index >= 0); sm_p->u.readdirplus.readdirplus_resp->stat_err_array[handle_index] = - sm_p->msgarray[index].op_status; + sm_p->msgarray_op.msgarray[index].op_status; } } - else if (sm_p->msgarray[index].op_status == 0) + else if (sm_p->msgarray_op.msgarray[index].op_status == 0) { /* fetch all errors from the servresp structure and copy the object attributes */ int i, handle_index; @@ -576,18 +636,18 @@ static int readdirplus_fetch_attrs_comp_fn(void *v_p, } } } - + /* if this is the last response, check all the status values and return error codes if any requests failed */ - if (index == (sm_p->msgarray_count - 1)) + if (index == (sm_p->msgarray_op.count - 1)) { int i; - for (i = 0; i < sm_p->msgarray_count; i++) + for (i = 0; i < sm_p->msgarray_op.count; i++) { - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { - return sm_p->msgarray[i].op_status; + return sm_p->msgarray_op.msgarray[i].op_status; } } /* destroy scratch space.. we need to reuse them in phase 2 */ @@ -617,7 +677,8 @@ static int list_of_data_servers(PINT_client_sm *sm_p) nhandles = 0; for (i = 0; i < sm_p->u.readdirplus.readdirplus_resp->pvfs_dirent_outcount; i++) { - if (sm_p->u.readdirplus.obj_attr_array[i].objtype == PVFS_TYPE_METAFILE) + /* skip if the file is stuffed */ + if (sm_p->u.readdirplus.obj_attr_array[i].objtype == PVFS_TYPE_METAFILE && (sm_p->u.readdirplus.obj_attr_array[i].mask & PVFS_ATTR_META_UNSTUFFED)) { if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_ALL) { @@ -646,7 +707,8 @@ static int list_of_data_servers(PINT_client_sm *sm_p) nhandles = 0; for (i = 0; i < sm_p->u.readdirplus.readdirplus_resp->pvfs_dirent_outcount; i++) { - if (sm_p->u.readdirplus.obj_attr_array[i].objtype == PVFS_TYPE_METAFILE) + /* skip if the file is stuffed */ + if (sm_p->u.readdirplus.obj_attr_array[i].objtype == PVFS_TYPE_METAFILE && (sm_p->u.readdirplus.obj_attr_array[i].mask & PVFS_ATTR_META_UNSTUFFED)) { int j; if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_DIST) @@ -671,6 +733,7 @@ static int list_of_data_servers(PINT_client_sm *sm_p) } } } + ret = create_partition_handles(sm_p->object_ref.fs_id, sm_p->u.readdirplus.nhandles, sm_p->u.readdirplus.input_handle_array, @@ -688,19 +751,23 @@ static PINT_sm_action readdirplus_fetch_sizes_setup_msgpair( { int i, ret; struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgpair_state *msg_p; + int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); + struct server_configuration_s *server_config; - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); + + PINT_msgpairarray_destroy(&sm_p->msgarray_op); + /* don't need sizes */ if (!(sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_ALL) && !(sm_p->u.readdirplus.attrmask & PVFS_ATTR_DATA_SIZE)) { js_p->error_code = NO_WORK; return SM_ACTION_COMPLETE; } - + /* ok, now we have all the data files. split it on a per-server basis */ if ((ret = list_of_data_servers(sm_p)) < 0) { @@ -713,35 +780,128 @@ static PINT_sm_action readdirplus_fetch_sizes_setup_msgpair( js_p->error_code = NO_WORK; return SM_ACTION_COMPLETE; } - sm_p->msgarray_count = sm_p->u.readdirplus.svr_count; - sm_p->msgarray = (PINT_sm_msgpair_state *) - calloc(sm_p->msgarray_count, sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) - { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - for (i = 0; i < sm_p->msgarray_count; i++) - { - PINT_sm_msgpair_state *msg_p = NULL; - - msg_p = &sm_p->msgarray[i]; - PINT_SERVREQ_LISTATTR_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - sm_p->u.readdirplus.attrmask, - sm_p->u.readdirplus.handle_count[i], - sm_p->u.readdirplus.handles[i]); - msg_p->fs_id = sm_p->object_ref.fs_id; - msg_p->handle = PVFS_HANDLE_NULL; - msg_p->retry_flag = PVFS_MSGPAIR_RETRY; - msg_p->comp_fn = readdirplus_fetch_sizes_comp_fn; - msg_p->svr_addr = sm_p->u.readdirplus.server_addresses[i]; + if (is_osd) + { + js_p->error_code = OSD_MSGPAIR; + + if(server_config->member_attr) { + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->svr_addr = sm_p->u.readdirplus.server_addresses[0]; + msg_p->comp_fn = readdirplus_fetch_sizes_comp_fn; + + msg_p->handle = find_min_handle(sm_p->u.readdirplus.input_handle_array, sm_p->u.readdirplus.nhandles); + struct attribute_list attr[sm_p->u.readdirplus.nhandles]; + /* Set attr type, page and number */ + for (i = 0; i < sm_p->u.readdirplus.nhandles; i++) { + if (sm_p->u.readdirplus.nhandles == 1) { + attr[i].type = ATTR_GET; + } else { + attr[i].type = ATTR_GET_MULTI; + } + attr[i].page = 0x1; + attr[i].number = 0x82; /* logical length (not used capacity) */ + attr[i].len = sizeof(uint64_t); + } + + struct osd_command *command = &sm_p->msgarray_op.msgarray[0].osd_command; + + ret = osd_command_set_get_member_attributes(command, PVFS_OSD_DATA_PID, sm_p->object_ref.cid, msg_p->handle); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_set_get_member_attributes failed", + __func__); + js_p->error_code = ret; + return 1; + } + + ret = osd_command_multi_attr_build(command, attr, sm_p->u.readdirplus.nhandles, sm_p->u.readdirplus.nhandles); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_attr_build failed", + __func__); + js_p->error_code = ret; + return 1; + } + } else { + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, sm_p->u.readdirplus.nhandles); + if(ret != 0) + { + gossip_err("Failed to initialize %d msgpairs\n", + sm_p->u.readdirplus.svr_count); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) + { + struct osd_command *command = &sm_p->msgarray_op.msgarray[i].osd_command; + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = readdirplus_fetch_sizes_comp_fn; + msg_p->svr_addr = sm_p->u.readdirplus.server_addresses[0]; + msg_p->handle = sm_p->u.readdirplus.input_handle_array[i].handle; + + struct attribute_list id = { + .type = ATTR_GET, + .page = 0x1, + .number = 0x82, /* logical length (not used capacity) */ + .len = sizeof(uint64_t), + }; + + ret = osd_command_set_get_attributes(command, PVFS_OSD_DATA_PID, + sm_p->u.readdirplus.input_handle_array[i].handle); + + if (ret) { + osd_error_xerrno(ret, + "%s: osd_command_set_get_attributes failed", + __func__); + js_p->error_code = ret; + return 1; + } + + ret = osd_command_attr_build(command, &id, 1); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_attr_build failed", + __func__); + js_p->error_code = ret; + return 1; + } + } + } + } else { + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, sm_p->u.readdirplus.svr_count); + if(ret != 0) + { + gossip_err("Failed to initialize %d msgpairs\n", + sm_p->u.readdirplus.svr_count); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) + { + PINT_SERVREQ_LISTATTR_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + PVFS_ATTR_DATA_SIZE, + sm_p->u.readdirplus.handle_count[i], + sm_p->u.readdirplus.handles[i], + sm_p->hints); + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = PVFS_HANDLE_NULL; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = readdirplus_fetch_sizes_comp_fn; + msg_p->svr_addr = sm_p->u.readdirplus.server_addresses[i]; + } + /* immediate return. next state jumps to msgpairarray machine */ + js_p->error_code = 0; } - /* immediate return. next state jumps to msgpairarray machine */ - js_p->error_code = 0; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -751,14 +911,34 @@ static int readdirplus_fetch_sizes_comp_fn(void *v_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + PVFS_error status; + int64_t size[sm_p->u.readdirplus.nhandles]; + int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); + struct server_configuration_s *server_config; + + server_config = PINT_get_server_config_struct( + sm_p->object_ref.fs_id); + PINT_put_server_config_struct(server_config); gossip_debug(GOSSIP_LISTATTR_DEBUG, - "readdirplus_fetch_attrs_comp_fn called\n"); - assert(resp_p->op == PVFS_SERV_LISTATTR); + "readdirplus_fetch_sizes_comp_fn called\n"); + + if (is_osd) { + status = osd_errno_from_status( + sm_p->msgarray_op.msgarray[index].osd_command.status); + } else { + assert(resp_p->op == PVFS_SERV_LISTATTR); + status = resp_p->status; + } + + if (status != 0) + { + return status; + } /* Mark all handles in this server range as having failed a stat */ - if (sm_p->msgarray[index].op_status != 0) { + if (sm_p->msgarray_op.msgarray[index].op_status != 0) { int i, handle_index, aux_index; for (i = 0; i < sm_p->u.readdirplus.handle_count[index]; i++) { get_handle_index(sm_p->u.readdirplus.input_handle_array, @@ -768,48 +948,123 @@ static int readdirplus_fetch_sizes_comp_fn(void *v_p, &aux_index); assert(handle_index >= 0 && aux_index >= 0); sm_p->u.readdirplus.readdirplus_resp->stat_err_array[handle_index] = - sm_p->msgarray[index].op_status; + sm_p->msgarray_op.msgarray[index].op_status; } } - else if (sm_p->msgarray[index].op_status == 0) + else if (sm_p->msgarray_op.msgarray[index].op_status == 0) { /* fetch all errors from the servresp structure and copy the object attributes */ int i, handle_index, aux_index; - /* make sure that we get back responses for all handles that we sent out */ - assert(resp_p->u.listattr.nhandles == sm_p->u.readdirplus.handle_count[index]); - for (i = 0; i < sm_p->u.readdirplus.handle_count[index]; i++) - { - get_handle_index(sm_p->u.readdirplus.input_handle_array, - sm_p->u.readdirplus.nhandles, - sm_p->u.readdirplus.handles[index][i], - &handle_index, - &aux_index); - /* Copy any errors */ - sm_p->u.readdirplus.readdirplus_resp->stat_err_array[handle_index] = - resp_p->u.listattr.error[i]; - if (resp_p->u.listattr.error[i] == 0) + if (is_osd) { + struct osd_command *command = &sm_p->msgarray_op.msgarray[index].osd_command; + + int ret = osd_command_attr_resolve(command); + if (ret) { + osd_error_xerrno(ret, "%s: osd_command_attr_resolve failed", + __func__); + return ret; + } + + if(server_config->member_attr) { + struct attribute_list *attr = command->attr; + + for (i=0; iu.readdirplus.nhandles; i++) { + if(sm_p->u.readdirplus.nhandles == 1) { + if (command->attr->outlen != sizeof(uint64_t)) { + gossip_err("%s: expecting 8 bytes returned, got %u.\n", __func__, + command->attr->outlen); + return ret; + } + } else { + if (((struct attribute_get_multi_results *)attr[0].val)->outlen[((struct attribute_get_multi_results *)attr[0].val)->numoid-(sm_p->u.readdirplus.nhandles-i)] != sizeof(uint64_t)) { + gossip_err("%s: expecting 8 bytes returned, got %u.\n", __func__, + command->attr->outlen); + return ret; + } + } + } + + for (i=0; iu.readdirplus.nhandles; i++) { + if(sm_p->u.readdirplus.nhandles == 1) { + size[i] = get_ntohll(command->attr[i].val); + } else { + size[i] = get_ntohll(((struct attribute_get_multi_results *)attr[0].val)->val[((struct attribute_get_multi_results *)attr[0].val)->numoid-(sm_p->u.readdirplus.nhandles-i)]); + } + } + + for (i = 0; i < sm_p->u.readdirplus.nhandles; i++) + { + if(sm_p->u.readdirplus.nhandles == 1) { + if(sm_p->msgarray_op.msgarray[index].handle == sm_p->u.readdirplus.input_handle_array[i].handle) + { + handle_index = sm_p->u.readdirplus.input_handle_array[0].handle_index; + aux_index = sm_p->u.readdirplus.input_handle_array[0].aux_index; + sm_p->u.readdirplus.size_array[handle_index][aux_index] = size[0]; + } + } else { + for (i = 0; i < sm_p->u.readdirplus.handle_count[index]; i++) { + //gossip_err("%d\n", sm_p->u.readdirplus.input_handle_array[i]); + //gossip_err("%d\n", ((struct attribute_get_multi_results *)attr[0].val)->oid[((struct attribute_get_multi_results *)attr[0].val)->numoid-(sm_p->u.readdirplus.nhandles-i)]); + } + + for (i = 0; i < sm_p->u.readdirplus.handle_count[index]; i++) { + get_handle_index(sm_p->u.readdirplus.input_handle_array, + sm_p->u.readdirplus.nhandles, + ((struct attribute_get_multi_results *)attr[0].val)->oid[((struct attribute_get_multi_results *)attr[0].val)->numoid-(sm_p->u.readdirplus.nhandles-i)], + &handle_index, + &aux_index); + sm_p->u.readdirplus.size_array[handle_index][aux_index] = size[i]; + } + + } + } + } else { + if (command->attr->outlen != sizeof(uint64_t)) { + gossip_err("%s: expecting 8 bytes returned, got %u.\n", __func__, + command->attr->outlen); + return ret; + } + + get_handle_index(sm_p->u.readdirplus.input_handle_array, + sm_p->u.readdirplus.nhandles, + sm_p->msgarray_op.msgarray[index].handle, + &handle_index, + &aux_index); + sm_p->u.readdirplus.size_array[handle_index][aux_index] = get_ntohll(command->attr->val); + } + + osd_command_attr_free(command); + + } else { + /* make sure that we get back responses for all handles that we sent out */ + assert(resp_p->u.listattr.nhandles == sm_p->u.readdirplus.handle_count[index]); + for (i = 0; i < sm_p->u.readdirplus.handle_count[index]; i++) { - /* if no errors, stash the object sizes */ - assert(resp_p->u.listattr.attr[i].objtype == PVFS_TYPE_DATAFILE); - sm_p->u.readdirplus.size_array[handle_index][aux_index] = - resp_p->u.listattr.attr[i].u.data.size; + get_handle_index(sm_p->u.readdirplus.input_handle_array, + sm_p->u.readdirplus.nhandles, + sm_p->u.readdirplus.handles[index][i], + &handle_index, + &aux_index); + /* Copy any errors */ + sm_p->u.readdirplus.readdirplus_resp->stat_err_array[handle_index] = + resp_p->u.listattr.error[i]; + if (resp_p->u.listattr.error[i] == 0) + { + /* if no errors, stash the object sizes */ + assert(resp_p->u.listattr.attr[i].objtype == PVFS_TYPE_DATAFILE); + sm_p->u.readdirplus.size_array[handle_index][aux_index] = + resp_p->u.listattr.attr[i].u.data.size; + } } } } /* If this is the last server response, check all the status values * and stash any error codes if any of them failed */ - if (index == (sm_p->msgarray_count - 1)) + if (index == (sm_p->msgarray_op.count - 1)) { - int i; - for (i = 0; i < sm_p->msgarray_count; i++) - { - if (sm_p->msgarray[i].op_status != 0) - { - return sm_p->msgarray[i].op_status; - } - } + return PINT_msgarray_status(&sm_p->msgarray_op); } return 0; } @@ -847,48 +1102,76 @@ static PINT_sm_action readdirplus_cleanup( if (readdirplus_resp->stat_err_array[i] == 0) { /* convert into sys attributes */ - readdirplus_resp->attr_array[i].owner = sm_p->u.readdirplus.obj_attr_array[i].owner; - readdirplus_resp->attr_array[i].group = sm_p->u.readdirplus.obj_attr_array[i].group; - readdirplus_resp->attr_array[i].perms = sm_p->u.readdirplus.obj_attr_array[i].perms; - readdirplus_resp->attr_array[i].atime = sm_p->u.readdirplus.obj_attr_array[i].atime; - readdirplus_resp->attr_array[i].mtime = sm_p->u.readdirplus.obj_attr_array[i].mtime; - readdirplus_resp->attr_array[i].ctime = sm_p->u.readdirplus.obj_attr_array[i].ctime; - readdirplus_resp->attr_array[i].objtype = sm_p->u.readdirplus.obj_attr_array[i].objtype; - readdirplus_resp->attr_array[i].mask = - PVFS_util_object_to_sys_attr_mask(sm_p->u.readdirplus.obj_attr_array[i].mask); + readdirplus_resp->attr_array[i].owner + = sm_p->u.readdirplus.obj_attr_array[i].owner; + readdirplus_resp->attr_array[i].group + = sm_p->u.readdirplus.obj_attr_array[i].group; + readdirplus_resp->attr_array[i].perms + = sm_p->u.readdirplus.obj_attr_array[i].perms; + readdirplus_resp->attr_array[i].atime + = sm_p->u.readdirplus.obj_attr_array[i].atime; + readdirplus_resp->attr_array[i].mtime + = sm_p->u.readdirplus.obj_attr_array[i].mtime; + readdirplus_resp->attr_array[i].ctime + = sm_p->u.readdirplus.obj_attr_array[i].ctime; + readdirplus_resp->attr_array[i].objtype + = sm_p->u.readdirplus.obj_attr_array[i].objtype; + readdirplus_resp->attr_array[i].mask + = PVFS_util_object_to_sys_attr_mask + (sm_p->u.readdirplus.obj_attr_array[i].mask); if (readdirplus_resp->attr_array[i].objtype == PVFS_TYPE_METAFILE) { - if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_DIST) - { - PVFS_size (*logical_file_size)(void* params, - uint32_t num_handles, - PVFS_size *psizes) = NULL; - /* compute the file size */ - assert(sm_p->u.readdirplus.size_array[i]); - logical_file_size = + if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_DIST ) + { + if(sm_p->u.readdirplus.obj_attr_array[i].mask + & PVFS_ATTR_META_UNSTUFFED) + { + PVFS_size (*logical_file_size)(void* params, + uint32_t num_handles, + PVFS_size *psizes) = NULL; + /* compute the file size */ + assert(sm_p->u.readdirplus.size_array[i]); + logical_file_size = sm_p->u.readdirplus.obj_attr_array[i].u.meta.dist->methods->logical_file_size; - assert(logical_file_size); - assert(sm_p->u.readdirplus.obj_attr_array[i].u.meta.dist->params); - - readdirplus_resp->attr_array[i].size = logical_file_size( - sm_p->u.readdirplus.obj_attr_array[i].u.meta.dist->params, - sm_p->u.readdirplus.obj_attr_array[i].u.meta.dfile_count, - sm_p->u.readdirplus.size_array[i]); - + assert(logical_file_size); + assert(sm_p->u.readdirplus.obj_attr_array[i].u.meta.dist->params); + + readdirplus_resp->attr_array[i].size = logical_file_size( + sm_p->u.readdirplus.obj_attr_array[i].u.meta.dist->params, + sm_p->u.readdirplus.obj_attr_array[i].u.meta.dfile_count, + sm_p->u.readdirplus.size_array[i]); + + } + else + { + /* size for stuffed case */ + readdirplus_resp->attr_array[i].size = + sm_p->u.readdirplus.obj_attr_array[i].u.meta.stuffed_size; + } readdirplus_resp->attr_array[i].mask |= PVFS_ATTR_SYS_SIZE; - } - if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_DFILES) - { - readdirplus_resp->attr_array[i].dfile_count = - sm_p->u.readdirplus.obj_attr_array[i].u.meta.dfile_count; - readdirplus_resp->attr_array[i].mask |= PVFS_ATTR_SYS_DFILE_COUNT; - } + } + if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_DFILES) + { + readdirplus_resp->attr_array[i].dfile_count = + sm_p->u.readdirplus.obj_attr_array[i].u.meta.dfile_count; + readdirplus_resp->attr_array[i].mask + |= PVFS_ATTR_SYS_DFILE_COUNT; + } + if (sm_p->u.readdirplus.attrmask & PVFS_ATTR_META_MIRROR_DFILES) + { + readdirplus_resp->attr_array[i].mirror_copies_count = + sm_p->u.readdirplus.obj_attr_array[i].u.meta.mirror_copies_count; + readdirplus_resp->attr_array[i].mask + |= PVFS_ATTR_SYS_MIRROR_COPIES_COUNT; + } } - else if (readdirplus_resp->attr_array[i].objtype == PVFS_TYPE_DIRECTORY) + else if (readdirplus_resp->attr_array[i].objtype == + PVFS_TYPE_DIRECTORY) { readdirplus_resp->attr_array[i].dirent_count = sm_p->u.readdirplus.obj_attr_array[i].u.dir.dirent_count; - readdirplus_resp->attr_array[i].mask |= PVFS_ATTR_SYS_DIRENT_COUNT; + readdirplus_resp->attr_array[i].mask + |= PVFS_ATTR_SYS_DIRENT_COUNT; } else if (readdirplus_resp->attr_array[i].objtype == PVFS_TYPE_SYMLINK && sm_p->u.readdirplus.attrmask & PVFS_ATTR_SYMLNK_TARGET) @@ -926,15 +1209,25 @@ static PINT_sm_action readdirplus_cleanup( } free(sm_p->u.readdirplus.obj_attr_array); sm_p->u.readdirplus.obj_attr_array = NULL; - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; } +static uint64_t find_min_handle(struct handle_to_index *input_handle_array, int nhandles) +{ + int i; + uint64_t ret = input_handle_array[0].handle; + + for(i=1; i rmdirent_osd_msgpair; success => rmdirent_xfer_msgpair; default => rmdirent_retry_or_fail; } - state rmdirent_osd_msgpair - { - jump pvfs2_client_osd_dirops_sm; - success => do_remove; - default => rmdirent_retry_or_fail; - } - state rmdirent_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -129,18 +120,10 @@ machine pvfs2_client_remove_sm state crdirent_setup_msgpair { run remove_crdirent_setup_msgpair; - OSD_MSGPAIR => crdirent_osd_msgpair; success => crdirent_xfer_msgpair; default => crdirent_retry_or_fail; } - state crdirent_osd_msgpair - { - jump pvfs2_client_osd_dirops_sm; - success => cleanup; - default => crdirent_retry_or_fail; - } - state crdirent_xfer_msgpair { jump pvfs2_msgpairarray_sm; @@ -171,6 +154,7 @@ PVFS_error PVFS_isys_remove( PVFS_object_ref parent_ref, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -198,11 +182,13 @@ PVFS_error PVFS_isys_remove( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.remove.object_name = object_name; sm_p->parent_ref = parent_ref; sm_p->u.remove.stored_error_code = 0; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent_ref.handle); gossip_debug( GOSSIP_CLIENT_DEBUG, "Trying to remove entry %s under %llu,%d\n", @@ -223,7 +209,8 @@ PVFS_error PVFS_isys_remove( PVFS_error PVFS_sys_remove( char *object_name, PVFS_object_ref parent_ref, - const PVFS_credentials *credentials) + const PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -231,7 +218,7 @@ PVFS_error PVFS_sys_remove( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_remove entered\n"); ret = PVFS_isys_remove(object_name, parent_ref, - credentials, &op_id, NULL); + credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_remove call", ret); @@ -276,43 +263,39 @@ static PINT_sm_action remove_rmdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_setup_msgpair\n"); - PINT_init_msgpair(sm_p, msg_p); - - msg_p->fs_id = sm_p->parent_ref.fs_id; - msg_p->handle = sm_p->parent_ref.handle; - ret = PINT_cached_config_map_to_server(&msg_p->svr_addr, msg_p->handle, - msg_p->fs_id); - if (ret) { - gossip_err("Failed to map meta server address\n"); - js_p->error_code = ret; - } + js_p->error_code = 0; - if (server_is_osd(msg_p->svr_addr)) { - /* - * Directory operations for metafile and mdfile. We don't do anything - * here because we'll handle the individual directory operations in a - * different state machine. - */ - js_p->error_code = OSD_MSGPAIR; - } else { - /* If the parent directory is /pvfs, let regular pvfs handle dirent */ - js_p->error_code = 0; - PINT_SERVREQ_RMDIRENT_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->parent_ref.fs_id, - sm_p->parent_ref.handle, - sm_p->u.remove.object_name); - } + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + PINT_SERVREQ_RMDIRENT_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->parent_ref.fs_id, + sm_p->parent_ref.handle, + sm_p->u.remove.object_name, + sm_p->hints); gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing RMDIRENT on %s " "under %llu,%d\n", sm_p->u.remove.object_name, llu(sm_p->parent_ref.handle), sm_p->parent_ref.fs_id); + msg_p->fs_id = sm_p->parent_ref.fs_id; + msg_p->handle = sm_p->parent_ref.handle; msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; msg_p->comp_fn = remove_rmdirent_comp_fn; + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); + + if (ret) + { + gossip_err("Failed to map meta server address\n"); + js_p->error_code = ret; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -328,47 +311,19 @@ static PINT_sm_action remove_crdirent_setup_msgpair( sm_p->u.remove.stored_error_code = js_p->error_code; - PINT_init_msgpair(sm_p, msg_p); - - msg_p->fs_id = sm_p->parent_ref.fs_id; - msg_p->handle = sm_p->parent_ref.handle; - ret = PINT_cached_config_map_to_server(&msg_p->svr_addr, msg_p->handle, - msg_p->fs_id); - if (ret) { - gossip_err("Failed to map meta server address\n"); - js_p->error_code = ret; - } + js_p->error_code = 0; - /* - * XXX Things get a little hairy here. The OSD directory state machine - * looks at the smcb->op variable to insert/remove dirents. In this case, - * since smcb->op == PVFS_SYS_REMOVE, it'll try to remove this dirent - * instead of creating it. A hack/workaround is to set - * smcb->op = PVFS_SYS_MKDIR here and after we're done with this state, set - * it back to PVFS_SYS_REMOVE. - * - * On second thoughts, the above hack might not work. The object name is - * stored in the sm_p->u.mkdir member in the case of PVFS_SYS_MKDIR and in - * sm_p->u.remove for the PVFS_SYS_REMOVE state machine. - */ - if (server_is_osd(msg_p->svr_addr)) { - /* - * Directory operations for metafile and mdfile. We don't do anything - * here because we'll handle the individual directory operations in a - * different state machine. - */ - js_p->error_code = OSD_MSGPAIR; - } else { - /* If the parent directory is /pvfs, let regular pvfs handle dirent */ - js_p->error_code = 0; - PINT_SERVREQ_CRDIRENT_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->u.remove.object_name, - sm_p->object_ref.handle, - sm_p->parent_ref.handle, - sm_p->parent_ref.fs_id); - } + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + PINT_SERVREQ_CRDIRENT_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->u.remove.object_name, + sm_p->object_ref.handle, + sm_p->parent_ref.handle, + sm_p->parent_ref.fs_id, + sm_p->hints); gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing CRDIRENT of %s (%llu,%d) " "under %llu,%d\n", sm_p->u.remove.object_name, @@ -377,9 +332,21 @@ static PINT_sm_action remove_crdirent_setup_msgpair( llu(sm_p->parent_ref.handle), sm_p->parent_ref.fs_id); + msg_p->fs_id = sm_p->parent_ref.fs_id; + msg_p->handle = sm_p->parent_ref.handle; msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; msg_p->comp_fn = remove_crdirent_comp_fn; + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); + + if (ret) + { + gossip_err("Failed to map meta server address\n"); + js_p->error_code = ret; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -388,32 +355,26 @@ static int remove_rmdirent_comp_fn( struct PVFS_server_resp *resp_p, int index) { - int status; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgpair; - - if (server_is_osd(msg_p->svr_addr)) { - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); - } else { - assert(resp_p->op == PVFS_SERV_RMDIRENT); - status = resp_p->status; - - if (status == 0) { - assert(resp_p->u.rmdirent.entry_handle != PVFS_HANDLE_NULL); - assert(sm_p->parent_ref.fs_id != PVFS_FS_ID_NULL); - - /* XXX Do we do something similar for the OSD case? */ - /* pull handle out of response */ - sm_p->object_ref.handle = resp_p->u.rmdirent.entry_handle; - } - } + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); - sm_p->object_ref.fs_id = sm_p->parent_ref.fs_id; - gossip_debug(GOSSIP_CLIENT_DEBUG, - " remove_rmdirent_comp_fn: metafile handle = %llu\n", - llu(sm_p->object_ref.handle)); - return status; + assert(resp_p->op == PVFS_SERV_RMDIRENT); + + if (resp_p->status == 0) + { + assert(resp_p->u.rmdirent.entry_handle != PVFS_HANDLE_NULL); + assert(sm_p->parent_ref.fs_id != PVFS_FS_ID_NULL); + + /* pull handle out of response, also copy in fs_id from before */ + sm_p->object_ref.handle = resp_p->u.rmdirent.entry_handle; + sm_p->object_ref.fs_id = sm_p->parent_ref.fs_id; + + gossip_debug( + GOSSIP_CLIENT_DEBUG, + " remove_rmdirent_comp_fn: metafile handle = %llu\n", + llu(sm_p->object_ref.handle)); + } + return resp_p->status; } static int remove_crdirent_comp_fn( @@ -421,18 +382,14 @@ static int remove_crdirent_comp_fn( struct PVFS_server_resp *resp_p, int index) { - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgpair; - - gossip_debug(GOSSIP_CLIENT_DEBUG, "remove_crdirent_comp_fn\n"); + assert(resp_p->op == PVFS_SERV_CRDIRENT); - if (server_is_osd(msg_p->svr_addr)) { - return osd_errno_from_status(sm_p->msgpair.osd_command.status); - } else { - assert(resp_p->op == PVFS_SERV_CRDIRENT); - return resp_p->status; + if (resp_p->status == 0) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + " remove_crdirent_comp_fn: OK\n"); } + return resp_p->status; } static PINT_sm_action remove_check_error_code( @@ -482,11 +439,7 @@ static PINT_sm_action remove_cleanup( PINT_ncache_invalidate((const char*) sm_p->u.remove.object_name, (const PVFS_object_ref*) &(sm_p->parent_ref)); - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; @@ -502,7 +455,7 @@ static PINT_sm_action remove_generic_timer( gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: generic_timer\n"); ret = job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); return ret; @@ -517,7 +470,7 @@ static PINT_sm_action remove_crdirent_retry_or_fail( /* try again (up to a point) if we get a comm. failure. */ if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.remove.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.remove.retry_count++; js_p->error_code = CRDIRENT_RETRY; @@ -556,7 +509,7 @@ static PINT_sm_action remove_rmdirent_retry_or_fail( /* try again (up to a point) if we get a comm. failure. */ if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.remove.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.remove.retry_count++; js_p->error_code = RMDIRENT_RETRY; diff --git a/src/client/sysint/sys-rename.sm b/src/client/sysint/sys-rename.sm index 1ac3e13..191b042 100644 --- a/src/client/sysint/sys-rename.sm +++ b/src/client/sysint/sys-rename.sm @@ -224,6 +224,7 @@ PVFS_error PVFS_isys_rename( PVFS_object_ref new_parent_ref, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -266,7 +267,7 @@ PVFS_error PVFS_isys_rename( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, old_parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, old_parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); /* this state machine has several hardcoded 2's around because @@ -287,6 +288,8 @@ PVFS_error PVFS_isys_rename( sm_p->u.rename.rmdirent_index = 0; sm_p->u.rename.target_dirent_exists = 0; sm_p->u.rename.stored_error_code = 0; + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &old_parent_ref.handle); gossip_debug( GOSSIP_CLIENT_DEBUG, "Renaming file named %s (under [%llu,%d]\n\t" @@ -312,7 +315,8 @@ PVFS_error PVFS_sys_rename( PVFS_object_ref old_parent_ref, char *new_entry, PVFS_object_ref new_parent_ref, - const PVFS_credentials *credentials) + const PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -320,7 +324,7 @@ PVFS_error PVFS_sys_rename( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_rename entered\n"); ret = PVFS_isys_rename(old_entry, old_parent_ref, new_entry, - new_parent_ref, credentials, &op_id, NULL); + new_parent_ref, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_rename call", ret); @@ -366,7 +370,7 @@ static int rename_lookups_comp_fn( int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_CLIENT_DEBUG, "rename_lookups_comp_fn\n"); @@ -427,7 +431,7 @@ static int rename_lookups_comp_fn( /* set fs_id and handle for getattr nested sm */ sm_p->object_ref = sm_p->u.rename.refns[0]; - if(sm_p->msgarray[0].op_status == 0) + if(sm_p->msgarray_op.msgarray[0].op_status == 0) { PINT_SM_GETATTR_STATE_FILL( sm_p->getattr, @@ -442,7 +446,7 @@ static int rename_lookups_comp_fn( else { /* if the first one failed, maintain its error code */ - return(sm_p->msgarray[0].op_status); + return(sm_p->msgarray_op.msgarray[0].op_status); } } return 0; @@ -463,8 +467,13 @@ static int rename_rmdirent_comp_fn(void *v_p, int index) { PINT_smcb *smcb = v_p; +#ifdef WIN32 + PINT_client_sm *sm_p = + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#else PINT_client_sm *sm_p __attribute__((unused)) = - PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); +#endif gossip_debug(GOSSIP_CLIENT_DEBUG, "rename_rmdirent_comp_fn\n"); assert(resp_p->op == PVFS_SERV_RMDIRENT); @@ -488,7 +497,7 @@ static int rename_chdirent_comp_fn( void *v_p, struct PVFS_server_resp *resp_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); gossip_debug(GOSSIP_CLIENT_DEBUG, "rename_chdirent_comp_fn\n"); @@ -517,22 +526,20 @@ static PINT_sm_action rename_lookups_setup_msgpair_array( { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL, i = 0; + PINT_sm_msgpair_state *msg_p; js_p->error_code = 0; - sm_p->msgarray_count = 2; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - sm_p->msgarray_count * sizeof(PINT_sm_msgpair_state)); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, 2); + if(ret != 0) { - gossip_err("rename: failed to allocate msgarray\n"); - return -PVFS_ENOMEM; + js_p->error_code = ret; + return SM_ACTION_COMPLETE; } /* prepare to post the lookup send/recv pairs */ - for(i = 0; i < 2; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; gossip_debug(GOSSIP_CLIENT_DEBUG, "rename: posting lookup[%d] (%lld,%d)\n", @@ -545,7 +552,8 @@ static PINT_sm_action rename_lookups_setup_msgpair_array( sm_p->u.rename.entries[i], sm_p->u.rename.parent_refns[i].fs_id, sm_p->u.rename.parent_refns[i].handle, - PVFS_ATTR_COMMON_ALL); + PVFS_ATTR_COMMON_ALL, + sm_p->hints); /* fill in msgpair structure components */ msg_p->fs_id = sm_p->u.rename.parent_refns[i].fs_id; @@ -563,6 +571,8 @@ static PINT_sm_action rename_lookups_setup_msgpair_array( break; } } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -570,11 +580,7 @@ static PINT_sm_action rename_lookups_failure( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - if (sm_p->msgarray) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -589,8 +595,9 @@ static PINT_sm_action rename_crdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," rename: posting crdirent req\n"); - assert(sm_p->msgarray); - PINT_init_msgpair(sm_p, msg_p); + assert(sm_p->msgarray_op.msgarray); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* hook the 'old' handle up to the new parent with the 'new_entry' @@ -603,7 +610,8 @@ static PINT_sm_action rename_crdirent_setup_msgpair( sm_p->u.rename.entries[1], sm_p->u.rename.refns[0].handle, sm_p->u.rename.parent_refns[1].handle, - sm_p->u.rename.parent_refns[1].fs_id); + sm_p->u.rename.parent_refns[1].fs_id, + sm_p->hints); msg_p->fs_id = sm_p->u.rename.parent_refns[1].fs_id; msg_p->handle = sm_p->u.rename.parent_refns[1].handle; @@ -618,6 +626,8 @@ static PINT_sm_action rename_crdirent_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -632,7 +642,8 @@ static PINT_sm_action rename_rmdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," rename: posting rmdirent req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* in the normal case, the state machine will lead us here and we @@ -651,7 +662,8 @@ static PINT_sm_action rename_rmdirent_setup_msgpair( *sm_p->cred_p, sm_p->u.rename.parent_refns[sm_p->u.rename.rmdirent_index].fs_id, sm_p->u.rename.parent_refns[sm_p->u.rename.rmdirent_index].handle, - sm_p->u.rename.entries[sm_p->u.rename.rmdirent_index]); + sm_p->u.rename.entries[sm_p->u.rename.rmdirent_index], + sm_p->hints); msg_p->fs_id = sm_p->u.rename.parent_refns[ sm_p->u.rename.rmdirent_index].fs_id; @@ -666,8 +678,10 @@ static PINT_sm_action rename_rmdirent_setup_msgpair( if (ret) { gossip_err("Failed to map meta server address\n"); - js_p->error_code = ret; + js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -764,7 +778,8 @@ static PINT_sm_action rename_chdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," rename: posting chdirent req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* here, we're exhanging the existing dirent under the 'new' parent @@ -777,7 +792,8 @@ static PINT_sm_action rename_chdirent_setup_msgpair( sm_p->u.rename.parent_refns[1].fs_id, sm_p->u.rename.parent_refns[1].handle, sm_p->u.rename.refns[0].handle, - sm_p->u.rename.entries[1]); + sm_p->u.rename.entries[1], + sm_p->hints); msg_p->fs_id = sm_p->u.rename.parent_refns[1].fs_id; msg_p->handle = sm_p->u.rename.parent_refns[1].handle; @@ -792,6 +808,8 @@ static PINT_sm_action rename_chdirent_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -849,21 +867,16 @@ static PINT_sm_action rename_cleanup( "ref.handle=%llu ref.fsid=%d\n", sm_p->u.rename.entries[1], llu(sm_p->u.rename.refns[0].handle), - sm_p->u.rename.parent_refns[0].fs_id); + sm_p->u.rename.parent_refns[1].fs_id); PINT_ncache_update((const char*) sm_p->u.rename.entries[1], (const PVFS_object_ref*) &(sm_p->u.rename.refns[0]), - (const PVFS_object_ref*) &(sm_p->u.rename.parent_refns[0])); + (const PVFS_object_ref*) &(sm_p->u.rename.parent_refns[1])); } PINT_SM_GETATTR_STATE_CLEAR(sm_p->getattr); - if (sm_p->msgarray && (sm_p->msgarray != &(sm_p->msgpair))) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } - + PINT_msgpairarray_destroy(&sm_p->msgarray_op); PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; } @@ -876,7 +889,7 @@ static PINT_sm_action rename_generic_timer( job_id_t tmp_id; ret = job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); return ret; @@ -889,7 +902,7 @@ static PINT_sm_action rename_crdirent_retry_or_fail( /* try again (up to a point) if we get a comm. failure. */ if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.rename.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.rename.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.rename.retry_count++; js_p->error_code = RENAME_CRDIRENT_RETRY; @@ -918,7 +931,7 @@ static PINT_sm_action rename_rmdirent_retry_or_fail( /* try again (up to a point) if we get a comm. failure. */ if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.rename.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.rename.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.rename.retry_count++; js_p->error_code = RENAME_RMDIRENT_RETRY; diff --git a/src/client/sysint/sys-set-eattr.sm b/src/client/sysint/sys-set-eattr.sm index 21b70e1..a6c5d15 100644 --- a/src/client/sysint/sys-set-eattr.sm +++ b/src/client/sysint/sys-set-eattr.sm @@ -4,9 +4,15 @@ * See COPYING in top-level directory. */ +/** \file PVFS system calls for setting extended attributes + * \ingroup sysint + */ + #include #include +#ifndef WIN32 #include +#endif #include "client-state-machine.h" #include "pvfs2-debug.h" @@ -58,6 +64,7 @@ PVFS_error PVFS_isys_seteattr_list( PVFS_ds_keyval *val_array, int32_t flags, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { int ret = -PVFS_EINVAL; @@ -70,16 +77,19 @@ PVFS_error PVFS_isys_seteattr_list( if(gossip_debug_enabled(GOSSIP_SETEATTR_DEBUG)) { int i = 0; - gossip_debug(GOSSIP_SETEATTR_DEBUG, - "Setting extended attributes:\n"); + int j; char *valBuf = (char *)val_array[i].buffer; + gossip_debug(GOSSIP_SETEATTR_DEBUG,"Setting extended attributes:\n"); for(; i < nkey; ++i) { - gossip_debug(GOSSIP_SETEATTR_DEBUG, - "eattr[%d]: key=%s val=%.*s\n", - i, (char *)key_array[i].buffer, - val_array[i].buffer_sz, (char *)val_array[i].buffer); - } - } + gossip_debug(GOSSIP_SETEATTR_DEBUG,"eattr[%d]: key=%s\n" + , i + ,(char *)key_array[i].buffer); + for (j=0; jmsgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.seteattr.nkey = nkey; sm_p->u.seteattr.key_array = key_array; @@ -107,18 +117,34 @@ PVFS_error PVFS_isys_seteattr_list( sm_p->u.seteattr.flags = flags; sm_p->error_code = 0; sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); return PINT_client_state_machine_post( smcb, op_id, user_ptr); } +PVFS_error PVFS_isys_seteattr( + PVFS_object_ref ref, + const PVFS_credentials *credentials, + PVFS_ds_keyval *key_p, + PVFS_ds_keyval *val_p, + int32_t flags, + PVFS_sys_op_id *op_id, + PVFS_hint hints, + void *user_ptr) +{ + return PVFS_isys_seteattr_list(ref, credentials, 1, key_p, val_p, flags, + op_id, hints, user_ptr); +} + PVFS_error PVFS_sys_seteattr_list( PVFS_object_ref ref, const PVFS_credentials *credentials, int32_t nkey, PVFS_ds_keyval *key_array, PVFS_ds_keyval *val_array, - int32_t flags) + int32_t flags, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -126,7 +152,7 @@ PVFS_error PVFS_sys_seteattr_list( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_seteattr entered\n"); ret = PVFS_isys_seteattr_list(ref, credentials, - nkey, key_array, val_array, flags, &op_id, NULL); + nkey, key_array, val_array, flags, &op_id, hints, NULL); if (ret) { @@ -152,9 +178,10 @@ PVFS_error PVFS_sys_seteattr( const PVFS_credentials *credentials, PVFS_ds_keyval *key_p, PVFS_ds_keyval *val_p, - int32_t flags) + int32_t flags, + PVFS_hint hints) { - return PVFS_sys_seteattr_list(ref, credentials, 1, key_p, val_p, flags); + return PVFS_sys_seteattr_list(ref, credentials, 1, key_p, val_p, flags, hints); } @@ -163,35 +190,38 @@ static PINT_sm_action set_eattr_setup_msgpair( { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p; + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_SETEATTR_FILL( - sm_p->msgpair.req, + msg_p->req, (*sm_p->cred_p), sm_p->object_ref.fs_id, sm_p->object_ref.handle, sm_p->u.seteattr.flags, sm_p->u.seteattr.nkey, sm_p->u.seteattr.key_array, - sm_p->u.seteattr.val_array + sm_p->u.seteattr.val_array, + sm_p->hints ); - sm_p->msgarray = &(sm_p->msgpair); - sm_p->msgarray_count = 1; - sm_p->msgpair.fs_id = sm_p->object_ref.fs_id; - sm_p->msgpair.handle = sm_p->object_ref.handle; - sm_p->msgpair.retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgpair.comp_fn = set_eattr_comp_fn; + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = set_eattr_comp_fn; ret = PINT_cached_config_map_to_server( - &sm_p->msgpair.svr_addr, - sm_p->msgpair.handle, - sm_p->msgpair.fs_id); + &msg_p->svr_addr, msg_p->handle, msg_p->fs_id); if (ret) { gossip_err("Failed to map meta server address\n"); js_p->error_code = 0; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -213,7 +243,8 @@ static int set_eattr_comp_fn( int j = 0; int ret = 0; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + PINT_sm_msgpair_state *msg_p; gossip_debug(GOSSIP_CLIENT_DEBUG, "get_eattr completion fn: set_eattr_comp_fn\n"); @@ -223,23 +254,23 @@ static int set_eattr_comp_fn( /* no return value from set eattrib so just check status */ - if (sm_p->msgarray[i].op_status != 0) + if (sm_p->msgarray_op.msgarray[i].op_status != 0) { - ret = sm_p->msgarray[i].op_status; + ret = sm_p->msgarray_op.msgarray[i].op_status; } /* if this is the last response, check all of the status values * and return error code if any requests failed */ - if (i == (sm_p->msgarray_count -1)) + if (i == (sm_p->msgarray_op.count -1)) { - for (j=0; j < sm_p->msgarray_count; j++) - { - if (sm_p->msgarray[j].op_status != 0) - { - return(sm_p->msgarray[j].op_status); - } - } + foreach_msgpair(&sm_p->msgarray_op, msg_p, j) + { + if (msg_p->op_status != 0) + { + return(msg_p->op_status); + } + } } return ret; } diff --git a/src/client/sysint/sys-setattr.sm b/src/client/sysint/sys-setattr.sm index 6877fc7..b369bd5 100644 --- a/src/client/sysint/sys-setattr.sm +++ b/src/client/sysint/sys-setattr.sm @@ -88,6 +88,7 @@ PVFS_error PVFS_isys_setattr( PVFS_sys_attr attr, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -117,7 +118,7 @@ PVFS_error PVFS_isys_setattr( /* make sure that the permission bits are acceptable */ if ((attr.mask & PVFS_ATTR_SYS_PERM) && (attr.perms & ~PVFS_PERM_VALID) != 0) { - gossip_lerr("PVFS_isys_setattr() failure: invalid or unsupported" + gossip_lerr("PVFS_isys_setattr() failure: invalid or unsupported " "permission bits\n"); return(-PVFS_EINVAL); } @@ -133,10 +134,12 @@ PVFS_error PVFS_isys_setattr( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->object_ref = ref; - + PVFS_hint_copy(hints, &sm_p->hints); + PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &ref.handle); + ret = PVFS_util_copy_sys_attr(&sm_p->u.setattr.sys_attr, &attr); if(ret < 0) { @@ -158,14 +161,15 @@ PVFS_error PVFS_isys_setattr( PVFS_error PVFS_sys_setattr( PVFS_object_ref ref, PVFS_sys_attr attr, - const PVFS_credentials *credentials) + const PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_setattr entered\n"); - ret = PVFS_isys_setattr(ref, attr, credentials, &op_id, NULL); + ret = PVFS_isys_setattr(ref, attr, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_setattr call", ret); @@ -200,7 +204,7 @@ static int setattr_msg_comp_fn(void *v_p, { int status; PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); int is_osd_md = fsid_is_osd_md(sm_p->object_ref.fs_id); int is_osd_meta = fsid_is_osd_meta(sm_p->object_ref.fs_id); @@ -208,8 +212,8 @@ static int setattr_msg_comp_fn(void *v_p, gossip_debug(GOSSIP_CLIENT_DEBUG, "setattr_msg_comp_fn\n"); if (is_osd_meta || is_osd_md) { - status = osd_errno_from_status(sm_p->msgpair.osd_command.status); - osd_command_attr_free(&sm_p->msgpair.osd_command); + status = osd_errno_from_status(sm_p->msgarray_op.msgpair.osd_command.status); + osd_command_attr_free(&sm_p->msgarray_op.msgpair.osd_command); return status; } else { assert(resp_p->op == PVFS_SERV_SETATTR); @@ -232,10 +236,11 @@ static PINT_sm_action setattr_msg_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," setattr: posting setattr req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; - objtype = ((sm_p->u.setattr.sys_attr.mask & PVFS_ATTR_SYS_TYPE) ? - sm_p->u.setattr.sys_attr.objtype : PVFS_TYPE_NONE); + objtype = ((sm_p->u.setattr.sys_attr.mask & PVFS_ATTR_SYS_TYPE) ? + sm_p->u.setattr.sys_attr.objtype : PVFS_TYPE_NONE); if (is_osd_meta || is_osd_md) { /* @@ -246,7 +251,7 @@ static PINT_sm_action setattr_msg_setup_msgpair( uint64_t oid; int i, numattrs = 5; struct attribute_list attr[numattrs]; - struct osd_command *command = &sm_p->msgpair.osd_command; + struct osd_command *command = &sm_p->msgarray_op.msgpair.osd_command; /* Set attr type, page and number */ for (i = 0; i < numattrs; i++) { @@ -300,7 +305,6 @@ static PINT_sm_action setattr_msg_setup_msgpair( js_p->error_code = ret; return 1; } - } else { PINT_SERVREQ_SETATTR_FILL( msg_p->req, @@ -309,7 +313,8 @@ static PINT_sm_action setattr_msg_setup_msgpair( sm_p->object_ref.handle, objtype, sm_p->u.setattr.sys_attr, - 0); + 0, + sm_p->hints); } /* clients should not be able to mess with dfile and distribution @@ -334,6 +339,8 @@ static PINT_sm_action setattr_msg_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } diff --git a/src/client/sysint/sys-small-io.sm b/src/client/sysint/sys-small-io.sm index 360c456..de7eded 100644 --- a/src/client/sysint/sys-small-io.sm +++ b/src/client/sysint/sys-small-io.sm @@ -32,6 +32,10 @@ static int small_io_completion_fn(void * user_args, struct PVFS_server_resp * resp_p, int index); +enum { + MIRROR_RETRY = 132 +}; + %% nested machine pvfs2_client_small_io_sm @@ -46,7 +50,14 @@ nested machine pvfs2_client_small_io_sm state xfer_msgpairs { jump pvfs2_msgpairarray_sm; - default => cleanup; + default => check_for_retries; + } + + state check_for_retries + { + run small_io_check_for_retries; + MIRROR_RETRY => xfer_msgpairs; + default => cleanup; /*no mirroring, done, or out of retries*/ } state cleanup @@ -76,22 +87,36 @@ static PINT_sm_action small_io_setup_msgpairs( int ret; PVFS_handle datafile_handle; int regions = 0; + PINT_sm_msgpair_state *msg_p; + uint32_t server_nr; js_p->error_code = 0; attr = &sm_p->getattr.attr; assert(attr); - - /* initialize msgarray */ - ret = PINT_msgpairarray_init(&sm_p->msgarray, sm_p->u.io.datafile_count); + + /* initialize msgarray. one msgpair for each handle with data. */ + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, sm_p->u.io.datafile_count); if(ret < 0) { js_p->error_code = ret; return SM_ACTION_COMPLETE; } - sm_p->msgarray_count = sm_p->u.io.datafile_count; - for(i = 0; i < sm_p->u.io.datafile_count; ++i) + /*initialize small_io_ctx array. one context for each handle in the file.*/ + sm_p->u.io.small_io_ctx = malloc(attr->u.meta.dfile_count * + sizeof(*sm_p->u.io.small_io_ctx)); + if (!sm_p->u.io.small_io_ctx) + { + PINT_msgpairarray_destroy(&sm_p->msgarray_op); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(sm_p->u.io.small_io_ctx,0,sizeof(*sm_p->u.io.small_io_ctx) * + attr->u.meta.dfile_count); + + + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { datafile_handle = attr->u.meta.dfile_array[ sm_p->u.io.datafile_index_array[i]]; @@ -111,8 +136,8 @@ static PINT_sm_action small_io_setup_msgpairs( file_data.fsize = 0; file_data.dist = attr->u.meta.dist; file_data.extend_flag = 1; - result.segmax = IO_MAX_REGIONS; + result.bytemax = PINT_REQUEST_TOTAL_BYTES(sm_p->u.io.mem_req); file_req_state = PINT_new_request_state(sm_p->u.io.file_req); mem_req_state = PINT_new_request_state(sm_p->u.io.mem_req); @@ -124,13 +149,13 @@ static PINT_sm_action small_io_setup_msgpairs( sm_p->u.io.file_req_offset + result.bytemax); - file_data.server_nr = sm_p->u.io.datafile_index_array[i]; + file_data.server_nr = sm_p->u.io.datafile_index_array[i]; result.segs = 0; result.bytes = 0; - result.offset_array = sm_p->msgarray[i].req.u.small_io.offsets; - result.size_array = sm_p->msgarray[i].req.u.small_io.sizes; - sm_p->msgarray[i].req.u.small_io.buffer = sm_p->u.io.buffer; + result.offset_array = msg_p->req.u.small_io.offsets; + result.size_array = msg_p->req.u.small_io.sizes; + msg_p->req.u.small_io.buffer = sm_p->u.io.buffer; ret = PINT_process_request( file_req_state, mem_req_state, @@ -157,36 +182,56 @@ static PINT_sm_action small_io_setup_msgpairs( * the response. */ PINT_SERVREQ_SMALL_IO_FILL( - sm_p->msgarray[i].req, + msg_p->req, *sm_p->cred_p, sm_p->object_ref.fs_id, datafile_handle, sm_p->u.io.io_type, - sm_p->u.io.datafile_index_array[i], + sm_p->u.io.datafile_index_array[i], attr->u.meta.dfile_count, attr->u.meta.dist, sm_p->u.io.file_req, sm_p->u.io.file_req_offset, regions, - PINT_REQUEST_TOTAL_BYTES(sm_p->u.io.mem_req)); - - sm_p->msgarray[i].fs_id = sm_p->object_ref.fs_id; - sm_p->msgarray[i].handle = sm_p->object_ref.handle; - sm_p->msgarray[i].retry_flag = PVFS_MSGPAIR_RETRY; - sm_p->msgarray[i].comp_fn = small_io_completion_fn; + PINT_REQUEST_TOTAL_BYTES(sm_p->u.io.mem_req), + sm_p->hints); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = datafile_handle; + + /*if we are processing a read request and the source file has mirrored + *handles, then bypass msgpairarray's retry mechanism. SMALL-IO will + *prepare another set of msgpairs using the mirrors and then retry. + */ + if (sm_p->u.io.io_type == PVFS_IO_READ && + attr->mask & PVFS_ATTR_META_MIRROR_DFILES) + { + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + } + else + { + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + } + msg_p->comp_fn = small_io_completion_fn; ret = PINT_cached_config_map_to_server( - &sm_p->msgarray[i].svr_addr, datafile_handle, + &msg_p->svr_addr, datafile_handle, sm_p->object_ref.fs_id); if(ret < 0) { - gossip_err("Failed to map meta server address\n"); + gossip_lerr("Failed to map data server address\n"); js_p->error_code = ret; return SM_ACTION_COMPLETE; } + + /*store the original datahandle for later use.*/ + server_nr = msg_p->req.u.small_io.server_nr; + sm_p->u.io.small_io_ctx[server_nr].original_datahandle = msg_p->handle; } js_p->error_code = 0; + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -201,7 +246,20 @@ static int small_io_completion_fn(void * user_args, int index) { struct PINT_smcb *smcb = (struct PINT_smcb *)user_args; - struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + PINT_sm_msgarray_op *mop = &(sm_p->msgarray_op); + PINT_sm_msgpair_state *msg_p = &(mop->msgarray[index]); + PINT_client_small_io_ctx *ctx = + &(sm_p->u.io.small_io_ctx[msg_p->req.u.small_io.server_nr]); + uint32_t server_nr = msg_p->req.u.small_io.server_nr; +#ifdef WIN32 + PVFS_object_attr *attr = &(sm_p->getattr.attr); + PVFS_metafile_attr *meta = &(attr->u.meta); +#else + PVFS_object_attr *attr __attribute__((unused)) = &(sm_p->getattr.attr); + PVFS_metafile_attr *meta __attribute__((unused)) = &(attr->u.meta); +#endif + int ret = 0; assert(resp_p->op == PVFS_SERV_SMALL_IO); @@ -230,7 +288,8 @@ static int small_io_completion_fn(void * user_args, { memset(&fdata, 0, sizeof(PINT_request_file_data)); fdata.server_ct = attr->u.meta.dfile_count; - fdata.server_nr = sm_p->u.io.datafile_index_array[index]; + + fdata.server_nr = server_nr; fdata.dist = attr->u.meta.dist; fdata.fsize = resp_p->u.small_io.bstream_size; @@ -303,21 +362,290 @@ static int small_io_completion_fn(void * user_args, PINT_free_request_state(file_req_state); PINT_free_request_state(mem_req_state); } - } + } /*if PVFS_IO_READ*/ + + sm_p->u.io.dfile_size_array[server_nr] = resp_p->u.small_io.bstream_size; + //sm_p->u.io.dfile_size_array[index] = resp_p->u.small_io.bstream_size; - sm_p->u.io.dfile_size_array[index] = resp_p->u.small_io.bstream_size; sm_p->u.io.total_size += resp_p->u.small_io.result_size; - + + /* Let's SMALL-IO know that the msg completed. */ + ctx->msg_completed = 1; + + /* To test fail-over with small-io, uncomment the following code. This + * will force each primary handle to have each of its mirrored handles + * tried on a READ io. If you are testing a file with many primary + * handles and/or many copies and don't want to wade through such a large + * test, then don't set the retry-limit. By default, it is normally set + * at 5. You can tweak this value in the pvfs2-fs.conf file, if you + * want. + */ + + gossip_debug(GOSSIP_IO_DEBUG,"handle=%llu \toperation=%d \toffset=%ld " + "\taggregate_size=%ld\n", + llu(msg_p->req.u.small_io.handle), + msg_p->req.u.small_io.io_type, + ((long int)msg_p->req.u.small_io.file_req_offset), + ((long int)msg_p->req.u.small_io.aggregate_size)); + +/* + if ( (sm_p->u.io.io_type == PVFS_IO_READ) + && (attr->mask & PVFS_ATTR_META_MIRROR_DFILES) + == PVFS_ATTR_META_MIRROR_DFILES + && (sm_p->u.io.retry_count < mop->params.retry_limit)) + { + mop->params.retry_limit = meta->mirror_copies_count; + ctx->msg_completed = 0; + } +*/ + return 0; } + +static int small_io_check_for_retries( struct PINT_smcb *smcb + , job_status_s *js_p) +{ +#ifndef WIN32 + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s..\n",__func__); +#endif + + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_object_attr *attr = &(sm_p->getattr.attr); + PVFS_metafile_attr *meta = &(attr->u.meta); + PINT_client_small_io_ctx *ctx = NULL; + PINT_sm_msgarray_op *mop = &sm_p->msgarray_op; + PINT_sm_msgpair_state *msgarray = mop->msgarray; + PINT_sm_msgpair_state *msg = NULL; + PINT_sm_msgpair_state *new_msg = NULL; + PINT_sm_msgarray_op new_mop = {{0},0,0,{0}}; + char *enc_req_bytes = NULL; + + uint32_t retry_msg_count = 0; + uint32_t index = 0; + uint32_t copies = 0; + uint32_t server_nr = 0; + int i = 0; + int j = 0; + int k = 0; + int ret = 0; + +#ifdef WIN32 + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s..\n",__func__); +#endif + /*if we are processing a write request, then msgpairarray handles retries. + *if we are processing a read and the source file is mirrored, then + *SMALL-IO handles the retries. + */ + if (sm_p->u.io.io_type == PVFS_IO_WRITE || + ((attr->mask & PVFS_ATTR_META_MIRROR_DFILES) != + PVFS_ATTR_META_MIRROR_DFILES)) + { + return SM_ACTION_COMPLETE; + } + + /* Do any messages need to be retried? */ + for (i=0; icount; i++) + { + server_nr = msgarray[i].req.u.small_io.server_nr; + ctx = &sm_p->u.io.small_io_ctx[server_nr]; + if (!ctx->msg_completed) + retry_msg_count++; + } + + /* no retries needed */ + if (!retry_msg_count) + { + return SM_ACTION_COMPLETE; + } + + /* do we have any retries available? */ + if (sm_p->u.io.retry_count >= mop->params.retry_limit) + { + return SM_ACTION_COMPLETE; + } + + /* okay. let's setup new msgpairs to retry. we will modify the incomplete + * msg pairs stored in msgarray and then copy them into a new msgarray + * before calling msgpairarray.sm. + */ + for (i=0; icount; i++) + { + msg = &msgarray[i]; + server_nr = msg->req.u.small_io.server_nr; + ctx = &sm_p->u.io.small_io_ctx[server_nr]; + + /* don't process completed messages */ + if (ctx->msg_completed) + continue; + + /* for incomplete messages, cleanup memory, if necessary */ + enc_req_bytes = (char *)&(msg->encoded_req); + for (k=0; kencoded_req); k++) + { + if (enc_req_bytes[k] != '\0') + { + PINT_encode_release(&(msg->encoded_req),PINT_ENCODE_REQ); + break; + } + }/*end for*/ + + if (msg->encoded_resp_p) + { + BMI_memfree(msg->svr_addr + ,msg->encoded_resp_p + ,msg->max_resp_sz + ,BMI_RECV); + } + + /* Should we use the original datahandle? */ + if (ctx->retry_original) + { + ctx->retry_original = 0; + msg->handle = ctx->original_datahandle; + msg->req.u.small_io.handle = ctx->original_datahandle; + msg->svr_addr = 0; + ret = PINT_cached_config_map_to_server(&msg->svr_addr + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine the server address " + "for this handle (%llu)" + ,llu(msg->handle)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + continue; + }/*end retry_original*/ + + /* get next mirrored handle. note: if a mirrored handle is zero, then + * this means that the creation of this mirrored object failed for its + * particular server. if so, then get the next valid handle. as a + * last resort, retry the original handle. + */ + copies = ctx->current_copies_count; + for (;copies < meta->mirror_copies_count; copies++) + { + index = (copies*meta->dfile_count) + server_nr; + if (meta->mirror_dfile_array[index] != 0) + { /* we have found a valid mirrored handle */ + msg->handle = meta->mirror_dfile_array[index]; + break; + } + } + + /* if we haven't found a valid mirrored handle, retry the original + * datahandle. + */ + if ( copies == meta->mirror_copies_count ) + { + msg->handle = ctx->original_datahandle; + ctx->retry_original = 0; + ctx->current_copies_count = 0; + sm_p->u.io.retry_count++; + msg->req.u.small_io.handle = ctx->original_datahandle; + msg->svr_addr = 0; + ret=PINT_cached_config_map_to_server(&(msg->svr_addr) + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine the server address " + "for this handle (%llu)" + ,llu(msg->handle)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + continue; + }/*end if we have to use the original*/ + + /* Otherwise, use the discovered mirrored handle */ + msg->req.u.small_io.handle = msg->handle; + msg->svr_addr = 0; + ret=PINT_cached_config_map_to_server(&(msg->svr_addr) + ,msg->handle + ,msg->fs_id); + if (ret) + { + gossip_lerr("Unable to determine the server address " + "for this handle (%llu)" + ,llu(msg->handle)); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* and setup for the next retry event */ + ctx->current_copies_count++; + if (ctx->current_copies_count == meta->mirror_copies_count) + { + ctx->current_copies_count = 0; + ctx->retry_original = 1; + sm_p->u.io.retry_count++; + } + }/*end for each msgpair*/ + + /* Now, create a new msgpair array and populate from the above modified + * messages. + */ + ret = PINT_msgpairarray_init(&new_mop,retry_msg_count); + if (ret) + { + gossip_lerr("Unable to initialize msgarray_op:new_op\n"); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* populate the new msgarray with the modified messages */ + for (i=0, j=0; icount && jreq.u.small_io.server_nr; + ctx = &sm_p->u.io.small_io_ctx[server_nr]; + + /* don't populate with completed messages */ + if (ctx->msg_completed) + continue; + + new_msg = &new_mop.msgarray[j]; + j++; + + new_msg->fs_id = msg->fs_id; + new_msg->handle = msg->handle; + new_msg->comp_fn = msg->comp_fn; + new_msg->svr_addr = msg->svr_addr; + new_msg->req = msg->req; + new_msg->enc_type = msg->enc_type; + new_msg->retry_flag = msg->retry_flag; + }/*end for*/ + + /* Destroy the old msgarray and substitute with the new. Params are left + * in tact. + */ + PINT_msgpairarray_destroy(mop); + mop->count = new_mop.count; + mop->msgarray = new_mop.msgarray; + mop->msgpair = new_mop.msgpair; + + /* Push the msgarray_op and jump to msgpairarray.sm */ + PINT_sm_push_frame(smcb,0,mop); + js_p->error_code=MIRROR_RETRY; + return SM_ACTION_COMPLETE; +}/*end small_io_check_for_retries*/ + + + static int small_io_cleanup( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_msgpairarray_destroy(sm_p->msgarray); - sm_p->msgarray = NULL; - sm_p->msgarray_count = 0; + + PINT_msgpairarray_destroy(&sm_p->msgarray_op); + + /*release the ctx array; this array is allocated whether or not the + *file to read is mirrored. + */ + free(sm_p->u.io.small_io_ctx); return SM_ACTION_COMPLETE; } diff --git a/src/client/sysint/sys-statfs.sm b/src/client/sysint/sys-statfs.sm index 9d1b49f..826ac64 100644 --- a/src/client/sysint/sys-statfs.sm +++ b/src/client/sysint/sys-statfs.sm @@ -53,6 +53,7 @@ PVFS_error PVFS_isys_statfs( const PVFS_credentials *credentials, PVFS_sysresp_statfs *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PINT_smcb *smcb = NULL; @@ -113,7 +114,7 @@ PVFS_error PVFS_isys_statfs( return ret; } - PINT_init_msgarray_params(&sm_p->msgarray_params, fs_id); + PINT_init_msgarray_params(sm_p, fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.statfs_list.fs_id = fs_id; sm_p->u.statfs_list.details = NULL; @@ -122,20 +123,17 @@ PVFS_error PVFS_isys_statfs( memset(sm_p->u.statfs_list.stat_array, 0, (sm_p->u.statfs_list.count * sizeof(struct PVFS_mgmt_server_stat))); - sm_p->msgarray_count = sm_p->u.statfs_list.count; - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - (sm_p->msgarray_count * sizeof(PINT_sm_msgpair_state))); - if (sm_p->msgarray == NULL) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, sm_p->u.statfs_list.count); + if(ret != 0) { - PVFS_util_release_credentials(sm_p->cred_p); - free(sm_p->u.statfs_list.addr_array); - free(sm_p->u.statfs_list.stat_array); - PINT_smcb_free(smcb); - return -PVFS_ENOMEM; + gossip_err("Failed to initialize %d msgpairs\n", sm_p->u.statfs_list.count); + return ret; } + + PVFS_hint_copy(hints, &sm_p->hints); return PINT_client_state_machine_post( - smcb, op_id, user_ptr); + smcb, op_id, user_ptr); } /** Obtain file system statistics. @@ -143,7 +141,8 @@ PVFS_error PVFS_isys_statfs( PVFS_error PVFS_sys_statfs( PVFS_fs_id fs_id, const PVFS_credentials *credentials, - PVFS_sysresp_statfs* resp) + PVFS_sysresp_statfs* resp, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -151,7 +150,7 @@ PVFS_error PVFS_sys_statfs( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_statfs entered\n"); ret = PVFS_isys_statfs( - fs_id, credentials, resp, &op_id, NULL); + fs_id, credentials, resp, &op_id, hints, NULL); if (ret) { diff --git a/src/client/sysint/sys-symlink.sm b/src/client/sysint/sys-symlink.sm index 896f1b5..dbdf595 100644 --- a/src/client/sysint/sys-symlink.sm +++ b/src/client/sysint/sys-symlink.sm @@ -1,5 +1,5 @@ -/* - * (C) 2003 Clemson University and The University of Chicago +/* + * (C) 2003 Clemson University and The University of Chicago * * See COPYING in top-level directory. */ @@ -150,6 +150,7 @@ PVFS_error PVFS_isys_symlink( const PVFS_credentials *credentials, PVFS_sysresp_symlink *resp, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -191,7 +192,7 @@ PVFS_error PVFS_isys_symlink( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, parent_ref.fs_id); + PINT_init_msgarray_params(sm_p, parent_ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.sym.link_name = entry_name; sm_p->u.sym.link_target = target; @@ -200,6 +201,7 @@ PVFS_error PVFS_isys_symlink( sm_p->u.sym.stored_error_code = 0; sm_p->u.sym.retry_count = 0; sm_p->object_ref = parent_ref; + PVFS_hint_copy(hints, &sm_p->hints); gossip_debug( GOSSIP_CLIENT_DEBUG, "Symlinking %s under parent handle %llu " @@ -218,7 +220,8 @@ PVFS_error PVFS_sys_symlink( char *target, PVFS_sys_attr attr, const PVFS_credentials *credentials, - PVFS_sysresp_symlink *resp) + PVFS_sysresp_symlink *resp, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -226,7 +229,7 @@ PVFS_error PVFS_sys_symlink( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_symlink entered\n"); ret = PVFS_isys_symlink(entry_name, parent_ref, target, attr, - credentials, resp, &op_id, NULL); + credentials, resp, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_symlink call", ret); @@ -263,7 +266,7 @@ static PINT_sm_action symlink_init( js_p->error_code = 0; ret = job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, smcb, 0, js_p, &tmp_id, + sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id, pint_client_sm_context); } @@ -273,7 +276,7 @@ static PINT_sm_action symlink_init( PVFS_ATTR_COMMON_ALL, PVFS_TYPE_DIRECTORY, 0); - + return ret; } @@ -282,19 +285,28 @@ static int symlink_create_comp_fn(void *v_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + gossip_debug(GOSSIP_CLIENT_DEBUG, "symlink_create_comp_fn\n"); - assert(resp_p->op == PVFS_SERV_CREATE); + assert(resp_p->op == PVFS_SERV_BATCH_CREATE); if (resp_p->status != 0) { - return resp_p->status; + return resp_p->status; } /* otherwise, just store the newly symlink'd meta handle */ - sm_p->u.sym.symlink_handle = resp_p->u.create.handle; + + if(resp_p->u.batch_create.handle_count != 1) + { + gossip_err("We requested one metafile handle for the symlink, " + "but we got %d.\n", + resp_p->u.batch_create.handle_count); + return -PVFS_EINVAL; + } + + sm_p->u.sym.symlink_handle = resp_p->u.batch_create.handle_array[0]; gossip_debug(GOSSIP_CLIENT_DEBUG, "*** Got newly created symlink " "handle %llu\n", llu(sm_p->u.sym.symlink_handle)); @@ -306,7 +318,7 @@ static int symlink_setattr_comp_fn(void *v_p, int index) { PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); PVFS_object_attr attr; PVFS_object_ref tmp_ref; @@ -357,7 +369,8 @@ static PINT_sm_action symlink_dspace_create_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," create: posting create req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; ret = PINT_cached_config_get_next_meta( sm_p->object_ref.fs_id, @@ -370,18 +383,21 @@ static PINT_sm_action symlink_dspace_create_setup_msgpair( return SM_ACTION_COMPLETE; } - PINT_SERVREQ_CREATE_FILL( + PINT_SERVREQ_BATCH_CREATE_FILL( msg_p->req, *sm_p->cred_p, sm_p->object_ref.fs_id, PVFS_TYPE_SYMLINK, - meta_handle_extent_array); + 1, + meta_handle_extent_array, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = meta_handle_extent_array.extent_array[0].first; msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; msg_p->comp_fn = symlink_create_comp_fn; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -405,7 +421,8 @@ static PINT_sm_action symlink_crdirent_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," symlink: posting crdirent req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_CRDIRENT_FILL( msg_p->req, @@ -413,7 +430,8 @@ static PINT_sm_action symlink_crdirent_setup_msgpair( sm_p->u.sym.link_name, sm_p->u.sym.symlink_handle, sm_p->object_ref.handle, - sm_p->object_ref.fs_id); + sm_p->object_ref.fs_id, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = sm_p->object_ref.handle; @@ -429,6 +447,8 @@ static PINT_sm_action symlink_crdirent_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -453,7 +473,8 @@ static PINT_sm_action symlink_setattr_setup_msgpair( gossip_debug(GOSSIP_CLIENT_DEBUG," symlink: posting setattr req\n"); - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; /* force permissions to 777 for symbolic links */ sm_p->u.sym.sys_attr.perms = 0777; @@ -465,7 +486,8 @@ static PINT_sm_action symlink_setattr_setup_msgpair( sm_p->u.sym.symlink_handle, PVFS_TYPE_SYMLINK, sm_p->u.sym.sys_attr, - PVFS_ATTR_SYMLNK_ALL); + PVFS_ATTR_SYMLNK_ALL, + sm_p->hints); /* fill in symlink specific attributes */ msg_p->req.u.setattr.attr.u.sym.target_path = @@ -488,6 +510,8 @@ static PINT_sm_action symlink_setattr_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -509,13 +533,15 @@ static PINT_sm_action symlink_delete_handle_setup_msgpair( js_p->error_code = 0; - PINT_init_msgpair(sm_p, msg_p); + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; PINT_SERVREQ_REMOVE_FILL( msg_p->req, *sm_p->cred_p, sm_p->object_ref.fs_id, - sm_p->u.sym.symlink_handle); + sm_p->u.sym.symlink_handle, + sm_p->hints); msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = sm_p->u.sym.symlink_handle; @@ -534,6 +560,8 @@ static PINT_sm_action symlink_delete_handle_setup_msgpair( gossip_err("Failed to map meta server address\n"); js_p->error_code = ret; } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); return SM_ACTION_COMPLETE; } @@ -560,12 +588,12 @@ static PINT_sm_action symlink_cleanup( sm_p->u.sym.sym_resp->ref = symlink_ref; /* insert newly created symlink into the ncache */ - PINT_ncache_update((const char*) sm_p->u.sym.link_name, - (const PVFS_object_ref*) &symlink_ref, + PINT_ncache_update((const char*) sm_p->u.sym.link_name, + (const PVFS_object_ref*) &symlink_ref, (const PVFS_object_ref*) &(sm_p->object_ref)); } else if ((PVFS_ERROR_CLASS(-sm_p->error_code) == PVFS_ERROR_BMI) && - (sm_p->u.sym.retry_count < sm_p->msgarray_params.retry_limit)) + (sm_p->u.sym.retry_count < sm_p->msgarray_op.params.retry_limit)) { sm_p->u.sym.stored_error_code = 0; sm_p->u.sym.retry_count++; @@ -581,11 +609,7 @@ static PINT_sm_action symlink_cleanup( PINT_acache_invalidate(sm_p->object_ref); } - if (sm_p->msgarray && (sm_p->msgarray != &(sm_p->msgpair))) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); PINT_SET_OP_COMPLETE; return SM_ACTION_TERMINATE; diff --git a/src/client/sysint/sys-truncate.sm b/src/client/sysint/sys-truncate.sm index b240b92..440e0a8 100644 --- a/src/client/sysint/sys-truncate.sm +++ b/src/client/sysint/sys-truncate.sm @@ -25,18 +25,19 @@ #include "acache.h" #include "pvfs2-internal.h" -#include "osd-util/osd-util.h" -#include "osd-util/osd-defs.h" - -enum -{ - OSD_MSGPAIR = 2001 -}; +#define TRUNCATE_UNSTUFF 100 extern job_context_id pint_client_sm_context; -static int truncate_osd_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, - int index); +static int unstuff_needed( + PVFS_size size, + PINT_dist *dist_p, + uint32_t mask); + +static int unstuff_comp_fn( + void *v_p, + struct PVFS_server_resp *resp_p, + int i); %% @@ -45,23 +46,37 @@ machine pvfs2_client_truncate_sm state truncate_getattr { jump pvfs2_client_getattr_sm; + success => inspect_attr; + default => cleanup; + } + + state inspect_attr + { + run truncate_inspect_attr; + TRUNCATE_UNSTUFF => unstuff_setup_msgpair; success => truncate_datafile_setup_msgpairarray; default => cleanup; } - state truncate_datafile_setup_msgpairarray + state unstuff_setup_msgpair { - run truncate_datafile_setup_msgpairarray; - success => truncate_datafile_xfer_msgpairarray; - OSD_MSGPAIR => truncate_osd_datafile_xfer_msgpairarray; + run truncate_unstuff_setup_msgpair; + success => unstuff_xfer_msgpair; default => cleanup; } - state truncate_osd_datafile_xfer_msgpairarray + state unstuff_xfer_msgpair { - jump pvfs2_osd_msgpairarray_sm; - success => cleanup; - default => truncate_datafile_failure; + jump pvfs2_msgpairarray_sm; + success => truncate_datafile_setup_msgpairarray; + default => cleanup; + } + + state truncate_datafile_setup_msgpairarray + { + run truncate_datafile_setup_msgpairarray; + success => truncate_datafile_xfer_msgpairarray; + default => cleanup; } state truncate_datafile_xfer_msgpairarray @@ -93,6 +108,7 @@ PVFS_error PVFS_isys_truncate( PVFS_size size, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, + PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; @@ -126,17 +142,18 @@ PVFS_error PVFS_isys_truncate( } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_init_msgarray_params(&sm_p->msgarray_params, ref.fs_id); + PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.truncate.size = size; sm_p->object_ref = ref; + PVFS_hint_copy(hints, &sm_p->hints); PINT_SM_GETATTR_STATE_FILL( sm_p->getattr, sm_p->object_ref, PVFS_ATTR_META_ALL|PVFS_ATTR_COMMON_TYPE, PVFS_TYPE_METAFILE, - 0); + PINT_SM_GETATTR_BYPASS_CACHE); return PINT_client_state_machine_post( smcb, op_id, user_ptr); @@ -145,7 +162,8 @@ PVFS_error PVFS_isys_truncate( PVFS_error PVFS_sys_truncate( PVFS_object_ref ref, PVFS_size size, - const PVFS_credentials *credentials) + const PVFS_credentials *credentials, + PVFS_hint hints) { PVFS_error ret = -PVFS_EINVAL, error = 0; PVFS_sys_op_id op_id; @@ -153,7 +171,7 @@ PVFS_error PVFS_sys_truncate( gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_truncate entered with %lld\n", lld(size)); - ret = PVFS_isys_truncate(ref, size, credentials, &op_id, NULL); + ret = PVFS_isys_truncate(ref, size, credentials, &op_id, hints, NULL); if (ret) { PVFS_perror_gossip("PVFS_isys_truncate call", ret); @@ -184,7 +202,6 @@ static PINT_sm_action truncate_datafile_setup_msgpairarray( PINT_sm_msgpair_state *msg_p = NULL; PVFS_size new_dfile_size = 0; PINT_request_file_data file_data; - int is_osd = fsid_is_osd(sm_p->object_ref.fs_id); js_p->error_code = 0; @@ -203,16 +220,13 @@ static PINT_sm_action truncate_datafile_setup_msgpairarray( return 1; } - sm_p->msgarray_count = attr->u.meta.dfile_count; - - sm_p->msgarray = (PINT_sm_msgpair_state *)malloc( - sm_p->msgarray_count * sizeof(PINT_sm_msgpair_state)); - if (!sm_p->msgarray) + ret = PINT_msgpairarray_init(&sm_p->msgarray_op, attr->u.meta.dfile_count); + if(ret != 0) { - js_p->error_code = -PVFS_ENOMEM; + gossip_err("Failed to initialize %d msgpairs\n", attr->u.meta.dfile_count); + js_p->error_code = ret; return SM_ACTION_COMPLETE; } - memset(sm_p->msgarray, 0, sm_p->msgarray_count * sizeof(*sm_p->msgarray)); /* Initialize the file data struct */ memset(&file_data, 0, sizeof(file_data)); @@ -221,10 +235,8 @@ static PINT_sm_action truncate_datafile_setup_msgpairarray( file_data.extend_flag = 1; /* Construct truncate messages */ - for (i = 0; i < attr->u.meta.dfile_count; i++) + foreach_msgpair(&sm_p->msgarray_op, msg_p, i) { - msg_p = &sm_p->msgarray[i]; - file_data.server_nr = i; new_dfile_size = attr->u.meta.dist->methods->logical_to_physical_offset( @@ -237,85 +249,34 @@ static PINT_sm_action truncate_datafile_setup_msgpairarray( __func__, lld(sm_p->u.truncate.size), llu(attr->u.meta.dfile_array[i]), lld(new_dfile_size)); - if (is_osd) { - struct attribute_list setlenattr = { - .type = ATTR_SET, - .page = USER_INFO_PG, - .number = UIAP_LOGICAL_LEN, - .len = 8, - }; - ret = osd_command_set_set_attributes(&msg_p->osd_command, - PVFS_OSD_DATA_PID, - attr->u.meta.dfile_array[i]); - if (ret) { - osd_error_xerrno(ret, "%s: set set_attributes", __func__); - goto fail; - } - setlenattr.val = Malloc(8); - if (!setlenattr.val) { - ret = -ENOMEM; - osd_error_xerrno(ret, "%s: alloc logical len", __func__); - goto fail; - } - set_htonll(setlenattr.val, new_dfile_size); - ret = osd_command_attr_build(&msg_p->osd_command, &setlenattr, 1); - if (ret) { - osd_error_xerrno(ret, "%s: attr build", __func__); - goto fail; - } - msg_p->comp_fn = truncate_osd_comp_fn; - js_p->error_code = OSD_MSGPAIR; - } else { - PINT_SERVREQ_TRUNCATE_FILL( - msg_p->req, - *sm_p->cred_p, - sm_p->object_ref.fs_id, - new_dfile_size, - attr->u.meta.dfile_array[i]); - /* - no callback. the status will be in the generic response - structure - */ - msg_p->comp_fn = NULL; - } + PINT_SERVREQ_TRUNCATE_FILL( + msg_p->req, + *sm_p->cred_p, + sm_p->object_ref.fs_id, + new_dfile_size, + attr->u.meta.dfile_array[i], + sm_p->hints); + /* + no callback. the status will be in the generic response + structure + */ msg_p->fs_id = sm_p->object_ref.fs_id; msg_p->handle = attr->u.meta.dfile_array[i]; msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = NULL; } sm_p->getattr.size = sm_p->u.truncate.size; - ret = PINT_serv_msgpairarray_resolve_addrs( - sm_p->msgarray_count, sm_p->msgarray); + ret = PINT_serv_msgpairarray_resolve_addrs(&sm_p->msgarray_op); -fail: if (ret) { gossip_err("Error: failed to resolve server addresses.\n"); js_p->error_code = ret; } - return SM_ACTION_COMPLETE; -} -/* - * Check status, and free small attribute value. - */ -static int truncate_osd_comp_fn( - void *v_p, - struct PVFS_server_resp *resp_p __attribute__((unused)), - int index) -{ - PINT_smcb *smcb = v_p; - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[index]; - - if (msg_p->osd_command.status != 0) { - gossip_err("%s: truncate command failed\n", __func__); - return osd_errno_from_status(msg_p->osd_command.status); - } - - free(msg_p->osd_command.attr->val); - osd_command_attr_free(&msg_p->osd_command); - return 0; + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; } static PINT_sm_action truncate_datafile_failure( @@ -330,11 +291,7 @@ static PINT_sm_action truncate_cleanup( struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); sm_p->error_code = js_p->error_code; - if (sm_p->msgarray && (sm_p->msgarray != &sm_p->msgpair)) - { - free(sm_p->msgarray); - sm_p->msgarray = NULL; - } + PINT_msgpairarray_destroy(&sm_p->msgarray_op); if(sm_p->error_code == 0) { @@ -351,6 +308,172 @@ static PINT_sm_action truncate_cleanup( return SM_ACTION_TERMINATE; } +static PINT_sm_action truncate_inspect_attr( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + /* check for write access */ + js_p->error_code = PINT_check_mode( + &sm_p->getattr.attr, sm_p->cred_p->uid, sm_p->cred_p->gid, PINT_ACCESS_WRITABLE); + if(js_p->error_code) + { + return 1; + } + + /* determine if we need to unstuff or not to service this request */ + if(unstuff_needed( + sm_p->u.truncate.size, + sm_p->getattr.attr.u.meta.dist, + sm_p->getattr.attr.mask)) + { + js_p->error_code = TRUNCATE_UNSTUFF; + return SM_ACTION_COMPLETE; + } + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + +/* unstuff_needed() + * + * looks at the I/O pattern requested and compares against the distribution + * to determine if a stuffed file would have to be "unstuffed" in order to + * service the request + * + * returns 1 if unstuff is needed, 0 otherwise. + */ +static int unstuff_needed( + PVFS_size size, + PINT_dist *dist_p, + uint32_t mask) +{ + PVFS_offset first_unstuffed_offset = 0; + PINT_request_file_data fake_file_data; + + gossip_debug(GOSSIP_CLIENT_DEBUG, "sys-truncate checking to see if file should be unstuffed.\n"); + + /* check the flag first to see if file is already explicitly marked as + * unstuffed + */ + if(mask & PVFS_ATTR_META_UNSTUFFED) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "sys-truncate detected file is already unstuffed.\n"); + return(0); + } + + /* we need to query the distribution to determine what the first offset + * is that does not belong to the first server/datafile. We construct a + * fake server data struct for 2 servers and find out what the first + * offset (above zero) is that hits the second server */ + fake_file_data.dist = dist_p; + fake_file_data.server_ct = 2; + fake_file_data.extend_flag = 1; + fake_file_data.fsize = 0; + fake_file_data.server_nr = 1; + + /* call next mapped offset to find the next logical offset that appears + * on the 2nd server + */ + first_unstuffed_offset = dist_p->methods->next_mapped_offset( + dist_p->params, + &fake_file_data, + 0); + + gossip_debug(GOSSIP_CLIENT_DEBUG, "sys-truncate calculated first unstuffed offset as %lld.\n", lld(first_unstuffed_offset)); + + /* compare to see if the file needs to be unstuffed yet */ + if(size > first_unstuffed_offset) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, "sys-truncate will unstuff the file.\n"); + return(1); + } + + gossip_debug(GOSSIP_CLIENT_DEBUG, "sys-truncate will not unstuff the file.\n"); + return(0); +} + +static PINT_sm_action truncate_unstuff_setup_msgpair( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -PVFS_EINVAL; + PINT_sm_msgpair_state *msg_p = NULL; + + js_p->error_code = 0; + + PINT_msgpair_init(&sm_p->msgarray_op); + msg_p = &sm_p->msgarray_op.msgpair; + + /* note that unstuff must request the same attr mask that we requested + * earlier. If the file has already been unstuffed then we need an + * updated authoritative copy of all of the attrs relevant to I/O. + */ + PINT_SERVREQ_UNSTUFF_FILL( + msg_p->req, + (*sm_p->cred_p), + sm_p->object_ref.fs_id, + sm_p->object_ref.handle, + PVFS_ATTR_META_ALL|PVFS_ATTR_COMMON_TYPE); + + msg_p->fs_id = sm_p->object_ref.fs_id; + msg_p->handle = sm_p->object_ref.handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = unstuff_comp_fn; + + ret = PINT_cached_config_map_to_server( + &msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id); + if (ret) + { + gossip_err("Failed to map meta server address\n"); + js_p->error_code = ret; + } + + PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op); + return SM_ACTION_COMPLETE; +} + +/* unstuff_comp_fn() + * + * completion function for unstuff msgpair array + */ +static int unstuff_comp_fn( + void *v_p, + struct PVFS_server_resp *resp_p, + int i) +{ + PINT_smcb *smcb = v_p; + PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + + gossip_debug(GOSSIP_CLIENT_DEBUG, + "unstuff completion fn: unstuff_comp_fn\n"); + + /* only posted one msgpair */ + assert(i==0); + + if (resp_p->status != 0) + { + gossip_debug(GOSSIP_CLIENT_DEBUG, + "unstuff negative response with error code: %d\n", + resp_p->status); + return resp_p->status; + } + + assert(resp_p->op == PVFS_SERV_UNSTUFF); + + PINT_acache_update(sm_p->object_ref, + &resp_p->u.unstuff.attr, + NULL); + + /* replace attrs found by getattr */ + /* PINT_copy_object_attr() takes care of releasing old memory */ + PINT_copy_object_attr(&sm_p->getattr.attr, &resp_p->u.unstuff.attr); + + return(0); +} + /* * Local variables: * mode: c diff --git a/src/client/usrint/Makefile b/src/client/usrint/Makefile new file mode 100644 index 0000000..fcdc019 --- /dev/null +++ b/src/client/usrint/Makefile @@ -0,0 +1,21 @@ + +PVFSDIR=../../../ +INCDIR=-I. -I$(PVFSDIR)/include +LIBDIR= + +CFLAGS=$(INCDIR) $(LIBDIR) + +OBJS=iocommon.o openfile-util.o posix-pvfs.o posix.o stdio.o + +INCS=usrint.h iocommon.h stdio-pvfs.h posix-pvfs.h openfile-util.h + +all: $(OBJS) + +clean: + rm *.o + +iocommon.o:$(INCS) +openfile-util.o:$(INCS) +posix-pvfs.o:$(INCS) +posix.o:$(INCS) +stdio.o:$(INCS) diff --git a/src/client/usrint/README b/src/client/usrint/README new file mode 100644 index 0000000..d992677 --- /dev/null +++ b/src/client/usrint/README @@ -0,0 +1,42 @@ + +These codes stack up as follows: + +stdio +posix +posix-pvfs +iocommon/openfile-util +libpvfs + +posix also calls out to glibc for non-pvfs file systems. +user code is expected to call in a stdio, posix, and/or posix-pvfs + +stdio + +This is an actual implementation of stdio based on the joined file +descriptor/pointer implemented in openfile-util - this is here mostly +so calls to libc don't bypass our system calls. + +posix + +These are wrappers that either call the glibc or the pvfs version + +posix-pvfs + +These implement the system calls for pvfs using the iocommon calls + +iocommon + +These are based on the codes in the pvfs apps and other implementations +that access pvfs via the sysint calls + +openfile-util + +These are data structures used in the stdio/posix implementation. Mostly +an open file table and method tables, various other functions and structures +needed for those implementations + +libpvfs + +This is the client sysint code - not in this directory + +WBL diff --git a/src/client/usrint/iocommon.c b/src/client/usrint/iocommon.c new file mode 100644 index 0000000..4060b06 --- /dev/null +++ b/src/client/usrint/iocommon.c @@ -0,0 +1,2850 @@ +/* we will keep a copy and keep one in the environment */ +/* (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - low level calls to system interface + */ +#define USRINT_SOURCE 1 +#include "usrint.h" +#include "posix-ops.h" +#include "openfile-util.h" +#include "iocommon.h" +#if PVFS_UCACHE_ENABLE +#include "ucache.h" +#endif +#include +#include + +static int iocommon_parse_serverlist(char *serverlist, + struct PVFS_sys_server_list *slist, + PVFS_fs_id fsid); + +/** this is a global analog of errno for pvfs specific + * errors errno is set to EIO and this is set to the + * original code + */ +int pvfs_errno; + +void iocommon_cred(PVFS_credentials **credentials) +{ + static PVFS_credentials creds_buf; + static int cred_init = 0; + + if(!cred_init) + { + memset(&creds_buf, 0, sizeof(creds_buf)); + creds_buf.uid = getuid(); + creds_buf.gid = getgid(); + cred_init = 1; + } + + *credentials = &creds_buf; +} + +int iocommon_fsync(pvfs_descriptor *pd) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + + pvfs_sys_init(); + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + iocommon_cred(&credentials); +#if PVFS_UCACHE_ENABLE + if (ucache_enabled) + { + rc = ucache_flush_file(pd->s->fent); + if(rc != 0) + { + goto errorout; + } + } +#endif + errno = 0; + rc = PVFS_sys_flush(pd->s->pvfs_ref, credentials, PVFS_HINT_NULL); + IOCOMMON_CHECK_ERR(rc); + +errorout: + return rc; +} + +/** + * Find the PVFS handle to an object (file, dir sym) + * assumes an absoluate path + */ +int iocommon_lookup_absolute(const char *abs_path, + PVFS_object_ref *ref, + char *error_path, + int error_path_size) +{ + int rc = 0; + int orig_errno = errno; + char pvfs_path[PVFS_PATH_MAX]; + PVFS_fs_id lookup_fs_id; + PVFS_credentials *credentials; + PVFS_sysresp_lookup resp_lookup; + + /* Initialize any variables */ + memset(&resp_lookup, 0, sizeof(resp_lookup)); + + pvfs_sys_init(); + iocommon_cred(&credentials); + + /* Determine the fs_id and pvfs_path */ + errno = 0; + rc = PVFS_util_resolve(abs_path, &lookup_fs_id, pvfs_path, PVFS_PATH_MAX); + if (rc < 0) + { + if (rc == -PVFS_ENOENT) + { + errno = ESTALE; /* this signals open that resolve failed */ + rc = -1; + goto errorout; + } + IOCOMMON_CHECK_ERR(rc); + } + + /* set up buffer to return partially looked up path */ + /* in failure. This is most likely a non-PVFS path */ + + /* Set up error path + TODO: orange-security + if (error_path) + { + memset(error_path, 0, error_path_size); + resp_lookup.error_path = error_path; + resp_lookup.error_path_size = error_path_size; + } + else + { + resp_lookup.error_path = NULL; + resp_lookup.error_path_size = 0; + } + */ + + errno = 0; + rc = PVFS_sys_lookup(lookup_fs_id, pvfs_path, + credentials, &resp_lookup, + PVFS2_LOOKUP_LINK_FOLLOW, NULL); + IOCOMMON_CHECK_ERR(rc); + *ref = resp_lookup.ref; + +errorout: + return rc; +} + +/** + * Lookup a file via the PVFS system interface + * + * Assumes we have already looked up part of the path + * POSIX assumes we can handle at least 1024 char paths + * and potentially 4096 char paths (depending on which + * include file you look at). PVFS cannot deal with more + * than 255 chars at a time so we must break long paths + * into pieces and do multiple relative lookups + */ +int iocommon_lookup_relative(const char *rel_path, + PVFS_object_ref parent_ref, /* by value */ + int follow_links, + PVFS_object_ref *ref, + char *error_path, + int error_path_size) +{ + int rc = 0; + int orig_errno = errno; + PVFS_object_ref current_seg_ref; + char current_seg_path[PVFS_NAME_MAX]; + char *cur, *last, *start; + PVFS_credentials *credentials; + PVFS_sysresp_lookup resp_lookup; + + /* Initialize any variables */ + pvfs_sys_init(); + memset(&resp_lookup, 0, sizeof(resp_lookup)); + + /* Set credentials */ + iocommon_cred(&credentials); + + /* Set up error path + TODO: orange-security + if (error_path) + { + memset(error_path, 0, error_path_size); + resp_lookup.error_path = error_path; + resp_lookup.error_path_size = error_path_size; + } + else + { + resp_lookup.error_path = NULL; + resp_lookup.error_path_size = 0; + } + */ + + current_seg_ref = parent_ref; + cur = (char *)rel_path; + last = (char *)rel_path; + start = (char *)rel_path; + + /* loop over segments of the path with max PVFS_NAME_MAX chars */ + while(*cur) + { + /* loop over chars to find a complete path segment */ + /* that is no longer than PVFS_NAME_MAX chars */ + while(*cur) + { + /* find next path seperator / */ + /* cur either points to a slash */ + /* or the first char of the path */ + /* there must be at least one */ + /* so n either case increment it first */ + for(cur++; *cur && *cur != '/'; cur++); + if (cur - start > PVFS_NAME_MAX-1) + { + /* we over-shot the limit go back to last */ + cur = last; + if (cur == start) + { + /* single segment larger than PVFS_NAME_MAX */ + errno = ENAMETOOLONG; + rc = -1; + goto errorout; + } + break; + } + else + { + /* set up to add the next path segment */ + last = cur; + } + } + memset(current_seg_path, 0, PVFS_NAME_MAX); + strncpy(current_seg_path, start, (cur - start) + 1); + start = cur; + last = cur; + + /* Contact server */ + errno = 0; + rc = PVFS_sys_ref_lookup(parent_ref.fs_id, + current_seg_path, + current_seg_ref, + credentials, + &resp_lookup, + follow_links, + PVFS_HINT_NULL); + IOCOMMON_CHECK_ERR(rc); + if (*cur) + { + current_seg_ref = resp_lookup.ref; + } + else + { + *ref = resp_lookup.ref; + } + } + +errorout: + return rc; +} + +/** + * Parses a simple string to find the number and select of servers + * for the LIST layout method + */ +static int iocommon_parse_serverlist(char *serverlist, + struct PVFS_sys_server_list *slist, + PVFS_fs_id fsid) +{ + PVFS_BMI_addr_t *server_array; + int count; + char *tok, *save_ptr; + int i; + + /* expects slist->servers to be NULL */ + if (!slist || slist->servers) + { + errno = EINVAL; + return -1; + } + tok = strtok_r(serverlist, ":", &save_ptr); + if (!tok) + { + errno = EINVAL; + return -1; + } + slist->count = atoi(tok); + PINT_cached_config_count_servers(fsid, PINT_SERVER_TYPE_IO, &count); + if (slist->count < 1 || slist->count > count) + { + errno = EINVAL; + return -1; + } + slist->servers = (PVFS_BMI_addr_t *)malloc(sizeof(PVFS_BMI_addr_t) * + slist->count); + if (!slist->servers) + { + errno = ENOMEM; + return -1; + } + server_array = (PVFS_BMI_addr_t *)malloc(sizeof(PVFS_BMI_addr_t)*count); + if (!server_array) + { + free(slist->servers); + slist->servers = NULL; + errno = ENOMEM; + return -1; + } + PINT_cached_config_get_server_array(fsid, PINT_SERVER_TYPE_IO, + server_array, &count); + for (i = 0; i < slist->count; i++) + { + tok = strtok_r(NULL, ":", &save_ptr); + if (!tok || atoi(tok) < 0 || atoi(tok) >= count) + { + free(slist->servers); + slist->servers = NULL; + free(server_array); + errno = EINVAL; + return -1; + } + slist->servers[i] = server_array[atoi(tok)]; + } + free(server_array); + return 0; +} + + +/** + * Create a file via the PVFS system interface + */ +int iocommon_create_file(const char *filename, + mode_t mode, + PVFS_hint file_creation_param, + PVFS_object_ref parent_ref, + PVFS_object_ref *ref ) +{ + int rc = 0; + int orig_errno = errno; + mode_t mode_mask; + mode_t user_mode; + PVFS_sys_attr attr; + PVFS_credentials *credentials; + PVFS_sysresp_create resp_create; + PVFS_sys_dist *dist = NULL; + PVFS_sys_layout *layout = NULL; + PVFS_hint hints = NULL; + + /* Initialize */ + pvfs_sys_init(); + memset(&attr, 0, sizeof(attr)); + memset(&resp_create, 0, sizeof(resp_create)); + + attr.owner = geteuid(); + attr.group = getegid(); + attr.atime = time(NULL); + attr.mtime = attr.atime; + attr.ctime = attr.atime; + attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; + + if (file_creation_param) /* these are hints */ + { + int length; + void *value; + /* check for distribution */ + value = PINT_hint_get_value_by_type(file_creation_param, + PINT_HINT_DISTRIBUTION, + &length); + if (value) + { + dist = PVFS_sys_dist_lookup((char *)value); + if (!dist) /* distribution not found */ + { + rc = EINVAL; + goto errorout; + } + } + /* check for dfile count */ + value = PINT_hint_get_value_by_type(file_creation_param, + PINT_HINT_DFILE_COUNT, + &length); + if (value) + { + attr.dfile_count = *(int *)value; + attr.mask |= PVFS_ATTR_SYS_DFILE_COUNT; + } + /* check for layout */ + value = PINT_hint_get_value_by_type(file_creation_param, + PINT_HINT_LAYOUT, + &length); + if (value) + { + layout = (PVFS_sys_layout *)malloc(sizeof(PVFS_sys_layout)); + layout->algorithm = *(int *)value; + layout->server_list.count = 0; + layout->server_list.servers = NULL; + } + /* check for server list */ + value = PINT_hint_get_value_by_type(file_creation_param, + PINT_HINT_SERVERLIST, + &length); + if (value) + { + if(!layout) + { + /* serverlist makes no sense without a layout */ + rc = EINVAL; + goto errorout; + } + layout->server_list.count = 0; + layout->server_list.servers = NULL; + rc = iocommon_parse_serverlist(value, &layout->server_list, + parent_ref.fs_id); + if (rc < 0) + { + return rc; + } + } + /* check for nocache flag */ + value = PINT_hint_get_value_by_type(file_creation_param, + PINT_HINT_NOCACHE, + &length); + if (value) + { + /* this should probably move into the open routine */ + } + /* look for hints handled on the server */ + if (PVFS_hint_check_transfer(&file_creation_param)) + { + hints = file_creation_param; + } + } + + /* Extract the users umask (and restore it to the original value) */ + mode_mask = umask(0); + umask(mode_mask); + user_mode = mode & ~mode_mask; + + /* Set file permissions */ + if (user_mode & S_IXOTH) + { + attr.perms |= PVFS_O_EXECUTE; + } + if (user_mode & S_IWOTH) + { + attr.perms |= PVFS_O_WRITE; + } + if (user_mode & S_IROTH) + { + attr.perms |= PVFS_O_READ; + } + if (user_mode & S_IXGRP) + { + attr.perms |= PVFS_G_EXECUTE; + } + if (user_mode & S_IWGRP) + { + attr.perms |= PVFS_G_WRITE; + } + if (user_mode & S_IRGRP) + { + attr.perms |= PVFS_G_READ; + } + if (user_mode & S_IXUSR) + { + attr.perms |= PVFS_U_EXECUTE; + } + if (user_mode & S_IWUSR) + { + attr.perms |= PVFS_U_WRITE; + } + if (user_mode & S_IRUSR) + { + attr.perms |= PVFS_U_READ; + } + + /* Set credentials */ + iocommon_cred(&credentials); + + /* Contact server */ + errno = 0; + rc = PVFS_sys_create((char*)filename, + parent_ref, + attr, + credentials, + dist, + &resp_create, + layout, + hints); + IOCOMMON_CHECK_ERR(rc); + *ref = resp_create.ref; + +errorout: + if (dist) + { + PVFS_sys_dist_free(dist); + } + if (layout) + { + free(layout); + } + return rc; +} + + +/** pvfs_open implementation, return file info in fd + * assumes path is fully qualified + * if pdir is not NULL, it is the parent directory + */ +pvfs_descriptor *iocommon_open(const char *path, + int flags, + PVFS_hint file_creation_param, + mode_t mode, + pvfs_descriptor *pdir) +{ + int rc = 0; + int orig_errno = errno; + int follow_link; + char *directory = NULL; + char *filename = NULL; + char error_path[256]; + PVFS_object_ref file_ref; + PVFS_object_ref parent_ref; + pvfs_descriptor *pd = NULL; /* invalid pd until file is opened */ + PVFS_sysresp_getattr attributes_resp; + PVFS_credentials *credentials; + + /* Initialize */ + memset(&file_ref, 0, sizeof(file_ref)); + memset(&parent_ref, 0, sizeof(parent_ref)); + memset(&attributes_resp, 0, sizeof(attributes_resp)); + memset(error_path, 0, sizeof(error_path)); + + pvfs_sys_init(); + iocommon_cred(&credentials); + + /* Split the path into a directory and file */ + rc = split_pathname(path, 0, &directory, &filename); + IOCOMMON_RETURN_ERR(rc); + + /* Check the flags to determine if links are followed */ + if (flags & O_NOFOLLOW) + { + follow_link = PVFS2_LOOKUP_LINK_NO_FOLLOW; + } + else + { + follow_link = PVFS2_LOOKUP_LINK_FOLLOW; + } + + /* Get reference for the parent directory */ + if (pdir == NULL) + { + errno = 0; + rc = iocommon_lookup_absolute(directory, &parent_ref, NULL, 0); + if (rc < 0) + { + if (errno == ESTALE) + { + /* special case we are opening the root dir of PVFS */ + errno = 0; + rc = iocommon_lookup_absolute(path, &file_ref, NULL, 0); + /* in this case we don't need to look up anything else */ + /* jump right to found the file code */ + goto foundfile; + } + IOCOMMON_RETURN_ERR(rc); + } + } + else + { + if (directory) + { + errno = 0; + rc = iocommon_lookup_relative(directory, + pdir->s->pvfs_ref, + follow_link, + &parent_ref, + NULL, + 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + parent_ref = pdir->s->pvfs_ref; + } + } + + /* An open procedure safe for multiprocessing */ + + /* Attempt to find file */ + errno = 0; + rc = iocommon_lookup_relative(filename, + parent_ref, + follow_link, + &file_ref, + error_path, + sizeof(error_path)); +foundfile: + if ((rc == 0) && (flags & O_EXCL) && (flags & O_CREAT)) + { + /* File was found but EXCLUSIVE so fail */ + rc = -1; + errno = EEXIST; + goto errorout; + } + if (rc < 0) + { + /* if an error code was returned */ + if (errno == EIO && pvfs_errno == PVFS_ENOTPVFS && + flags & O_NOTPVFS) + { + struct stat sbuf; + /* try to open using glibc */ + rc = (*glibc_ops.open)(error_path, flags & 01777777, mode); + IOCOMMON_RETURN_ERR(rc); + pd = pvfs_alloc_descriptor(&glibc_ops, -1, NULL, 0); + pd->is_in_use = PVFS_FS; /* indicate fd is valid! */ + pd->true_fd = rc; + pd->s->flags = flags; /* open flags */ + fstat(rc, &sbuf); + pd->s->mode = sbuf.st_mode; + gen_mutex_unlock(&pd->s->lock); + gen_mutex_unlock(&pd->lock); + goto errorout; /* not really an error, but bailing out */ + } + if (errno != ENOENT || !(flags & O_CREAT)) + { + /* either file not found and no create flag */ + /* or some other error */ + goto errorout; + } + /* file not found but create flag */ + /* clear errno, it was not an error */ + errno = orig_errno; + errno = 0; + rc = iocommon_create_file(filename, + mode, + file_creation_param, + parent_ref, + &file_ref); + if (rc < 0) + { + /* error on the create */ + if (errno != EEXIST) + { + goto errorout; + } + /* the file exists so must have been + * created by a different process + * just open it + */ + errno = 0; + rc = iocommon_lookup_relative(filename, + parent_ref, + follow_link, + &file_ref, + NULL, + 0); + IOCOMMON_RETURN_ERR(rc); + } + } + + /* If we get here the file was created and/or opened */ + /* Translate the pvfs reference into a file descriptor */ + /* Set the file information */ + /* create fd object */ + pd = pvfs_alloc_descriptor(&pvfs_ops, -1, &file_ref, 0); + if (!pd) + { + rc = -1; + goto errorout; + } + pd->s->flags = flags; /* open flags */ + pd->is_in_use = PVFS_FS; /* indicate fd is valid! */ + + /* Get the file's type information from its attributes */ + errno = 0; + rc = PVFS_sys_getattr(pd->s->pvfs_ref, + PVFS_ATTR_SYS_ALL_NOHINT, + credentials, + &attributes_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + pd->s->mode = attributes_resp.attr.perms; /* this may change */ + + if (attributes_resp.attr.objtype == PVFS_TYPE_METAFILE) + { + pd->s->mode |= S_IFREG; + } + if (attributes_resp.attr.objtype == PVFS_TYPE_DIRECTORY) + { + pd->s->mode |= S_IFDIR; + if (pdir) + { + pd->s->dpath = (char *)malloc(strlen(pdir->s->dpath) + strlen(path) + 2); + strcpy(pd->s->dpath, pdir->s->dpath); + strcat(pd->s->dpath, "/"); + strcat(pd->s->dpath, path); + } + else + { + pd->s->dpath = (char *)malloc(strlen(path) + 1); + strcpy(pd->s->dpath, path); + } + } + if (attributes_resp.attr.objtype == PVFS_TYPE_SYMLINK) + { + pd->s->mode |= S_IFLNK; + } + gen_mutex_unlock(&pd->s->lock); + gen_mutex_unlock(&pd->lock); + + /* Truncate the file if neccesary */ + if (flags & O_TRUNC) + { + errno = 0; + rc = PVFS_sys_truncate(file_ref, 0, credentials, NULL); + IOCOMMON_CHECK_ERR(rc); + } + + /* Move to the end of file if necessary */ + if (flags & O_APPEND) + { + rc = iocommon_lseek(pd, 0, 0, SEEK_END); + } + +errorout: + + /* Free directory and filename memory */ + if (directory) + { + free(directory); + } + if (filename) + { + free(filename); + } + if (rc < 0) + { + return NULL; + } + else + { + return pd; + } +} + +/** + * Implementation of truncate via PVFS + * + */ +int iocommon_truncate(PVFS_object_ref file_ref, off64_t length) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + + pvfs_sys_init(); + iocommon_cred(&credentials); + errno = 0; + rc = PVFS_sys_truncate(file_ref, length, credentials, NULL); + IOCOMMON_CHECK_ERR(rc); + +errorout: + return rc; +} + +/** + * Implementation of lseek for file and directory via PVFS + * + */ +off64_t iocommon_lseek(pvfs_descriptor *pd, off64_t offset, + PVFS_size unit_size, int whence) +{ + int rc = 0; + int orig_errno = errno; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + gen_mutex_lock(&pd->s->lock); + switch(whence) + { + case SEEK_SET: + { + pd->s->file_pointer = (offset * unit_size); + break; + } + case SEEK_CUR: + { + pd->s->file_pointer += (offset * unit_size); + break; + } + case SEEK_END: + { + PVFS_credentials *credentials; + PVFS_sysresp_getattr attributes_resp; + + memset(&attributes_resp, 0, sizeof(attributes_resp)); + iocommon_cred(&credentials); + /* Get the file's size in bytes as the ending offset */ + errno = 0; + rc = PVFS_sys_getattr(pd->s->pvfs_ref, + PVFS_ATTR_SYS_SIZE, + credentials, + &attributes_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + pd->s->file_pointer = attributes_resp.attr.size + (offset * unit_size); + break; + } + default: + { + errno = EINVAL; + goto errorout; + } + + /* Sum the individal segment sizes */} + /* if this is a directory adjust token, the hard way */ + if (S_ISDIR(pd->s->mode)) + { + int dirent_no; + PVFS_credentials *credentials; + PVFS_sysresp_readdir readdir_resp; + + memset(&readdir_resp, 0, sizeof(readdir_resp)); + iocommon_cred(&credentials); + dirent_no = pd->s->file_pointer / sizeof(PVFS_dirent); + pd->s->file_pointer = dirent_no * sizeof(PVFS_dirent); + pd->s->token = PVFS_READDIR_START; + if(dirent_no) + { + errno = 0; + rc = PVFS_sys_readdir(pd->s->pvfs_ref, + pd->s->token, + dirent_no, + credentials, + &readdir_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + pd->s->token = readdir_resp.token; + free(readdir_resp.dirent_array); + } + } + gen_mutex_unlock(&pd->s->lock); + return pd->s->file_pointer; + +errorout: + return -1; +} + +/** + * implements unlink and rmdir + * + * dirflag indicates trying to remove a dir (rmdir) + */ +int iocommon_remove (const char *path, + PVFS_object_ref *pdir, + int dirflag) +{ + int rc = 0; + int orig_errno = errno; + char *parentdir = NULL; + char *file = NULL; + PVFS_object_ref parent_ref, file_ref; + PVFS_credentials *credentials; + PVFS_sys_attr attr; + + /* Initialize */ + memset(&parent_ref, 0, sizeof(parent_ref)); + memset(&file_ref, 0, sizeof(file_ref)); + memset(&attr, 0, sizeof(attr)); + + /* Initialize the system interface for this process */ + pvfs_sys_init(); + iocommon_cred(&credentials); + + rc = split_pathname(path, dirflag, &parentdir, &file); + IOCOMMON_RETURN_ERR(rc); + + if (!pdir) + { + errno = 0; + rc = iocommon_lookup_absolute(parentdir, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + if (parentdir) + { + errno = 0; + rc = iocommon_lookup_relative(parentdir, *pdir, + PVFS2_LOOKUP_LINK_FOLLOW, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + parent_ref = *pdir; + } + } + + /* need to verify this is a file or symlink */ + errno = 0; + rc = iocommon_lookup_relative(file, parent_ref, + PVFS2_LOOKUP_LINK_NO_FOLLOW, &file_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + + errno = 0; + rc = iocommon_getattr(file_ref, &attr, PVFS_ATTR_SYS_TYPE); + IOCOMMON_RETURN_ERR(rc); + + if ((attr.objtype == PVFS_TYPE_DIRECTORY) && !dirflag) + { + errno = EISDIR; + goto errorout; + } + else if ((attr.objtype != PVFS_TYPE_DIRECTORY) && dirflag) + { + errno = ENOTDIR; + goto errorout; + } + + /* should check to see if any process has file open */ + /* but at themoment we don't have a way to do that */ + errno = 0; + rc = PVFS_sys_remove(file, parent_ref, credentials, PVFS_HINT_NULL); + IOCOMMON_CHECK_ERR(rc); + +errorout: + if (parentdir) + { + free(parentdir); + } + if (file) + { + free(file); + } + if (rc < 0) + { + return -1; + } + else + { + return 0; + } +} + +/** + * wrapper for unlink + */ +int iocommon_unlink(const char *path, + PVFS_object_ref *pdir) +{ + return iocommon_remove(path, pdir, 0); +} + +/** + * wrapper for rmdir + */ +int iocommon_rmdir(const char *path, + PVFS_object_ref *pdir) +{ + return iocommon_remove(path, pdir, 1); +} + +/** if dir(s) are NULL, assume name is absolute */ +int iocommon_rename(PVFS_object_ref *oldpdir, const char *oldpath, + PVFS_object_ref *newpdir, const char *newpath) +{ + int rc = 0; + int orig_errno = errno; + char *olddir = NULL, *newdir = NULL, *oldname = NULL, *newname = NULL; + PVFS_object_ref oldref, newref; + PVFS_credentials *creds; + PVFS_hint hints = PVFS_HINT_NULL; + + /* Initialize */ + memset(&oldref, 0, sizeof(oldref)); + memset(&newref, 0, sizeof(newref)); + + iocommon_cred(&creds); + rc = split_pathname(oldpath, 0, &olddir, &oldname); + IOCOMMON_RETURN_ERR(rc); + + if (!oldpdir) + { + errno = 0; + rc = iocommon_lookup_absolute(olddir, &oldref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + if (olddir) + { + errno = 0; + rc = iocommon_lookup_relative(olddir, *oldpdir, + PVFS2_LOOKUP_LINK_FOLLOW, &oldref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + oldref = *oldpdir; + } + } + rc = split_pathname(newpath, 0, &newdir, &newname); + IOCOMMON_RETURN_ERR(rc); + if (!newpdir) + { + errno = 0; + rc = iocommon_lookup_absolute(newdir, &newref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + if (newdir) + { + errno = 0; + rc = iocommon_lookup_relative(newdir, *newpdir, + PVFS2_LOOKUP_LINK_FOLLOW, &newref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + newref = *newpdir; + } + } + errno = 0; + rc = PVFS_sys_rename(oldname, oldref, newname, newref, creds, hints); + IOCOMMON_CHECK_ERR(rc); + +errorout: + if (olddir) + { + free(olddir); + } + if (oldname) + { + free(oldname); + } + if (newdir) + { + free(newdir); + } + if (newname) + { + free(newname); + } + return rc; +} + +#if PVFS_UCACHE_ENABLE +/* Returns how many copy operations to/from the ucache will need to be + * completed. + */ +static int calc_copy_op_cnt( + off64_t offset, /* offset into file where transfer should begin */ + size_t req_size, /* total Request Size */ + int req_blk_cnt, /* requested Block Count */ + size_t iovec_count, /* number of iovecs in vector */ + const struct iovec *vector /* pointer to array of iovecs */ +) +{ + int copy_count = 0; /* The number of memcpy operations to be completed */ + size_t size_left = req_size; /* Bytes left to convert to copy ops */ + size_t iovec_left = vector[0].iov_len; /* bytes left in this iovec */ + int vec_ndx = 0; /* Index into iovec array */ + + /* Compute the size of the first block to be transfered */ + size_t block_size_to_transfer = CACHE_BLOCK_SIZE - (offset % CACHE_BLOCK_SIZE); + + int i; + /* For every block identify source and destination locations in memory and + * size of transfer between the ucache and memory buffer while maintaining: + * the size left in the request, the size left in the current iovec + * segment, the size left in the current block, and which iovec segment is + * currently being considered. + */ + for(i = 0; i < req_blk_cnt; i++) + { + size_t block_left = block_size_to_transfer; + while(block_left != 0) + { + /* block_left is the limiting factor */ + if(iovec_left > block_left) + { + size_left -= block_left; + iovec_left -= block_left; + block_left = 0; /* Done with this block */ + } + /* iovec_left is the limiting factor */ + else if(iovec_left < block_left) + { + size_left -= iovec_left; + block_left -= iovec_left; + vec_ndx++; /* Done with this iovec */ + if(vec_ndx < iovec_count) + { + iovec_left = vector[vec_ndx].iov_len; + } + } + /* This transfer operation would complete both */ + else /* They must be equal */ + { + size_left -= iovec_left; + block_left = 0; + vec_ndx++; /* Done with this iovec and block */ + + /* Only set the next iovec_left if one is available */ + if(vec_ndx < iovec_count) + { + iovec_left = vector[vec_ndx].iov_len; + } + } + /* Increment the number of the memcpy calls that will need to be + * performed + */ + copy_count++; + } + + /* Break when there are no more bytes to be read/written so that the + * following if/else code block won't been run unless there is another + * block of data to be transfered. */ + if(size_left == 0) + { + break; + } + + if(size_left >= CACHE_BLOCK_SIZE) + { + /* Must transfer full block */ + block_size_to_transfer = CACHE_BLOCK_SIZE; + } + else + { + /* size_left is less than a full block's size, so size_left is all + * that needs to be transfered to/from this block + */ + block_size_to_transfer = size_left; + } + } + /* Finally, return the number of memcpy operations that must be completed + * to satisfy this request. + */ + return copy_count; +} + +/** + * Provided two ucache related structures ureq and ucop, determine the + * reads/writes to be completed between the ucache and user memory (vector). + */ +void calc_copy_ops( + off64_t offset, + size_t req_size, + struct ucache_req_s *ureq, + struct ucache_copy_s *ucop, + int copy_count, + const struct iovec *vector +) +{ + int ureq_ndx = 0; + int vec_ndx = 0; + size_t size_left = req_size; + size_t blk_tfer_size = CACHE_BLOCK_SIZE - (offset % CACHE_BLOCK_SIZE); + size_t blk_left = blk_tfer_size; + size_t vec_left = vector[0].iov_len; + int i; + for(i = 0; i < copy_count; i++) + { + /* Record necessary info for the future memcpy operation */ + if(i == 0) + { + ucop[i].cache_pos = ureq[ureq_ndx].ublk_ptr + + (offset % CACHE_BLOCK_SIZE); + } + else + { + ucop[i].cache_pos = ureq[ureq_ndx].ublk_ptr + + (blk_tfer_size - blk_left); + } + ucop[i].buff_pos = vector[vec_ndx].iov_base + + (vector[vec_ndx].iov_len - vec_left); + ucop[i].hit = ureq[ureq_ndx].ublk_hit; + ucop[i].blk_index = ureq[ureq_ndx].ublk_index; + + if(vec_left > blk_left) /* Finish block */ + { + ucop[i].size = blk_left; + vec_left -= blk_left; + size_left -= blk_left; + if(size_left >= CACHE_BLOCK_SIZE) + { + blk_tfer_size = CACHE_BLOCK_SIZE; + blk_left = blk_tfer_size; + } + else + { + blk_tfer_size = size_left; + blk_left = blk_tfer_size; + } + ureq_ndx++; + } + else if(vec_left < blk_left) /* Finish iovec */ + { + ucop[i].size = vec_left; + blk_left -= vec_left; + size_left -= vec_left; + vec_ndx++; + vec_left = vector[vec_ndx].iov_len; + } + else /* They must be equal - finish both */ + { + ucop[i].size = blk_left; + size_left -= blk_left; + if(size_left >= CACHE_BLOCK_SIZE) + { + blk_tfer_size = CACHE_BLOCK_SIZE; + blk_left = blk_tfer_size; + } + else + { + blk_tfer_size = size_left; + blk_left = blk_tfer_size; + } + vec_ndx++; + vec_left = vector[vec_ndx].iov_len; + ureq_ndx++; + } + } +} + +static int cache_readorwrite( + enum PVFS_io_type which, + struct ucache_copy_s * ucop +) +{ + int rc = 0; + if(which == PVFS_IO_READ) + { + /* Copy from cache to user mem */ + memcpy(ucop->buff_pos, ucop->cache_pos, ucop->size); + rc = (int)ucop->size; + } + else + { + /* Copy from user mem to cache */ + memcpy(ucop->cache_pos, ucop->buff_pos, ucop->size); + rc = (int)ucop->size; + } + return rc; +} + +int calc_req_blk_cnt(uint64_t offset, size_t req_size) +{ + /* Check for zero sized request */ + if(req_size == 0) + { + return 0; + } + /* Check to see if request is less than a full block */ + if(req_size < CACHE_BLOCK_SIZE && (offset % CACHE_BLOCK_SIZE) == 0) + { + return 1; + } + /* Count next blocks */ + size_t req_left = req_size - (CACHE_BLOCK_SIZE - + (offset % CACHE_BLOCK_SIZE)); + int blk_cnt = req_left / CACHE_BLOCK_SIZE; + + /* Account for last block if necessary */ + if((req_left - (blk_cnt * CACHE_BLOCK_SIZE)) != 0) + { + blk_cnt++; + } + + return (blk_cnt + 1); /* Add one to account for first block */ +} + +#endif /* PVFS_UCACHE_ENABLE */ + +/** Do a blocking read or write, possibly utilizing the user cache. + * Returns -1 on error, some positive value on success; + */ +int iocommon_readorwrite(enum PVFS_io_type which, + pvfs_descriptor *pd, + PVFS_size offset, + size_t iovec_count, + const struct iovec *vector) +{ + int rc = 0; +#if PVFS_UCACHE_ENABLE + if(ucache_enabled) + { + if(!pd->s->fent) + { + lock_lock(ucache_lock); + ucache_stats->pseudo_misses++; /* could overflow */ + these_stats.pseudo_misses++; + lock_unlock(ucache_lock); + } + } + + if(!ucache_enabled || !pd->s->fent) + { +#endif /* PVFS_UCACHE_ENABLE */ + + /* Bypass the ucache */ + errno = 0; + rc = iocommon_vreadorwrite(which, &pd->s->pvfs_ref, offset, + iovec_count, vector); + return rc; +#if PVFS_UCACHE_ENABLE + } + + + /* define all the values we'll need to fill the ucache_req_s struct */ + int i; /* index used for 'for loops' */ + int req_blk_cnt = 0; /* how many blocks to r/w */ + size_t req_size = 0; /* size in bytes of r/w */ + + /* How many bytes is the request? */ + /* These will be contiguous in file starting at offset. */ + /* Also, they may be spread in out memory */ + /* Sum the individal segment sizes */ + for (i = 0; i < iovec_count; i++) + { + req_size += vector[i].iov_len; + } + + if(req_size == 0) + { + return 0; + } + + struct file_ent_s *fent = pd->s->fent; + struct mem_table_s *mtbl = get_mtbl(fent->mtbl_blk, fent->mtbl_ent); + int mtbl_data_size = 0; + if(which == PVFS_IO_WRITE) + { + mtbl_data_size = CACHE_BLOCK_SIZE * mtbl->num_blocks; + } + + /* If the ucache request threshold is exceeded, flush and evict file, then + * peform nocache version of readorwrite */ + if((mtbl_data_size + req_size) > UCACHE_MAX_REQ) + { + /* Flush dirty blocks */ + rc = ucache_flush_file(pd->s->fent); + if(rc != 0) + { + rc = -1; + } + + /*TODO Possibly remove the file - bad idea? What if it's referenced? */ + + /* Bypass the ucache */ + rc = iocommon_vreadorwrite(which, &pd->s->pvfs_ref, offset, + iovec_count, vector); + return rc; + } + + /* Keep a running count of the bytes transfered */ + int transfered = 0; + + /* How many tags? */ + uint64_t start_tag = offset - (offset % CACHE_BLOCK_SIZE); + /* End_tag isn't really the last tag if the blk is alligned. + * This value is used to determine the req_blk_cnt only. + */ + uint64_t end_tag = 0; + end_tag = (offset + req_size) - ((offset + req_size) % CACHE_BLOCK_SIZE); + req_blk_cnt = calc_req_blk_cnt(offset, req_size); + + /* Now that we know the req_blk_cnt, allocate the required + * space for tags, hits boolean, and ptr to block in ucache shared memory. + */ + struct ucache_req_s ureq[req_blk_cnt]; + ureq[0].ublk_tag = start_tag; + /* Loop over positions storing tags (ment identifiers) */ + for(i = 1; i < req_blk_cnt; i++) + { + ureq[i].ublk_tag = ureq[ (i - 1) ].ublk_tag + CACHE_BLOCK_SIZE; + } + + /* Now that tags are set fill in array of lookup responses */ + for(i = 0; i < req_blk_cnt; i++) + { + /* if lookup returns nil set char to 0, otherwise 1 */ + ureq[i].ublk_ptr = ucache_lookup(pd->s->fent, ureq[i].ublk_tag, + &(ureq[i].ublk_index)); + if(ureq[i].ublk_ptr == (void *)NIL) + { + lock_lock(ucache_lock); + ucache_stats->misses++; /* could overflow */ + these_stats.misses++; + lock_unlock(ucache_lock); + ureq[i].ublk_hit = 0; /* miss */ + /* Find a place for the block */ + ureq[i].ublk_ptr = ucache_insert(pd->s->fent, ureq[i].ublk_tag, + &(ureq[i].ublk_index)); + if((uint64_t)ureq[i].ublk_ptr == -1) + { + /* Cannot cache the rest of this file */ + } + assert(ureq[i].ublk_ptr != (void *)NILP); + } + else + { + lock_lock(ucache_lock); + ucache_stats->hits++; /* could overflow */ + these_stats.hits++; + lock_unlock(ucache_lock); + ureq[i].ublk_hit = 1; /* hit */ + } + } + + if(which == PVFS_IO_READ) + { + /* Loop over ureq structure and perform reads on misses */ + for(i = 0; i < req_blk_cnt; i++) + { + if(ureq[i].ublk_hit == 0) + { + /* Perform read */ + /* read single block from fs and write into ucache */ + struct iovec cache_vec = {ureq[i].ublk_ptr, CACHE_BLOCK_SIZE}; + lock_lock(get_lock(ureq[i].ublk_index)); + rc = iocommon_vreadorwrite(PVFS_IO_READ, + &pd->s->pvfs_ref, + ureq[i].ublk_tag, + 1, + &cache_vec); + lock_unlock(get_lock(ureq[i].ublk_index)); + } + } + } + + /* Read beginning and end blks into cache before writing if + * either end of the request are unalligned. + */ + if(which == PVFS_IO_WRITE) /* Write */ + { + /* Consult ureq to see if block was hit or missed */ + /* Also see if block was alligned or not */ + if((ureq[0].ublk_hit == 0) && (offset != ureq[0].ublk_tag)) + { + /* Read first block from fs into ucache */ + struct iovec vector = {ureq[0].ublk_ptr, CACHE_BLOCK_SIZE}; + lock_lock(get_lock(ureq[0].ublk_index)); + rc = iocommon_vreadorwrite(PVFS_IO_READ, + &pd->s->pvfs_ref, + ureq[0].ublk_tag, + 1, + &vector); + lock_unlock(get_lock(ureq[0].ublk_index)); + } + if( req_blk_cnt > 1 && + (ureq[req_blk_cnt - 1].ublk_hit == 0) && + (((offset + req_size) % CACHE_BLOCK_SIZE) != 0) + ) + { + /* Read last block from fs into ucache */ + struct iovec vector = {ureq[req_blk_cnt - 1].ublk_ptr, + CACHE_BLOCK_SIZE}; + lock_lock(get_lock(ureq[req_blk_cnt - 1].ublk_index)); + rc = iocommon_vreadorwrite(PVFS_IO_READ, + &pd->s->pvfs_ref, + ureq[req_blk_cnt - 1].ublk_tag, + 1, + &vector); + lock_unlock(get_lock(ureq[req_blk_cnt - 1].ublk_index)); + } + } + + /* At this point we know how many blocks the request will cover, the tags + * (indexes into file) of the blocks, whether the corresponding block was + * hit, and the ptr to the corresponding blk in memory. The blocks are also + * locked at this point. They will be unlocked as reads/writes happen. + */ + + /* If only one iovec then we can assume there will be req_blk_cnt + * memcpy operations, otherwise we need to determine how many + * memcpy operations will be required so we can create the ucache_copy_s + * struct array of proper length. + */ + int copy_count = 0; + if(iovec_count == 1) + { + copy_count = req_blk_cnt; + } + else + { + copy_count = calc_copy_op_cnt(offset, req_size, req_blk_cnt, + iovec_count, vector); + } + + /* Create copy structure and fill with appropriate values */ + struct ucache_copy_s ucop[copy_count]; + calc_copy_ops(offset, req_size, &ureq[0], &ucop[0], copy_count, vector); + + /* The ucache copy structure should now be filled and we can procede with + * the necessary memcpy operations. + */ + for(i = 0; i < copy_count; i++) + { + /* perform copy operation */ + lock_lock(get_lock(ureq[i].ublk_index)); + transfered += cache_readorwrite(which, &ucop[i]); + /* Unlock the block */ + lock_unlock(get_lock(ureq[i].ublk_index)); + } + rc = transfered; + +#endif /* PVFS_UCACHE_ENABLE */ + return rc; +} + +/** do a blocking read or write from an iovec + * this just converts to PVFS Request notation + * other interfaces can still do direct reads to + * RorW_nocache below + */ +int iocommon_vreadorwrite(enum PVFS_io_type which, + PVFS_object_ref *por, + PVFS_size offset, + size_t count, + const struct iovec *vector) +{ + int rc = 0; + int i, size = 0; + void *buf; + PVFS_Request mem_req; + PVFS_Request file_req; + + for(i = 0; i < count; i++) + { + size += vector[i].iov_len; + } + + if(size == 0) + { + return 0; + } + + rc = PVFS_Request_contiguous(size, PVFS_BYTE, &file_req); + rc = pvfs_convert_iovec(vector, count, &mem_req, &buf); + rc = iocommon_readorwrite_nocache(which, + por, + offset, + buf, + mem_req, + file_req); + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + + return rc; +} + +/** do a blocking read or write + * all sync reads or writes to disk come here + */ +int iocommon_readorwrite_nocache(enum PVFS_io_type which, + PVFS_object_ref *por, + PVFS_size offset, + void *buf, + PVFS_Request mem_req, + PVFS_Request file_req) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *creds; + PVFS_sysresp_io io_resp; + + if (!por) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&io_resp, 0, sizeof(io_resp)); + + iocommon_cred(&creds); + + errno = 0; + rc = PVFS_sys_io(*por, + file_req, + offset, + buf, + mem_req, + creds, + &io_resp, + which, + PVFS_HINT_NULL); + IOCOMMON_CHECK_ERR(rc); + return io_resp.total_completed; + +errorout: + return rc; +} + +/** Do a nonblocking read or write, possibly utilizing the user cache. + * + * extra_offset = extra padding to the pd's offset, + * independent of the pd's offset + * Returns an op_id, response, and ret_mem_request + * (which represents an etype_req*count region) + * Note that the none of the PVFS_Requests are freed + * +int iocommon_ireadorwrite(enum PVFS_io_type which, + pvfs_descriptor *pd, + PVFS_size extra_offset, + void *buf, + PVFS_Request etype_req, + PVFS_Request file_req, + size_t count, + PVFS_sys_op_id *ret_op_id, + PVFS_sysresp_io *ret_resp, + PVFS_Request *ret_memory_req) +{ + #ifndef UCACHE_ENABLED + // No cache + return iocommon_ireadorwrite_nocache(which, pd, extra_offset, buf, + etype_req, file_req, count, ret_op_id, ret_resp, ret_memory_req); + #endif // UCACHE_ENABLED + + //if read then check cache..if not there..then read from i/o node and store into correct location + //Possibly Data Transfer + //Possibly More Cache Routines +} +*/ + +/** Do a nonblocking read or write + * + * extra_offset = extra padding to the pd's offset, + * independent of the pd's offset + * Returns an op_id, response, and ret_mem_request + * (which represents an etype_req*count region) + * Note that the none of the PVFS_Requests are freed + */ +int iocommon_ireadorwrite(enum PVFS_io_type which, + pvfs_descriptor *pd, + PVFS_size extra_offset, + void *buf, + PVFS_Request etype_req, + PVFS_Request file_req, + size_t count, + PVFS_sys_op_id *ret_op_id, + PVFS_sysresp_io *ret_resp, + PVFS_Request *ret_memory_req) +{ + int rc = 0; + int orig_errno = errno; + PVFS_Request contig_memory_req; + PVFS_credentials *credentials; + PVFS_size req_size; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + //Ensure descriptor is used for the correct type of access + if ((which==PVFS_IO_READ && (O_WRONLY == (pd->s->flags & O_ACCMODE))) || + (which==PVFS_IO_WRITE && (O_RDONLY == (pd->s->flags & O_ACCMODE)))) + { + errno = EBADF; + return PVFS_FD_FAILURE; + } + + //Create the memory request of a contiguous region: 'mem_req' x count + rc = PVFS_Request_contiguous(count, etype_req, &contig_memory_req); + + iocommon_cred(&credentials); + + errno = 0; + rc = PVFS_isys_io(pd->s->pvfs_ref, + file_req, + pd->s->file_pointer+extra_offset, + buf, + contig_memory_req, + credentials, + ret_resp, + which, + ret_op_id, + PVFS_HINT_NULL, + NULL); + IOCOMMON_CHECK_ERR(rc); + + assert(*ret_op_id!=-1);//TODO: handle this + + PVFS_Request_size(contig_memory_req, &req_size); + gen_mutex_lock(&pd->s->lock); + pd->s->file_pointer += req_size; + gen_mutex_unlock(&pd->s->lock); + *ret_memory_req = contig_memory_req; + return 0; + +errorout: + return rc; +} + +/** Implelments an object attribute get or read + * + */ +int iocommon_getattr(PVFS_object_ref obj, PVFS_sys_attr *attr, uint32_t mask) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + PVFS_sysresp_getattr getattr_response; + + /* Initialize */ + memset(&getattr_response, 0, sizeof(getattr_response)); + + /* check credentials */ + iocommon_cred(&credentials); + + /* now get attributes */ + errno = 0; + rc = PVFS_sys_getattr(obj, + mask, + credentials, + &getattr_response, NULL); + IOCOMMON_CHECK_ERR(rc); + *attr = getattr_response.attr; + +errorout: + return rc; +} + +/** Implelments an object attribute set or write + * + */ +int iocommon_setattr(PVFS_object_ref obj, PVFS_sys_attr *attr) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + + /* check credentials */ + iocommon_cred(&credentials); + + /* now get attributes */ + rc = PVFS_sys_setattr(obj, *attr, credentials, NULL); + IOCOMMON_CHECK_ERR(rc); + +errorout: + return rc; +} + +int iocommon_stat(pvfs_descriptor *pd, struct stat *buf, uint32_t mask) +{ + int rc = 0; + PVFS_sys_attr attr; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&attr, 0, sizeof(attr)); + + errno = 0; + rc = iocommon_getattr(pd->s->pvfs_ref, &attr, mask); + IOCOMMON_RETURN_ERR(rc); + + /* copy attributes into standard stat struct */ + buf->st_dev = pd->s->pvfs_ref.fs_id; + buf->st_ino = pd->s->pvfs_ref.handle; + buf->st_mode = attr.perms; + if (attr.objtype == PVFS_TYPE_METAFILE) + { + buf->st_mode |= S_IFREG; + } + if (attr.objtype == PVFS_TYPE_DIRECTORY) + { + buf->st_mode |= S_IFDIR; + } + if (attr.objtype == PVFS_TYPE_SYMLINK) + { + buf->st_mode |= S_IFLNK; + } + buf->st_nlink = 1; /* PVFS does not allow hard links */ + buf->st_uid = attr.owner; + buf->st_gid = attr.group; + buf->st_rdev = 0; /* no dev special files */ + buf->st_size = attr.size; + buf->st_blksize = attr.blksize; + if (attr.blksize) + { + buf->st_blocks = (attr.size + (attr.blksize - 1)) / attr.blksize; + } + buf->st_atime = attr.atime; + buf->st_mtime = attr.mtime; + buf->st_ctime = attr.ctime; + +errorout: + return rc; +} + +/* + * The only difference here is that buf is stat64 which + * means some of its fields are defined as different types + */ +int iocommon_stat64(pvfs_descriptor *pd, struct stat64 *buf, uint32_t mask) +{ + int rc = 0; + PVFS_sys_attr attr; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&attr, 0, sizeof(attr)); + + errno = 0; + rc = iocommon_getattr(pd->s->pvfs_ref, &attr, mask); + IOCOMMON_RETURN_ERR(rc); + + /* copy attributes into standard stat struct */ + buf->st_dev = pd->s->pvfs_ref.fs_id; + buf->st_ino = pd->s->pvfs_ref.handle; + buf->st_mode = attr.perms; + if (attr.objtype == PVFS_TYPE_METAFILE) + { + buf->st_mode |= S_IFREG; + } + if (attr.objtype == PVFS_TYPE_DIRECTORY) + { + buf->st_mode |= S_IFDIR; + } + if (attr.objtype == PVFS_TYPE_SYMLINK) + { + buf->st_mode |= S_IFLNK; + } + buf->st_nlink = 1; /* PVFS does not allow hard links */ + buf->st_uid = attr.owner; + buf->st_gid = attr.group; + buf->st_rdev = 0; /* no dev special files */ + buf->st_size = attr.size; + buf->st_blksize = attr.blksize; + if (attr.blksize) + { + buf->st_blocks = (attr.size + (attr.blksize - 1)) / attr.blksize; + } + buf->st_atime = attr.atime; + buf->st_mtime = attr.mtime; + buf->st_ctime = attr.ctime; + +errorout: + return rc; +} + +int iocommon_chown(pvfs_descriptor *pd, uid_t owner, gid_t group) +{ + int rc = 0; + PVFS_sys_attr attr; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&attr, 0, sizeof(attr)); + + if (owner != -1) + { + attr.owner = owner; + attr.mask |= PVFS_ATTR_SYS_UID; + } + if (group != -1) + { + attr.group = group; + attr.mask |= PVFS_ATTR_SYS_GID; + } + + errno = 0; + rc = iocommon_setattr(pd->s->pvfs_ref, &attr); + return rc; +} + +int iocommon_chmod(pvfs_descriptor *pd, mode_t mode) +{ + int rc = 0; + PVFS_sys_attr attr; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&attr, 0, sizeof(attr)); + + attr.perms = mode & 07777; /* mask off any stray bits */ + attr.mask = PVFS_ATTR_SYS_PERM; + + errno = 0; + rc = iocommon_setattr(pd->s->pvfs_ref, &attr); + return rc; +} + +int iocommon_make_directory(const char *pvfs_path, + const int mode, + PVFS_object_ref *pdir) +{ + int rc = 0; + int orig_errno = errno; + char *parentdir = NULL; + char *filename = NULL; + PVFS_sys_attr attr; + PVFS_sysresp_lookup resp_lookup; + PVFS_object_ref parent_ref; + PVFS_sysresp_mkdir resp_mkdir; + PVFS_credentials *credentials; + + /* Initialize any variables */ + memset(&attr, 0, sizeof(attr)); + memset(&resp_lookup, 0, sizeof(resp_lookup)); + memset(&parent_ref, 0, sizeof(parent_ref)); + memset(&resp_mkdir, 0, sizeof(resp_mkdir)); + + pvfs_sys_init(); + iocommon_cred(&credentials); + + rc = split_pathname(pvfs_path, 1, &parentdir, &filename); + IOCOMMON_RETURN_ERR(rc); + + /* Make sure we don't try and create the root or current directory */ + + /* lookup parent */ + if (!pdir) + { + errno = 0; + rc = iocommon_lookup_absolute(parentdir, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + if (parentdir) + { + errno = 0; + rc = iocommon_lookup_relative(parentdir, *pdir, + PVFS2_LOOKUP_LINK_FOLLOW, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + parent_ref = *pdir; + } + } + + /* Set the attributes for the new directory */ + attr.owner = geteuid(); + attr.group = getegid(); + attr.perms = mode & 07777; /* mask off stray bits */ + attr.mask = (PVFS_ATTR_SYS_ALL_SETABLE); + + errno = 0; + rc = PVFS_sys_mkdir(filename, + parent_ref, + attr, + credentials, + &resp_mkdir, NULL); + IOCOMMON_CHECK_ERR(rc); + +errorout: + if (parentdir) + { + free(parentdir); + } + if (filename) + { + free(filename); + } + return(rc); +} + +int iocommon_readlink(pvfs_descriptor *pd, char *buf, int size) +{ + int rc = 0; + PVFS_sys_attr attr; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize any variables */ + memset(&attr, 0, sizeof(attr)); + + errno = 0; + rc = iocommon_getattr(pd->s->pvfs_ref, &attr, PVFS_ATTR_SYS_TYPE | + PVFS_ATTR_SYS_LNK_TARGET); + IOCOMMON_RETURN_ERR(rc); + + /* copy attributes into standard stat struct */ + if (attr.objtype == PVFS_TYPE_SYMLINK) + { + strncpy(buf, attr.link_target, size); + } + else + { + errno = EINVAL; + return -1; + } + +errorout: + return rc; +} + +int iocommon_symlink(const char *pvfs_path, /* where new linkis created */ + const char *link_target, /* contents of the link */ + PVFS_object_ref *pdir) /* suports symlinkat */ +{ + int rc = 0; + int orig_errno = errno; + char *parentdir = NULL; + char *filename = NULL; + PVFS_sys_attr attr; + PVFS_object_ref parent_ref; + PVFS_sysresp_symlink resp_symlink; + PVFS_credentials *credentials; + + /* Initialize any variables */ + memset(&attr, 0, sizeof(attr)); + memset(&parent_ref, 0, sizeof(parent_ref)); + memset(&resp_symlink,0, sizeof(resp_symlink)); + + pvfs_sys_init(); + iocommon_cred(&credentials); + + + rc = split_pathname(pvfs_path, 0, &parentdir, &filename); + IOCOMMON_RETURN_ERR(rc); + + /* Make sure we don't try and create the root or current directory */ + + /* lookup parent */ + if (!pdir) + { + errno = 0; + rc = iocommon_lookup_absolute(parentdir, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + if (parentdir) + { + errno = 0; + rc = iocommon_lookup_relative(parentdir, *pdir, + PVFS2_LOOKUP_LINK_FOLLOW, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + parent_ref = *pdir; + } + } + + /* Set the attributes for the new directory */ + attr.owner = getuid(); + attr.group = getgid(); + attr.perms = 0777; + attr.mask = (PVFS_ATTR_SYS_ALL_SETABLE); + + errno = 0; + rc = PVFS_sys_symlink(filename, + parent_ref, + (char *)link_target, + attr, + credentials, + &resp_symlink, + NULL); + IOCOMMON_CHECK_ERR(rc); + +errorout: + if (parentdir) + { + free(parentdir); + } + if (filename) + { + free(filename); + } + return(rc); +} + +int iocommon_getdents(pvfs_descriptor *pd, /**< pvfs fiel descriptor */ + struct dirent *dirp, /**< pointer to buffer */ + unsigned int size) /**< number of bytes in buffer */ +{ + int rc = 0; + int orig_errno = errno; + int name_max; + int count; /* number of records to read */ + PVFS_credentials *credentials; + PVFS_sysresp_readdir readdir_resp; + PVFS_ds_position token; + int bytes = 0, i = 0; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + if (pd->s->token == PVFS_READDIR_END) + { + return 0; /* EOF */ + } + + if (!S_ISDIR(pd->s->mode)) + { + errno = ENOENT; + return -1; + } + + /* Initialize */ + memset(&readdir_resp, 0, sizeof(readdir_resp)); + + iocommon_cred(&credentials); + + gen_mutex_lock(&pd->s->lock); + token = pd->s->token == 0 ? PVFS_READDIR_START : pd->s->token; + + /* posix deals in bytes in buffer and bytes read */ + /* PVFS deals in number of records to read or were read */ + count = size / sizeof(struct dirent); + if (count > PVFS_REQ_LIMIT_DIRENT_COUNT) + { + count = PVFS_REQ_LIMIT_DIRENT_COUNT; + } + errno = 0; + rc = PVFS_sys_readdir(pd->s->pvfs_ref, + token, + count, + credentials, + &readdir_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + + pd->s->token = readdir_resp.token; + name_max = PVFS_util_min(NAME_MAX, PVFS_NAME_MAX); + for(i = 0; i < readdir_resp.pvfs_dirent_outcount; i++) + { + /* copy a PVFS_dirent to a struct dirent */ + dirp->d_ino = (long)readdir_resp.dirent_array[i].handle; + dirp->d_off = pd->s->file_pointer; + dirp->d_reclen = sizeof(PVFS_dirent); + memcpy(dirp->d_name, readdir_resp.dirent_array[i].d_name, name_max); + dirp->d_name[name_max] = 0; + pd->s->file_pointer += sizeof(struct dirent); + bytes += sizeof(struct dirent); + dirp++; + } + gen_mutex_unlock(&pd->s->lock); + free(readdir_resp.dirent_array); + return bytes; + +errorout: + gen_mutex_unlock(&pd->s->lock); + return rc; +} + +int iocommon_getdents64(pvfs_descriptor *pd, + struct dirent64 *dirp, + unsigned int size) +{ + int rc = 0; + int orig_errno = errno; + int name_max; + int count; + PVFS_credentials *credentials; + PVFS_sysresp_readdir readdir_resp; + PVFS_ds_position token; + int bytes = 0, i = 0; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + if (pd->s->token == PVFS_READDIR_END) + { + return 0; /* EOF */ + } + + if (!S_ISDIR(pd->s->mode)) + { + errno = ENOENT; + return -1; + } + + /* Initialize */ + memset(&readdir_resp, 0, sizeof(readdir_resp)); + + iocommon_cred(&credentials); + + gen_mutex_lock(&pd->s->lock); + token = pd->s->token == 0 ? PVFS_READDIR_START : pd->s->token; + + count = size / sizeof(struct dirent64); + if (count > PVFS_REQ_LIMIT_DIRENT_COUNT) + { + count = PVFS_REQ_LIMIT_DIRENT_COUNT; + } + errno = 0; + rc = PVFS_sys_readdir(pd->s->pvfs_ref, + token, + count, + credentials, + &readdir_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + + pd->s->token = readdir_resp.token; + name_max = PVFS_util_min(NAME_MAX, PVFS_NAME_MAX); + for(i = 0; i < readdir_resp.pvfs_dirent_outcount; i++) + { + /* copy a PVFS_dirent to a struct dirent64 */ + dirp->d_ino = (uint64_t)readdir_resp.dirent_array[i].handle; + dirp->d_off = (off64_t)pd->s->file_pointer; + dirp->d_reclen = sizeof(struct dirent64); + memcpy(dirp->d_name, readdir_resp.dirent_array[i].d_name, name_max); + dirp->d_name[name_max] = 0; + pd->s->file_pointer += sizeof(struct dirent64); + bytes += sizeof(struct dirent64); + dirp++; + } + gen_mutex_unlock(&pd->s->lock); + free(readdir_resp.dirent_array); + return bytes; + +errorout: + gen_mutex_unlock(&pd->s->lock); + return rc; +} + +/* Read entries from a directory. + * + * \param token opaque value used to track position in directory + * when more than one read is required. + * \param pvfs_dirent_incount maximum number of entries to read, if + * available, starting from token. +PVFS_error PVFS_sys_readdir( + PVFS_object_ref ref, + PVFS_ds_position token, + int32_t pvfs_dirent_incount, + const PVFS_credentials *credentials, + PVFS_sysresp_readdir *resp, + PVFS_hint hints) + */ +/* Read entries from a directory and their associated attributes + * in an efficient manner. + * + * \param token opaque value used to track position in directory + * when more than one read is required. + * \param pvfs_dirent_incount maximum number of entries to read, if + * available, starting from token. +PVFS_error PVFS_sys_readdirplus( + PVFS_object_ref ref, + PVFS_ds_position token, + int32_t pvfs_dirent_incount, + const PVFS_credentials *credentials, + uint32_t attrmask, + PVFS_sysresp_readdirplus *resp, + PVFS_hint hints) + */ + +/** Checks to see if caller has the requested permissions + * + */ +int iocommon_access(const char *pvfs_path, + const int mode, + const int flags, + PVFS_object_ref *pdir) +{ + int rc = 0; + int orig_errno = errno; + char *parentdir = NULL; + char *file = NULL; + int followflag = PVFS2_LOOKUP_LINK_FOLLOW; + int uid = -1, gid = -1; + PVFS_object_ref parent_ref, file_ref; + PVFS_credentials *credentials; + PVFS_sys_attr attr; + + /* Initialize */ + memset(&parent_ref, 0, sizeof(parent_ref)); + memset(&file_ref, 0, sizeof(file_ref)); + memset(&attr, 0, sizeof(attr)); + + /* Initialize the system interface for this process */ + pvfs_sys_init(); + iocommon_cred(&credentials); + + rc = split_pathname(pvfs_path, 0, &parentdir, &file); + IOCOMMON_RETURN_ERR(rc); + + if (!pdir) + { + errno = 0; + rc = iocommon_lookup_absolute(parentdir, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + if (parentdir) + { + errno = 0; + rc = iocommon_lookup_relative(parentdir, *pdir, + PVFS2_LOOKUP_LINK_FOLLOW, &parent_ref, NULL, 0); + IOCOMMON_RETURN_ERR(rc); + } + else + { + parent_ref = *pdir; + } + } + /* Attempt to find file */ + if (flags & AT_SYMLINK_NOFOLLOW) + { + followflag = PVFS2_LOOKUP_LINK_NO_FOLLOW; + } + errno = 0; + rc = iocommon_lookup_relative(file, + parent_ref, + followflag, + &file_ref, + NULL, + 0); + IOCOMMON_CHECK_ERR(rc); + /* Get file atributes */ + errno = 0; + rc = iocommon_getattr(file_ref, &attr, PVFS_ATTR_SYS_COMMON_ALL); + IOCOMMON_RETURN_ERR(rc); + + if (flags & AT_EACCESS) + { + uid = getuid(); + gid = getgid(); + } + else + { + uid = geteuid(); + gid = getegid(); + } + if (uid == attr.owner) + { + if (((R_OK & mode) && !(S_IRUSR & attr.perms)) || + ((W_OK & mode) && !(S_IWUSR & attr.perms)) || + ((X_OK & mode) && !(S_IXUSR & attr.perms))) + { + errno = EACCES; + rc = -1; + goto errorout; + } + } + else if (gid == attr.group) + { + if (((R_OK & mode) && !(S_IRGRP & attr.perms)) || + ((W_OK & mode) && !(S_IWGRP & attr.perms)) || + ((X_OK & mode) && !(S_IXGRP & attr.perms))) + { + errno = EACCES; + rc = -1; + goto errorout; + } + } + else + { + if (((R_OK & mode) && !(S_IROTH & attr.perms)) || + ((W_OK & mode) && !(S_IWOTH & attr.perms)) || + ((X_OK & mode) && !(S_IXOTH & attr.perms))) + { + errno = EACCES; + rc = -1; + goto errorout; + } + } + /* Access is allowed, rc should be 0 */ + +errorout: + if (parentdir) + { + free(parentdir); + } + if (file) + { + free(file); + } + return rc; +} + +int iocommon_statfs(pvfs_descriptor *pd, struct statfs *buf) +{ + int rc = 0; + int orig_errno = errno; + int block_size = 2*1024*1024; /* optimal transfer size 2M */ + PVFS_credentials *credentials; + PVFS_sysresp_statfs statfs_resp; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize the system interface for this process */ + pvfs_sys_init(); + iocommon_cred(&credentials); + memset(&statfs_resp, 0, sizeof(statfs_resp)); + + errno = 0; + rc = PVFS_sys_statfs(pd->s->pvfs_ref.fs_id, + credentials, + &statfs_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + /* assign fields for statfs struct */ + /* this is a fudge because they don't line up */ + buf->f_type = PVFS2_SUPER_MAGIC; + buf->f_bsize = block_size; + buf->f_blocks = statfs_resp.statfs_buf.bytes_total/1024; + buf->f_bfree = statfs_resp.statfs_buf.bytes_available/1024; + buf->f_bavail = statfs_resp.statfs_buf.bytes_available/1024; + buf->f_files = statfs_resp.statfs_buf.handles_total_count; + buf->f_ffree = statfs_resp.statfs_buf.handles_available_count; + buf->f_fsid.__val[0] = statfs_resp.statfs_buf.fs_id; + buf->f_fsid.__val[1] = 0; + buf->f_namelen = PVFS_NAME_MAX; + +errorout: + return rc; +} + +int iocommon_statfs64(pvfs_descriptor *pd, struct statfs64 *buf) +{ + int rc = 0; + int orig_errno = errno; + int block_size = 2*1024*1024; /* optimal transfer size 2M */ + PVFS_credentials *credentials; + PVFS_sysresp_statfs statfs_resp; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize the system interface for this process */ + pvfs_sys_init(); + iocommon_cred(&credentials); + memset(&statfs_resp, 0, sizeof(statfs_resp)); + + errno = 0; + rc = PVFS_sys_statfs(pd->s->pvfs_ref.fs_id, + credentials, + &statfs_resp, + NULL); + IOCOMMON_CHECK_ERR(rc); + /* assign fields for statfs struct */ + /* this is a fudge because they don't line up */ + buf->f_type = PVFS2_SUPER_MAGIC; + buf->f_bsize = block_size; + buf->f_blocks = statfs_resp.statfs_buf.bytes_total/1024; + buf->f_bfree = statfs_resp.statfs_buf.bytes_available/1024; + buf->f_bavail = statfs_resp.statfs_buf.bytes_available/1024; + buf->f_files = statfs_resp.statfs_buf.handles_total_count; + buf->f_ffree = statfs_resp.statfs_buf.handles_available_count; + buf->f_fsid.__val[0] = statfs_resp.statfs_buf.fs_id; + buf->f_fsid.__val[1] = 0; + buf->f_namelen = PVFS_NAME_MAX; + +errorout: + return rc; +} + +int iocommon_sendfile(int sockfd, pvfs_descriptor *pd, + off64_t *offset, size_t count) +{ + int rc = 0, bytes_read = 0; + PVFS_Request mem_req, file_req; + char *buffer; + int buffer_size = (8*1024*1024); + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + buffer = (char *)malloc(buffer_size); + + PVFS_Request_contiguous(buffer_size, PVFS_BYTE, &mem_req); + file_req = PVFS_BYTE; + + errno = 0; + rc = iocommon_readorwrite_nocache(PVFS_IO_READ, + &pd->s->pvfs_ref, + *offset + bytes_read, + buffer, + mem_req, + file_req); + while(rc > 0) + { + int flags = 0; + bytes_read += rc; + if (bytes_read + buffer_size < count) + { + flags = MSG_MORE; + } + rc = glibc_ops.send(sockfd, buffer, rc, flags); + if (rc < 0) + { + break; + } + errno = 0; + rc = iocommon_readorwrite_nocache(PVFS_IO_READ, + &pd->s->pvfs_ref, + *offset + bytes_read, + buffer, + mem_req, + file_req); + } + PVFS_Request_free(&mem_req); + free(buffer); + if (rc < 0) + { + return -1; + } + else + { + *offset += bytes_read; + return bytes_read; + } +} + +/** Implelments an extended attribute get or read + * + * The PVFS server enforces namespaces as prefixes on the + * attribute keys. Thus they are not checked here. + * Probably would be more efficient to do so. + */ +int iocommon_geteattr(pvfs_descriptor *pd, + const char *key_p, + void *val_p, + int size) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + PVFS_ds_keyval key, val; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&key, 0, sizeof(key)); + memset(&val, 0, sizeof(val)); + + /* check credentials */ + iocommon_cred(&credentials); + + key.buffer = (char *)key_p; + key.buffer_sz = strlen(key_p) + 1; + val.buffer = val_p; + val.buffer_sz = size; + + /* now get attributes */ + errno = 0; + rc = PVFS_sys_geteattr(pd->s->pvfs_ref, + credentials, + &key, + &val, + NULL); + switch (rc) + { + case -PVFS_ENOENT: + /* file exists if we have a pd */ + /* either attr does not exist or */ + /* we do not have access to it */ + rc = -PVFS_ENODATA; + break; + case -PVFS_EMSGSIZE: + /* buffer was too small for the attribute value */ + rc = -PVFS_ERANGE; + } + IOCOMMON_CHECK_ERR(rc); + rc = val.read_sz; + +errorout: + return rc; +} + +/** Implelments an extended attribute set or write + * + * The flag can be used to control whether to overwrite + * or create a new attribute. The default is to create + * if needed and overwrite if a previus value exists. + * The PVFS server enforces namespaces as prefixes on the + * attribute keys. Thus they are not checked here. + * Probably would be more efficient to do so. + */ +int iocommon_seteattr(pvfs_descriptor *pd, + const char *key_p, + const void *val_p, + int size, + int flag) +{ + int rc = 0; + int pvfs_flag = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + PVFS_ds_keyval key, val; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&key, 0, sizeof(key)); + memset(&val, 0, sizeof(val)); + + /* check credentials */ + iocommon_cred(&credentials); + + key.buffer = (char *)key_p; + key.buffer_sz = strlen(key_p) + 1; + val.buffer = (void *)val_p; + val.buffer_sz = size; + + if (flag & XATTR_CREATE)//TODO + { + pvfs_flag |= PVFS_XATTR_CREATE; + } + if (flag & XATTR_REPLACE)//TODO + { + pvfs_flag |= PVFS_XATTR_REPLACE; + } + + /* now set attributes */ + errno = 0; + rc = PVFS_sys_seteattr(pd->s->pvfs_ref, + credentials, + &key, + &val, + pvfs_flag, + NULL); + if (rc == -PVFS_ENOENT) + { + /* file exists if we have a pd */ + /* either attr does not exist or */ + /* we do not have access to it */ + rc = -PVFS_ENODATA; + } + IOCOMMON_CHECK_ERR(rc); + +errorout: + return rc; +} + +/** Implements an extended attribute delete or remove + * + * The PVFS server enforces namespaces as prefixes on the + * attribute keys. Thus they are not checked here. + * Probably would be more efficient to do so. + */ +int iocommon_deleattr(pvfs_descriptor *pd, + const char *key_p) +{ + int rc = 0; + int orig_errno = errno; + PVFS_credentials *credentials; + PVFS_ds_keyval key; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&key, 0, sizeof(key)); + + /* check credentials */ + iocommon_cred(&credentials); + + key.buffer = (char *)key_p; + key.buffer_sz = strlen(key_p) + 1; + + /* now set attributes */ + errno = 0; + rc = PVFS_sys_deleattr(pd->s->pvfs_ref, + credentials, + &key, + NULL); + if (rc == -PVFS_ENOENT) + { + /* file exists if we have a pd */ + /* either attr does not exist or */ + /* we do not have access to it */ + rc = -PVFS_ENODATA; + } + IOCOMMON_CHECK_ERR(rc); + +errorout: + return rc; +} + +/** Implelments an extended attribute key list + * + * All of the keys for athe specified object are returned + * in the specified buffer, NULL delimited. The number + * of keys is returned. If the size passed in is 0, then + * only the number of keys available is returned. + * The PVFS server enforces namespaces as prefixes on the + * attribute keys. Thus they are not checked here. + * Probably would be more efficient to do so. + */ +int iocommon_listeattr(pvfs_descriptor *pd, + char *list, + int size, + int *retsize) +{ + int rc = 0; + int orig_errno = errno; + int k, total_size, total_keys, max_keys; + int32_t nkey; + PVFS_ds_position token; + PVFS_credentials *credentials; + PVFS_sysresp_listeattr listeattr_resp; + + if (!pd || pd->is_in_use != PVFS_FS) + { + errno = EBADF; + return -1; + } + /* Initialize */ + memset(&listeattr_resp, 0, sizeof(listeattr_resp)); + token = PVFS_ITERATE_START; + total_size = 0; + total_keys = 0; + nkey = 0; + + /* check credentials */ + iocommon_cred(&credentials); + + /* find number of attributes */ + errno = 0; + rc = PVFS_sys_listeattr(pd->s->pvfs_ref, + token, + nkey, + credentials, + &listeattr_resp, + NULL); + if (rc == -PVFS_ENOENT) + { + /* file exists if we have a pd */ + /* either attr does not exist or */ + /* we do not have access to it */ + rc = -PVFS_ENODATA; + } + IOCOMMON_CHECK_ERR(rc); + + /* get available keys */ + nkey = listeattr_resp.nkey; + + /* allocate key_array */ + if (nkey > PVFS_MAX_XATTR_LISTLEN) + { + max_keys = PVFS_MAX_XATTR_LISTLEN; + } + else + { + max_keys = nkey; + } + listeattr_resp.key_array = (PVFS_ds_keyval *)malloc(max_keys * + sizeof(PVFS_ds_keyval)); + for (k = 0; k < max_keys; k++) + { + listeattr_resp.key_array[k].buffer_sz = PVFS_MAX_XATTR_NAMELEN; + listeattr_resp.key_array[k].buffer = + (char *)malloc(PVFS_MAX_XATTR_NAMELEN); + } + + /* now list attributes */ + do + { + token = listeattr_resp.token; + listeattr_resp.nkey = max_keys; + errno = 0; + rc = PVFS_sys_listeattr(pd->s->pvfs_ref, + token, + nkey, + credentials, + &listeattr_resp, + NULL); + if (rc == -PVFS_ENOENT) + { + /* file exists if we have a pd */ + /* either attr does not exist or */ + /* we do not have access to it */ + rc = -PVFS_ENODATA; + } + IOCOMMON_CHECK_ERR(rc); + + /* copy keys out to caller */ + for (k = 0; k < listeattr_resp.nkey; k++) + { + if (size > 0) + { + if (total_size + listeattr_resp.key_array[k].read_sz > size) + { + total_size = size; + errno = ERANGE; + rc = -1; + break; /* ran out of buffer space */ + } + strncpy(list, listeattr_resp.key_array[k].buffer, + listeattr_resp.key_array[k].read_sz); + list += listeattr_resp.key_array[k].read_sz; + } + total_size += listeattr_resp.key_array[k].read_sz; + } + total_keys += listeattr_resp.nkey; + } while (total_keys < nkey && listeattr_resp.nkey > 0 && + total_size < size); + *retsize = total_size; + /* free key_array */ + for (k = 0; k < max_keys; k++) + { + free(listeattr_resp.key_array[k].buffer); + } + free(listeattr_resp.key_array); + +errorout: + return rc; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=4 sts=4 sw=4 expandtab + */ + diff --git a/src/client/usrint/iocommon.h b/src/client/usrint/iocommon.h new file mode 100644 index 0000000..296e804 --- /dev/null +++ b/src/client/usrint/iocommon.h @@ -0,0 +1,293 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines + */ +#ifndef IOCOMMON_H +#define IOCOMMON_H 1 + +#include +#include +#include +#include + +/* Define GNU's O_NOFOLLOW flag to be false if its not set */ +#ifndef O_NOFOLLOW +#define O_NOFOLLOW 0 +#endif + +/* Base pvfs file handle info: + * native-lib and mpi-io both wrap around + * pvfs_descriptor for their file table entries + */ + +#define PVFS_NULL_OBJ ((PVFS_object_ref *)NULL) + +/** A structure used in the cache enabled version of iocommon_readorwrite. + */ +struct ucache_req_s +{ + uint64_t ublk_tag; /* ucache block tag (byte index into file) */ + unsigned char ublk_hit; /* boolean indicating 'block found in cache' */ + void *ublk_ptr; /* where in ucache memory to read block from or write to */ + uint16_t ublk_index; /* index of ucache block in shared memory segment */ +}; + +struct ucache_copy_s +{ + void * cache_pos; + void * buff_pos; + size_t size; + unsigned char hit; + uint16_t blk_index; +}; + + +/* this global is set when a pvfs specific error is returned + * and errno is set to EIO + */ +extern int pvfs_errno; + +/* prototypes */ + +/* Perform PVFS initialization if not already finished */ +void iocommon_ensure_init(void); + +void iocommon_cred(PVFS_credentials **credentials); + +extern int iocommon_fsync(pvfs_descriptor *pvfs_info); + +/* + * Find the PVFS handle to an object (file, dir sym) + * assumes an absoluate path + */ +extern int iocommon_lookup_absolute(const char *abs_path, + PVFS_object_ref *ref, + char *error_path, + int error_path_size); + +/* + * Lookup a file via the PVFS system interface + */ +extern int iocommon_lookup_relative(const char *rel_path, + PVFS_object_ref parent_ref, + int follow_links, + PVFS_object_ref *ref, + char *error_path, + int error_path_size); + +/* + * Create a file via the PVFS system interface + */ +extern int iocommon_create_file(const char *filename, + mode_t file_permission, + PVFS_hint file_creation_param, + PVFS_object_ref parent_ref, + PVFS_object_ref *ref); + + +/* pvfs_open implementation, return file info in fd */ +/* assumes path is fully qualified */ +/* if pdir is not NULL, it is the parent directory */ +extern pvfs_descriptor *iocommon_open(const char *pathname, int flag, + PVFS_hint file_creation_param, + mode_t file_permission, + pvfs_descriptor *pdir); + +extern int iocommon_truncate(PVFS_object_ref file_ref, + off64_t length); + +extern off64_t iocommon_lseek(pvfs_descriptor *pd, + off64_t offset, + PVFS_size unit_size, + int whence); + +/* + * pvfs_unlink implementation + * need to verify this is a file or symlink + * use rmdir for directory + */ +extern int iocommon_remove (const char *pathname, PVFS_object_ref *pdir, int dirflag); + +extern int iocommon_unlink(const char *pathname, PVFS_object_ref *pdir); + +extern int iocommon_rmdir(const char *pathname, PVFS_object_ref *pdir); + +/* if dir(s) are NULL, assume name is absolute */ +extern int iocommon_rename(PVFS_object_ref *oldpdir, + const char *oldname, + PVFS_object_ref *newpdir, + const char *newname); + +/* R/W Wrapper Functions, possibly utilizing user cache + * do a blocking read or write + */ +extern int iocommon_readorwrite(enum PVFS_io_type which, + pvfs_descriptor *pd, + PVFS_size offset, + size_t iovec_count, + const struct iovec *vector); + +extern int iocommon_vreadorwrite(enum PVFS_io_type which, + PVFS_object_ref *por, + PVFS_size offset, + size_t count, + const struct iovec *vector); + +extern int iocommon_ireadorwrite(enum PVFS_io_type which, + pvfs_descriptor *pd, + PVFS_size extra_offset, + void *buf, + PVFS_Request etype_req, + PVFS_Request file_req, + size_t count, + PVFS_sys_op_id *ret_op_id, + PVFS_sysresp_io *ret_resp, + PVFS_Request *ret_memory_req); + +/* do a blocking read or write + */ +extern int iocommon_readorwrite_nocache(enum PVFS_io_type which, + PVFS_object_ref *por, + PVFS_size offset, + void *buf, + PVFS_Request mem_req, + PVFS_Request file_req); + +/* + * Do a nonblocking read or write + * extra_offset = extra padding to the pd's offset, + * independent of the pd's offset + * Returns an op_id, response, and ret_mem_request + * (which represents an etype_req*count region) + * Note that the none of the PVFS_Requests are freed + */ +extern int iocommon_ireadorwrite_nocache(enum PVFS_io_type which, + pvfs_descriptor *pd, + PVFS_size extra_offset, + void *buf, + PVFS_Request etype_req, + PVFS_Request file_req, + size_t count, + PVFS_sys_op_id *ret_op_id, + PVFS_sysresp_io *ret_resp, + PVFS_Request *ret_memory_req); + +extern int iocommon_getattr(PVFS_object_ref obj, PVFS_sys_attr *attr, uint32_t mask); + +extern int iocommon_setattr(PVFS_object_ref obj, PVFS_sys_attr *attr); + +extern int iocommon_stat(pvfs_descriptor *pd, struct stat *buf, uint32_t mask); + +/* + * The only difference here is that buf is stat64 which + * means some of its fields are defined as different types + */ +extern int iocommon_stat64(pvfs_descriptor *pd, struct stat64 *buf, uint32_t mask); + +extern int iocommon_statfs(pvfs_descriptor *pd, struct statfs *buf); + +extern int iocommon_statfs64(pvfs_descriptor *pd, struct statfs64 *buf); + +extern int iocommon_seteattr(pvfs_descriptor *pd, const char *key, const void *val, int size, int flag); + +extern int iocommon_geteattr(pvfs_descriptor *pd, const char *key, void *val, int size); + +extern int iocommon_listeattr(pvfs_descriptor *pd, char *list, int size, int *numkeys); + +extern int iocommon_deleattr(pvfs_descriptor *pd, const char * key); + +extern int iocommon_chown(pvfs_descriptor *pd, uid_t owner, gid_t group); + +extern int iocommon_chmod(pvfs_descriptor *pd, mode_t mode); + +extern int iocommon_make_directory(const char *pvfs_path, + int mode, + PVFS_object_ref *pdir); + +extern int iocommon_readlink(pvfs_descriptor *pd, char *buf, int size); + +extern int iocommon_symlink(const char *pvfs_path, + const char *link_target, + PVFS_object_ref *pdir); + +extern int iocommon_getdents(pvfs_descriptor *pd, + struct dirent *dirp, + unsigned int count); + +extern int iocommon_getdents64(pvfs_descriptor *pd, + struct dirent64 *dirp, + unsigned int count); + +extern int iocommon_access(const char *pvfs_path, + int mode, + int flags, + PVFS_object_ref *pdir); + +extern int iocommon_sendfile(int sockfd, + pvfs_descriptor *pd, + off64_t *offset, + size_t count); + + +/* Functions in this file generally define a label errorout + * for cleanup before exit and return an int rc which is -1 + * on error with the error code in errno, 0 on success. + * IOCOMMON_RETURN_ERR checks a return code from a function + * returns the same protocol and goto errorout: if less than 0 + * IOCOMMON_CHECK_ERR assumes the return code contains the + * negative of the error code as encoded by PVFS sysint + * functions and decodes these before jumping to errorout. + * PVFS sysint calls always return error codes in the return + * value, but system calls inside them might set errno to + * a value that may or may not have meaning for the programmer + * calling this library. Steps are taken to ensure errno + * is not modified unless the code in this lib wants to + * modify it. CHECK_ERR should be called after each sysint + * call to correctly pass error codes. + */ +extern PVFS_error PINT_errno_mapping[]; +#define IOCOMMON_RETURN_ERR(rc) \ +do { \ + if ((rc) < 0) \ + { \ + goto errorout; \ + } \ +} while (0) + +#define IOCOMMON_CHECK_ERR(rc) \ +do { \ + errno = orig_errno; \ + if ((rc) < 0) \ + { \ + if (IS_PVFS_NON_ERRNO_ERROR(-(rc))) \ + { \ + pvfs_errno = -rc; \ + errno = EIO; \ + } \ + else if (IS_PVFS_ERROR(-(rc))) \ + { \ + errno = PINT_errno_mapping[(-(rc)) & 0x7f]; \ + } \ + rc = -1; \ + goto errorout; \ + } \ +} while (0) + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + +#endif diff --git a/src/client/usrint/openfile-util.c b/src/client/usrint/openfile-util.c new file mode 100644 index 0000000..705086d --- /dev/null +++ b/src/client/usrint/openfile-util.c @@ -0,0 +1,1205 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - routines to manage open files + */ +#define USRINT_SOURCE 1 +#include "usrint.h" +#include +#ifndef SYS_readdir +#define SYS_readdir 89 +#endif +#include "posix-ops.h" +#include "openfile-util.h" +#include "posix-pvfs.h" +#ifdef PVFS_AIO_ENABLE +#include "aiocommon.h" +#endif + +#if PVFS_UCACHE_ENABLE +#include "ucache.h" +#endif + +static struct glibc_redirect_s +{ + int (*stat)(int ver, const char *path, struct stat *buf); + int (*stat64)(int ver, const char *path, struct stat64 *buf); + int (*fstat)(int ver, int fd, struct stat *buf); + int (*fstat64)(int ver, int fd, struct stat64 *buf); + int (*fstatat)(int ver, int fd, const char *path, struct stat *buf, int flag); + int (*fstatat64)(int ver, int fd, const char *path, struct stat64 *buf, int flag); + int (*lstat)(int ver, const char *path, struct stat *buf); + int (*lstat64)(int ver, const char *path, struct stat64 *buf); + int (*mknod)(int ver, const char *path, mode_t mode, dev_t dev); + int (*mknodat)(int ver, int dirfd, const char *path, mode_t mode, dev_t dev); +} glibc_redirect; + +#define PREALLOC 3 +static char logfilepath[30]; +static int logfile; +static int descriptor_table_count = 0; +static int descriptor_table_size = 0; +static pvfs_descriptor **descriptor_table; +static char rstate[256]; /* used for random number generation */ + +posix_ops glibc_ops; + +pvfs_descriptor_status pvfs_stdin_status = +{ + .dup_cnt = 1, + .fsops = &glibc_ops, + .pvfs_ref.fs_id = 0, + .pvfs_ref.handle = 0, + .flags = O_RDONLY, + .mode = 0, + .file_pointer = 0, + .token = 0, + .dpath = NULL, + .fent = NULL +}; + +pvfs_descriptor pvfs_stdin = +{ + .is_in_use = PVFS_FS, + .fd = 0, + .true_fd = STDIN_FILENO, + .fdflags = 0, + .s = &pvfs_stdin_status +}; + +pvfs_descriptor_status pvfs_stdout_status = +{ + .dup_cnt = 1, + .fsops = &glibc_ops, + .pvfs_ref.fs_id = 0, + .pvfs_ref.handle = 0, + .flags = O_WRONLY | O_APPEND, + .mode = 0, + .file_pointer = 0, + .token = 0, + .dpath = NULL, + .fent = NULL +}; + +pvfs_descriptor pvfs_stdout = +{ + .is_in_use = PVFS_FS, + .fd = 1, + .true_fd = STDOUT_FILENO, + .fdflags = 0, + .s = &pvfs_stdout_status +}; + +pvfs_descriptor_status pvfs_stderr_status = +{ + .dup_cnt = 1, + .fsops = &glibc_ops, + .pvfs_ref.fs_id = 0, + .pvfs_ref.handle = 0, + .flags = O_WRONLY | O_APPEND, + .mode = 0, + .file_pointer = 0, + .token = 0, + .dpath = NULL, + .fent = NULL +}; + +pvfs_descriptor pvfs_stderr = +{ + .is_in_use = PVFS_FS, + .fd = 2, + .true_fd = STDERR_FILENO, + .fdflags = 0, + .s = &pvfs_stderr_status +}; + +static int my_glibc_stat(const char *path, struct stat *buf) +{ + int rc = glibc_redirect.stat(_STAT_VER, path, buf); + return rc; +} + +static int my_glibc_stat64(const char *path, struct stat64 *buf) +{ + int rc = glibc_redirect.stat64(_STAT_VER, path, buf); + return rc; +} + +static int my_glibc_fstat(int fd, struct stat *buf) +{ + return glibc_redirect.fstat(_STAT_VER, fd, buf); +} + +static int my_glibc_fstat64(int fd, struct stat64 *buf) +{ + return glibc_redirect.fstat64(_STAT_VER, fd, buf); +} + +static int my_glibc_fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + return glibc_redirect.fstatat(_STAT_VER, fd, path, buf, flag); +} + +static int my_glibc_fstatat64(int fd, const char *path, struct stat64 *buf, int flag) +{ + return glibc_redirect.fstatat64(_STAT_VER, fd, path, buf, flag); +} + +static int my_glibc_lstat(const char *path, struct stat *buf) +{ + return glibc_redirect.lstat(_STAT_VER, path, buf); +} + +static int my_glibc_lstat64(const char *path, struct stat64 *buf) +{ + return glibc_redirect.lstat64(_STAT_VER, path, buf); +} + +static int my_glibc_mknod(const char *path, mode_t mode, dev_t dev) +{ + return glibc_redirect.mknod(_MKNOD_VER, path, mode, dev); +} + +static int my_glibc_mknodat(int dirfd, const char *path, mode_t mode, dev_t dev) +{ + return glibc_redirect.mknodat(_MKNOD_VER, dirfd, path, mode, dev); +} + +static int my_glibc_getdents(u_int fd, struct dirent *dirp, u_int count) +{ + return syscall(SYS_getdents, fd, dirp, count); +} + +static int my_glibc_getdents64(u_int fd, struct dirent64 *dirp, u_int count) +{ + return syscall(SYS_getdents64, fd, dirp, count); +} + +static int my_glibc_fadvise64(int fd, off64_t offset, off64_t len, int advice) +{ + return syscall(SYS_fadvise64, fd, offset, len, advice); +} + +static int my_glibc_fadvise(int fd, off_t offset, off_t len, int advice) +{ + return my_glibc_fadvise64(fd, (off64_t)offset, (off64_t)len, advice); +} + +static int my_glibc_readdir(u_int fd, struct dirent *dirp, u_int count) +{ + return syscall(SYS_readdir, fd, dirp, count); +} + +static int my_glibc_getcwd(char *buf, unsigned long size) +{ + return syscall(SYS_getcwd, buf, size); +} + +void load_glibc(void) +{ + void *libc_handle; + libc_handle = dlopen("libc.so.6", RTLD_LAZY|RTLD_GLOBAL); + if (!libc_handle) + { + fprintf(stderr,"Failed to open libc.so\n"); + libc_handle = RTLD_NEXT; + } + memset((void *)&glibc_ops, 0, sizeof(glibc_ops)); + glibc_ops.open = dlsym(libc_handle, "open"); + glibc_ops.open64 = dlsym(libc_handle, "open64"); + glibc_ops.openat = dlsym(libc_handle, "openat"); + glibc_ops.openat64 = dlsym(libc_handle, "openat64"); + glibc_ops.creat = dlsym(libc_handle, "creat"); + glibc_ops.creat64 = dlsym(libc_handle, "creat64"); + glibc_ops.unlink = dlsym(libc_handle, "unlink"); + glibc_ops.unlinkat = dlsym(libc_handle, "unlinkat"); + glibc_ops.rename = dlsym(libc_handle, "rename"); + glibc_ops.renameat = dlsym(libc_handle, "renameat"); + glibc_ops.read = dlsym(libc_handle, "read"); + glibc_ops.pread = dlsym(libc_handle, "pread"); + glibc_ops.readv = dlsym(libc_handle, "readv"); + glibc_ops.pread64 = dlsym(libc_handle, "pread64"); + glibc_ops.write = dlsym(libc_handle, "write"); + glibc_ops.pwrite = dlsym(libc_handle, "pwrite"); + glibc_ops.writev = dlsym(libc_handle, "writev"); + glibc_ops.pwrite64 = dlsym(libc_handle, "pwrite64"); + glibc_ops.lseek = dlsym(libc_handle, "lseek"); + glibc_ops.lseek64 = dlsym(libc_handle, "lseek64"); + glibc_ops.truncate = dlsym(libc_handle, "truncate"); + glibc_ops.truncate64 = dlsym(libc_handle, "truncate64"); + glibc_ops.ftruncate = dlsym(libc_handle, "ftruncate"); + glibc_ops.ftruncate64 = dlsym(libc_handle, "ftruncate64"); + glibc_ops.fallocate = dlsym(libc_handle, "posix_fallocate"); + glibc_ops.close = dlsym(libc_handle, "close"); + glibc_ops.stat = my_glibc_stat; + glibc_redirect.stat = dlsym(libc_handle, "__xstat"); + glibc_ops.stat64 = my_glibc_stat64; + glibc_redirect.stat64 = dlsym(libc_handle, "__xstat64"); + glibc_ops.fstat = my_glibc_fstat; + glibc_redirect.fstat = dlsym(libc_handle, "__fxstat"); + glibc_ops.fstat64 = my_glibc_fstat64; + glibc_redirect.fstat64 = dlsym(libc_handle, "__fxstat64"); + glibc_ops.fstatat = my_glibc_fstatat; + glibc_redirect.fstatat = dlsym(libc_handle, "__fxstatat"); + glibc_ops.fstatat64 = my_glibc_fstatat64; + glibc_redirect.fstatat64 = dlsym(libc_handle, "__fxstatat64"); + glibc_ops.lstat = my_glibc_lstat; + glibc_redirect.lstat = dlsym(libc_handle, "__lxstat"); + glibc_ops.lstat64 = my_glibc_lstat64; + glibc_redirect.lstat64 = dlsym(libc_handle, "__lxstat64"); + glibc_ops.futimesat = dlsym(libc_handle, "futimesat"); + glibc_ops.utimes = dlsym(libc_handle, "utimes"); + glibc_ops.utime = dlsym(libc_handle, "utime"); + glibc_ops.futimes = dlsym(libc_handle, "futimes"); + glibc_ops.dup = dlsym(libc_handle, "dup"); + glibc_ops.dup2 = dlsym(libc_handle, "dup2"); + glibc_ops.chown = dlsym(libc_handle, "chown"); + glibc_ops.fchown = dlsym(libc_handle, "fchown"); + glibc_ops.fchownat = dlsym(libc_handle, "fchownat"); + glibc_ops.lchown = dlsym(libc_handle, "lchown"); + glibc_ops.chmod = dlsym(libc_handle, "chmod"); + glibc_ops.fchmod = dlsym(libc_handle, "fchmod"); + glibc_ops.fchmodat = dlsym(libc_handle, "fchmodat"); + glibc_ops.mkdir = dlsym(libc_handle, "mkdir"); + glibc_ops.mkdirat = dlsym(libc_handle, "mkdirat"); + glibc_ops.rmdir = dlsym(libc_handle, "rmdir"); + glibc_ops.readlink = dlsym(libc_handle, "readlink"); + glibc_ops.readlinkat = dlsym(libc_handle, "readlinkat"); + glibc_ops.symlink = dlsym(libc_handle, "symlink"); + glibc_ops.symlinkat = dlsym(libc_handle, "symlinkat"); + glibc_ops.link = dlsym(libc_handle, "link"); + glibc_ops.linkat = dlsym(libc_handle, "linkat"); + glibc_ops.readdir = my_glibc_readdir; + glibc_ops.getdents = my_glibc_getdents; + glibc_ops.getdents64 = my_glibc_getdents64; + glibc_ops.access = dlsym(libc_handle, "access"); + glibc_ops.faccessat = dlsym(libc_handle, "faccessat"); + glibc_ops.flock = dlsym(libc_handle, "flock"); + glibc_ops.fcntl = dlsym(libc_handle, "fcntl"); + glibc_ops.sync = dlsym(libc_handle, "sync"); + glibc_ops.fsync = dlsym(libc_handle, "fsync"); + glibc_ops.fdatasync = dlsym(libc_handle, "fdatasync"); + glibc_ops.fadvise = my_glibc_fadvise; + glibc_ops.fadvise64 = my_glibc_fadvise64; + glibc_ops.statfs = dlsym(libc_handle, "statfs"); + glibc_ops.statfs64 = dlsym(libc_handle, "statfs64"); + glibc_ops.fstatfs = dlsym(libc_handle, "fstatfs"); + glibc_ops.fstatfs64 = dlsym(libc_handle, "fstatfs64"); + glibc_ops.statvfs = dlsym(libc_handle, "statvfs"); + glibc_ops.fstatvfs = dlsym(libc_handle, "fstatvfs"); + glibc_ops.mknod = my_glibc_mknod; + glibc_redirect.mknod = dlsym(libc_handle, "__xmknod"); + glibc_ops.mknodat = my_glibc_mknodat; + glibc_redirect.mknodat = dlsym(libc_handle, "__xmknodat"); + glibc_ops.sendfile = dlsym(libc_handle, "sendfile"); + glibc_ops.sendfile64 = dlsym(libc_handle, "sendfile64"); +#ifdef HAVE_ATTR_XATTR_H + glibc_ops.setxattr = dlsym(libc_handle, "setxattr"); + glibc_ops.lsetxattr = dlsym(libc_handle, "lsetxattr"); + glibc_ops.fsetxattr = dlsym(libc_handle, "fsetxattr"); + glibc_ops.getxattr = dlsym(libc_handle, "getxattr"); + glibc_ops.lgetxattr = dlsym(libc_handle, "lgetxattr"); + glibc_ops.fgetxattr = dlsym(libc_handle, "fgetxattr"); + glibc_ops.listxattr = dlsym(libc_handle, "listxattr"); + glibc_ops.llistxattr = dlsym(libc_handle, "llistxattr"); + glibc_ops.flistxattr = dlsym(libc_handle, "flistxattr"); + glibc_ops.removexattr = dlsym(libc_handle, "removexattr"); + glibc_ops.lremovexattr = dlsym(libc_handle, "lremovexattr"); + glibc_ops.fremovexattr = dlsym(libc_handle, "fremovexattr"); +#endif + glibc_ops.socket = dlsym(libc_handle, "socket"); + glibc_ops.accept = dlsym(libc_handle, "accept"); + glibc_ops.bind = dlsym(libc_handle, "bind"); + glibc_ops.connect = dlsym(libc_handle, "connect"); + glibc_ops.getpeername = dlsym(libc_handle, "getpeername"); + glibc_ops.getsockname = dlsym(libc_handle, "getsockname"); + glibc_ops.getsockopt = dlsym(libc_handle, "getsockopt"); + glibc_ops.setsockopt = dlsym(libc_handle, "setsockopt"); + glibc_ops.ioctl = dlsym(libc_handle, "ioctl"); + glibc_ops.listen = dlsym(libc_handle, "listen"); + glibc_ops.recv = dlsym(libc_handle, "recv"); + glibc_ops.recvfrom = dlsym(libc_handle, "recvfrom"); + glibc_ops.recvmsg = dlsym(libc_handle, "recvmsg"); + //glibc_ops.select = dlsym(libc_handle, "select"); + //glibc_ops.FD_CLR = dlsym(libc_handle, "FD_CLR"); + //glibc_ops.FD_ISSET = dlsym(libc_handle, "FD_ISSET"); + //glibc_ops.FD_SET = dlsym(libc_handle, "FD_SET"); + //glibc_ops.FD_ZERO = dlsym(libc_handle, "FD_ZERO"); + //glibc_ops.pselect = dlsym(libc_handle, "pselect"); + glibc_ops.send = dlsym(libc_handle, "send"); + glibc_ops.sendto = dlsym(libc_handle, "sendto"); + glibc_ops.sendmsg = dlsym(libc_handle, "sendmsg"); + glibc_ops.shutdown = dlsym(libc_handle, "shutdown"); + glibc_ops.socketpair = dlsym(libc_handle, "socketpair"); + glibc_ops.pipe = dlsym(libc_handle, "pipe"); + glibc_ops.umask = dlsym(libc_handle, "umask"); + glibc_ops.getumask = dlsym(libc_handle, "getumask"); + glibc_ops.getdtablesize = dlsym(libc_handle, "getdtablesize"); + glibc_ops.mmap = dlsym(libc_handle, "mmap"); + glibc_ops.munmap = dlsym(libc_handle, "munmap"); + glibc_ops.msync = dlsym(libc_handle, "msync"); +#if 0 + glibc_ops.acl_delete_def_file = dlsym(libc_handle, "acl_delete_def_file"); + glibc_ops.acl_get_fd = dlsym(libc_handle, "acl_get_fd"); + glibc_ops.acl_get_file = dlsym(libc_handle, "acl_get_file"); + glibc_ops.acl_set_fd = dlsym(libc_handle, "acl_set_fd"); + glibc_ops.acl_set_file = dlsym(libc_handle, "acl_set_file"); +#endif + +/* PVFS does not implement socket ops */ + pvfs_ops.socket = dlsym(libc_handle, "socket"); + pvfs_ops.accept = dlsym(libc_handle, "accept"); + pvfs_ops.bind = dlsym(libc_handle, "bind"); + pvfs_ops.connect = dlsym(libc_handle, "connect"); + pvfs_ops.getpeername = dlsym(libc_handle, "getpeername"); + pvfs_ops.getsockname = dlsym(libc_handle, "getsockname"); + pvfs_ops.getsockopt = dlsym(libc_handle, "getsockopt"); + pvfs_ops.setsockopt = dlsym(libc_handle, "setsockopt"); + pvfs_ops.ioctl = dlsym(libc_handle, "ioctl"); + pvfs_ops.listen = dlsym(libc_handle, "listen"); + pvfs_ops.recv = dlsym(libc_handle, "recv"); + pvfs_ops.recvfrom = dlsym(libc_handle, "recvfrom"); + pvfs_ops.recvmsg = dlsym(libc_handle, "recvmsg"); + //pvfs_ops.select = dlsym(libc_handle, "select"); + //pvfs_ops.FD_CLR = dlsym(libc_handle, "FD_CLR"); + //pvfs_ops.FD_ISSET = dlsym(libc_handle, "FD_ISSET"); + //pvfs_ops.FD_SET = dlsym(libc_handle, "FD_SET"); + //pvfs_ops.FD_ZERO = dlsym(libc_handle, "FD_ZERO"); + //pvfs_ops.pselect = dlsym(libc_handle, "pselect"); + pvfs_ops.send = dlsym(libc_handle, "send"); + pvfs_ops.sendto = dlsym(libc_handle, "sendto"); + pvfs_ops.sendmsg = dlsym(libc_handle, "sendmsg"); + pvfs_ops.shutdown = dlsym(libc_handle, "shutdown"); + pvfs_ops.socketpair = dlsym(libc_handle, "socketpair"); + pvfs_ops.pipe = dlsym(libc_handle, "pipe"); + + /* should have been previously opened */ + /* this decrements the reference count */ + if (libc_handle != RTLD_NEXT) + { + dlclose(libc_handle); + } +} + +/* + * runs on exit to do any cleanup + */ +static void usrint_cleanup(void) +{ + /* later check for an error that might want us */ + /* to keep this - for now it is empty */ + glibc_ops.unlink(logfilepath); + /* cache cleanup? */ +#if 0 + if (ucache_enabled) + { + ucache_finalize(); + } +#endif + PVFS_sys_finalize(); +} + +#if PVFS_UCACHE_ENABLE +/* + * access function to see if cache is currently enabled + * only used by code ouside of this module + */ +int pvfs_ucache_enabled(void) +{ + return ucache_enabled; +} +#endif + +void pvfs_sys_init_doit(void); + +int pvfs_sys_init(void) +{ + static int pvfs_initializing_flag = 0; + static int pvfs_lib_lock_initialized = 0; /* recursive lock init flag */ + static int pvfs_lib_init_flag = 0; + + int rc = 0; + + /* Mutex protecting initialization of recursive mutex */ + static gen_mutex_t mutex_mutex = GEN_MUTEX_INITIALIZER; + /* The recursive mutex */ + static pthread_mutex_t rec_mutex; + + if(pvfs_lib_init_flag) + return 0; + + if(!pvfs_lib_lock_initialized) + { + rc = gen_mutex_lock(&mutex_mutex); + if(!pvfs_lib_lock_initialized) + { + //init recursive mutex + pthread_mutexattr_t rec_attr; + rc = pthread_mutexattr_init(&rec_attr); + rc = pthread_mutexattr_settype(&rec_attr, PTHREAD_MUTEX_RECURSIVE); + rc = pthread_mutex_init(&rec_mutex, &rec_attr); + rc = pthread_mutexattr_destroy(&rec_attr); + pvfs_lib_lock_initialized = 1; + } + rc = gen_mutex_unlock(&mutex_mutex); + } + + rc = pthread_mutex_lock(&rec_mutex); + if(pvfs_lib_init_flag || pvfs_initializing_flag) + { + rc = pthread_mutex_unlock(&rec_mutex); + return 1; + } + + /* set this to prevent pvfs_sys_init from running recursively (indirect) */ + pvfs_initializing_flag = 1; + + //Perform Init + pvfs_sys_init_doit(); + pvfs_initializing_flag = 0; + pvfs_lib_init_flag = 1; + rc = pthread_mutex_unlock(&rec_mutex); + return 0; +} + +/* + * Perform PVFS initialization tasks + */ +void pvfs_sys_init_doit(void) { + struct rlimit rl; + int rc; + char curdir[PVFS_PATH_MAX]; + + /* this allows system calls to run */ + load_glibc(); + PINT_initrand(); + + /* if this fails not much we can do about it */ + atexit(usrint_cleanup); + + /* set up current working dir */ + memset(curdir, 0, sizeof(curdir)); + rc = my_glibc_getcwd(curdir, PVFS_PATH_MAX); + if (rc < 0) + { + perror("failed to get CWD"); + exit(-1); + } + pvfs_cwd_init(curdir, PVFS_PATH_MAX); + + rc = getrlimit(RLIMIT_NOFILE, &rl); + /* need to check for "INFINITY" */ + + /* set up descriptor table */ + descriptor_table_size = rl.rlim_max; + descriptor_table = + (pvfs_descriptor **)malloc(sizeof(pvfs_descriptor *) * + descriptor_table_size); + if (!descriptor_table) + { + perror("failed to malloc descriptor table"); + exit(-1); + } + memset(descriptor_table, 0, + (sizeof(pvfs_descriptor *) * descriptor_table_size)); + descriptor_table[0] = &pvfs_stdin; + gen_mutex_init(&pvfs_stdin.lock); + gen_mutex_init(&pvfs_stdin.s->lock); + descriptor_table[1] = &pvfs_stdout; + gen_mutex_init(&pvfs_stdout.lock); + gen_mutex_init(&pvfs_stdin.s->lock); + descriptor_table[2] = &pvfs_stderr; + gen_mutex_init(&pvfs_stderr.lock); + gen_mutex_init(&pvfs_stdin.s->lock); + descriptor_table_count = PREALLOC; + + /* open log file */ + /* we dupe this FD to get FD's for pvfs files */ + memset(logfilepath, 0, sizeof(logfilepath)); + snprintf(logfilepath, 25, "/tmp/pvfsuid-%05d.log", (int)(getuid())); + logfile = glibc_ops.open(logfilepath, O_RDWR|O_CREAT, 0600); + if (logfile < 0) + { + perror("failed in pvfs_sys_init"); + exit(-1); + } + + /* initalize PVFS */ + /* this is very complex so most stuff needs to work + * before we do this + */ + PVFS_util_init_defaults(); + if (errno == EINPROGRESS) + { + errno = 0; + } + + /* call other initialization routines */ + +#if PVFS_UCACHE_ENABLE + //gossip_enable_file(UCACHE_LOG_FILE, "a"); + //gossip_enable_stderr(); + + /* ucache initialization - assumes shared memory previously + * aquired (using ucache daemon) + */ + rc = ucache_initialize(); + if (rc < 0) + { + /* ucache failed to initialize, so continue without it */ + /* Write a warning message in the ucache.log letting programmer know */ + ucache_enabled = 0; + + /* Enable the writing of the error message and write the message to file. */ + //gossip_set_debug_mask(1, GOSSIP_UCACHE_DEBUG); + //gossip_debug(GOSSIP_UCACHE_DEBUG, + // "WARNING: client caching configured enabled but couldn't inizialize\n"); + } +#endif + +#ifdef PVFS_AIO_ENABLE + /* initialize aio interface */ + aiocommon_init(); +#endif +} + +int pvfs_descriptor_table_size(void) +{ + return descriptor_table_size; +} + +/* + * Allocate a new pvfs_descriptor + * initialize fsops to the given set + */ + pvfs_descriptor *pvfs_alloc_descriptor(posix_ops *fsops, + int fd, + PVFS_object_ref *file_ref, + int use_cache) + { + int newfd, flags = 0; + pvfs_descriptor *pd; + + pvfs_sys_init(); + if (fsops == NULL) + { + errno = EINVAL; + return NULL; + } + if (fd == -1) + { + /* PVFS file allocate a real descriptor for it */ + newfd = glibc_ops.dup(logfile); + } + else + { + /* opened by glibc, make sure this is a valid fd */ + newfd = fd; + flags = glibc_ops.fcntl(newfd, F_GETFL); + if (flags < 0) + { + return NULL; + } + if (descriptor_table[newfd] != NULL) + { + errno = EINVAL; + return NULL; + } + } + + /* allocate new descriptor */ + descriptor_table_count++; + pd = (pvfs_descriptor *)malloc(sizeof(pvfs_descriptor)); + + if (!pd) + { + return NULL; + } + memset(pd, 0, sizeof(pvfs_descriptor)); + + gen_mutex_init(&pd->lock); + gen_mutex_lock(&pd->lock); + descriptor_table[newfd] = pd; + + pd->s = (pvfs_descriptor_status *)malloc(sizeof(pvfs_descriptor_status)); + if (!pd->s) + { + free(pd); + return NULL; + } + memset(pd->s, 0, sizeof(pvfs_descriptor_status)); + + gen_mutex_init(&pd->s->lock); + gen_mutex_lock(&pd->s->lock); + + /* fill in descriptor */ + pd->is_in_use = PVFS_FS; + pd->fd = newfd; + pd->true_fd = newfd; + pd->fdflags = 0; + + /* + if (!use_cache) + { + pd->fdflags |= PVFS_FD_NOCACHE; + } + */ + pd->s->dup_cnt = 1; + pd->s->fsops = fsops; + if (file_ref) + { + pd->s->pvfs_ref.fs_id = file_ref->fs_id; + pd->s->pvfs_ref.handle = file_ref->handle; + } + else + { + /* if this is not a PVFS file then the file_ref will be NULL */ + pd->s->pvfs_ref.fs_id = 0; + pd->s->pvfs_ref.handle = 0LL; + } + + pd->s->flags = flags; + pd->s->mode = 0; /* this should be filled in by caller */ + pd->s->file_pointer = 0; + pd->s->token = 0; + pd->s->dpath = NULL; + pd->s->fent = NULL; /* not caching if left NULL */ + +#if PVFS_UCACHE_ENABLE + if (ucache_enabled /* && use_cache*/ ) + { + /* File reference won't always be passed in */ + if(file_ref != NULL) + { + /* We have the file identifiers + * so insert file info into ucache + * this fills in mtbl + */ + ucache_open_file(&(file_ref->fs_id), + &(file_ref->handle), + &(pd->s->fent)); + } + } +#endif /* PVFS_UCACHE_ENABLE */ + + /* NEW PD IS STILL LOCKED */ + return pd; +} + +/* + * Function for duplicating a descriptor - used in dup and dup2 calls + */ +int pvfs_dup_descriptor(int oldfd, int newfd) +{ + int rc = 0; + pvfs_descriptor *pd; + + pvfs_sys_init(); + if (oldfd < 0 || oldfd >= descriptor_table_size) + { + errno = EBADF; + return -1; + } + if (newfd == -1) /* dup */ + { + newfd = glibc_ops.dup(logfile); + if (newfd < 0) + { + return newfd; + } + } + else /* dup2 */ + { + /* see if requested fd is in use */ + if (descriptor_table[newfd] != NULL) + { + /* check for special case */ + if (newfd == oldfd) + { + return oldfd; + } + /* close old file in new slot */ + rc = pvfs_free_descriptor(newfd); + if (rc < 0) + { + return rc; + } + } + /* continuing with dup2 */ + rc = glibc_ops.dup2(oldfd, newfd); + if (rc < 0) + { + return rc; + } + } + /* new set up new pvfs_descfriptor */ + descriptor_table_count++; + pd = (pvfs_descriptor *)malloc(sizeof(pvfs_descriptor)); + if (!pd) + { + return -1; + } + memset(pd, 0, sizeof(pvfs_descriptor)); + gen_mutex_init(&pd->lock); + gen_mutex_lock(&pd->lock); + descriptor_table[newfd] = pd; + + pd->is_in_use = PVFS_FS; + pd->fd = newfd; + pd->true_fd = newfd; + pd->fdflags = 0; + /* share the pvfs_desdriptor_status info */ + pd->s = descriptor_table[oldfd]->s; + gen_mutex_lock(&pd->s->lock); + pd->s->dup_cnt++; + gen_mutex_unlock(&pd->s->lock); + gen_mutex_unlock(&pd->lock); + return 0; +} + +/* + * Return a pointer to the pvfs_descriptor for the file descriptor or null + * if there is no entry for the given file descriptor + * should probably be inline if we can get at static table that way + */ +pvfs_descriptor *pvfs_find_descriptor(int fd) +{ + pvfs_descriptor *pd = NULL; + + pvfs_sys_init(); + if (fd < 0 || fd >= descriptor_table_size) + { + errno = EBADF; + return NULL; + } + pd = descriptor_table[fd]; + if (!pd) + { + int flags = 0; + /* see if glibc opened this file without our knowing */ + flags = glibc_ops.fcntl(fd, F_GETFL); + if (flags == -1) + { + /* apparently not */ + return NULL; + } + /* allocate a descriptor */ + descriptor_table_count++; + pd = (pvfs_descriptor *)malloc(sizeof(pvfs_descriptor)); + if (!pd) + { + return NULL; + } + memset(pd, 0, sizeof(pvfs_descriptor)); + gen_mutex_init(&pd->lock); + gen_mutex_lock(&pd->lock); + descriptor_table[fd] = pd; + + pd->s = + (pvfs_descriptor_status *)malloc(sizeof(pvfs_descriptor_status)); + if (!pd->s) + { + free(pd); + return NULL; + } + memset(pd->s, 0, sizeof(pvfs_descriptor_status)); + gen_mutex_init(&pd->s->lock); + + /* fill in descriptor */ + pd->is_in_use = PVFS_FS; + pd->fd = fd; + pd->true_fd = fd; + pd->fdflags = 0; + pd->s->dup_cnt = 1; + pd->s->fsops = &glibc_ops; + pd->s->pvfs_ref.fs_id = 0; + pd->s->pvfs_ref.handle = 0LL; + pd->s->flags = flags; + pd->s->mode = 0; + pd->s->file_pointer = 0; + pd->s->token = 0; + pd->s->dpath = NULL; + pd->s->fent = NULL; /* not caching if left NULL */ + } + else + { + /* locks here prevent a thread from getting */ + /* a pd that is not finish being allocated yet */ + gen_mutex_lock(&pd->lock); + if (pd->is_in_use != PVFS_FS) + { + errno = EBADF; + gen_mutex_unlock(&pd->lock); + return NULL; + } + } + gen_mutex_unlock(&pd->lock); + return pd; +} + +int pvfs_free_descriptor(int fd) +{ + int dup_cnt; + pvfs_descriptor *pd = NULL; + debug("pvfs_free_descriptor called with %d\n", fd); + + pd = pvfs_find_descriptor(fd); + if (pd == NULL) + { + return -1; + } + + /* clear out table entry */ + descriptor_table[fd] = NULL; + glibc_ops.close(fd); + + /* keep up with used descriptors */ + descriptor_table_count--; + + /* check if last copy */ + gen_mutex_lock(&pd->s->lock); + dup_cnt = --(pd->s->dup_cnt); + gen_mutex_unlock(&pd->s->lock); + if (dup_cnt <= 0) + { + if (pd->s->dpath) + { + free(pd->s->dpath); + } + +#if PVFS_UCACHE_ENABLE + if (pd->s->fent) + { + int rc = 0; + rc = ucache_close_file(pd->s->fent); + if(rc == -1) + { + return rc; + } + } +#endif /* PVFS_UCACHE_ENABLE */ + + /* free descriptor status - wipe memory first */ + memset(pd->s, 0, sizeof(pvfs_descriptor_status)); + + /* first 3 descriptors not malloc'd */ + if (fd > 2) + { + free(pd->s); + } + } + /* free descriptor - wipe memory first */ + memset(pd, 0, sizeof(pvfs_descriptor)); + /* first 3 descriptors not malloc'd */ + if (fd > 2) + { + free(pd); + } + + debug("pvfs_free_descriptor returns %d\n", 0); + return 0; +} + +/* + * takes a path that is relative to the working dir and + * expands it all the way to the root + */ +char *pvfs_qualify_path(const char *path) +{ + int cdsz, psz, msz; + char *rc; + char *newpath = NULL; + char curdir[PVFS_PATH_MAX]; + + if(path[0] != '/') + { + memset(curdir, 0, PVFS_PATH_MAX); + rc = getcwd(curdir, PVFS_PATH_MAX); + if (curdir == NULL) + { + /* ERANGE if need a larger buffer */ + /* error, bail out */ + return NULL; + } + cdsz = strlen(curdir); + psz = strlen(path); + msz = cdsz + psz + 2; + if (msz < 2) + { + errno = EINVAL; + return NULL; + } + /* allocate buffer for whole path and copy */ + newpath = (char *)malloc(msz); + if (!newpath) + { + return NULL; + } + memset(newpath, 0, msz); + if (cdsz >= 0) /* zero size copy is bad */ + { + strncpy(newpath, curdir, cdsz); + } + /* free(curdir); */ + strncat(newpath, "/", 1); + if (psz >= 0) /* zero size copy is bad */ + { + strncat(newpath, path, psz); + } + } + else + { + newpath = (char *)path; + } + return newpath; +} + +/** + * Determines if a path is part of a PVFS Filesystem + * + * returns 1 if PVFS 0 otherwise + */ + +int is_pvfs_path(const char *path) +{ + int rc = 0; + char *newpath = NULL ; +#if PVFS_USRINT_KMOUNT + int npsize; + struct stat sbuf; + struct statfs fsbuf; +#else + PVFS_fs_id fs_id; + char pvfs_path[PVFS_PATH_MAX]; +#endif + + if(pvfs_sys_init()) + { + return 0; + } + + if (!path) + { + errno = EINVAL; + return 0; /* let glibc sort out the error */ + } +#if PVFS_USRINT_KMOUNT + memset(&sbuf, 0, sizeof(sbuf)); + memset(&fsbuf, 0, sizeof(fsbuf)); + npsize = strnlen(path, PVFS_PATH_MAX) + 1; + newpath = (char *)malloc(npsize); + if (!newpath) + { + return 0; /* let glibc sort out the error */ + } + strncpy(newpath, path, npsize); + + /* first try to stat the path */ + /* this must call standard glibc stat */ + rc = glibc_ops.stat(newpath, &sbuf); + if (rc < 0) + { + int count; + /* path doesn't exist, try removing last segment */ + for(count = strlen(newpath) - 2; count > 0; count--) + { + if(newpath[count] == '/') + { + newpath[count] = '\0'; + break; + } + } + /* this must call standard glibc stat */ + rc = glibc_ops.stat(newpath, &sbuf); + if (rc < 0) + { + /* can't find the path must be an error */ + free(newpath); + return 0; /* let glibc sort out the error */ + } + } + /* this must call standard glibc statfs */ + rc = glibc_ops.statfs(newpath, &fsbuf); + free(newpath); + if(fsbuf.f_type == PVFS_FS) + { + return 1; /* PVFS */ + } + else + { + return 0; /* not PVFS assume the kernel can handle it */ + } +/***************************************************************/ +#else /* PVFS_USRINT_KMOUNT */ +/***************************************************************/ + /* we might not be able to stat the file direcly + * so we will use our resolver to look up the path + * prefix in the mount tab files + */ + memset(pvfs_path, 0 , PVFS_PATH_MAX); + newpath = pvfs_qualify_path(path); + rc = PVFS_util_resolve(newpath, &fs_id, pvfs_path, PVFS_PATH_MAX); + if (newpath != path) + { + free(newpath); + } + if (rc < 0) + { + if (rc == -PVFS_ENOENT) + { + return 0; /* not a PVFS path */ + } + errno = rc; + return 0; /* an error returned - let glibc deal with it */ + // return -1; /* an error returned */ + } + return 1; /* a PVFS path */ +#endif /* PVFS_USRINT_KMOUNT */ +} + +/** + * Split a pathname into a directory and a filename. + * If non-null is passed as the directory or filename, + * the field will be allocated and filled with the correct value + * + * A slash at the end of the path is interpreted as no filename + * and is an error. To parse the last dir in a path, remove this + * trailing slash. No filename with no directory is OK. + */ +int split_pathname( const char *path, + int dirflag, + char **directory, + char **filename) +{ + int i, fnlen, slashes = 0; + int length = strlen("pvfs2"); + + if (!path || !directory || !filename) + { + errno = EINVAL; + return -1; + } + /* chop off pvfs2 prefix */ + if (strncmp(path, "pvfs2:", length) == 0) + { + path = &path[length]; + } + /* Split path into a directory and filename */ + length = strnlen(path, PVFS_PATH_MAX); + if (length == PVFS_PATH_MAX) + { + errno = ENAMETOOLONG; + return -1; + } + i = length - 1; + if (dirflag) + { + /* skip any trailing slashes */ + for(; i >= 0 && path[i] == '/'; i--) + { + slashes++; + } + } + for (; i >= 0; i--) + { + if (path[i] == '/') + { + /* parse the directory */ + *directory = malloc(i + 1); + if (!*directory) + { + return -1; + } + strncpy(*directory, path, i); + (*directory)[i] = '\0'; + break; + } + } + if (i == -1) + { + /* found no '/' path is all filename */ + *directory = NULL; + } + i++; + /* copy the filename */ + fnlen = length - i - slashes; + if (fnlen == 0) + { + filename = NULL; + if (!directory) + { + errno = EISDIR; + } + else + { + errno = ENOENT; + } + return -1; + } + /* check flag to see if there are slashes to skip */ + *filename = malloc(fnlen + 1); + if (!*filename) + { + if (*directory) + { + free(*directory); + } + *directory = NULL; + *filename = NULL; + return -1; + } + strncpy(*filename, path + i, length - i); + (*filename)[length - i] = '\0'; + return 0; +} + +void PINT_initrand(void) +{ + static int init_called = 0; + pid_t pid; + uid_t uid; + gid_t gid; + struct timeval time; + char *oldstate; + unsigned int seed; + + if (init_called) + { + return; + } + init_called = 1; + pid = getpid(); + uid = getuid(); + gid = getgid(); + gettimeofday(&time, NULL); + seed = (((pid << 16) ^ uid) ^ (gid << 8)) ^ time.tv_usec; + oldstate = initstate(seed, rstate, 256); + setstate(oldstate); +} + +long int PINT_random(void) +{ + char *oldstate; + long int rndval; + + PINT_initrand(); + oldstate = setstate(rstate); + rndval = random(); + setstate(oldstate); + return rndval; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=4 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/openfile-util.h b/src/client/usrint/openfile-util.h new file mode 100644 index 0000000..b3c8a05 --- /dev/null +++ b/src/client/usrint/openfile-util.h @@ -0,0 +1,69 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "posix-ops.h" + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines + */ +#ifndef OPENFILE_UTIL_H +#define OPENFILE_UTIL_H 1 + +//Define success and error return values +#define PVFS_FD_SUCCESS 0 +#define PVFS_FD_FAILURE -1 + +extern int split_pathname(const char *path, + int dirflag, + char **directory, + char **filename); + +int pvfs_ucache_enabled(void); + +extern int pvfs_sys_init(void); + +extern char *pvfs_qualify_path(const char *path); + +extern int is_pvfs_path(const char *path); + +extern void pvfs_debug(char *fmt, ...); + +extern void load_glibc(void); + +extern int pvfs_lookup_dir(const char *directory, + PVFS_object_ref *ref, + int *fs_id); + +extern int pvfs_lookup_file(const char *filename, + int fs_id, + PVFS_object_ref parent_ref, + int follow_links, + PVFS_object_ref *ref); + +extern pvfs_descriptor *pvfs_alloc_descriptor(posix_ops *fsops, + int fd, + PVFS_object_ref *file_ref, + int use_cache); + +extern pvfs_descriptor *pvfs_find_descriptor(int fd); + +extern int pvfs_dup_descriptor(int oldfd, int newfd); + +extern int pvfs_free_descriptor(int fd); + +extern int pvfs_descriptor_table_size(void); + +extern int pvfs_create_file(const char *filename, + mode_t mode, + PVFS_object_ref parent_ref, + PVFS_object_ref *ref); + +extern void PINT_initrand(void); + +extern long int PINT_random(void); +#endif diff --git a/src/client/usrint/posix-pvfs.c b/src/client/usrint/posix-pvfs.c new file mode 100644 index 0000000..2d68311 --- /dev/null +++ b/src/client/usrint/posix-pvfs.c @@ -0,0 +1,2540 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - pvfs version of posix system calls + */ +#define USRINT_SOURCE 1 +#include "usrint.h" +#include "posix-ops.h" +#include "posix-pvfs.h" +#include "openfile-util.h" +#include "iocommon.h" + +#define PVFS_ATTR_DEFAULT_MASK \ +(PVFS_ATTR_SYS_COMMON_ALL | PVFS_ATTR_SYS_SIZE | PVFS_ATTR_SYS_BLKSIZE) + +static mode_t mask_val = 0022; /* implements umask for pvfs library */ +static char pvfs_cwd[PVFS_PATH_MAX]; + +/* actual implementation of read and write are in these static funcs */ + +static ssize_t pvfs_prdwr64(int fd, + void *buf, + size_t count, + off64_t offset, + int which); + +static ssize_t pvfs_rdwrv(int fd, + const struct iovec *vector, + size_t count, + int which); + +/** + * pvfs_open + */ +int pvfs_open(const char *path, int flags, ...) +{ + va_list ap; + int mode; + PVFS_hint hints; + char *newpath; + pvfs_descriptor *pd; + debug("pvfs_open: called with %s\n", path); + + if (!path) + { + errno = EINVAL; + return -1; + } + va_start(ap, flags); + if (flags & O_CREAT) + mode = va_arg(ap, int); + else + mode = 0777; + if (flags & O_HINTS) + hints = va_arg(ap, PVFS_hint); + else + hints = PVFS_HINT_NULL; + va_end(ap); + + /* fully qualify pathname */ + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, flags, hints, mode, NULL); + if (newpath != path) + { + free(newpath); + } + if (!pd) + { + return -1; + } + else + { + return pd->fd; + } +} + +/** + * pvfs_open64 + */ +int pvfs_open64(const char *path, int flags, ...) +{ + va_list ap; + int mode; + PVFS_hint hints; + + if (!path) + { + errno = EINVAL; + return -1; + } + va_start(ap, flags); + if (flags & O_CREAT) + { + mode = va_arg(ap, int); + } + else + { + mode = 0777; + } + if (flags & O_HINTS) + { + hints = va_arg(ap, PVFS_hint); + } + else + { + hints = PVFS_HINT_NULL; + } + va_end(ap); + flags |= O_LARGEFILE; + return pvfs_open(path, flags, mode); +} + +/** + * pvfs_openat + */ +int pvfs_openat(int dirfd, const char *path, int flags, ...) +{ + va_list ap; + int mode; + PVFS_hint hints; + pvfs_descriptor *dpd, *fpd; + + if (!path) + { + errno = EINVAL; + return -1; + } + va_start(ap, flags); + if (flags & O_CREAT) + { + mode = va_arg(ap, int); + } + else + { + mode = 0777; + } + if (flags & O_HINTS) + { + hints = va_arg(ap, PVFS_hint); + } + else + { + hints = PVFS_HINT_NULL; + } + va_end(ap); + if (path[0] == '/' || dirfd == AT_FDCWD) + { + return pvfs_open(path, flags, mode); + } + else + { + if (dirfd < 0) + { + errno = EBADF; + return -1; + } + dpd = pvfs_find_descriptor(dirfd); + if (!dpd) + { + return -1; + } + fpd = iocommon_open(path, flags, hints, mode, dpd); + if (!fpd) + { + return -1; + } + return fpd->fd; + } +} + +/** + * pvfs_openat64 + */ +int pvfs_openat64(int dirfd, const char *path, int flags, ...) +{ + va_list ap; + int mode; + PVFS_hint hints; + + if (dirfd < 0) + { + errno = EBADF; + return -1; + } + va_start(ap, flags); + if (flags & O_CREAT) + { + mode = va_arg(ap, int); + } + else + { + mode = 0777; + } + if (flags & O_HINTS) + { + hints = va_arg(ap, PVFS_hint); + } + else + { + hints = PVFS_HINT_NULL; + } + va_end(ap); + flags |= O_LARGEFILE; + return pvfs_openat(dirfd, path, flags, mode); +} + +/** + * pvfs_creat wrapper + */ +int pvfs_creat(const char *path, mode_t mode, ...) +{ + return pvfs_open(path, O_RDWR | O_CREAT | O_EXCL, mode); +} + +/** + * pvfs_creat64 wrapper + */ +int pvfs_creat64(const char *path, mode_t mode, ...) +{ + return pvfs_open64(path, O_RDWR | O_CREAT | O_EXCL, mode); +} + +/** + * pvfs_unlink + */ +int pvfs_unlink(const char *path) +{ + int rc = 0; + char *newpath; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + rc = iocommon_unlink(path, NULL); + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_unlinkat + */ +int pvfs_unlinkat(int dirfd, const char *path, int flags) +{ + int rc; + pvfs_descriptor *pd; + + if (path[0] == '/' || dirfd == AT_FDCWD) + { + rc = iocommon_unlink(path, NULL); + } + else + { + if (dirfd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(dirfd); + if (!pd) + { + errno = EBADF; + return -1; + } + if (flags & AT_REMOVEDIR) + { + rc = iocommon_rmdir(path, &pd->s->pvfs_ref); + } + else + { + rc = iocommon_unlink(path, &pd->s->pvfs_ref); + } + } + return rc; +} + +/** + * pvfs_rename + */ +int pvfs_rename(const char *oldpath, const char *newpath) +{ + int rc; + char *absoldpath, *absnewpath; + + absoldpath = pvfs_qualify_path(oldpath); + if (!absoldpath) + { + return -1; + } + absnewpath = pvfs_qualify_path(newpath); + if (!absnewpath) + { + free(absoldpath); + return -1; + } + rc = iocommon_rename(NULL, absoldpath, NULL, absnewpath); + if (oldpath != absoldpath) + { + free(absoldpath); + } + if (newpath != absnewpath) + { + free(absnewpath); + } + return rc; +} + +/** + * pvfs_renameat + */ +int pvfs_renameat(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath) +{ + int rc; + pvfs_descriptor *pd; + PVFS_object_ref *olddirref, *newdirref; + char *absoldpath, *absnewpath; + + if (!oldpath || !newpath) + { + errno = EINVAL; + return -1; + } + if (oldpath[0] == '/' || olddirfd == AT_FDCWD) + { + olddirref = NULL; + absoldpath = pvfs_qualify_path(oldpath); + if (!absoldpath) + { + return -1; + } + } + else + { + if (olddirfd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(olddirfd); + if (!pd) + { + errno = EBADF; + return -1; + } + olddirref = &pd->s->pvfs_ref; + absoldpath = (char *)oldpath; + } + if (oldpath[0] == '/' || newdirfd == AT_FDCWD) + { + newdirref = NULL; + absnewpath = pvfs_qualify_path(newpath); + if (!absnewpath) + { + return -1; + } + } + else + { + if (newdirfd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(newdirfd); + if (!pd) + { + errno = EBADF; + return -1; + } + newdirref = &pd->s->pvfs_ref; + absnewpath = (char *)newpath; + } + rc = iocommon_rename(olddirref, absoldpath, newdirref, absnewpath); + if (oldpath != absoldpath) + { + free(absoldpath); + } + if (newpath != absnewpath) + { + free(absnewpath); + } + return rc; +} + +/** + * pvfs_read wrapper + */ +ssize_t pvfs_read(int fd, void *buf, size_t count) +{ + int rc; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pvfs_descriptor *pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + rc = pvfs_prdwr64(fd, buf, count, pd->s->file_pointer, PVFS_IO_READ); + if (rc < 0) + { + return -1; + } + gen_mutex_lock(&pd->s->lock); + pd->s->file_pointer += rc; + gen_mutex_unlock(&pd->s->lock); + return rc; +} + +/** + * pvfs_pread wrapper + */ +ssize_t pvfs_pread(int fd, void *buf, size_t count, off_t offset) +{ + return pvfs_prdwr64(fd, buf, count, (off64_t) offset, PVFS_IO_READ); +} + +/** + * pvfs_readv wrapper + */ +ssize_t pvfs_readv(int fd, const struct iovec *vector, int count) +{ + return pvfs_rdwrv(fd, vector, count, PVFS_IO_READ); +} + +/** + * pvfs_pread64 wrapper + */ +ssize_t pvfs_pread64( int fd, void *buf, size_t count, off64_t offset ) +{ + return pvfs_prdwr64(fd, buf, count, offset, PVFS_IO_READ); +} + +/** + * pvfs_write wrapper + */ +ssize_t pvfs_write(int fd, const void *buf, size_t count) +{ + int rc; + off64_t offset; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pvfs_descriptor *pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + /* check for append mode */ + if (pd->s->flags & O_APPEND) + { + struct stat sbuf; + pvfs_fstat(fd, &sbuf); + offset = sbuf.st_size; + } + else + { + offset = pd->s->file_pointer; + } + rc = pvfs_prdwr64(fd, (void *)buf, count, offset, PVFS_IO_WRITE); + if (rc < 0) + { + return -1; + } + gen_mutex_lock(&pd->s->lock); + pd->s->file_pointer += rc; + gen_mutex_unlock(&pd->s->lock); + return rc; +} + +/** + * pvfs_pwrite wrapper + */ +ssize_t pvfs_pwrite(int fd, const void *buf, size_t count, off_t offset) +{ + return pvfs_prdwr64(fd, (void *)buf, count, (off64_t)offset, PVFS_IO_WRITE); +} + +/** + * pvfs_writev wrapper + */ +ssize_t pvfs_writev(int fd, const struct iovec *vector, int count) +{ + return pvfs_rdwrv(fd, vector, count, PVFS_IO_WRITE); +} + +/** + * pvfs_pwrite64 wrapper + */ +ssize_t pvfs_pwrite64(int fd, const void *buf, size_t count, off64_t offset) +{ + return pvfs_prdwr64(fd, (void *)buf, count, offset, PVFS_IO_WRITE); +} + +/** + * implements pread and pwrite with 64-bit file pointers + */ +static ssize_t pvfs_prdwr64(int fd, + void *buf, + size_t size, + off64_t offset, + int which) +{ + int rc; + pvfs_descriptor* pd; + struct iovec vector[1]; + + /* Find the descriptor */ + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + + /* Ensure descriptor is used for the correct type of access */ + if ((which == PVFS_IO_READ && + (O_WRONLY == (pd->s->flags & O_ACCMODE))) || + (which == PVFS_IO_WRITE && + (O_RDONLY == (pd->s->flags & O_ACCMODE)))) + { + errno = EBADF; + return -1; + } + + /* place contiguous buff and count into an iovec array of length 1 */ + vector[0].iov_base = buf; + vector[0].iov_len = size; + + rc = iocommon_readorwrite(which, pd, offset, 1, vector); + + return rc; +} + +/** + * implements readv and writev + */ +static ssize_t pvfs_rdwrv(int fd, + const struct iovec *vector, + size_t count, + int which) +{ + int rc = 0; + pvfs_descriptor* pd; + off64_t offset; + + /* Find the descriptor */ + pd = pvfs_find_descriptor(fd); + if(!pd) + { + return -1; + } + offset = pd->s->file_pointer; + + /* Ensure descriptor is used for the correct type of access */ + if ((which == PVFS_IO_READ && + (O_WRONLY == (pd->s->flags & O_ACCMODE))) || + (which == PVFS_IO_WRITE && + (O_RDONLY == (pd->s->flags & O_ACCMODE)))) + { + errno = EBADF; + return -1; + } + + rc = iocommon_readorwrite(which, pd, offset, count, vector); + + if (rc >= 0) + { + gen_mutex_lock(&pd->s->lock); + pd->s->file_pointer += rc; + gen_mutex_unlock(&pd->s->lock); + } + + return rc; +} + +/** + * pvfs_lseek wrapper + */ +off_t pvfs_lseek(int fd, off_t offset, int whence) +{ + return (off_t) pvfs_lseek64(fd, (off64_t)offset, whence); +} + +/** + * pvfs_lseek64 + */ +off64_t pvfs_lseek64(int fd, off64_t offset, int whence) +{ + pvfs_descriptor* pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + /* Find the descriptor */ + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + + iocommon_lseek(pd, offset, 1, whence); + + return pd->s->file_pointer; +} + +/** + * pvfs_truncate wrapper + */ +int pvfs_truncate(const char *path, off_t length) +{ + return pvfs_truncate64(path, (off64_t) length); +} + +/** + * pvfs_truncate64 + */ +int pvfs_truncate64(const char *path, off64_t length) +{ + int rc; + pvfs_descriptor *pd; + + if (!path) + { + errno = EINVAL; + return -1; + } + pd = iocommon_open(path, O_WRONLY, PVFS_HINT_NULL, 0 , NULL); + if (!pd) + { + return -1; + } + rc = iocommon_truncate(pd->s->pvfs_ref, length); + pvfs_close(pd->fd); + return rc; +} + +/** + * pvfs_allocate wrapper + * + * This isn't right but we dont' have a syscall to match this. + * Best effort is to tuncate to thex size, which should guarantee + * spaceis available starting at beginning (let alone offset) + * extending to offset+length. + * + * Our truncate doesn't always allocate blocks either, since + * the underlying FS may have a sparse implementation. + */ +int pvfs_fallocate(int fd, off_t offset, off_t length) +{ + if (offset < 0 || length < 0) + { + errno = EINVAL; + return -1; + } + /* if (file_size < offset + length) + * { + */ + return pvfs_ftruncate64(fd, (off64_t)(offset) + (off64_t)(length)); +} + +/** + * pvfs_ftruncate wrapper + */ +int pvfs_ftruncate(int fd, off_t length) +{ + return pvfs_ftruncate64(fd, (off64_t) length); +} + +/** + * pvfs_ftruncate64 + */ +int pvfs_ftruncate64(int fd, off64_t length) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + return iocommon_truncate(pd->s->pvfs_ref, length); +} + +/** + * pvfs_close + * + * TODO: add open/close count to minimize metadata ops + * this may only work if we have multi-user caching + * which we don't for now + */ +int pvfs_close(int fd) +{ + int rc = 0; + pvfs_descriptor* pd; + debug("pvfs_close: called with %d\n", fd); + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return PVFS_FD_FAILURE; + } + + /* flush buffers */ + if (S_ISREG(pd->s->mode)) + { + rc = iocommon_fsync(pd); + if (rc < 0) + { + return -1; + } + } + + /* free descriptor */ + rc = pvfs_free_descriptor(pd->fd); + if (rc < 0) + { + return -1; + } + + debug("pvfs_close: returns %d\n", rc); + return rc; +} + +/* various flavors of stat */ +/** + * pvfs_stat + */ +int pvfs_stat(const char *path, struct stat *buf) +{ + return pvfs_stat_mask(path, buf, PVFS_ATTR_DEFAULT_MASK); +} + +int pvfs_stat_mask(const char *path, struct stat *buf, uint32_t mask) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + mask &= PVFS_ATTR_DEFAULT_MASK; + rc = iocommon_stat(pd, buf, mask); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_stat64 + */ +int pvfs_stat64(const char *path, struct stat64 *buf) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_stat64(pd, buf, PVFS_ATTR_DEFAULT_MASK); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_fstat + */ +int pvfs_fstat(int fd, struct stat *buf) +{ + return pvfs_fstat_mask(fd, buf, PVFS_ATTR_DEFAULT_MASK); +} + +int pvfs_fstat_mask(int fd, struct stat *buf, uint32_t mask) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + mask &= PVFS_ATTR_DEFAULT_MASK; + return iocommon_stat(pd, buf, mask); +} + +/** + * pvfs_fstat64 + */ +int pvfs_fstat64(int fd, struct stat64 *buf) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_stat64(pd, buf, PVFS_ATTR_DEFAULT_MASK); +} + +/** + * pvfs_fstatat + */ +int pvfs_fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + int rc; + pvfs_descriptor *pd, *pd2; + + if (path[0] == '/' || fd == AT_FDCWD) + { + if (flag & AT_SYMLINK_NOFOLLOW) + { + rc = pvfs_lstat(path, buf); + } + else + { + rc = pvfs_stat(path, buf); + } + } + else + { + int flags = O_RDONLY; + if (flag & AT_SYMLINK_NOFOLLOW) + { + flags |= O_NOFOLLOW; + } + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + pd2 = iocommon_open(path, flags, PVFS_HINT_NULL, 0, pd); + if (!pd2) + { + return -1; + } + rc = iocommon_stat(pd2, buf, PVFS_ATTR_DEFAULT_MASK); + pvfs_close(pd2->fd); + } + return rc; +} + +/** + * pvfs_fstatat64 + */ +int pvfs_fstatat64(int fd, const char *path, struct stat64 *buf, int flag) +{ + int rc; + pvfs_descriptor *pd, *pd2; + + if (path[0] == '/' || fd == AT_FDCWD) + { + if (flag & AT_SYMLINK_NOFOLLOW) + { + rc = pvfs_lstat64(path, buf); + } + else + { + rc = pvfs_stat64(path, buf); + } + } + else + { + int flags = O_RDONLY; + if (flag & AT_SYMLINK_NOFOLLOW) + { + flags |= O_NOFOLLOW; + } + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + pd2 = iocommon_open(path, flags, PVFS_HINT_NULL, 0, pd); + if (!pd2) + { + return -1; + } + rc = iocommon_stat64(pd2, buf, PVFS_ATTR_DEFAULT_MASK); + pvfs_close(pd2->fd); + } + return rc; +} + +/** + * pvfs_lstat + */ +int pvfs_lstat(const char *path, struct stat *buf) +{ + return pvfs_lstat_mask(path, buf, PVFS_ATTR_DEFAULT_MASK); +} + +int pvfs_lstat_mask(const char *path, struct stat *buf, uint32_t mask) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY|O_NOFOLLOW, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + mask &= PVFS_ATTR_DEFAULT_MASK; + rc = iocommon_stat(pd, buf, mask); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_lstat64 + */ +int pvfs_lstat64(const char *path, struct stat64 *buf) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY|O_NOFOLLOW, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_stat64(pd, buf, PVFS_ATTR_DEFAULT_MASK); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_futimesat + */ +int pvfs_futimesat(int dirfd, + const char *path, + const struct timeval times[2]) +{ + int rc = 0; + pvfs_descriptor *pd=NULL, *pd2=NULL; + PVFS_sys_attr attr; + + if (dirfd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(dirfd); + if (!pd) + { + return -1; + } + if (path) + { + pd2 = iocommon_open(path, O_RDONLY, PVFS_HINT_NULL, 0, pd); + } + else + { + pd2 = pd; /* allow null path to work */ + } + if (!pd2) + { + return -1; + } + memset(&attr, 0, sizeof(attr)); + if (!times) + { + struct timeval curtime; + gettimeofday(&curtime, NULL); + attr.atime = curtime.tv_sec; + attr.mtime = curtime.tv_sec; + } + else + { + attr.atime = times[0].tv_sec; + attr.mtime = times[1].tv_sec; + } + attr.mask = PVFS_ATTR_SYS_ATIME | PVFS_ATTR_SYS_MTIME; + rc = iocommon_setattr(pd2->s->pvfs_ref, &attr); + if (path) + { + pvfs_close(pd2->fd); + } + return rc; +} + +int pvfs_utimes(const char *path, const struct timeval times[2]) +{ + return pvfs_futimesat(AT_FDCWD, path, times); +} + +int pvfs_utime(const char *path, const struct utimbuf *buf) +{ + struct timeval times[2]; + times[0].tv_sec = buf->actime; + times[0].tv_usec = 0; + times[1].tv_sec = buf->modtime; + times[1].tv_usec = 0; + return pvfs_futimesat(AT_FDCWD, path, times); +} + +int pvfs_futimes(int fd, const struct timeval times[2]) +{ + int rc = 0; + pvfs_descriptor *pd=NULL; + PVFS_sys_attr attr; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + memset(&attr, 0, sizeof(attr)); + if (!times) + { + struct timeval curtime; + gettimeofday(&curtime, NULL); + attr.atime = curtime.tv_sec; + attr.mtime = curtime.tv_sec; + } + else + { + attr.atime = times[0].tv_sec; + attr.mtime = times[1].tv_sec; + } + attr.mask = PVFS_ATTR_SYS_ATIME | PVFS_ATTR_SYS_MTIME; + rc = iocommon_setattr(pd->s->pvfs_ref, &attr); + pvfs_close(pd->fd); + return rc; +} + +/** + * pvfs_dup + */ +int pvfs_dup(int oldfd) +{ + return pvfs_dup_descriptor(oldfd, -1); +} + +/** + * pvfs_dup2 + */ +int pvfs_dup2(int oldfd, int newfd) +{ + return pvfs_dup_descriptor(oldfd, newfd); +} + +/** + * pvfs_chown + */ +int pvfs_chown(const char *path, uid_t owner, gid_t group) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_chown(pd, owner, group); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_fchown + */ +int pvfs_fchown(int fd, uid_t owner, gid_t group) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_chown(pd, owner, group); +} + +/** + * pvfs_fchownat + */ +int pvfs_fchownat(int fd, const char *path, uid_t owner, gid_t group, int flag) +{ + int rc; + pvfs_descriptor *pd, *pd2; + + if (path[0] == '/' || fd == AT_FDCWD) + { + if (flag & AT_SYMLINK_NOFOLLOW) + { + rc = pvfs_lchown(path, owner, group); + } + else + { + rc = pvfs_chown(path, owner, group); + } + } + else + { + int flags = O_RDONLY; + if (flag & AT_SYMLINK_NOFOLLOW) + { + flags |= O_NOFOLLOW; + } + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + pd2 = iocommon_open(path, flags, PVFS_HINT_NULL, 0, pd); + if (!pd) + { + return -1; + } + rc = iocommon_chown(pd2, owner, group); + pvfs_close(pd2->fd); + } + return rc; +} + +/** + * pvfs_lchown + */ +int pvfs_lchown(const char *path, uid_t owner, gid_t group) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY|O_NOFOLLOW, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_chown(pd, owner, group); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_chmod + */ +int pvfs_chmod(const char *path, mode_t mode) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_chmod(pd, mode); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_fchmod + */ +int pvfs_fchmod(int fd, mode_t mode) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_chmod(pd, mode); +} + +/** + * pvfs_fchmodat + */ +int pvfs_fchmodat(int fd, const char *path, mode_t mode, int flag) +{ + int rc; + pvfs_descriptor *pd, *pd2; + + if (path[0] == '/' || fd == AT_FDCWD) + { + rc = pvfs_chmod(path, mode); + } + else + { + int flags = O_RDONLY; + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + pd2 = iocommon_open(path, flags, PVFS_HINT_NULL, 0, pd); + if (!pd2) + { + return -1; + } + rc = iocommon_chmod(pd2, mode); + pvfs_close(pd2->fd); + } + return rc; +} + +/** + * pvfs_mkdir + */ +int pvfs_mkdir(const char *path, mode_t mode) +{ + int rc; + char *newpath; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + rc = iocommon_make_directory(newpath, (mode & ~mask_val & 0777), NULL); + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * pvfs_mkdirat + */ +int pvfs_mkdirat(int dirfd, const char *path, mode_t mode) +{ + int rc; + pvfs_descriptor *pd; + + if (path[0] == '/' || dirfd == AT_FDCWD) + { + rc = pvfs_mkdir(path, mode); + } + else + { + if (dirfd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(dirfd); + if (!pd) + { + errno = EBADF; + return -1; + } + rc = iocommon_make_directory(path, + (mode & ~mask_val & 0777), + &pd->s->pvfs_ref); + } + return rc; +} + +/** + * pvfs_rmdir + */ +int pvfs_rmdir(const char *path) +{ + int rc; + char *newpath; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + rc = iocommon_rmdir(newpath, NULL); + if (newpath != path) + { + free(newpath); + } + return rc; +} + +/** + * readlink fills buffer with contents of a symbolic link + * + */ +ssize_t pvfs_readlink(const char *path, char *buf, size_t bufsiz) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY | O_NOFOLLOW, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + debug("pvfs_readlink mode is %o\n", pd->s->mode); + /* this checks that it is a valid symlink and sets errno if not */ + rc = iocommon_readlink(pd, buf, bufsiz); + /* need to close if readlink succeeds or not */ + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +ssize_t pvfs_readlinkat(int fd, const char *path, char *buf, size_t bufsiz) +{ + int rc; + pvfs_descriptor *pd, *pd2; + + if (path[0] == '/' || fd == AT_FDCWD) + { + rc = pvfs_readlink(path, buf, bufsiz); + } + else + { + int flags = O_RDONLY | O_NOFOLLOW; + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + return -1; + } + pd2 = iocommon_open(path, flags, PVFS_HINT_NULL, 0, pd); + if(!pd2) + { + return -1; + } + rc = iocommon_readlink(pd2, buf, bufsiz); + pvfs_close(pd2->fd); + } + return rc; +} + +int pvfs_symlink(const char *oldpath, const char *newpath) +{ + int rc = 0; + char *abspath; + abspath = pvfs_qualify_path(newpath); + if (!abspath) + { + return -1; + } + rc = iocommon_symlink(abspath, oldpath, NULL); + if (abspath != newpath) + { + free(abspath); + } + return rc; +} + +int pvfs_symlinkat(const char *oldpath, int newdirfd, const char *newpath) +{ + pvfs_descriptor *pd; + + if (newpath[0] == '/' || newdirfd == AT_FDCWD) + { + return pvfs_symlink(oldpath, newpath); + } + else + { + if (newdirfd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(newdirfd); + if (!pd) + { + errno = EBADF; + return -1; + } + } + return iocommon_symlink(newpath, oldpath, &pd->s->pvfs_ref); +} + +/** + * PVFS does not have hard links + */ +int pvfs_link(const char *oldpath, const char *newpath) +{ + fprintf(stderr, "pvfs_link not implemented\n"); + errno = ENOSYS; + return -1; +} + +/** + * PVFS does not have hard links + */ +int pvfs_linkat(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, int flags) +{ + fprintf(stderr, "pvfs_linkat not implemented\n"); + errno = ENOSYS; + return -1; +} + +/** + * this reads exactly one dirent, count is ignored + */ +int pvfs_readdir(unsigned int fd, struct dirent *dirp, unsigned int count) +{ + return pvfs_getdents(fd, dirp, 1); +} + +/** + * this reads multiple dirents, man pages calls last arg count but + * is ambiguous if it is number of records or number of bytes. latter + * appears to be true so we renmame size. Returns bytes read. + */ +int pvfs_getdents(unsigned int fd, struct dirent *dirp, unsigned int size) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_getdents(pd, dirp, size); +} + +int pvfs_getdents64(unsigned int fd, struct dirent64 *dirp, unsigned int size) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_getdents64(pd, dirp, size); +} + +int pvfs_access(const char *path, int mode) +{ + int rc = 0; + char *newpath; + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + rc = iocommon_access(path, mode, 0, NULL); + if (newpath != path) + { + free(newpath); + } + return rc; +} + +int pvfs_faccessat(int fd, const char *path, int mode, int flags) +{ + pvfs_descriptor *pd; + + if (path[0] == '/' || fd == AT_FDCWD) + { + return pvfs_access(path, mode); + } + else + { + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if(!pd) + { + errno = EBADF; + return -1; + } + } + return iocommon_access(path, mode, flags, &pd->s->pvfs_ref); +} + +int pvfs_flock(int fd, int op) +{ + errno = ENOSYS; + fprintf(stderr, "pvfs_flock not implemented\n"); + return -1; +} + +int pvfs_fcntl(int fd, int cmd, ...) +{ + int rc = 0; + va_list ap; + /* long arg; */ + struct flock *lock; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + rc = -1; + goto errorout; + } + va_start(ap, cmd); + switch (cmd) + { + case F_DUPFD : + case F_GETFD : + rc = pd->fdflags; + break; + case F_SETFD : + pd->fdflags = va_arg(ap, int); + break; + case F_GETFL : + rc = pd->s->flags; + break; + case F_SETFL : + pd->s->flags = va_arg(ap, int); + break; + case F_GETLK : + case F_SETLK : + case F_SETLKW : + lock = va_arg(ap, struct flock *); + case F_GETOWN : + case F_SETOWN : + case F_GETSIG : + case F_SETSIG : + case F_GETLEASE : + case F_SETLEASE : + case F_NOTIFY : + default : + errno = ENOSYS; + fprintf(stderr, "pvfs_fcntl command not implemented\n"); + rc = -1; + break; + } + va_end(ap); + +errorout : + return rc; +} + +/* sync all disk data */ +void pvfs_sync(void ) +{ + return; +} + +/** + * pvfs_fsync + * sync file, but not dir it is in + * as close as we have for now + */ +int pvfs_fsync(int fd) +{ + int rc = 0; + pvfs_descriptor* pd; + + debug("pvfs_fsync: called with %d\n", fd); + + if (fd < 0) + { + errno = EBADF; + return -1; + } + /* Find the descriptor */ + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + + /* tell the server to flush data to disk */ + rc = iocommon_fsync(pd); + debug("pvfs_fsync: returns %d\n", rc); + return rc; +} + +/* does not sync file metadata */ +int pvfs_fdatasync(int fd) +{ + int rc = 0; + + rc = pvfs_fsync(fd); /* as close as we have for now */ + return rc; +} + +int pvfs_fadvise(int fd, off_t offset, off_t len, int advice) +{ + return pvfs_fadvise64(fd, (off64_t) offset, (off64_t)len, advice); +} + +/** fadvise implementation + * + * technically this is a hint, so doing nothing is still success + */ +int pvfs_fadvise64(int fd, off64_t offset, off64_t len, int advice) +{ + switch (advice) + { + case POSIX_FADV_NORMAL: + case POSIX_FADV_RANDOM: + case POSIX_FADV_SEQUENTIAL: + case POSIX_FADV_WILLNEED: + case POSIX_FADV_DONTNEED: + case POSIX_FADV_NOREUSE: + break; + default: + errno = EINVAL; + return -1; + } + return 0; +} + +int pvfs_statfs(const char *path, struct statfs *buf) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_statfs(pd, buf); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +int pvfs_statfs64(const char *path, struct statfs64 *buf) +{ + int rc; + char *newpath; + pvfs_descriptor *pd; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_statfs64(pd, buf); + pvfs_close(pd->fd); + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +int pvfs_fstatfs(int fd, struct statfs *buf) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_statfs(pd, buf); +} + +int pvfs_fstatfs64(int fd, struct statfs64 *buf) +{ + pvfs_descriptor *pd; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_statfs64(pd, buf); +} + +int pvfs_statvfs(const char *path, struct statvfs *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + struct statfs buf2; + char *newpath; + + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + pd = iocommon_open(newpath, O_RDONLY, PVFS_HINT_NULL, 0, NULL); + if (!pd) + { + rc = -1; + goto errorout; + } + rc = iocommon_statfs(pd, &buf2); + pvfs_close(pd->fd); + if (rc < 0) + { + goto errorout; + } + buf->f_bsize = buf2.f_bsize; + /* buf->f_rsize */ + buf->f_blocks = buf2.f_blocks; + buf->f_bfree = buf2.f_bfree; + buf->f_bavail = buf2.f_bavail; + buf->f_files = buf2.f_files; + buf->f_ffree = buf2.f_ffree; + /* buf->f_favail */ + buf->f_fsid = (unsigned long)buf2.f_fsid.__val[0]; + /* buf->f_flag */ + buf->f_namemax = buf2.f_namelen; + +errorout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +int pvfs_fstatvfs(int fd, struct statvfs *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + struct statfs buf2; + + if (fd < 0) + { + errno = EBADF; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + rc = iocommon_statfs(pd, &buf2); + if (rc < 0) + { + return -1; + } + buf->f_bsize = buf2.f_bsize; + /* buf->f_rsize */ + buf->f_blocks = buf2.f_blocks; + buf->f_bfree = buf2.f_bfree; + buf->f_bavail = buf2.f_bavail; + buf->f_files = buf2.f_files; + buf->f_ffree = buf2.f_ffree; + /* buf->f_favail */ + buf->f_fsid = (unsigned long)buf2.f_fsid.__val[0]; + /* buf->f_flag */ + buf->f_namemax = buf2.f_namelen; + return rc; +} + +int pvfs_mknod(const char *path, mode_t mode, dev_t dev) +{ + return pvfs_mknodat(AT_FDCWD, path, mode, dev); +} + +int pvfs_mknodat(int dirfd, const char *path, mode_t mode, dev_t dev) +{ + int fd; + /* int s_type = mode & S_IFMT; */ + + switch (dev) + { + case S_IFREG: + fd = pvfs_openat(dirfd, path, O_CREAT|O_EXCL|O_RDONLY, mode & 0x777); + if (fd < 0) + { + return -1; + } + pvfs_close(fd); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + default: + errno = EINVAL; + return -1; + } + return 0; +} + +ssize_t pvfs_sendfile(int outfd, int infd, off_t *offset, size_t count) +{ + return pvfs_sendfile64(outfd, infd, (off64_t *)offset, count); +} + +ssize_t pvfs_sendfile64(int outfd, int infd, off64_t *offset, size_t count) +{ + pvfs_descriptor *inpd, *outpd; + + inpd = pvfs_find_descriptor(infd); + outpd = pvfs_find_descriptor(outfd); /* this should be a socket */ + if (!inpd || !outpd) + { + errno = EBADF; + return -1; + } + return iocommon_sendfile(outpd->true_fd, inpd, offset, count); +} + +int pvfs_setxattr(const char *path, + const char *name, + const void *value, + size_t size, + int flags) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR); + if (fd < 0) + { + return fd; + } + rc = pvfs_fsetxattr(fd, name, value, size, flags); + pvfs_close(fd); + return rc; +} + +int pvfs_lsetxattr(const char *path, + const char *name, + const void *value, + size_t size, + int flags) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR | O_NOFOLLOW); + if (fd < 0) + { + return fd; + } + rc = pvfs_fsetxattr(fd, name, value, size, flags); + pvfs_close(fd); + return rc; +} + +int pvfs_fsetxattr(int fd, + const char *name, + const void *value, + size_t size, + int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + rc = iocommon_seteattr(pd, name, value, size, flags); + return rc; +} + +ssize_t pvfs_getxattr(const char *path, + const char *name, + void *value, + size_t size) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR); + if (fd < 0) + { + return fd; + } + rc = pvfs_fgetxattr(fd, name, value, size); + pvfs_close(fd); + return rc; +} + +ssize_t pvfs_lgetxattr(const char *path, + const char *name, + void *value, + size_t size) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR | O_NOFOLLOW); + if (fd < 0) + { + return fd; + } + rc = pvfs_fgetxattr(fd, name, value, size); + pvfs_close(fd); + return rc; +} + +ssize_t pvfs_fgetxattr(int fd, + const char *name, + void *value, + size_t size) +{ + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_geteattr(pd, name, value, size); +} + +ssize_t pvfs_listxattr(const char *path, + char *list, + size_t size) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR); + if (fd < 0) + { + return fd; + } + rc = pvfs_flistxattr(fd, list, size); + pvfs_close(fd); + return rc; +} + +ssize_t pvfs_llistxattr(const char *path, + char *list, + size_t size) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR | O_NOFOLLOW); + if (fd < 0) + { + return fd; + } + rc = pvfs_flistxattr(fd, list, size); + pvfs_close(fd); + return rc; +} + +ssize_t pvfs_flistxattr(int fd, + char *list, + size_t size) +{ + int retsize, rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + rc = iocommon_listeattr(pd, list, size, &retsize); + if (rc < 0) + { + return -1; + } + return retsize; +} + +int pvfs_removexattr(const char *path, + const char *name) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR); + if (fd < 0) + { + return fd; + } + rc = pvfs_fremovexattr(fd, name); + pvfs_close(fd); + return rc; +} + +int pvfs_lremovexattr(const char *path, + const char *name) +{ + int fd, rc = 0; + + fd = pvfs_open(path, O_RDWR | O_NOFOLLOW); + if (fd < 0) + { + return fd; + } + rc = pvfs_fremovexattr(fd, name); + pvfs_close(fd); + return rc; +} + +int pvfs_fremovexattr(int fd, + const char *name) +{ + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (!pd) + { + errno = EBADF; + return -1; + } + return iocommon_deleattr(pd, name); +} + +/* These functions simulate management of the current + * working directory given than the kernel may not + * be aware of PVFS virtual mounts + */ +int pvfs_cwd_init(const char *buf, size_t size) +{ + char *rv; + memset(pvfs_cwd, 0, PVFS_PATH_MAX); + rv = strncpy(pvfs_cwd, buf, PVFS_util_min(size, PVFS_PATH_MAX)); + if (!rv) + { + return -1; + } + return 0; +} + +/** + * pvfs chdir + */ +int pvfs_chdir(const char *path) +{ + int rc = 0, plen = 0; + struct stat sbuf; + char *newpath = NULL; + + if (!path) + { + errno = EINVAL; + return -1; + } + /* we really need to resolve this to a cannonical path */ + newpath = pvfs_qualify_path(path); + if (!newpath) + { + return -1; + } + /* basic path length check */ + plen = strlen(newpath); + if (plen > PVFS_PATH_MAX) + { + errno = ENAMETOOLONG; + rc = -1; + goto errout; + } + /* if it is a valid path we can stat it and see what it is */ + rc = stat(newpath, &sbuf); /* this will get most errors */ + if (rc < 0) + { + rc = -1; + goto errout; + } + /* path must be a directory */ + if (!S_ISDIR(sbuf.st_mode)) + { + errno = ENOTDIR; + rc = -1; + goto errout; + } + /* we will keep a copy and keep one in the environment */ + strncpy(pvfs_cwd, newpath, PVFS_PATH_MAX); + setenv("PWD", newpath, 1); + +errout: + if (newpath != path) + { + free(newpath); + } + return rc; +} + +int pvfs_fchdir(int fd) +{ + int plen; + pvfs_descriptor *pd; + + /* path is already opened, make sure it is a dir */ + pd = pvfs_find_descriptor(fd); + if (!pd || !S_ISDIR(pd->s->mode) || !pd->s->dpath) + { + errno = EBADF; + return -1; + } + /* basic check for overflow */ + plen = strlen(pd->s->dpath); + if (plen > PVFS_PATH_MAX) + { + errno = ENAMETOOLONG; + return -1; + } + /* we will keep a copy and keep one in the environment */ + strncpy(pvfs_cwd, pd->s->dpath, PVFS_PATH_MAX); + setenv("PWD", pd->s->dpath, 1); + return 0; +} + +char *pvfs_getcwd(char *buf, size_t size) +{ + int plen; + plen = strnlen(pvfs_cwd, PVFS_PATH_MAX); + /* implement Linux variation */ + if (!buf) + { + int bsize = size ? size : plen + 1; + if (bsize < plen + 1) + { + errno = ERANGE; + return NULL; + } + /* malloc space */ + buf = (char *)malloc(bsize); + if (!buf) + { + errno = ENOMEM; + return NULL; + } + } + else + { + if (size == 0) + { + errno = EINVAL; + return NULL; + } + if (size < plen + 1) + { + errno = ERANGE; + return NULL; + } + } + strcpy(buf, pvfs_cwd); + return buf; +} + +char *pvfs_get_current_dir_name(void) +{ + int plen; + char *buf; + plen = strnlen(pvfs_cwd, PVFS_PATH_MAX); + buf = (char *)malloc(plen + 1); + if (!buf) + { + errno = ENOMEM; + return NULL; + } + strcpy(buf, pvfs_cwd); + return buf; +} +/* + * This is the no-frills old-fashioned version + * Use at own risk + */ +char *pvfs_getwd(char *buf) +{ + if (!buf) + { + errno = EINVAL; + return NULL; + } + strncpy(buf, pvfs_cwd, PVFS_PATH_MAX); + return buf; +} + + +/** + * pvfs_umask + * + * Manage a umask ourselves just in case we need to + * Probably the standard version works fine but + * In case we get a problem we have it + */ +mode_t pvfs_umask(mode_t mask) +{ + mode_t old_mask = mask_val; + mask_val = mask & 0777; + return old_mask; +} + +mode_t pvfs_getumask(void) +{ + return mask_val; +} + +int pvfs_getdtablesize(void) +{ + return pvfs_descriptor_table_size(); +} + +/* + * Table of PVFS system call versions for use by posix.c + */ +posix_ops pvfs_ops = +{ + .open = pvfs_open, + .open64 = pvfs_open64, + .openat = pvfs_openat, + .openat64 = pvfs_openat64, + .creat = pvfs_creat, + .creat64 = pvfs_creat64, + .unlink = pvfs_unlink, + .unlinkat = pvfs_unlinkat, + .rename = pvfs_rename, + .renameat = pvfs_renameat, + .read = pvfs_read, + .pread = pvfs_pread, + .readv = pvfs_readv, + .pread64 = pvfs_pread64, + .write = pvfs_write, + .pwrite = pvfs_pwrite, + .writev = pvfs_writev, + .pwrite64 = pvfs_pwrite64, + .lseek = pvfs_lseek, + .lseek64 = pvfs_lseek64, + .truncate = pvfs_truncate, + .truncate64 = pvfs_truncate64, + .ftruncate = pvfs_ftruncate, + .ftruncate64 = pvfs_ftruncate64, + .fallocate = pvfs_fallocate, + .close = pvfs_close, + .stat = pvfs_stat, + .stat64 = pvfs_stat64, + .fstat = pvfs_fstat, + .fstat64 = pvfs_fstat64, + .fstatat = pvfs_fstatat, + .fstatat64 = pvfs_fstatat64, + .lstat = pvfs_lstat, + .lstat64 = pvfs_lstat64, + .futimesat = pvfs_futimesat, + .utimes = pvfs_utimes, + .utime = pvfs_utime, + .futimes = pvfs_futimes, + .dup = pvfs_dup, + .dup2 = pvfs_dup2, + .chown = pvfs_chown, + .fchown = pvfs_fchown, + .fchownat = pvfs_fchownat, + .lchown = pvfs_lchown, + .chmod = pvfs_chmod, + .fchmod = pvfs_fchmod, + .fchmodat = pvfs_fchmodat, + .mkdir = pvfs_mkdir, + .mkdirat = pvfs_mkdirat, + .rmdir = pvfs_rmdir, + .readlink = pvfs_readlink, + .readlinkat = pvfs_readlinkat, + .symlink = pvfs_symlink, + .symlinkat = pvfs_symlinkat, + .link = pvfs_link, + .linkat = pvfs_linkat, + .readdir = pvfs_readdir, + .getdents = pvfs_getdents, + .getdents64 = pvfs_getdents64, + .access = pvfs_access, + .faccessat = pvfs_faccessat, + .flock = pvfs_flock, + .fcntl = pvfs_fcntl, + .sync = pvfs_sync, + .fsync = pvfs_fsync, + .fdatasync = pvfs_fdatasync, + .fadvise = pvfs_fadvise, + .fadvise64 = pvfs_fadvise64, + .statfs = statfs, /* this one is probably special */ + .statfs64 = pvfs_statfs64, + .fstatfs = pvfs_fstatfs, + .fstatfs64 = pvfs_fstatfs64, + .statvfs = statvfs, /* this one is probably special */ + .fstatvfs = pvfs_fstatvfs, + .mknod = pvfs_mknod, + .mknodat = pvfs_mknodat, + .sendfile = pvfs_sendfile, + .sendfile64 = pvfs_sendfile64, + .setxattr = pvfs_setxattr, + .lsetxattr = pvfs_lsetxattr, + .fsetxattr = pvfs_fsetxattr, + .getxattr = pvfs_getxattr, + .lgetxattr = pvfs_lgetxattr, + .fgetxattr = pvfs_fgetxattr, + .listxattr = pvfs_listxattr, + .llistxattr = pvfs_llistxattr, + .flistxattr = pvfs_flistxattr, + .removexattr = pvfs_removexattr, + .lremovexattr = pvfs_lremovexattr, + .fremovexattr = pvfs_fremovexattr, + .getdtablesize = pvfs_getdtablesize, + .umask = pvfs_umask, + .getumask = pvfs_getumask, + .mmap = pvfs_mmap, + .munmap = pvfs_munmap, + .msync = pvfs_msync, +/* these are defined in acl.c and do not really need */ +/* a PVFS specific implementation */ +#if 0 + .acl_delete_def_file = pvfs_acl_delete_def_file, + .acl_get_fd = pvfs_acl_get_fd, + .acl_get_file = pvfs_acl_get_file, + .acl_set_fd = pvfs_acl_set_fd, + .acl_set_file = pvfs_acl_set_file, +#endif +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/client/usrint/posix-pvfs.h b/src/client/usrint/posix-pvfs.h new file mode 100644 index 0000000..b33e4d1 --- /dev/null +++ b/src/client/usrint/posix-pvfs.h @@ -0,0 +1,287 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines + */ + +#ifndef POSIX_PVFS_H +#define POSIX_PVFS_H 1 + +/* define FD flags unique to PVFS here */ +#define PVFS_FD_NOCACHE 0x10000 + +/* pvfs_open */ +extern int pvfs_open(const char *path, int flags, ...); + +/* pvfs_open64 */ +extern int pvfs_open64(const char *path, int flags, ...); + +/* pvfs_openat */ +extern int pvfs_openat(int dirfd, const char *path, int flags, ...); + +/* pvfs_openat64 */ +extern int pvfs_openat64(int dirfd, const char *path, int flags, ...); + +extern int pvfs_creat(const char *path, mode_t mode, ...); + +extern int pvfs_creat64(const char *path, mode_t mode, ...); + +/* pvfs_unlink */ +extern int pvfs_unlink (const char *path); + +extern int pvfs_unlinkat (int dirfd, const char *path, int flags); + +extern int pvfs_rename(const char *oldpath, const char *newpath); + +extern int pvfs_renameat(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath); + +/* pvfs_read */ +extern ssize_t pvfs_read( int fd, void *buf, size_t count ); + +/* pvfs_pread */ +extern ssize_t pvfs_pread( int fd, void *buf, size_t count, off_t offset ); + +extern ssize_t pvfs_readv(int fd, const struct iovec *vector, int count); + +/* pvfs_pread64 */ +extern ssize_t pvfs_pread64( int fd, void *buf, size_t count, off64_t offset ); + +/* pvfs_write */ +extern ssize_t pvfs_write( int fd, const void *buf, size_t count ); + +/* pvfs_pwrite */ +extern ssize_t pvfs_pwrite( int fd, const void *buf, size_t count, off_t offset ); + +extern ssize_t pvfs_writev( int fd, const struct iovec *vector, int count ); + +/* pvfs_pwrite64 */ +extern ssize_t pvfs_pwrite64( int fd, const void *buf, size_t count, off64_t offset ); + +/* pvfs_lseek */ +extern off_t pvfs_lseek(int fd, off_t offset, int whence); + +/* pvfs_lseek64 */ +extern off64_t pvfs_lseek64(int fd, off64_t offset, int whence); + +extern int pvfs_truncate(const char *path, off_t length); + +extern int pvfs_truncate64 (const char *path, off64_t length); + +extern int pvfs_fallocate(int fd, off_t offset, off_t length); + +extern int pvfs_ftruncate (int fd, off_t length); + +extern int pvfs_ftruncate64 (int fd, off64_t length); + +/* pvfs_close */ +extern int pvfs_close( int fd ); + +extern int pvfs_flush(int fd); + +/* various flavors of stat */ +extern int pvfs_stat(const char *path, struct stat *buf); + +extern int pvfs_stat64(const char *path, struct stat64 *buf); + +extern int pvfs_stat_mask(const char *path, struct stat *buf, uint32_t mask); + +extern int pvfs_fstat(int fd, struct stat *buf); + +extern int pvfs_fstat64(int fd, struct stat64 *buf); + +extern int pvfs_fstatat(int fd, const char *path, struct stat *buf, int flag); + +extern int pvfs_fstatat64(int fd, const char *path, struct stat64 *buf, int flag); + +extern int pvfs_fstat_mask(int fd, struct stat *buf, uint32_t mask); + +extern int pvfs_lstat(const char *path, struct stat *buf); + +extern int pvfs_lstat64(const char *path, struct stat64 *buf); + +extern int pvfs_lstat_mask(const char *path, struct stat *buf, uint32_t mask); + +extern int pvfs_futimesat(int dirfd, const char *path, const struct timeval times[2]); + +extern int pvfs_utimes(const char *path, const struct timeval times[2]); + +extern int pvfs_utime(const char *path, const struct utimbuf *buf); + +extern int pvfs_futimes(int fd, const struct timeval times[2]); + +extern int pvfs_dup(int oldfd); + +extern int pvfs_dup2(int oldfd, int newfd); + +extern int pvfs_chown (const char *path, uid_t owner, gid_t group); + +extern int pvfs_fchown (int fd, uid_t owner, gid_t group); + +extern int pvfs_fchownat(int fd, const char *path, uid_t owner, gid_t group, int flag); + +extern int pvfs_lchown (const char *path, uid_t owner, gid_t group); + +extern int pvfs_chmod (const char *path, mode_t mode); + +extern int pvfs_fchmod (int fd, mode_t mode); + +extern int pvfs_fchmodat(int fd, const char *path, mode_t mode, int flag); + +extern int pvfs_mkdir (const char *path, mode_t mode); + +extern int pvfs_mkdirat (int dirfd, const char *path, mode_t mode); + +extern int pvfs_rmdir (const char *path); + +extern ssize_t pvfs_readlink (const char *path, char *buf, size_t bufsiz); + +extern ssize_t pvfs_readlinkat (int dirfd, const char *path, char *buf, size_t bufsiz); + +extern int pvfs_symlink (const char *oldpath, const char *newpath); + +extern int pvfs_symlinkat (const char *oldpath, int newdirfd, const char *newpath); + +/* PVFS does not have hard links */ +extern int pvfs_link (const char *oldpath, const char *newpath); + +/* PVFS does not have hard links */ +extern int pvfs_linkat (int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, int flags); + +/* this reads exactly one dirent, count is ignored */ +extern int pvfs_readdir(unsigned int fd, struct dirent *dirp, unsigned int count); + +/* this reads multiple dirents, up to count */ +extern int pvfs_getdents(unsigned int fd, struct dirent *dirp, unsigned int count); + +extern int pvfs_getdents64(unsigned int fd, struct dirent64 *dirp, unsigned int count); + +extern int pvfs_access (const char * path, int mode); + +extern int pvfs_faccessat (int dirfd, const char * path, int mode, int flags); + +extern int pvfs_flock(int fd, int op); + +extern int pvfs_fcntl(int fd, int cmd, ...); + +/* sync all disk data */ +extern void pvfs_sync(void ); + +/* sync file, but not dir it is in */ +extern int pvfs_fsync(int fd); + +/* does not sync file metadata */ +extern int pvfs_fdatasync(int fd); + +extern int pvfs_fadvise(int fd, off_t offset, off_t len, int advice); + +extern int pvfs_fadvise64(int fd, off64_t offset, off64_t len, int advice); + +extern int pvfs_statfs(const char *path, struct statfs *buf); + +extern int pvfs_statfs64(const char *path, struct statfs64 *buf); + +extern int pvfs_fstatfs(int fd, struct statfs *buf); + +extern int pvfs_fstatfs64(int fd, struct statfs64 *buf); + +extern int pvfs_statvfs(const char *path, struct statvfs *buf); + +extern int pvfs_fstatvfs(int fd, struct statvfs *buf); + +extern int pvfs_mknod(const char *path, mode_t mode, dev_t dev); + +extern int pvfs_mknodat(int dirfd, const char *path, mode_t mode, dev_t dev); + +extern ssize_t pvfs_sendfile(int outfd, int infd, off_t *offset, size_t count); + +extern ssize_t pvfs_sendfile64(int outfd, int infd, off64_t *offset, size_t count); + +extern int pvfs_setxattr(const char *path, const char *name, + const void *value, size_t size, int flags); + +extern int pvfs_lsetxattr(const char *path, const char *name, + const void *value, size_t size, int flags); + +extern int pvfs_fsetxattr(int fd, const char *name, + const void *value, size_t size, int flags); + +extern ssize_t pvfs_getxattr(const char *path, const char *name, + void *value, size_t size); + +extern ssize_t pvfs_lgetxattr(const char *path, const char *name, + void *value, size_t size); + +extern ssize_t pvfs_fgetxattr(int fd, const char *name, + void *value, size_t size); + +extern ssize_t pvfs_listxattr(const char *path, char *list, size_t size); + +extern ssize_t pvfs_llistxattr(const char *path, char *list, size_t size); + +extern ssize_t pvfs_flistxattr(int fd, char *list, size_t size); + +extern int pvfs_removexattr(const char *path, const char *name); + +extern int pvfs_lremovexattr(const char *path, const char *name); + +extern int pvfs_fremovexattr(int fd, const char *name); + +extern int pvfs_chdir(const char *path); + +extern int pvfs_fchdir(int fd); + +extern int pvfs_cwd_init(const char *buf, size_t size); + +extern char *pvfs_getcwd(char *buf, size_t size); + +extern char *pvfs_get_current_dir_name(void); + +extern char *pvfs_getwd(char *buf); + +extern mode_t pvfs_umask(mode_t mask); + +extern mode_t pvfs_getumask(void); + +extern int pvfs_getdtablesize(void); + +extern void *pvfs_mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset); + +extern int pvfs_munmap(void *start, size_t length); + +extern int pvfs_msync(void *start, size_t length, int flags); + +/* these are defined in acl.c and don't really need */ +/* a PVFS implementation */ +#if 0 +extern int pvfs_acl_delete_def_file(const char *path_p); + +extern acl_t pvfs_acl_get_fd(int fd); + +extern acl_t pvfs_acl_get_file(const char *path_p, acl_type_t type); + +extern int pvfs_acl_set_fd(int fd, acl_t acl); + +extern int pvfs_acl_set_file(const char *path_p, acl_type_t type, acl_t acl); +#endif + + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/client/usrint/posix.c b/src/client/usrint/posix.c new file mode 100644 index 0000000..418c8bd --- /dev/null +++ b/src/client/usrint/posix.c @@ -0,0 +1,2141 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - wrappers for posix system calls + */ + +/* this prevents headers from using inlines for 64 bit calls */ + +#define USRINT_SOURCE 1 +#include "usrint.h" +#include "posix-ops.h" +#include "posix-pvfs.h" +#include "openfile-util.h" + +/** + * function prototypes not defined in libc, though it is a linux + * system call and we define it in the usr lib + */ + +int getdents(unsigned int, struct dirent *, unsigned int); +int getdents64(unsigned int, struct dirent64 *, unsigned int); +int flock(int, int); +int fadvise64(int, off64_t, off64_t, int); + +/* + * SYSTEM CALLS + */ + +/* + * open wrapper + */ +int open(const char *path, int flags, ...) +{ + va_list ap; + mode_t mode = 0; + PVFS_hint hints; /* need to figure out how to set default */ + pvfs_descriptor *pd; + + va_start(ap, flags); + if (flags & O_CREAT) + mode = va_arg(ap, mode_t); + else + mode = 0777; + if (flags & O_HINTS) + hints = va_arg(ap, PVFS_hint); + else + hints = PVFS_HINT_NULL; + va_end(ap); + + + if (!path) + { + errno = EFAULT; + return -1; + } + if (is_pvfs_path(path)) + { + /* this handles setup of the descriptor */ + flags |= O_NOTPVFS; /* try to open non-pvfs files too */ + return pvfs_open(path, flags, mode, hints); + } + else + { + int rc; + struct stat sbuf; + /* path unknown to FS so open with glibc */ + rc = glibc_ops.open(path, flags & 01777777, mode); + if (rc < 0) + { + return rc; + } + /* set up the descriptor manually */ + pd = pvfs_alloc_descriptor(&glibc_ops, rc, NULL, 0); + if (!pd) + { + return -1; + } + pd->is_in_use = PVFS_FS; + pd->s->flags = flags; + glibc_ops.fstat(rc, &sbuf); + pd->s->mode = sbuf.st_mode; + gen_mutex_unlock(&pd->s->lock); + gen_mutex_unlock(&pd->lock); + return pd->fd; + } +} + +/* + * open64 wrapper + */ +int open64(const char *path, int flags, ...) +{ + int fd; + int mode = 0; + va_list ap; + va_start(ap, flags); + if (flags & O_CREAT) + { + mode = va_arg(ap, int); + } + fd = open(path, flags|O_LARGEFILE, mode); + va_end(ap); + return fd; +} + +int openat(int dirfd, const char *path, int flags, ...) +{ + int fd; + int mode = 0; + pvfs_descriptor *pd; + va_list ap; + + if (!path) + { + errno = EFAULT; + return -1; + } + va_start(ap, flags); + if (flags & O_CREAT) + { + mode = va_arg(ap, int); + } + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + fd = open(path, flags, mode); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + fd = pd->s->fsops->openat(pd->true_fd, path, flags, mode); + } + else + { + errno = EBADF; + fd = -1; + } + } + va_end(ap); + return fd; +} + +int openat64(int dirfd, const char *path, int flags, ...) +{ + int fd; + int mode = 0; + va_list ap; + va_start(ap, flags); + if (flags & O_CREAT) + { + mode = va_arg(ap, int); + } + fd = openat(dirfd, path, flags|O_LARGEFILE, mode); + va_end(ap); + return fd; +} + +/* + * creat wrapper + */ +int creat(const char *path, mode_t mode) +{ + return open(path, O_CREAT|O_WRONLY|O_TRUNC, mode); +} + +/* + * creat64 wrapper + */ +int creat64(const char *path, mode_t mode) +{ + return open64(path, O_CREAT|O_WRONLY|O_TRUNC, mode); +} + +/* + * unlink wrapper + */ +int unlink(const char *path) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if (is_pvfs_path(path)) + { + return pvfs_ops.unlink(path); + } + else + { + return glibc_ops.unlink(path); + } +} + +int unlinkat(int dirfd, const char *path, int flag) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!path) + { + errno = EFAULT; + return -1; + } + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + unlink(path); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->unlinkat(pd->true_fd, path, flag); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +/* + * rename wrapper + */ +int rename (const char *old, const char *new) +{ + int oldp, newp; + if (!old || !new) + { + errno = EFAULT; + return -1; + } + oldp = is_pvfs_path(old); + newp = is_pvfs_path(new); + if(oldp && newp) + { + return pvfs_rename(old, new); + } + else if (!oldp && !newp) + { + return glibc_ops.rename(old, new); + } + else + { + errno = EXDEV; + return -1; + } +} + +int renameat (int oldfd, const char *old, int newfd, const char *new) +{ + pvfs_descriptor *oldpd, *newpd; + oldpd = pvfs_find_descriptor(oldfd); + newpd = pvfs_find_descriptor(newfd); + if (!old || !new) + { + errno = EFAULT; + return -1; + } + if (!oldpd || !newpd) + { + errno = EBADF; + return -1; + } + if (oldpd->s->fsops == newpd->s->fsops) + { + return oldpd->s->fsops->renameat(oldpd->true_fd, old, newpd->true_fd, new); + } + else + { + errno = EXDEV; + return -1; + } +} + +/* READING and WRITING SYSTEM CALL */ + +/* + * read wrapper + */ +ssize_t read(int fd, void *buf, size_t count) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->read(pd->true_fd, buf, count); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * pread wrapper + */ +ssize_t pread(int fd, void *buf, size_t nbytes, off_t offset) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->pread(pd->true_fd, (void *)buf, nbytes, offset); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * readv wrapper + */ +ssize_t readv(int fd, const struct iovec *iov, int iovcnt) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->readv(pd->true_fd, iov, iovcnt); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * pread64 wrapper + */ +ssize_t pread64(int fd, void *buf, size_t nbytes, off64_t offset) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->pread64(pd->true_fd, (void *)buf, nbytes, offset); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * write wrapper + */ +ssize_t write(int fd, const void *buf, size_t count) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->write(pd->true_fd, (void *)buf, count); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * pwrite wrapper + */ +ssize_t pwrite(int fd, const void *buf, size_t nbytes, off_t offset) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->pwrite(pd->true_fd, buf, nbytes, offset); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * write wrapper + */ +ssize_t writev(int fd, const struct iovec *iov, int iovcnt) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->writev(fd, iov, iovcnt); + if (rc > 0) + { + gen_mutex_lock(&pd->s->lock); + pd->s->file_pointer += rc; + gen_mutex_unlock(&pd->s->lock); + } + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* + * pwrite64 wrapper + */ +ssize_t pwrite64(int fd, const void *buf, size_t nbytes, off64_t offset) +{ + ssize_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->pwrite64(pd->true_fd, buf, nbytes, offset); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* SEEK system calls */ + +/* + * lseek wrapper + */ +off_t lseek(int fd, off_t offset, int whence) +{ + off64_t rc = lseek64(fd, (off64_t)offset, whence); + if (rc & 0xffffffff00000000LLU) + { + errno = EFAULT; + rc = -1; + } + return (off_t)rc; +} + +/* + * lseek64 wrapper + */ +off64_t lseek64(int fd, off64_t offset, int whence) +{ + off64_t rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->lseek64(pd->true_fd, offset, whence); + } + else + { + errno = EBADF; + rc = (off64_t)-1; + } + return rc; +} + +int truncate(const char *path, off_t length) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_truncate(path, length); + } + else + { + return glibc_ops.truncate(path, length); + } +} + +int truncate64(const char *path, off64_t length) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_truncate64(path, length); + } + else + { + return glibc_ops.truncate64(path, length); + } +} + +int ftruncate(int fd, off_t length) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->ftruncate(pd->true_fd, length); + } + else + { + errno = EBADF; + rc = (off64_t)-1; + } + return rc; +} + +int ftruncate64(int fd, off64_t length) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->ftruncate64(pd->true_fd, length); + } + else + { + errno = EBADF; + rc = (off64_t)-1; + } + return rc; +} + +#ifdef _XOPEN_SOURCE +int posix_fallocate(int fd, off_t offset, off_t length) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fallocate(pd->true_fd, offset, length); + } + else + { + errno = EBADF; + rc = (off64_t)-1; + } + return rc; +} +#endif + +/* + * close wrapper + */ +int close(int fd) +{ + int rc = 0; + + rc = pvfs_free_descriptor(fd); + return rc; +} + +#if 0 +int flush(int fd) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->flush(pd->true_fd); + } + else + { + errno = EBADF; + rc = (off64_t)-1; + } + return rc; +} +#endif + +/* various flavors of stat */ +int stat(const char *path, struct stat *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_stat(path, buf); + } + else + { + return glibc_ops.stat(path, buf); + } +} + +int __xstat(int ver, const char *path, struct stat *buf) +{ + return stat(path, buf); +} + +int stat64(const char *path, struct stat64 *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_stat64(path, buf); + } + else + { + return glibc_ops.stat64(path, buf); + } +} + +int __xstat64(int ver, const char *path, struct stat64 *buf) +{ + return stat64(path, buf); +} + +int fstat(int fd, struct stat *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstat(pd->true_fd, buf); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int __fxstat(int ver, int fd, struct stat *buf) +{ + return fstat(fd, buf); +} + +int fstat64(int fd, struct stat64 *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstat64(pd->true_fd, buf); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int __fxstat64(int ver, int fd, struct stat64 *buf) +{ + return fstat64(fd, buf); +} + +int fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (fd == AT_FDCWD || (path && path[0] == '/')) + { + if (flag & AT_SYMLINK_NOFOLLOW) + { + rc = pvfs_lstat(path, buf); + } + else + { + rc = pvfs_stat(path, buf); + } + } + else + { + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstatat(pd->true_fd, path, buf, flag); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int __fxstatat(int ver, int fd, const char *path, struct stat *buf, int flag) +{ + return fstatat(fd, path, buf, flag); +} + +int fstatat64(int fd, const char *path, struct stat64 *buf, int flag) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (fd == AT_FDCWD || (path && path[0] == '/')) + { + if (flag & AT_SYMLINK_NOFOLLOW) + { + rc = pvfs_lstat64(path, buf); + } + else + { + rc = pvfs_stat64(path, buf); + } + } + else + { + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstatat64(pd->true_fd, path, buf, flag); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int __fxstatat64(int ver, int fd, const char *path, struct stat64 *buf, int flag) +{ + return fstatat64(fd, path, buf, flag); +} + +int lstat(const char *path, struct stat *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_lstat(path, buf); + } + else + { + return glibc_ops.lstat(path, buf); + } +} + +int __lxstat(int ver, const char *path, struct stat *buf) +{ + return lstat(path, buf); +} + +int lstat64(const char *path, struct stat64 *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_lstat64(path, buf); + } + else + { + return glibc_ops.lstat64(path, buf); + } +} + +int __lxstat64(int ver, const char *path, struct stat64 *buf) +{ + return lstat64(path, buf); +} + +int futimesat(int dirfd, const char *path, const struct timeval times[2]) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + utimes(path, times); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->futimesat(pd->true_fd, path, times); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int utimes(const char *path, const struct timeval times[2]) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_utimes(path, times); + } + else + { + return glibc_ops.utimes(path, times); + } +} + +int utime(const char *path, const struct utimbuf *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_utime(path, buf); + } + else + { + return glibc_ops.utime(path, buf); + } +} + +int futimes(int fd, const struct timeval times[2]) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->futimes(pd->true_fd, times); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int dup(int oldfd) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(oldfd); + if (pd) + { + rc = pd->s->fsops->dup(pd->true_fd); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int dup2(int oldfd, int newfd) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(oldfd); + if (pd) + { + rc = pd->s->fsops->dup2(pd->true_fd, newfd); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int chown(const char *path, uid_t owner, gid_t group) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_chown(path, owner, group); + } + else + { + return glibc_ops.chown(path, owner, group); + } +} + +int fchown(int fd, uid_t owner, gid_t group) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fchown(pd->true_fd, owner, group); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int fchownat(int dirfd, const char *path, uid_t owner, gid_t group, int flag) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + chown(path, owner, group); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->fchownat(pd->true_fd, path, owner, group, flag); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int lchown(const char *path, uid_t owner, gid_t group) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_lchown(path, owner, group); + } + else + { + return glibc_ops.lchown(path, owner, group); + } +} + +int chmod(const char *path, mode_t mode) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_chmod(path, mode); + } + else + { + return glibc_ops.chmod(path, mode); + } +} + +int fchmod(int fd, mode_t mode) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fchmod(pd->true_fd, mode); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int fchmodat(int dirfd, const char *path, mode_t mode, int flag) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!path) + { + errno = EFAULT; + return -1; + } + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + chmod(path, mode); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->fchmodat(pd->true_fd, path, mode, flag); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int mkdir(const char *path, mode_t mode) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_mkdir(path, mode); + } + else + { + return glibc_ops.mkdir(path, mode); + } +} + +int mkdirat(int dirfd, const char *path, mode_t mode) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!path) + { + errno = EFAULT; + return -1; + } + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + mkdir(path, mode); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->mkdirat(pd->true_fd, path, mode); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int rmdir(const char *path) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_rmdir(path); + } + else + { + return glibc_ops.rmdir(path); + } +} + +#if __GLIBC_PREREQ (2,5) +ssize_t readlink(const char *path, char *buf, size_t bufsiz) +#else +int readlink(const char *path, char *buf, size_t bufsiz) +#endif +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_readlink(path, buf, bufsiz); + } + else + { + return glibc_ops.readlink(path, buf, bufsiz); + } +} + +ssize_t readlinkat(int dirfd, const char *path, char *buf, size_t bufsiz) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + readlink(path, buf, bufsiz); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->readlinkat(pd->true_fd, path, buf, bufsiz); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int symlink(const char *oldpath, const char *newpath) +{ + if (!oldpath || !newpath) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(newpath)) + { + return pvfs_symlink(oldpath, newpath); + } + else + { + return glibc_ops.symlink(oldpath, newpath); + } +} + +int symlinkat(const char *oldpath, int newdirfd, const char *newpath) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!oldpath || !newpath) + { + errno = EFAULT; + return -1; + } + if (newdirfd == AT_FDCWD || (newpath && newpath[0] == '/')) + { + symlink(oldpath, newpath); + } + else + { + pd = pvfs_find_descriptor(newdirfd); + if (pd) + { + rc = pd->s->fsops->symlinkat(oldpath, pd->true_fd, newpath); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +int link(const char *oldpath, const char *newpath) +{ + if (!oldpath || !newpath) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(oldpath)) + { + return pvfs_link(oldpath, newpath); + } + else + { + return glibc_ops.link(oldpath, newpath); + } + return -1; +} + +int linkat(int olddirfd, const char *old, + int newdirfd, const char *new, int flags) +{ + pvfs_descriptor *oldpd, *newpd; + oldpd = pvfs_find_descriptor(olddirfd); + newpd = pvfs_find_descriptor(newdirfd); + if (!old || !new) + { + errno = EFAULT; + return -1; + } + if (!oldpd || !newpd) + { + errno = EBADF; + return -1; + } + if (oldpd->s->fsops == newpd->s->fsops) + { + return oldpd->s->fsops->linkat(oldpd->true_fd, old, + newpd->true_fd, new, flags); + } + else + { + errno = EXDEV; + return -1; + } +} + +/** + * According to man page count is ignored + */ +int posix_readdir(unsigned int fd, struct dirent *dirp, unsigned int count) +{ + int rc; + pvfs_descriptor *pd; + + if (!dirp) + { + errno = EFAULT; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->readdir(pd->true_fd, dirp, count); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + + +/** + * man page calls last arg count but is ambiguous if it is number + * of bytes or number of records to read. The former appears to be + * true thus we rename the argument + */ +int getdents(unsigned int fd, struct dirent *dirp, unsigned int size) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!dirp) + { + errno = EFAULT; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->getdents(pd->true_fd, dirp, size); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int getdents64(unsigned int fd, struct dirent64 *dirp, unsigned int size) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!dirp) + { + errno = EFAULT; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->getdents64(pd->true_fd, dirp, size); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* linux discourages using readdir system calls, so for now + * we will leave them out - there are stdio calls that can + * be used + */ + +int access(const char *path, int mode) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_access(path, mode); + } + else + { + return glibc_ops.access(path, mode); + } +} + +int faccessat(int dirfd, const char *path, int mode, int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!path) + { + errno = EFAULT; + return -1; + } + if (dirfd == AT_FDCWD || (path && path[0] == '/')) + { + access(path, mode); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->faccessat(pd->true_fd, path, mode, flags); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + + +int flock(int fd, int op) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->flock(pd->true_fd, op); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int fcntl(int fd, int cmd, ...) +{ + int rc = 0; + long arg; + struct flock *lock; + pvfs_descriptor *pd; + va_list ap; + + va_start(ap, cmd); + pd = pvfs_find_descriptor(fd); + if (pd) + { + switch (cmd) + { + case F_GETLK: + case F_SETLK: + case F_SETLKW: + lock = va_arg(ap, struct flock *); + rc = pd->s->fsops->fcntl(pd->true_fd, cmd, lock); + break; + default: + arg = va_arg(ap, long); + rc = pd->s->fsops->fcntl(pd->true_fd, cmd, arg); + break; + } + } + else + { + errno = EBADF; + rc = -1; + } + va_end(ap); + return rc; +} + +void sync(void) +{ + pvfs_sync(); + glibc_ops.sync(); +} + +int fsync(int fd) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fsync(pd->true_fd); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int fdatasync(int fd) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fdatasync(pd->true_fd); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int posix_fadvise(int fd, off_t offset, off_t length, int advice) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fadvise(pd->true_fd, offset, length, advice); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/** GlibC doesn't seem to have fadvise or fadvise64 + * It does have posix_fadvise Linux has system calls + * for fadvise and fadvise64. Coreutils defines its + * own fadvise as operating on a file pointer so this + * is commented out here - seems rather arbitrary though + */ +#if 0 +int fadvise(int fd, off_t offset, off_t len, int advice) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fadvise(pd->true_fd, offset, len, advice); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} +#endif + +int fadvise64(int fd, off64_t offset, off64_t len, int advice) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fadvise64(pd->true_fd, offset, len, advice); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int statfs(const char *path, struct statfs *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_statfs(path, buf); + } + else + { + return glibc_ops.statfs(path, buf); + } +} + +int statfs64(const char *path, struct statfs64 *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_statfs64(path, buf); + } + else + { + return glibc_ops.statfs64(path, buf); + } +} + +int fstatfs(int fd, struct statfs *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!buf) + { + errno = EFAULT; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstatfs(pd->true_fd, buf); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int fstatfs64(int fd, struct statfs64 *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!buf) + { + errno = EFAULT; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstatfs64(pd->true_fd, buf); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int statvfs(const char *path, struct statvfs *buf) +{ + if (!path || !buf) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_statvfs(path, buf); + } + else + { + return glibc_ops.statvfs(path, buf); + } +} + +int fstatvfs(int fd, struct statvfs *buf) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!buf) + { + errno = EFAULT; + return -1; + } + pd = pvfs_find_descriptor(fd); + if (pd) + { + rc = pd->s->fsops->fstatvfs(pd->true_fd, buf); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int mknod(const char *path, mode_t mode, dev_t dev) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_mknod(path, mode, dev); + } + else + { + return glibc_ops.mknod(path, mode, dev); + } +} + +int mknodat(int dirfd, const char *path, mode_t mode, dev_t dev) +{ + int rc = 0; + pvfs_descriptor *pd; + + if (!path) + { + errno = EFAULT; + return -1; + } + if (dirfd == AT_FDCWD || path[0] == '/') + { + mknod(path, mode, dev); + } + else + { + pd = pvfs_find_descriptor(dirfd); + if (pd) + { + rc = pd->s->fsops->mknodat(pd->true_fd, path, mode, dev); + } + else + { + errno = EBADF; + rc = -1; + } + } + return rc; +} + +ssize_t sendfile(int outfd, int infd, off_t *offset, size_t count) +{ + return sendfile64(outfd, infd, (off64_t *)offset, count); +} + +ssize_t sendfile64(int outfd, int infd, off64_t *offset, size_t count) +{ + int rc = 0; + pvfs_descriptor *inpd, *outpd; + + inpd = pvfs_find_descriptor(infd); + outpd = pvfs_find_descriptor(outfd); + if (inpd && outpd) + { + rc = inpd->s->fsops->sendfile64(outpd->true_fd, inpd->true_fd, + offset, count); + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int setxattr(const char *path, const char *name, + const void *value, size_t size, int flags) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_setxattr(path, name, value, size, flags); + } + else + { + if (glibc_ops.setxattr) + { + return glibc_ops.setxattr(path, name, value, size, flags); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +int lsetxattr(const char *path, const char *name, + const void *value, size_t size, int flags) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_lsetxattr(path, name, value, size, flags); + } + else + { + if (glibc_ops.lsetxattr) + { + return glibc_ops.lsetxattr(path, name, value, size, flags); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +int fsetxattr(int fd, const char *name, + const void *value, size_t size, int flags) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + if (pd->s->fsops->fsetxattr) + { + rc = pd->s->fsops->fsetxattr(pd->true_fd, name, value, size, flags); + } + else + { + errno = ENOPKG; + rc = -1; + } + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +ssize_t getxattr(const char *path, const char *name, + void *value, size_t size) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if (is_pvfs_path(path)) + { + return pvfs_getxattr(path, name, value, size); + } + else + { + if (glibc_ops.getxattr) + { + return glibc_ops.getxattr(path, name, value, size); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +ssize_t lgetxattr(const char *path, const char *name, + void *value, size_t size) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_lgetxattr(path, name, value, size); + } + else + { + if (glibc_ops.lgetxattr) + { + return glibc_ops.lgetxattr(path, name, value, size); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +ssize_t fgetxattr(int fd, const char *name, void *value, + size_t size) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + if (pd->s->fsops->fgetxattr) + { + rc = pd->s->fsops->fgetxattr(pd->true_fd, name, value, size); + } + else + { + errno = ENOPKG; + rc = -1; + } + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +ssize_t listxattr(const char *path, char *list, size_t size) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_listxattr(path, list, size); + } + else + { + if (glibc_ops.listxattr) + { + return glibc_ops.listxattr(path, list, size); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +ssize_t llistxattr(const char *path, char *list, size_t size) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_llistxattr(path, list, size); + } + else + { + if (glibc_ops.llistxattr) + { + return glibc_ops.llistxattr(path, list, size); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +ssize_t flistxattr(int fd, char *list, size_t size) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + if (pd->s->fsops->flistxattr) + { + rc = pd->s->fsops->flistxattr(pd->true_fd, list, size); + } + else + { + errno = ENOPKG; + rc = -1; + } + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +int removexattr(const char *path, const char *name) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_removexattr(path, name); + } + else + { + if (glibc_ops.removexattr) + { + return glibc_ops.removexattr(path, name); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +int lremovexattr(const char *path, const char *name) +{ + if (!path) + { + errno = EFAULT; + return -1; + } + if(is_pvfs_path(path)) + { + return pvfs_lremovexattr(path, name); + } + else + { + if (glibc_ops.lremovexattr) + { + return glibc_ops.lremovexattr(path, name); + } + else + { + errno = ENOPKG; + return -1; + } + } +} + +int fremovexattr(int fd, const char *name) +{ + int rc = 0; + pvfs_descriptor *pd; + + pd = pvfs_find_descriptor(fd); + if (pd) + { + if (pd->s->fsops->fremovexattr) + { + rc = pd->s->fsops->fremovexattr(pd->true_fd, name); + } + else + { + errno = ENOPKG; + rc = -1; + } + } + else + { + errno = EBADF; + rc = -1; + } + return rc; +} + +/* these functions allow the library to take over +.* management of the currrent working directory + * all of the actual code is in the pvfs versions + * of these functions - these are only wrappers + * to catch calls to libc + * + * if the kernel module is used to mount the FS + * then there is no need for these + */ +#if PVFS_USRINT_CWD + +int chdir(const char *path) +{ + return pvfs_chdir(path); +} + +int fchdir(int fd) +{ + return pvfs_fchdir(fd); +} + +char *getcwd(char *buf, size_t size) +{ + return pvfs_getcwd(buf, size); +} + +char *get_current_dir_name(void) +{ + return pvfs_get_current_dir_name(); +} + +char *getwd(char *buf) +{ + return pvfs_getwd(buf); +} + +mode_t umask(mode_t mask) +{ + return pvfs_umask(mask); +} + +mode_t getumask(void) +{ + return pvfs_getumask(); +} + +int getdtablesize(void) +{ + return pvfs_getdtablesize(); +} + +#endif /* PVFS_USRINT_CWD */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/stdio-pvfs.h b/src/client/usrint/stdio-pvfs.h new file mode 100644 index 0000000..da650e4 --- /dev/null +++ b/src/client/usrint/stdio-pvfs.h @@ -0,0 +1,26 @@ + +#ifdef REDEF_STD_STREAMS +#ifdef stdin +#undef stdin +#endif + +#define stdin (&pvfs_stdin) /* this is wrong!!! */ + +#ifdef stdout +#undef stdout +#endif + +#define stdout (&pvfs_stdout) /* this is wrong!!! */ + +#ifdef stderr +#undef stderr +#endif + +#define stderr (&pvfs_stderr) /* this is wrong!!! */ +#endif + +#ifdef FILE +#undef FILE +#endif + +#define FILE pvfs_descriptor diff --git a/src/client/usrint/stdio.c b/src/client/usrint/stdio.c new file mode 100644 index 0000000..55419eb --- /dev/null +++ b/src/client/usrint/stdio.c @@ -0,0 +1,2787 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines - implementation of stdio for pvfs + */ +/* this prevents headers from using inlines for 64 bit calls */ +#define USRINT_SOURCE 1 +#include "usrint.h" +#include "openfile-util.h" +#include "stdio-ops.h" + +/* fdopendir not present until glibc2.5 */ +#if __GLIBC_PREREQ (2,5) +#else +extern DIR *fdopendir (int __fd); +#endif + + +#ifdef gossip_debug +#undef gossip_debug +#endif + +#ifdef GOSSIP_USRINT_DEBUG +#undef GOSSIP_USRINT_DEBUG +#endif + +#ifdef USRINT_DEBUG +#define GOSSIP_USRINT_DEBUG stderr +#define gossip_debug fprintf +#else +#define gossip_debug(__m, __f, ...) +#endif /* USRINT_DEBUG */ + +#define PVFS_STDIO_DEBUG 0 + +static void init_stdio(void); +static struct stdio_ops_s stdio_ops; +static FILE open_files = {._chain = NULL}; + +#define _P_IO_MAGIC 0xF0BD0000 +#define SETMAGIC(s,m) do{(s)->_flags = (m) & _IO_MAGIC_MASK;}while(0) +#define ISMAGICSET(s,m) (((s)->_flags & _IO_MAGIC_MASK) == (m)) +#define SETFLAG(s,f) do{(s)->_flags |= ((f) & ~_IO_MAGIC_MASK);}while(0) +#define CLEARFLAG(s,f) do{(s)->_flags &= ~((f) & ~_IO_MAGIC_MASK);}while(0) +#define ISFLAGSET(s,f) (((s)->_flags & (f)) == (f)) + +/* STDIO implementation - this gives users something to link to + * that will call back to the PVFS lib - also lets us optimize + * in a few spots like buffer sizes and stuff + */ + +/** struct representing a directory stream for buffered dir io + * + * this struct type is undefined in /usr/include as it is opaque + * it is defined in this file only. This design is based loosely + * on the buffered IO scheme used in Linux for files. + */ +struct __dirstream { + int _flags; /**< general flags field */ + int fileno; /**< file dscriptor of open dir */ + struct dirent de; /**< pointer to dirent read by readdir */ + char *buf_base; /**< pointer to beginning of buffer */ + char *buf_end; /**< pointer to end of buffer */ + char *buf_act; /**< pointer to end of active portion of buffer */ + char *buf_ptr; /**< pointer to current position in buffer */ +}; + +#define DIRSTREAM_MAGIC 0xFD100000 +#define PVFS_RLDC PVFS_REQ_LIMIT_DIRENT_COUNT +#define MAXDIRENT (PVFS_RLDC < 512 ? PVFS_RLDC : 512) +#define MAXBUFSIZE (MAXDIRENT * sizeof(struct dirent64)) +#define DIRBUFSIZE ((MAXBUFSIZE / 1024) * 1024) +#define ASIZE 256 +#define MAXTRIES 16 /* arbitrary - how many tries to get a unique file name */ + +/** These functions lock and unlock the stream structure + * + * These are only called within our library, so we assume that the + * stream is good, that it is our stream (and not glibc's) and we + * check for the flag to see if the lock is being used. + */ + +static inline void lock_init_stream(FILE *stream) +{ +#ifdef _IO_MTSAFE_IO + if (ISFLAGSET(stream, _IO_USER_LOCK)) + { + _IO_lock_init(stream->_lock); + } +#endif +} + +static inline void lock_stream(FILE *stream) +{ +#ifdef _IO_MTSAFE_IO + if (ISFLAGSET(stream, _IO_USER_LOCK)) + { + _IO_lock_lock(stream->_lock); + } +#endif +} + +static inline int trylock_stream(FILE *stream) +{ +#ifdef _IO_MTSAFE_IO + if (ISFLAGSET(stream, _IO_USER_LOCK)) + { + return _IO_lock_try(stream->_lock); + } +#else + return 0; +#endif +} + +static inline void unlock_stream(FILE *stream) +{ +#ifdef _IO_MTSAFE_IO + if (ISFLAGSET(stream, _IO_USER_LOCK)) + { + _IO_lock_unlock(stream->_lock); + } +#endif +} + +static inline void lock_fini_stream(FILE *stream) +{ +#ifdef _IO_MTSAFE_IO + if (ISFLAGSET(stream, _IO_USER_LOCK)) + { + _IO_lock_fini(stream->_lock); + } +#endif +} + +/** POSIX interface for user level locking of streams *. + * + */ +void flockfile(FILE *stream) +{ + lock_stream(stream); +} + +int ftrylockfile(FILE *stream) +{ + return trylock_stream(stream); +} + +void funlockfile(FILE *stream) +{ + unlock_stream(stream); +} + +/** This function converts from stream style mode to syscall + * style flags + * + */ +static int mode2flags(const char *mode) +{ + int i = 0; + int flags = 0; + int append = false, read = false, write = false, update = false; + int exclusive = false; + + /* look for fopen modes */ + for(i = 0; mode[i]; i++) + { + switch(mode[i]) { + case 'a': + append = true; + if (read || write) + { + errno = EINVAL; + return -1; + } + break; + case 'r': + read = true; + if (append || write) + { + errno = EINVAL; + return -1; + } + break; + case 'w': + write = true; + if (read || append) + { + errno = EINVAL; + return -1; + } + break; + case '+': + update = true; + if (!(read || write || append)) + { + errno = EINVAL; + return -1; + } + break; + case 'b': /* this is ignored in POSIX */ + case 'c': /* used in glibc ignored here */ + case 'e': /* used in glibc ignored here */ + case 'm': /* used in glibc ignored here */ + break; + case 'x': /* glibc extension */ + exclusive = true; + if (!(read || write || append)) + { + errno = EINVAL; + return -1; + } + break; + default: + errno = EINVAL; + return -1; + break; + } + } + /* this catches an empty mode */ + if (!(read || write || append)) + { + errno = EINVAL; + return -1; + } + if (read && update) + { + flags = O_RDWR; + } + else if(read) + { + flags = O_RDONLY; + } + else if(write && update) + { + flags = O_RDWR | O_CREAT | O_TRUNC; + } + else if(write) + { + flags = O_WRONLY | O_CREAT | O_TRUNC; + } + else if(append && update) + { + flags = O_RDWR | O_APPEND | O_CREAT; + } + else if (append) + { + flags = O_WRONLY | O_APPEND | O_CREAT; + } + if (exclusive) /* check this regardless of the above */ + { + flags |= O_EXCL; + } + return flags; +} + +/** + * fopen opens a file, then adds a stream to it + */ +FILE *fopen(const char *path, const char *mode) +{ + int fd = 0; + int flags = 0; + FILE *newfile = NULL; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fopen %s %s\n", path, mode); + flags = mode2flags(mode); + if (flags == -1) + { + return NULL; + } + + fd = open(path, flags, 0666); + if (fd == -1) + { + return NULL; + } + + newfile = fdopen(fd, mode); + + return newfile; +} + +/** + * fopen64 - not clear why there is an fopen64 but there is + */ +FILE *fopen64(const char *path, const char *modes) +{ + return fopen(path, modes); +} + +/** this function sets up a new stream's buffer area + * + */ +static int init_stream (FILE *stream, int flags, int bufsize) +{ + /* set up stream here */ + SETMAGIC(stream, _P_IO_MAGIC); + if (!(flags & O_WRONLY)) + SETFLAG(stream, _IO_NO_READS); + if (!(flags & O_RDONLY)) + SETFLAG(stream, _IO_NO_WRITES); + /* set up default buffering here */ + stream->_IO_buf_base = (char *)malloc(bufsize); + if (!stream->_IO_buf_base) + { + return -1; + } + stream->_IO_buf_end = stream->_IO_buf_base + bufsize; + stream->_IO_read_base = stream->_IO_buf_base; + stream->_IO_read_ptr = stream->_IO_buf_base; + stream->_IO_read_end = stream->_IO_buf_base; + stream->_IO_write_base = stream->_IO_buf_base; + stream->_IO_write_ptr = stream->_IO_buf_base; + stream->_IO_write_end = stream->_IO_buf_end; + lock_stream(&open_files); + stream->_chain = open_files._chain; + open_files._chain = stream; + unlock_stream(&open_files); + return 0; +} + +/** + * fdopen adds a stream to an existing open file + */ +FILE *fdopen(int fd, const char *mode) +{ + int rc = 0; + FILE *newfile = NULL; + int flags; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fdopen %d %s\n", fd, mode); + /* need to check for valid mode here */ + /* it must be compatible with the existing mode */ + flags = mode2flags(mode); + + newfile = (FILE *)malloc(sizeof(FILE)); + if (!newfile) + { + errno = ENOMEM; + return NULL; + } + memset(newfile, 0, sizeof(FILE)); + + /* initize lock for this stream */ + SETFLAG(newfile, _IO_USER_LOCK); + lock_init_stream(newfile); + + newfile->_fileno = fd; + rc = init_stream(newfile, flags, PVFS_BUFSIZE); + if(rc) + { + free(newfile); + return NULL; + } + return newfile; +} + +/** + * freopen closes the file and opens another one for the stream + */ +FILE *freopen(const char *path, const char *mode, FILE *stream) +{ + int fd = 0; + int flags = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "freopen %s %s %p\n", path, mode, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.freopen(path, mode, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.freopen(path, mode, stream); + } + errno = EINVAL; + return NULL; + } + lock_stream(stream); + /* see if stream is in use - if so close the file */ + if (stream->_fileno > -1) + { + int rc; + rc = close(stream->_fileno); + if (rc == -1) + { + unlock_stream(stream); + return NULL; + } + } + + /* translate mode to flags */ + flags = mode2flags(mode); + + /* open the file */ + fd = open(path, flags, 0666); + if (fd == -1) + { + unlock_stream(stream); + return NULL; + } + stream->_fileno = fd; + + /* reset buffering here */ + if (stream->_IO_buf_base) + free (stream->_IO_buf_base); + init_stream(stream, flags, PVFS_BUFSIZE); + + unlock_stream(stream); + return stream; +} + +/** + * freopen64 - again this appears useless but nevertheless ... + */ +FILE *freopen64 (const char *path, const char *modes, FILE *stream) +{ + return freopen(path, modes, stream); +} + +/** + * These functions do not need PVFS versions and thus + * are not implemented here + */ +#if 0 +FILE *fopencookie(void *cookie, const char *modes, + _IO_cookie_io_function_t funcs); +FILE *fmemopen(void *buf, size_t size, const char *mode); +FILE *open_memstream(char **ptr, size_t *sizeloc); +#endif + +/** Implements buffered write using Linux pointer model + * + * Two sets of pointers, one for reading one for writing + * flag determins which mode we are in. start always + * points to beginning of buffer, end points to end + * In read, end points to end of actual data read and + * coincides with the file pointer. In write the start + * coincides with file pointer. In either case ptr is + * where user stream pointer is. + * + * The FILE struct is struct _IO_FILE defined in /usr/include/libio.h + */ +size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + int rc = 0; + + /* causing loops */ + /* gossip_debug(GOSSIP_USRINT_DEBUG, "fwrite %p %d %d %p\n", + ptr, size, nmemb, stream); */ +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fwrite(ptr, size, nmemb, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fwrite(ptr, size, nmemb, stream); + } + errno = EINVAL; + return 0; + } + lock_stream(stream); + rc = fwrite_unlocked(ptr, size, nmemb, stream); + unlock_stream(stream); + return rc; +} + +size_t fwrite_unlocked(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + off64_t rsz, rsz_buf, rsz_extra; + int rc; + + /* causing loops */ + /* gossip_debug(GOSSIP_USRINT_DEBUG, "fwrite_unlocked %p %d %d %p\n", + ptr, size, nmemb, stream); */ +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fwrite_unlocked(ptr, size, nmemb, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fwrite_unlocked(ptr, size, nmemb, stream); + } + errno = EINVAL; + return 0; + } + if (!ptr || size <= 0 || nmemb <= 0) + { + errno = EINVAL; + return 0; + } + + /* Check to see if switching from read to write */ + if (!ISFLAGSET(stream, _IO_CURRENTLY_PUTTING)) + { + /* reset read pointer */ + stream->_IO_read_ptr = stream->_IO_read_end; + /* set flag */ + SETFLAG(stream, _IO_CURRENTLY_PUTTING); + /* indicate read buffer empty */ + stream->_IO_read_end = stream->_IO_read_base; + stream->_IO_read_ptr = stream->_IO_read_end; + /* indicate write buffer empty */ + stream->_IO_write_end = stream->_IO_buf_end; + stream->_IO_write_ptr = stream->_IO_write_base; + } + + rsz = size * nmemb; + rsz_buf = PVFS_util_min(rsz, stream->_IO_write_end - stream->_IO_write_ptr); + rsz_extra = rsz - rsz_buf; + + if (rsz_buf) /* is only zero if buffer is full */ + { + memcpy(stream->_IO_write_ptr, ptr, rsz_buf); + stream->_IO_write_ptr += rsz_buf; + } + + /* if there is more to write */ + if (rsz_extra) + { + /* buffer is full - write the current buffer */ +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fwrite writing %d bytes to offset %d\n", + (int)(stream->_IO_write_ptr - stream->_IO_write_base), + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + rc = write(stream->_fileno, stream->_IO_write_base, + stream->_IO_write_ptr - stream->_IO_write_base); + if (rc == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + /* TODO: check for a short write */ + /* reset buffer */ + stream->_IO_write_ptr = stream->_IO_write_base; + /* if there more data left in request than fits in a buffer */ + if(rsz_extra > stream->_IO_buf_end - stream->_IO_buf_base) + { +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fwrite writing %d bytes to offset %d\n", + (int)rsz_extra, + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + /* write data directly */ + rc = write(stream->_fileno, (char *)ptr + rsz_buf, rsz_extra); + if (rc == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + /* TODO: check for a short write */ + } + else + { + memcpy(stream->_IO_write_ptr, (char *)ptr + rsz_buf, rsz_extra); + stream->_IO_write_ptr += rsz_extra; + } + } + + return rsz / size; /* num items written */ +} + +/* + * fread implements the same buffer scheme as in fwrite + */ +size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fread %p %d %d %p\n", + ptr, size, nmemb, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fread(ptr, size, nmemb, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fread(ptr, size, nmemb, stream); + } + errno = EINVAL; + return 0; + } + lock_stream(stream); + rc = fread_unlocked(ptr, size, nmemb, stream); + unlock_stream(stream); + return rc; +} + +size_t fread_unlocked(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + int rsz, rsz_buf, rsz_extra; + int bytes_read; + int rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fread_unlocked %p %d %d %p\n", + ptr, size, nmemb, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fread_unlocked(ptr, size, nmemb, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fread_unlocked(ptr, size, nmemb, stream); + } + errno = EINVAL; + return 0; + } + if (!ptr || size < 0 || nmemb < 0) + { + errno = EINVAL; + return 0; + } + + /* Check to see if switching from write to read */ + if (ISFLAGSET(stream, _IO_CURRENTLY_PUTTING)) + { + /* write buffer back */ +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fread writing %d bytes to offset %d\n", + (int)(stream->_IO_write_ptr - stream->_IO_write_base), + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + rc = write(stream->_fileno, stream->_IO_write_base, + stream->_IO_write_ptr - stream->_IO_write_base); + if (rc == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + else if (rc < stream->_IO_write_ptr - stream->_IO_write_base) + { + /* short write but no error ??? */ + SETFLAG(stream, _IO_ERR_SEEN); + } + /* reset write pointer */ + stream->_IO_write_ptr = stream->_IO_write_base; + /* clear flag */ + CLEARFLAG(stream, _IO_CURRENTLY_PUTTING); + /* indicate read buffer empty */ + stream->_IO_read_end = stream->_IO_read_base; + stream->_IO_read_ptr = stream->_IO_read_end; + } + + /* see if anything is in read buffer */ + if (stream->_IO_read_end == stream->_IO_read_base || + stream->_IO_read_ptr == stream->_IO_read_end) + { + /* buffer empty so read new buffer */ + bytes_read = read(stream->_fileno, stream->_IO_read_base, + stream->_IO_buf_end - stream->_IO_buf_base); + if (bytes_read == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + else if (bytes_read == 0) + { + SETFLAG(stream, _IO_EOF_SEEN); + } + /* indicate end of read area */ + stream->_IO_read_end = stream->_IO_read_base + bytes_read; + /* reset read pointer */ + stream->_IO_read_ptr = stream->_IO_read_base; + } + + /* + * we assume there is a block in the buffer now + * and that the current file pointer corresponds + * to the end of the read buffer. The user has + * only seen up to the read pointer. + */ + rsz = size * nmemb; /* total bytes requested */ + rsz_buf = PVFS_util_min(rsz, stream->_IO_read_end - stream->_IO_read_ptr); + rsz_extra = rsz - rsz_buf; /* bytes beyond the buffer */ + + /* copy rz_buf bytes from buffer */ + if (rsz_buf) /* zero if at EOF */ + { + memcpy(ptr, stream->_IO_read_ptr, rsz_buf); + stream->_IO_read_ptr += rsz_buf; + } + + /* if more bytes requested */ + if (rsz_extra) + { + /* if current buffer not at EOF */ + if (stream->_IO_read_end == stream->_IO_buf_end) + { + /* if more data requested than fits in buffer */ + if (rsz_extra > (stream->_IO_buf_end - stream->_IO_buf_base)) + { + /* read directly from file for remainder of request */ + bytes_read = read(stream->_fileno, (char *)ptr+rsz_buf, rsz_extra); + if (bytes_read == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + else if (bytes_read == 0) + { + SETFLAG(stream, _IO_EOF_SEEN); + } + if (bytes_read == rsz_extra) + { + /* then read next buffer */ + bytes_read = read(stream->_fileno, stream->_IO_buf_base, + stream->_IO_buf_end - stream->_IO_buf_base); + if (bytes_read == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + else if (bytes_read == 0) + { + SETFLAG(stream, _IO_EOF_SEEN); + } + stream->_IO_read_end = stream->_IO_read_base + bytes_read; + stream->_IO_read_ptr = stream->_IO_read_base; + return rsz / size; /* num items read */ + } + /* MIGHT have read to EOF - check for pipe, tty */ + SETFLAG(stream, _IO_EOF_SEEN); + return (rsz_buf + bytes_read) / size; /* num items read */ + } + /* rest of request fits in a buffer - read next buffer */ + bytes_read = read(stream->_fileno, stream->_IO_buf_base, + stream->_IO_buf_end - stream->_IO_buf_base); + if (bytes_read == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return 0; + } + else if (bytes_read == 0) + { + SETFLAG(stream, _IO_EOF_SEEN); + } + stream->_IO_read_end = stream->_IO_read_base + bytes_read; + stream->_IO_read_ptr = stream->_IO_read_base; + /* transfer remainder */ + rsz_extra = PVFS_util_min(rsz_extra, + stream->_IO_read_end - stream->_IO_read_ptr); + if (rsz_extra) /* zero if at EOF */ + { + memcpy(ptr, stream->_IO_read_ptr, rsz_extra); + stream->_IO_read_ptr += rsz_extra; + } + /* MIGHT have read to EOF - check for pipe, tty */ + if (rsz_buf + rsz_extra < rsz) + { + SETFLAG(stream, _IO_EOF_SEEN); + } + return (rsz_buf + rsz_extra) / size; /* num items read */ + } + else + { + /* at EOF so return bytes read */ + SETFLAG(stream, _IO_EOF_SEEN); + return rsz_buf / size; /* num items read */ + } + } + /* request totally within current buffer */ + return rsz / size; /* num items read */ +} + +/** + * fcloseall closes all open streams + */ +int fcloseall(void) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fcloseall\n"); + while (open_files._chain) + { + rc = fclose(open_files._chain); + } + return rc; +} + +/** + * fclose first writes any dirty data in the buffer + */ +int fclose(FILE *stream) +{ + int rc = 0; + FILE *f; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fclose %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fclose(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fclose(stream); + } + errno = EINVAL; + return -1; + } + lock_stream(stream); + /* write any pending data */ + if (ISFLAGSET(stream, _IO_CURRENTLY_PUTTING)) + { + if (stream->_IO_write_ptr > stream->_IO_write_base) + { +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fclose writing %d bytes to offset %d\n", + (int)(stream->_IO_write_ptr - stream->_IO_write_base), + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + rc = write(stream->_fileno, stream->_IO_write_base, + stream->_IO_write_ptr - stream->_IO_write_base); + if (rc == -1) + { + SETFLAG(stream, _IO_ERR_SEEN); + return -1; + } + } + } + if (!ISFLAGSET(stream, _IO_USER_BUF)) + { + /* free the buffer */ + free(stream->_IO_buf_base); + } + if (!ISFLAGSET(stream, _IO_DELETE_DONT_CLOSE)) + { + rc = close(stream->_fileno); + } + /* remove from chain */ + lock_stream(&open_files); + if (open_files._chain == stream) + { + open_files._chain = stream->_chain; + } + else + { + for (f = open_files._chain; f && f->_chain != stream; f = f->_chain) + if (f && f->_chain) + { + f->_chain = f->_chain->_chain; + } + /* was not on chain */ + } + unlock_stream(&open_files); + stream->_flags = 0; + /* can stream be locked here ? */ + lock_fini_stream(stream); + free(stream); + return rc; +} + +/** + * fseek wrapper + */ +int fseek(FILE *stream, long offset, int whence) +{ + return fseek64(stream, (off64_t)offset, whence); +} + +int fseeko(FILE *stream, off_t offset, int whence) +{ + return fseek64(stream, (off64_t)offset, whence); +} + +int fseeko64(FILE *stream, off64_t offset, int whence) +{ + return fseek64(stream, (off64_t)offset, whence); +} + +/** This is the main code for seeking on a stream + * + * If we seek a short distance within the current buffer + * we can just move the stream pointer. Otherwise we + * have to clear the buffer, seek, and start fresh + */ +int fseek64(FILE *stream, const off64_t offset, int whence) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fseek64 %p %llx %d\n", + stream, offset, whence); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fseek64(stream, offset, whence); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fseek64(stream, offset, whence); + } + errno = EINVAL; + return -1; + } + lock_stream(stream); + /* if actually changing the position */ + if ((offset != 0L) || (whence != SEEK_CUR)) + { + int64_t filepos, fileend; + struct stat64 sbuf; + filepos = lseek64(stream->_fileno, 0, SEEK_CUR); + /* should fileend include stuff in write buffer ??? */ + rc = fstat64(stream->_fileno, &sbuf); + if (rc < 0) + { + SETFLAG(stream, _IO_ERR_SEEN); + rc = -1; + goto exitout; + } + fileend = sbuf.st_size; + /* figure out if we are only seeking within the */ + /* bounds of the current buffer to minimize */ + /* unneccessary reads/writes */ + if (whence == SEEK_CUR && ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + (offset < stream->_IO_write_end - stream->_IO_write_ptr) && + (offset > stream->_IO_write_base - stream->_IO_write_ptr)) + { + stream->_IO_write_ptr += offset; + /* should we zero out buffer if past eof ??? */ + rc = 0; + goto exitout; + } + if (whence == SEEK_CUR && !ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + (offset < stream->_IO_read_end - stream->_IO_read_ptr) && + (offset > stream->_IO_read_base - stream->_IO_read_ptr)) + { + stream->_IO_read_ptr += offset; + rc = 0; + goto exitout; + } + if (whence == SEEK_SET && ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + (offset > filepos) && (offset < filepos + + (stream->_IO_write_end - stream->_IO_write_base))) + { + stream->_IO_write_ptr += offset - filepos; + /* should we zero out buffer if past eof ??? */ + rc = 0; + goto exitout; + } + if (whence == SEEK_SET && !ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + (offset < filepos) && (offset > filepos - + (stream->_IO_read_end - stream->_IO_read_base))) + { + stream->_IO_read_ptr += offset - filepos; + rc = 0; + goto exitout; + } + if (whence == SEEK_END && ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + ((fileend - offset) > filepos) && + ((fileend - offset) < filepos + + (stream->_IO_write_end - stream->_IO_write_base))) + { + stream->_IO_write_ptr += (fileend - offset) - filepos; + /* should we zero out buffer if past eof ??? */ + rc = 0; + goto exitout; + } + if (whence == SEEK_END && !ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + ((fileend - offset) < filepos) && + ((fileend - offset) > filepos - + (stream->_IO_read_end - stream->_IO_read_base))) + { + stream->_IO_read_ptr += (fileend - offset) - filepos; + rc = 0; + goto exitout; + } + /* at this point the seek is beyond the current buffer */ + /* if we are in write mode write back the buffer */ + if (ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + stream->_IO_write_ptr > stream->_IO_write_base) + { + /* write buffer back */ +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fseek writing %d bytes to offset %d\n", + (int)(stream->_IO_write_ptr - stream->_IO_write_base), + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + rc = write(stream->_fileno, stream->_IO_write_base, + stream->_IO_write_ptr - stream->_IO_write_base); + if (rc < 0) + { + SETFLAG(stream, _IO_ERR_SEEN); + rc = -1; + goto exitout; + } + /* reset write pointer */ + stream->_IO_write_ptr = stream->_IO_write_base; + } + else + { + /* in read mode simply clear the buffer */ + /* will force a read at next fread call */ + stream->_IO_read_end = stream->_IO_read_base; + stream->_IO_read_ptr = stream->_IO_read_end; + } + rc = lseek64(stream->_fileno, offset, whence); +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fseek seeks to offset %d\n", + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + if (rc < 0) + { + SETFLAG(stream, _IO_ERR_SEEN); + rc = -1; + goto exitout; + } + /* fseek returns 0 on success */ + rc = 0; + } +exitout: + /* successful call */ + unlock_stream(stream); + CLEARFLAG(stream, _IO_EOF_SEEN); + return rc; +} + +/** + * fsetpos wrapper + */ +int fsetpos(FILE *stream, const fpos_t *pos) +{ + fseek64(stream, (off64_t)(pos->__pos), SEEK_SET); + return 0; +} + +int fsetpos64(FILE *stream, const fpos64_t *pos) +{ + fseek64(stream, (off64_t)(pos->__pos), SEEK_SET); + return 0; +} + +/** + * rewind wrapper + */ +void rewind(FILE *stream) +{ + fseek64(stream, 0L, SEEK_SET); + CLEARFLAG(stream, _IO_ERR_SEEN); +} + +/** + * ftell wrapper + */ +long int ftell(FILE *stream) +{ + return (long)ftell64(stream); +} + +off_t ftello(FILE *stream) +{ + return (off_t)ftell64(stream); +} + +off64_t ftello64(FILE *stream) +{ + return (off64_t)ftell64(stream); +} + +off64_t ftell64(FILE* stream) +{ + int64_t filepos; + + gossip_debug(GOSSIP_USRINT_DEBUG, "ftell64 %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.ftell64(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.ftell64(stream); + } + errno = EINVAL; + return -1; + } + filepos = lseek64(stream->_fileno, 0, SEEK_CUR); + if (ISFLAGSET(stream, _IO_CURRENTLY_PUTTING)) + { + return filepos + (stream->_IO_write_ptr - stream->_IO_write_base); + } + else + { + return filepos - (stream->_IO_read_end - stream->_IO_read_ptr); + } +} + +/** + * fgetpos wrapper + */ +int fgetpos(FILE *stream, fpos_t *pos) +{ + pos->__pos = ftell64(stream); + return 0; +} + +int fgetpos64(FILE *stream, fpos64_t *pos) +{ + pos->__pos = ftell64(stream); + return 0; +} + +/** forces a write back of potentially dirty buffer + * + * we don't have a dirty flag, so if user seeks + * ahead within the buffer then does a flush + * we will do an uncessary write + */ +int fflush(FILE *stream) +{ + int rc = 0; + +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fflush(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fflush(stream); + } + errno = EINVAL; + return -1; + } + lock_stream(stream); + rc = fflush_unlocked(stream); + unlock_stream(stream); + return rc; +} + +int fflush_unlocked(FILE *stream) +{ + int rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fflush_unlocked %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fflush_unlocked(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fflush_unlocked(stream); + } + errno = EINVAL; + return -1; + } + /* if we are in write mode write back the buffer */ + if (ISFLAGSET(stream, _IO_CURRENTLY_PUTTING) && + stream->_IO_write_ptr > stream->_IO_write_base) + { + /* write buffer back */ +#if PVFS_STDIO_DEBUG + fprintf(stderr,"fflush writing %d bytes to offset %d\n", + (int)(stream->_IO_write_ptr - stream->_IO_write_base), + (int)lseek(stream->_fileno, 0, SEEK_CUR)); +#endif + rc = write(stream->_fileno, stream->_IO_write_base, + stream->_IO_write_ptr - stream->_IO_write_base); + if (rc < 0) + { + SETFLAG(stream, _IO_ERR_SEEN); + return rc; + } + /* reset write pointer */ + stream->_IO_write_ptr = stream->_IO_write_base; + } + return 0; +} + +/* + * fputc wrapper + */ +int fputc(int c, FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fputc %c %p\n", c, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fputc(c, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fputc(c, stream); + } + errno = EINVAL; + return -1; + } + lock_stream(stream); + rc = fputc_unlocked(c, stream); + unlock_stream(stream); + return rc; +} + +int fputc_unlocked(int c, FILE *stream) +{ + int rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fputc_unlocked %c %p\n", c, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fputc_unlocked(c, stream); + } +#endif + rc = fwrite_unlocked(&c, 1, 1, stream); + if (ferror(stream)) + { + return EOF; + } + return c; +} + +/** + * fputs writes up to a null char + */ +int fputs(const char *s, FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fputs %s %p\n", s, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fputs(s, stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fputs(s, stream); + } + errno = EINVAL; + return -1; + } + lock_stream(stream); + rc = fputs_unlocked(s, stream); + unlock_stream(stream); + return rc; +} + +int fputs_unlocked(const char *s, FILE *stream) +{ + size_t len; + int rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fputs_unlocked %s %p\n", s, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fputs_unlocked(s, stream); + } +#endif + if (!s) + { + errno = EINVAL; + return EOF; + } + len = strlen(s); + rc = fwrite_unlocked(s, len, 1, stream); + if (ferror(stream)) + { + return EOF; + } + return rc; +} + +/** + * putc wrapper + */ +int putc(int c, FILE *stream) +{ + return fputc(c, stream); +} + +int putc_unlocked(int c, FILE *stream) +{ + return fputc_unlocked(c, stream); +} + +/** + * putchar wrapper + */ +int putchar(int c) +{ + return fputc(c, stdout); +} + +int putchar_unlocked(int c) +{ + return fputc_unlocked(c, stdout); +} + +/** + * puts wrapper + */ +int puts(const char *s) +{ + int rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "puts %s\n", s); +#ifndef PVFS_STDIO_REDEFSTREAM + init_stdio(); + rc = stdio_ops.puts(s); + return rc; +#else + rc = fputs(s, stdout); + if (rc == EOF) + { + return EOF; + } + return fputs("\n", stdout); +#endif +} + +/** + * putw wrapper + */ +int putw(int wd, FILE *stream) +{ + int rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "putw %d %p\n", wd, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.putw(wd, stream); + } +#endif + rc = fwrite(&wd, sizeof(int), 1, stream); + if (ferror(stream)) + { + return EOF; + } + return rc; +} + +/** + * fgets reads up to size or a newline + */ +char *fgets(char *s, int size, FILE *stream) +{ + char *rc = NULL; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets %p %d %p\n", s, size, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc = stdio_ops.fgets(s, size, stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets returns %s\n", s); + return rc; + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fgets(s, size, stream); + } + errno = EINVAL; + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets returns NULL\n"); + return NULL; + } + lock_stream(stream); + rc = fgets_unlocked(s, size, stream); + unlock_stream(stream); gossip_debug(GOSSIP_USRINT_DEBUG, "fgets returns %s\n", rc); + return rc; +} + +char *fgets_unlocked(char *s, int size, FILE *stream) +{ + char c, *p; + int feo, fer; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets_unlocked %p %d %p\n", + s, size, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + s = stdio_ops.fgets_unlocked(s, size, stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets_unlocked returns %s\n", s); + return s; + } +#endif + if (!stream || !s || size < 1) + { + errno = EINVAL; + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets_unlocked returns NULL\n"); + return NULL; + } + if (size == 1) + { + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets_unlocked returns \"\"\n"); + return s; + } + p = s; + size--; /* for the trailing NULL char */ + do { + *p++ = c = fgetc_unlocked(stream); + /* reduce multiple func calls */ + feo = feof_unlocked(stream); + fer = ferror_unlocked(stream); + } while (--size && c != '\n' && !(feo || fer)); + /* if error or eof and read no chars */ + if (fer || (feo && p - s == 1)) + { + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets_unlocked returns NULL\n"); + return NULL; + } + *p = 0; /* add null terminating char */ + gossip_debug(GOSSIP_USRINT_DEBUG, "fgets_unlocked returns %s\n", s); + return s; +} + +/** + * fgetc wrapper + */ +int fgetc(FILE *stream) +{ + int rc; + unsigned char ch; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc = stdio_ops.fgetc(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc returns %d(%c)\n", + rc, (char)rc); + return rc; + } +#endif + rc = fread(&ch, 1, 1, stream); + if (ferror(stream) || feof(stream)) + { + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc returns %d\n", EOF); + return EOF; + } + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc returns %c\n", ch); + return (int)ch; +} + +int fgetc_unlocked(FILE *stream) +{ + int rc; + char ch; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc_unlocked %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc = stdio_ops.fgetc_unlocked(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc_unlocked returns %d(%c)\n", + rc, (char)rc); + return rc; + } +#endif + rc = fread_unlocked(&ch, 1, 1, stream); + if (ferror_unlocked(stream) || feof_unlocked(stream)) + { + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc_unlocked returns %d\n", EOF); + return EOF; + } + gossip_debug(GOSSIP_USRINT_DEBUG, "fgetc_unlocked returns %c\n", ch); + return (int)ch; +} + +/** + * getc wrapper + */ +int getc(FILE *stream) +{ + return fgetc(stream); +} + +int getc_unlocked(FILE *stream) +{ + return fgetc_unlocked(stream); +} + +/** + * getchar wrapper + */ +int getchar(void) +{ + return fgetc(stdin); +} + +int getchar_unlocked(void) +{ + return fgetc_unlocked(stdin); +} + +/** + * getw wrapper + * + * not sure if feof should return an EOF or not + */ +int getw(FILE *stream) +{ + int rc, wd; + + gossip_debug(GOSSIP_USRINT_DEBUG, "getw %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.getw(stream); + } +#endif + rc = fread(&wd, sizeof(int), 1, stream); + if (ferror(stream) || feof(stream)) + { + return EOF; + } + return wd; +} + +/** + * gets + */ +char *gets(char * s) +{ +#ifdef PVFS_STDIO_REDEFSTREAM + char c, *p; +#endif + + gossip_debug(GOSSIP_USRINT_DEBUG, "gets %p\n", s); +#ifndef PVFS_STDIO_REDEFSTREAM + init_stdio(); + return stdio_ops.gets(s); +#else + if (!s) + { + errno = EINVAL; + return NULL; + } + p = s; + do { + *p++ = c = fgetc(stdin); + } while (c != '\n' && !feof(stdin) && !ferror(stdin)); + if (ferror(stdin) || ((p = s + 1) && feof(stdin))) + { + return NULL; + } + if (!feof(stdin)) + { + *(p-1) = 0; /* replace terminating char with null */ + } + return s; +#endif +} + +/** + * getline + */ +ssize_t getline(char **lnptr, size_t *n, FILE *stream) +{ + return __getdelim(lnptr, n, '\n', stream); +} + +ssize_t getdelim(char **lnptr, size_t *n, int delim, FILE *stream) +{ + return __getdelim(lnptr, n, delim, stream); +} + +ssize_t __getdelim(char **lnptr, size_t *n, int delim, FILE *stream) +{ + int i = 0; + char c, *p; + + gossip_debug(GOSSIP_USRINT_DEBUG, "getdelim %p, %d, %d, %p\n", + lnptr, *n, delim, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.getdelim(lnptr, n, delim, stream); + } +#endif + if (!stream || !n) + { + errno = EINVAL; + return -1; + } + if (!*lnptr) + { + *n = 256; + *lnptr = (char *)malloc(*n); + if (!*lnptr) + { + return -1; + } + } + p = *lnptr; + do { + if (i + 1 >= *n) /* need space for next char and null terminator */ + { + *n += 256; /* spec gives no guidance on fit of allocated space */ + *lnptr = realloc(*lnptr, *n); + if (!*lnptr) + { + return -1; + } + p = *lnptr + i; + } + *p++ = c = fgetc(stream); + i++; + } while (c != delim && !feof(stream) && !ferror(stream)); + if (ferror(stream) || feof(stream)) + { + return -1; + } + *p = 0; /* null termintor */ + return i; +} + +/** + * ungetc wrapper + * + * TODO: at the moment this will not unget beyond the current + * buffer - needs a better implementation using the backup + * buffer area _IO_save_base, _IO_save_end, _IO_backup_base + */ +int ungetc(int c, FILE *stream) +{ + int64_t rc; + + gossip_debug(GOSSIP_USRINT_DEBUG, "ungetc %d, %p\n", + c, stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.ungetc(c, stream); + } +#endif + rc = fseek64(stream, -1L, SEEK_CUR); + if (rc < 0) + { + return EOF; + } + return c; +} + +/** + * We don't need any flavor of sprintf or sscanf + * they don't do IO on a stream + */ +#if 0 +sprintf, snprintf, vsprintf, vsnprintf, asprintf, vasprintfm +sscanf, vsscanf +#endif + +/** + * dprintf wrapper + */ +int dprintf(int fd, const char *format, ...) +{ + size_t len; + va_list ap; + + va_start(ap, format); + len = vdprintf(fd, format, ap); + va_end(ap); + return len; +} + +/** + * vdprintf + */ +int vdprintf(int fd, const char *format, va_list ap) +{ + char *buf; + int len, rc = 0; + + len = vasprintf(&buf, format, ap); + if (len < 0) + { + return len; + } + if (len > 0 && buf) + { + rc = write(fd, buf, len); + } + if (buf) + { + free(buf); + } + return rc; +} + +/** + * vfprintf using a var arg list + */ +int vfprintf(FILE *stream, const char *format, va_list ap) +{ + char *buf; + int len, rc = 0; + +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.vfprintf(stream, format, ap); + } +#endif + len = vasprintf(&buf, format, ap); + if (len < 0) + { + return len; + } + if (len > 0 && buf) + { + rc = fwrite(buf, len, 1, stream); + } + if (buf) + { + free(buf); + } + return rc; +} + +/** + * fprintf wrapper + */ +int vprintf(const char *format, va_list ap) +{ + return vfprintf(stdout, format, ap); +} + +/** + * fprintf wrapper + */ +int fprintf(FILE *stream, const char *format, ...) +{ + size_t len; + va_list ap; + + va_start(ap, format); + len = vfprintf(stream, format, ap); + va_end(ap); + return len; +} + +/** + * printf wrapper + */ +int printf(const char *format, ...) +{ + size_t len; + va_list ap; + + va_start(ap, format); + len = vfprintf(stdout, format, ap); + va_end(ap); + return len; +} + +/** + * perror + */ +void perror(const char *s) +{ +#ifndef PVFS_STDIO_REDEFSTREAM + init_stdio(); + stdio_ops.perror(s); + return; +#else + char *msg; + if (s && *s) + { + fwrite(s, strlen(s), 1, stderr); + } + msg = strerror(errno); + fwrite(msg, strlen(msg), 1, stderr); + fwrite("\n", 1, 1, stderr); +#endif +} + +#if 0 +/* TODO: These are not implemented yet */ + +scanf() +{ +} + +fscanf() +{ +} + +vfscanf() +{ +} + +#endif + +/** + * Stdio utilitie to clear error and eof for a stream + */ +void clearerr (FILE *stream) +{ + gossip_debug(GOSSIP_USRINT_DEBUG, "clearerr %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.clearerr(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + stdio_ops.clearerr(stream); + return; + } + return; + } + lock_stream(stream); + CLEARFLAG(stream, _IO_ERR_SEEN); + CLEARFLAG(stream, _IO_EOF_SEEN); + unlock_stream(stream); +} + +void clearerr_unlocked (FILE *stream) +{ + gossip_debug(GOSSIP_USRINT_DEBUG, "clearerr_unlocked %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.clearerr_unlocked(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + stdio_ops.clearerr_unlocked(stream); + return; + } + return; + } + CLEARFLAG(stream, _IO_ERR_SEEN); + CLEARFLAG(stream, _IO_EOF_SEEN); +} + +/** + * Stdio utilitie to check if a stream is at EOF + */ +int feof (FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "feof %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc = stdio_ops.feof(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "feof returns %d\n", rc); + return rc; + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.feof(stream); + } + errno = EINVAL; + gossip_debug(GOSSIP_USRINT_DEBUG, "feof returns %d\n", -1); + return -1; + } + lock_stream(stream); + rc = ISFLAGSET(stream, _IO_EOF_SEEN); + unlock_stream(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "feof returns %d\n", rc); + return rc; +} + +int feof_unlocked (FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "feof_unlocked %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc = stdio_ops.feof_unlocked(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "feof_unlocked returns %d\n", rc); + return rc; + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.feof_unlocked(stream); + } + errno = EBADF; + gossip_debug(GOSSIP_USRINT_DEBUG, "feof_unlocked returns %d\n", -1); + return -1; + } + rc = ISFLAGSET(stream, _IO_EOF_SEEN); + gossip_debug(GOSSIP_USRINT_DEBUG, "feof_unlocked returns %d\n", rc); + return rc; +} + +/** + * Stdio utilitie to check for error on a stream + */ +int ferror (FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc = stdio_ops.ferror(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror returns %d\n", rc); + return rc; + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.ferror(stream); + } + errno = EINVAL; + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror returns %d\n", -1); + return -1; + } + lock_stream(stream); + rc = ISFLAGSET(stream, _IO_ERR_SEEN); + unlock_stream(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror returns %d\n", rc); + return rc; +} + +int ferror_unlocked (FILE *stream) +{ + int rc = 0; + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror_unlocked %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + rc =stdio_ops.ferror_unlocked(stream); + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror_unlocked returns %d\n", rc); + return rc; + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.ferror_unlocked(stream); + } + errno = EBADF; + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror_unlocked returns %d\n", -1); + return -1; + } + gossip_debug(GOSSIP_USRINT_DEBUG, "ferror_unlocked returns %d\n", rc); + return ISFLAGSET(stream, _IO_ERR_SEEN); +} + +/** + * Stdio utilitie to get file descriptor from a stream + */ +int fileno (FILE *stream) +{ + int rc = 0; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fileno %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fileno(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fileno(stream); + } + errno = EINVAL; + return -1; + } + lock_stream(stream); + rc = stream->_fileno; + unlock_stream(stream); + return rc; +} + +int fileno_unlocked (FILE *stream) +{ + gossip_debug(GOSSIP_USRINT_DEBUG, "fileno_unlocked %p\n", stream); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.fileno_unlocked(stream); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.fileno_unlocked(stream); + } + errno = EBADF; + return -1; + } + return stream->_fileno; +} + +/** stdio function to delete a file + * + */ +int remove (const char *path) +{ + int rc; + struct stat buf; + + gossip_debug(GOSSIP_USRINT_DEBUG, "remove %s\n", path); + rc = stat(path, &buf); + if (S_ISDIR(buf.st_mode)) + return rmdir (path); + return unlink (path); +} + +/** + * setbuf wrapper + */ +void setbuf (FILE *stream, char *buf) +{ + setvbuf(stream, buf, buf ? _IOFBF : _IONBF, BUFSIZ); +} + +/** + * setbuffer wrapper + */ +void setbuffer (FILE *stream, char *buf, size_t size) +{ + setvbuf(stream, buf, buf ? _IOFBF : _IONBF, size); +} + +/** + * setlinbuf wrapper + */ +void setlinebuf (FILE *stream) +{ + setvbuf(stream, (char *)NULL, _IOLBF, 0); +} + +/** + * + * This should only be called on a stream that has been opened + * but not used so we can assume any exiting buff is not dirty + */ +int setvbuf (FILE *stream, char *buf, int mode, size_t size) +{ + gossip_debug(GOSSIP_USRINT_DEBUG, "setvbuf %p %p %d %d\n", + stream, buf, mode, size); +#ifndef PVFS_STDIO_REDEFSTREAM + if (stream == stdin || stream == stdout || stream == stderr) + { + init_stdio(); + return stdio_ops.setvbuf(stream, buf, mode, size); + } +#endif + if (!stream || !ISMAGICSET(stream, _P_IO_MAGIC)) + { + if (stream && ISMAGICSET(stream, _IO_MAGIC)) + { + init_stdio(); + return stdio_ops.setvbuf(stream, buf, mode, size); + } + errno = EINVAL; + return -1; + } + if ((stream->_IO_read_end != stream->_IO_buf_base) || + (stream->_IO_write_ptr != stream->_IO_buf_base)) + { + /* fread or fwrite has been called */ + errno = EINVAL; + return -1; + } + lock_stream(stream); + switch (mode) + { + case _IOFBF : /* full buffered */ + /* this is the default */ + break; + case _IOLBF : /* line buffered */ + SETFLAG(stream, _IO_LINE_BUF); /* TODO: This is not implemented */ + break; + case _IONBF : /* not buffered */ + SETFLAG(stream, _IO_UNBUFFERED); /* TODO: This is not implemented */ + break; + default : + errno = EINVAL; + unlock_stream(stream); + return -1; + } + if (buf && size > 0) + { + SETFLAG(stream, _IO_USER_BUF); + free(stream->_IO_buf_base); + stream->_IO_buf_base = buf; + stream->_IO_buf_end = stream->_IO_buf_base + size; + stream->_IO_read_base = stream->_IO_buf_base; + stream->_IO_read_ptr = stream->_IO_buf_base; + stream->_IO_read_end = stream->_IO_buf_base; + stream->_IO_write_base = stream->_IO_buf_base; + stream->_IO_write_ptr = stream->_IO_buf_base; + stream->_IO_write_end = stream->_IO_buf_end; + } + unlock_stream(stream); + return 0; +} + +/** + * mkdtemp makes a temp dir and returns an fd + */ +char *mkdtemp(char *template) +{ + int fd; + int len; + int rnum; + int try; + + if (!template) + { + errno = EINVAL; + return NULL; + } + len = strlen(template); + if (!strncmp(&template[len-6],"XXXXXX",6)) + { + errno = EINVAL; + return NULL; + } + for(try = 0; try < MAXTRIES; try++) + { + rnum = PINT_random() % 1000000; + sprintf(&template[len-6],"%06d", rnum); + fd = mkdir(template, 0700); + if (fd < 0) + { + if (errno == EEXIST) + { + continue; + } + return NULL; + } + } + return template; +} + +/** + * mkstemp makes a temp file and returns an fd + */ +int mkstemp(char *template) +{ + int fd; + int len; + int rnum; + int try; + + if (!template) + { + errno = EINVAL; + return -1; + } + len = strlen(template); + if (!strncmp(&template[len-6],"XXXXXX",6)) + { + errno = EINVAL; + return -1; + } + for(try = 0; try < MAXTRIES; try++) + { + rnum = PINT_random() % 1000000; + sprintf(&template[len-6],"%06d", rnum); + fd = open(template, O_RDWR|O_EXCL|O_CREAT, 0600); + if (fd < 0) + { + if (errno == EEXIST) + { + continue; + } + return -1; + } + } + return fd; +} + +/** + * tmpfile makes a temp file and returns a stream + */ +FILE *tmpfile(void) +{ + char *template = "/tmp/tmpfileXXXXXX"; + int fd; + fd = mkstemp(template); + if (fd < 0) + { + return NULL; + } + return fdopen(fd, "r+"); +} + +/** + * opendir opens a directory as a stream + */ +DIR *opendir (const char *name) +{ + int fd; + + gossip_debug(GOSSIP_USRINT_DEBUG, "opendir %s\n", name); + if(!name) + { + errno = EINVAL; + return NULL; + } + fd = open(name, O_RDONLY|O_DIRECTORY); + if (fd < 0) + { + return NULL; + } + return fdopendir(fd); +} + +/** + * creates a stream for an already open directory + */ +DIR *fdopendir (int fd) +{ + DIR *dstr; + + gossip_debug(GOSSIP_USRINT_DEBUG, "fdopendir %d\n", fd); + dstr = (DIR *)malloc(sizeof(DIR)); + if (dstr == NULL) + { + return NULL; + } + memset(dstr, 0, sizeof(DIR)); + SETMAGIC(dstr, DIRSTREAM_MAGIC); + dstr->fileno = fd; + dstr->buf_base = (char *)malloc(DIRBUFSIZE); + if (dstr->buf_base == NULL) + { + dstr->_flags = 0; + free(dstr); + return NULL; + } + dstr->buf_end = dstr->buf_base + DIRBUFSIZE; + dstr->buf_act = dstr->buf_base; + dstr->buf_ptr = dstr->buf_base; + return dstr; +} + +/** + * returns the file descriptor for a directory stream + */ +int dirfd (DIR *dir) +{ + gossip_debug(GOSSIP_USRINT_DEBUG, "dirfd %p\n", dir); + if (!dir || !ISMAGICSET(dir, DIRSTREAM_MAGIC)) + { + errno = EBADF; + return -1; + } + return dir->fileno; +} + +/** + * readdir wrapper + */ +struct dirent *readdir (DIR *dir) +{ + struct dirent64 *de64; + + gossip_debug(GOSSIP_USRINT_DEBUG, "readdir %p\n", dir); + de64 = readdir64(dir); + if (de64 == NULL) + { + return NULL; + } + /* linux hard defines d_name to 256 bytes */ + /* if others don't should replace with a define */ + memset(&dir->de, 0, sizeof(dir->de)); + memcpy(dir->de.d_name, de64->d_name, 256); + dir->de.d_ino = de64->d_ino; + /* these are system specific fields from the dirent */ +#ifdef _DIRENT_HAVE_D_NAMELEN + dir->de.d_namelen = strnlen(de64->d_name, 256); +#endif +#ifdef _DIRENT_HAVE_D_OFF + dir->de.d_off = de64->d_off; +#endif +#ifdef _DIRENT_HAVE_D_RECLEN + dir->de.d_reclen = de64->d_reclen; +#endif +#ifdef _DIRENT_HAVE_D_TYPE + dir->de.d_type = de64->d_type; +#endif + return &dir->de; +} + +/** + * reads a single dirent64 in buffered mode from a stream + * + * getdents is not defined in libc, though it is a linux + * system call and we define it in the usr lib + */ + +int getdents(int fd, struct dirent *buf, size_t size); +int getdents64(int fd, struct dirent64 *buf, size_t size); + +struct dirent64 *readdir64 (DIR *dir) +{ + struct dirent64 *rval; + + gossip_debug(GOSSIP_USRINT_DEBUG, "readdir64 %p\n", dir); + if (!dir || !ISMAGICSET(dir, DIRSTREAM_MAGIC)) + { + errno = EBADF; + return NULL; + } + if (dir->buf_ptr >= dir->buf_act) + { + int bytes_read; + /* read a block of dirent64s into the buffer */ + bytes_read = getdents64(dir->fileno, (struct dirent64 *)dir->buf_base, + (dir->buf_end - dir->buf_base)); + if (bytes_read <= 0) + { + return NULL; /* EOF if errno == 0 */ + } + dir->buf_act = dir->buf_base + bytes_read; + dir->buf_ptr = dir->buf_base; + } + rval = (struct dirent64 *)dir->buf_ptr; +#ifdef _DIRENT_HAVE_D_RECLEN + dir->buf_ptr += rval->d_reclen; +#else + dir->buf_ptr += sizeof(struct dirent64); +#endif + return rval; +} + +/** + * rewinds a directory stream + */ +void rewinddir (DIR *dir) +{ + off64_t filepos; + + gossip_debug(GOSSIP_USRINT_DEBUG, "rewinddir %p\n", dir); + if (!dir || !ISMAGICSET(dir, DIRSTREAM_MAGIC)) + { + errno = EBADF; + return; + } + filepos = lseek64(dir->fileno, 0, SEEK_CUR); + if ((filepos - (dir->buf_act - dir->buf_base)) == 0) + { + dir->buf_ptr = dir->buf_base; + } + else + { + dir->buf_act = dir->buf_base; + dir->buf_ptr = dir->buf_base; + lseek64(dir->fileno, 0, SEEK_SET); + } +} + +/** + * seeks in a directory stream + */ +void seekdir (DIR *dir, off_t offset) +{ + off64_t filepos; + + gossip_debug(GOSSIP_USRINT_DEBUG, "seekdir %p\n", dir); + if (!dir || !ISMAGICSET(dir, DIRSTREAM_MAGIC)) + { + errno = EBADF; + return; + } + filepos = lseek64(dir->fileno, 0, SEEK_CUR); + if ((filepos - (dir->buf_act - dir->buf_base)) <= offset && + filepos >= offset) + { + dir->buf_ptr = dir->buf_act - (filepos - offset); + } + else + { + dir->buf_act = dir->buf_base; + dir->buf_ptr = dir->buf_base; + lseek64(dir->fileno, offset, SEEK_SET); + } +} + +/** + * returns current position in a direcotry stream + */ +off_t telldir (DIR *dir) +{ + off64_t filepos; + + gossip_debug(GOSSIP_USRINT_DEBUG, "telldir %p\n", dir); + if (!dir || !ISMAGICSET(dir, DIRSTREAM_MAGIC)) + { + errno = EBADF; + return -1; + } + filepos = lseek64(dir->fileno, 0, SEEK_CUR); + if (filepos == -1) + { + return -1; + } + return filepos - (dir->buf_act - dir->buf_ptr); +} + +/** + * closes a direcotry stream + */ +int closedir (DIR *dir) +{ + gossip_debug(GOSSIP_USRINT_DEBUG, "closedir %p\n", dir); + if (!dir || !ISMAGICSET(dir, DIRSTREAM_MAGIC)) + { + errno = EBADF; + return -1; + } + free(dir->buf_base); + dir->_flags = 0; + free(dir); + return 0; +} + +#ifdef PVFS_SCANDIR_VOID +int scandir (const char *dir, + struct dirent ***namelist, + int(*filter)(const struct dirent *), + int(*compar)(const void *, + const void *)) +#else +int scandir (const char *dir, + struct dirent ***namelist, + int(*filter)(const struct dirent *), + int(*compar)(const struct dirent **, + const struct dirent **)) +#endif +{ + struct dirent *de; + DIR *dp; + int len, i, rc; + int asz = ASIZE; + + gossip_debug(GOSSIP_USRINT_DEBUG, "scandir %p\n", dir); + /* open directory */ + dp = opendir(dir); + /* allocate namelist */ + *namelist = (struct dirent **)malloc(asz * sizeof(struct dirent *)); + if (!*namelist) + { + return -1; + } + /* loop through the dirents */ + for(i = 0, de = readdir(dp); de; i++, de = readdir(dp)) + { + if (!filter || filter(de)) + { + if (i >= asz) + { + struct dirent **darray; + /* ran out of space, realloc */ + darray = (struct dirent **)realloc(*namelist, asz + ASIZE); + if (!darray) + { + int j; + for (j = 0; j < i; j++) + { + free(*namelist[j]); + } + free(*namelist); + return -1; + } + *namelist = darray; + asz += ASIZE; + } + /* find the size of this entry */ + len = strnlen((*namelist)[i]->d_name, NAME_MAX + 1) + + sizeof(struct dirent); + /* add to namelist */ + *namelist[i] = (struct dirent *)malloc(len); + memcpy((*namelist)[i], de, len); + } + } + /* now sort entries */ + qsort(*namelist, i, sizeof(struct dirent *), (__compar_fn_t)compar); + rc = closedir(dp); + if (rc == -1) + { + return -1; + } + return i; +} + +/** + * 64 bit version of scandir + * + * TODO: Would prefer not to copy code - modify to a generic version + * and then call from two wrapper versions would be beter + * pass in a flag to control the copy of the dirent into the array + */ +#ifdef PVFS_SCANDIR_VOID +int scandir64 (const char *dir, + struct dirent64 ***namelist, + int(*filter)(const struct dirent64 *), + int(*compar)(const void *, + const void *)) +#else +int scandir64 (const char *dir, + struct dirent64 ***namelist, + int(*filter)(const struct dirent64 *), + int(*compar)(const struct dirent64 **, + const struct dirent64 **)) +#endif +{ + struct dirent64 *de; + DIR *dp; + int len, i, rc; + int asz = ASIZE; + + gossip_debug(GOSSIP_USRINT_DEBUG, "scandir64 %p\n", dir); + /* open directory */ + dp = opendir(dir); + /* allocate namelist */ + *namelist = (struct dirent64 **)malloc(asz * sizeof(struct dirent64 *)); + if (!*namelist) + { + return -1; + } + /* loop through the dirents */ + for(i = 0, de = readdir64(dp); de; i++, de = readdir64(dp)) + { + if (!filter || filter(de)) + { + if (i >= asz) + { + struct dirent64 **darray; + /* ran out of space, realloc */ + darray = (struct dirent64 **)realloc(*namelist, asz + ASIZE); + if (!darray) + { + int j; + for (j = 0; j < i; j++) + { + free(*namelist[j]); + } + free(*namelist); + return -1; + } + *namelist = darray; + asz += ASIZE; + } + /* find the size of this entry */ + len = strnlen((*namelist)[i]->d_name, NAME_MAX + 1) + + sizeof(struct dirent64); + /* add to namelist */ + (*namelist)[i] = (struct dirent64 *)malloc(len); + memcpy((*namelist)[i], de, len); + } + } + /* now sort entries */ + qsort(*namelist, i, sizeof(struct dirent64 *), (__compar_fn_t)compar); + rc = closedir(dp); + if (rc == -1) + { + return -1; + } + return i; +} + +static void init_stdio(void) +{ + static int init_flag = 0; + static gen_mutex_t initlock = GEN_MUTEX_INITIALIZER; + /* if we've already done this bail right away */ + if (init_flag) + { + return; + } + gen_mutex_lock(&initlock); + /* don't let more than one thread initialize */ + if (init_flag) + { + gen_mutex_unlock(&initlock); + return; + } + /* init open file chain */ + lock_init_stream(&open_files); + /* init pointers to glibc stdio calls */ + stdio_ops.fopen = dlsym(RTLD_NEXT, "fopen" ); + stdio_ops.fdopen = dlsym(RTLD_NEXT, "fdopen" ); + stdio_ops.freopen = dlsym(RTLD_NEXT, "freopen" ); + stdio_ops.fwrite = dlsym(RTLD_NEXT, "fwrite" ); + stdio_ops.fwrite_unlocked = dlsym(RTLD_NEXT, "fwrite_unlocked" ); + stdio_ops.fread = dlsym(RTLD_NEXT, "fread" ); + stdio_ops.fread_unlocked = dlsym(RTLD_NEXT, "fread_unlocked" ); + stdio_ops.fclose = dlsym(RTLD_NEXT, "fclose" ); + stdio_ops.fseek = dlsym(RTLD_NEXT, "fseek" ); + stdio_ops.fseek64 = dlsym(RTLD_NEXT, "fseek64" ); + stdio_ops.fsetpos = dlsym(RTLD_NEXT, "fsetpos" ); + stdio_ops.rewind = dlsym(RTLD_NEXT, "rewind" ); + stdio_ops.ftell = dlsym(RTLD_NEXT, "ftell" ); + stdio_ops.ftell64 = dlsym(RTLD_NEXT, "ftell64" ); + stdio_ops.fgetpos = dlsym(RTLD_NEXT, "fgetpos" ); + stdio_ops.fflush = dlsym(RTLD_NEXT, "fflush" ); + stdio_ops.fflush_unlocked = dlsym(RTLD_NEXT, "fflush_unlocked" ); + stdio_ops.fputc = dlsym(RTLD_NEXT, "fputc" ); + stdio_ops.fputc_unlocked = dlsym(RTLD_NEXT, "fputc_unlocked" ); + stdio_ops.fputs = dlsym(RTLD_NEXT, "fputs" ); + stdio_ops.fputs_unlocked = dlsym(RTLD_NEXT, "fputs_unlocked" ); + stdio_ops.putc = dlsym(RTLD_NEXT, "putc" ); + stdio_ops.putc_unlocked = dlsym(RTLD_NEXT, "putc_unlocked" ); + stdio_ops.putchar = dlsym(RTLD_NEXT, "putchar" ); + stdio_ops.putchar_unlocked = dlsym(RTLD_NEXT, "putchar_unlocked" ); + stdio_ops.puts = dlsym(RTLD_NEXT, "puts" ); + stdio_ops.putw = dlsym(RTLD_NEXT, "putw" ); + stdio_ops.fgets = dlsym(RTLD_NEXT, "fgets" ); + stdio_ops.fgetc = dlsym(RTLD_NEXT, "fgetc" ); + stdio_ops.getc = dlsym(RTLD_NEXT, "getc" ); + stdio_ops.getc_unlocked = dlsym(RTLD_NEXT, "getc_unlocked" ); + stdio_ops.getchar = dlsym(RTLD_NEXT, "getchar" ); + stdio_ops.getchar_unlocked = dlsym(RTLD_NEXT, "getchar_unlocked" ); + stdio_ops.getw = dlsym(RTLD_NEXT, "getw" ); + stdio_ops.gets = dlsym(RTLD_NEXT, "gets" ); + stdio_ops.getdelim = dlsym(RTLD_NEXT, "getdelim" ); + stdio_ops.ungetc = dlsym(RTLD_NEXT, "ungetc" ); + stdio_ops.vfprintf = dlsym(RTLD_NEXT, "vfprintf" ); + stdio_ops.vprintf = dlsym(RTLD_NEXT, "vprintf" ); + stdio_ops.fprintf = dlsym(RTLD_NEXT, "fprintf" ); + stdio_ops.printf = dlsym(RTLD_NEXT, "printf" ); + stdio_ops.perror = dlsym(RTLD_NEXT, "perror" ); + stdio_ops.fscanf = dlsym(RTLD_NEXT, "fscanf" ); + stdio_ops.scanf = dlsym(RTLD_NEXT, "scanf" ); + stdio_ops.clearerr = dlsym(RTLD_NEXT, "clearerr" ); + stdio_ops.clearerr_unlocked = dlsym(RTLD_NEXT, "clearerr_unlocked" ); + stdio_ops.feof = dlsym(RTLD_NEXT, "feof" ); + stdio_ops.feof_unlocked = dlsym(RTLD_NEXT, "feof_unlocked" ); + stdio_ops.ferror = dlsym(RTLD_NEXT, "ferror" ); + stdio_ops.ferror_unlocked = dlsym(RTLD_NEXT, "ferror_unlocked" ); + stdio_ops.fileno = dlsym(RTLD_NEXT, "fileno" ); + stdio_ops.fileno_unlocked = dlsym(RTLD_NEXT, "fileno_unlocked" ); + stdio_ops.remove = dlsym(RTLD_NEXT, "remove" ); + stdio_ops.setbuf = dlsym(RTLD_NEXT, "setbuf" ); + stdio_ops.setbuffer = dlsym(RTLD_NEXT, "setbuffer" ); + stdio_ops.setlinebuf = dlsym(RTLD_NEXT, "setlinebuf" ); + stdio_ops.setvbuf = dlsym(RTLD_NEXT, "setvbuf" ); + stdio_ops.mkdtemp = dlsym(RTLD_NEXT, "mkdtemp" ); + stdio_ops.mkstemp = dlsym(RTLD_NEXT, "mkstemp" ); + stdio_ops.tmpfile = dlsym(RTLD_NEXT, "tmpfile" ); + stdio_ops.opendir = dlsym(RTLD_NEXT, "opendir" ); + stdio_ops.fdopendir = dlsym(RTLD_NEXT, "fdopendir" ); + stdio_ops.dirfd = dlsym(RTLD_NEXT, "dirfd" ); + stdio_ops.readdir = dlsym(RTLD_NEXT, "readdir" ); + stdio_ops.readdir64 = dlsym(RTLD_NEXT, "readdir64" ); + stdio_ops.rewinddir = dlsym(RTLD_NEXT, "rewinddir" ); + stdio_ops.seekdir = dlsym(RTLD_NEXT, "seekdir" ); + stdio_ops.telldir = dlsym(RTLD_NEXT, "telldir" ); + stdio_ops.closedir = dlsym(RTLD_NEXT, "closedir" ); + stdio_ops.scandir = dlsym(RTLD_NEXT, "scandir" ); + stdio_ops.scandir64 = dlsym(RTLD_NEXT, "scandir64" ); + + /* Finish */ + init_flag = 1; + gen_mutex_unlock(&initlock); +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/client/usrint/usrint.h b/src/client/usrint/usrint.h new file mode 100644 index 0000000..4a27794 --- /dev/null +++ b/src/client/usrint/usrint.h @@ -0,0 +1,325 @@ +/* + * (C) 2011 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup usrint + * + * PVFS2 user interface routines + */ +#ifndef USRINT_H +#define USRINT_H 1 + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#ifndef _ATFILE_SOURCE +#define _ATFILE_SOURCE 1 +#endif +#ifndef _LARGEFILE_SOURCE +#define _LARGEFILE_SOURCE 1 +#endif +#ifndef _LARGEFILE64_SOURCE +#define _LARGEFILE64_SOURCE 1 +#endif + +/* + * This seems to control redirect of 32-bit IO to 64-bit IO + * We want to avoid this in our source + */ +#ifdef USRINT_SOURCE +#ifdef _FILE_OFFSET_BITS +#undef _FILE_OFFSET_BITS +#endif +#else +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif +#ifdef __OPTIMIZE__ +#undef __OPTIMIZE__ +#endif +#define __NO_INLINE__ 1 +#endif + +/* + * this defines __USE_LARGEFILE, __USE_LARGEFILE64, and + * __USE_FILE_OFFSET64 which control many of the other includes + */ +#ifdef HAVE_FEATURES_H +#include +#endif + +/* + * force this stuff off if the source requests + * the stuff controlling inlining and def'ing of + * functions in stdio is really mixed up and varies from + * one generation of the headers to another. + * I hate to whack all inlining and related stuff + * but it seems the only reliable way to turn it off. + * USRINT code can get this or not with the var below + * I question that header files should be doing + * optimization in the first place. WBL + */ +#ifdef USRINT_SOURCE +#ifdef __USE_FILE_OFFSET64 +#undef __USE_FILE_OFFSET64 +#endif +/* This seems to reappear on some systems, so whack it again */ +#ifdef __OPTIMIZE__ +#undef __OPTIMIZE__ +#endif +#ifdef __REDIRECT +#undef __REDIRECT +#endif +#ifdef __USE_EXTERN_INLINES +#undef __USE_EXTERN_INLINES +#endif +#ifdef __USE_FORTIFY_LEVEL +#undef __FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL 0 +#endif +#endif + +#include +#include + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#include +#include +#include +#include +#ifdef HAVE_STDARG_H +#include +#endif +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#include +#ifdef HAVE_SYS_SENDFILE_H +#include +#endif +/* #include */ /* struct statfs on OS X */ +#ifdef HAVE_SYS_VFS_H +#include /* struct statfs on Linux */ +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#include +#include +#ifdef PVFS_HAVE_ACL_INCLUDES +#include +#include +#endif +#include +#include + +#ifdef HAVE_LINUX_TYPES_H +#include +#endif + +#ifdef HAVE_ATTR_XATTR_H +#include +#else +#ifdef HAVE_SYS_ATTR_H +#include +#else +#define XATTR_CREATE 0x1 +#define XATTR_REPLACE 0x2 +extern int setxattr(const char *path, const char *name, + const void *value, size_t size, int flags); +extern int lsetxattr(const char *path, const char *name, + const void *value, size_t size, int flags); +extern int fsetxattr(int fd, const char *name, + const void *value, size_t size, int flags); +extern ssize_t getxattr(const char *path, const char *name, + void *value, size_t size); +extern ssize_t lgetxattr(const char *path, const char *name, + void *value, size_t size); +extern ssize_t fgetxattr(int fd, const char *name, void *value, size_t size); +extern ssize_t listxattr(const char *path, char *list, size_t size); +extern ssize_t llistxattr(const char *path, char *list, size_t size); +extern ssize_t flistxattr(int fd, char *list, size_t size); +extern int removexattr(const char *path, const char *name); +extern int lremovexattr(const char *path, const char *name); +extern int fremovexattr(int fd, const char *name); +#endif +#endif + +/* #include diff source need diff versions */ +#include +#include + +//#include + +/* PVFS specific includes */ +#include +#include +#include +#include +#include +#include + +/* magic numbers for PVFS filesystem */ +#define PVFS_FS 537068840 +#define LINUX_FS 61267 + +#define PVFS_FD_SUCCESS 0 +#define PVFS_FD_FAILURE -1 + +/* Defines GNU's O_NOFOLLOW flag to be false if its not set */ +#ifndef O_NOFOLLOW +#define O_NOFOLLOW 0 +#endif + +/* Define AT_FDCWD and related flags on older systems */ +#ifndef AT_FDCWD +# define AT_FDCWD -100 /* Special value used to indicate + the *at functions should use the + current working directory. */ +#endif +#ifndef AT_SYMLINK_NOFOLLOW +# define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#endif +#ifndef AT_REMOVDIR +# define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ +#endif +#ifndef AT_SYMLINK_FOLLOW +# define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#endif +#ifndef AT_EACCESS +# define AT_EACCESS 0x200 /* Test access permitted for + effective IDs, not real IDs. */ +#endif + +#define true 1 +#define false 0 +#define O_HINTS 02000000 /* PVFS hints are present */ +#define O_NOTPVFS 04000000 /* Open non-PVFS files if possible */ + +/* constants for this library */ +/* size of stdio default buffer - starting at 1Meg */ +#define PVFS_BUFSIZE (1024*1024) + +/* extra function prototypes */ + +extern int posix_readdir(unsigned int fd, struct dirent *dirp, + unsigned int count); + +extern int fseek64(FILE *stream, const off64_t offset, int whence); + +extern off64_t ftell64(FILE *stream); + +extern int pvfs_convert_iovec(const struct iovec *vector, + int count, + PVFS_Request *req, + void **buf); + +/* MPI functions */ +//int MPI_File_open(MPI_Comm comm, char *filename, +// int amode, MPI_Info info, MPI_File *mpi_fh); +//int MPI_File_write(MPI_File mpi_fh, void *buf, +// int count, MPI_Datatype datatype, MPI_Status *status); + +/* Macros */ + +/* debugging */ + +//#define USRINT_DEBUG +#ifdef PVFS_USRINT_DEBUG +#define debug(s,v) fprintf(stderr,s,v) +#else +#define debug(s,v) +#endif + +/* USRINT Configuration Defines - Defaults */ +/* These should be defined in pvfs2-config.h */ + +#ifndef PVFS_USRINT_BUILD +#define PVFS_USRINT_BUILD 1 +#endif + +#ifndef PVFS_USRINT_CWD +#define PVFS_USRINT_CWD 1 +#endif + +#ifndef PVFS_USRINT_KMOUNT +#define PVFS_USRINT_KMOUNT 0 +#endif + +#ifndef PVFS_UCACHE_ENABLE +#define PVFS_UCACHE_ENABLE 1 +#endif + + +/* FD sets */ +#if 0 +#ifdef FD_SET +#undef FD_SET +#endif +#define FD_SET(d,fdset) \ +do { \ + pvfs_descriptor *pd; \ + pd = pvfs_find_descriptor(d); \ + if (pd) \ + { \ + __FD_SET(pd->true_fd,(fdset)); \ + } \ +} while(0) + +#ifdef FD_CLR +#undef FD_CLR +#endif +#define FD_CLR(d,fdset) \ +do { \ + pvfs_descriptor *pd; \ + pd = pvfs_find_descriptor(d); \ + if (pd) \ + { \ + __FD_CLR(pd->true_fd,(fdset)); \ + } \ +} while(0) + +#ifdef FD_ISSET +#undef FD_ISSET +#endif +#define FD_ISSET(d,fdset) \ +do { \ + pvfs_descriptor *pd; \ + pd = pvfs_find_descriptor(d); \ + if (pd) \ + { \ + __FD_ISSET(pd->true_fd,(fdset));\ + } \ +} while(0) +#endif + +#endif /* USRINT_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/dotconf/dotconf.c b/src/common/dotconf/dotconf.c index 3e3ac38..abc57be 100644 --- a/src/common/dotconf/dotconf.c +++ b/src/common/dotconf/dotconf.c @@ -31,6 +31,11 @@ #include #include +/* Windows support added by Sam Sampson */ +#ifdef WIN32 +#include +#endif + /* Added by Stephen W. Boyer * for wildcard support in Include file paths */ @@ -54,12 +59,19 @@ #include #include +#ifndef WIN32 #include #include +#endif #include #include "./dotconf.h" +#ifdef WIN32 +#define snprintf _snprintf +#define strcasecmp stricmp +#endif + #ifndef MIN #define MIN(a,b) ((a)<(b)?(a):(b)) #endif @@ -864,7 +876,11 @@ configfile_t *PINT_dotconf_create( PINT_dotconf_register_options(new, options); if (new->flags & CASE_INSENSITIVE) +#ifdef WIN32 + new->cmp_func = strnicmp; +#else new->cmp_func = strncasecmp; +#endif else new->cmp_func = strncmp; @@ -1130,10 +1146,17 @@ int PINT_dotconf_handle_question_mark( char *ext) { configfile_t *included; +#ifdef WIN32 + HANDLE dh, hfile; + WIN32_FIND_DATA find_data; +#else DIR *dh = 0; struct dirent *dirptr = 0; +#endif int i; + char d_name[CFG_MAX_FILENAME]; + char new_pre[CFG_MAX_FILENAME]; char already_matched[CFG_MAX_FILENAME]; @@ -1152,15 +1175,30 @@ int PINT_dotconf_handle_question_mark( pre_len = strlen(pre); +#ifdef WIN32 + if ((dh = FindFirstFile(path, &find_data)) != INVALID_HANDLE_VALUE) +#else if ((dh = opendir(path)) != NULL) +#endif { +#ifdef WIN32 + do +#else while ((dirptr = readdir(dh)) != NULL) - { - match_state = PINT_dotconf_question_mark_match(dirptr->d_name, pre, ext); +#endif + { +#ifdef WIN32 + if ((find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) + continue; + strcpy(d_name, find_data.cFileName); +#else + strcpy(d_name, dirptr->d_name); +#endif + match_state = PINT_dotconf_question_mark_match(d_name, pre, ext); if (match_state >= 0) { - name_len = strlen(dirptr->d_name); + name_len = strlen(d_name); new_path_len = strlen(path) + name_len + strlen(ext) + 1; if (!alloced) @@ -1189,9 +1227,8 @@ int PINT_dotconf_handle_question_mark( } if (match_state == 1) - { - - strncpy(new_pre, dirptr->d_name, + { + strncpy(new_pre, d_name, (name_len > pre_len) ? (pre_len + 1) : pre_len); new_pre[(name_len > pre_len) ? (pre_len + 1) : pre_len] = '\0'; @@ -1232,15 +1269,32 @@ int PINT_dotconf_handle_question_mark( } - sprintf(new_path, "%s%s", path, dirptr->d_name); - + sprintf(new_path, "%s%s", path, d_name); + +#ifdef WIN32 + hfile = CreateFile(new_path, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL); + if (hfile != INVALID_HANDLE_VALUE) + { + CloseHandle(hfile); + } + else +#else if (access(new_path, R_OK)) +#endif { PINT_dotconf_warning(cmd->configfile, DCLOG_WARNING, ERR_INCLUDE_ERROR, "Cannot open %s for inclusion.\n" "IncludePath is '%s'\n", new_path, cmd->configfile->includepath); + free(new_path); + return -1; } @@ -1263,8 +1317,13 @@ int PINT_dotconf_handle_question_mark( } } +#ifdef WIN32 + while (FindNextFile(dh, &find_data)); + FindClose(dh); +#else closedir(dh); +#endif free(new_path); } @@ -1281,8 +1340,14 @@ int PINT_dotconf_handle_star( char *ext) { configfile_t *included; +#ifdef WIN32 + HANDLE dh, hfile; + WIN32_FIND_DATA find_data; +#else DIR *dh = 0; struct dirent *dirptr = 0; +#endif + char d_name[CFG_MAX_FILENAME]; char new_pre[CFG_MAX_FILENAME]; char new_ext[CFG_MAX_FILENAME]; @@ -1326,18 +1391,34 @@ int PINT_dotconf_handle_star( strncpy(new_ext, s_ext, t_ext_count); new_ext[t_ext_count] = '\0'; +#ifdef WIN32 + if ((dh = FindFirstFile(path, &find_data)) != INVALID_HANDLE_VALUE) +#else if ((dh = opendir(path)) != NULL) +#endif { +#ifdef WIN32 + do +#else while ((dirptr = readdir(dh)) != NULL) +#endif { - sub_count = 0; +#ifdef WIN32 + if ((find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) + continue; + + strcpy(d_name, find_data.cFileName); +#else + strcpy(d_name, dirptr->d_name); +#endif + sub_count = 0; t_ext_count = 0; - match_state = PINT_dotconf_star_match(dirptr->d_name, pre, s_ext); + match_state = PINT_dotconf_star_match(d_name, pre, s_ext); if (match_state >= 0) { - name_len = strlen(dirptr->d_name); + name_len = strlen(d_name); new_path_len = strlen(path) + name_len + strlen(s_ext) + 1; if (!alloced) @@ -1369,12 +1450,12 @@ int PINT_dotconf_handle_star( { if ((sub = - strstr((dirptr->d_name + pre_len), new_ext)) == NULL) + strstr((d_name + pre_len), new_ext)) == NULL) { continue; } - while (sub != dirptr->d_name) + while (sub != d_name) { sub--; sub_count++; @@ -1385,7 +1466,7 @@ int PINT_dotconf_handle_star( continue; } - strncpy(new_pre, dirptr->d_name, (sub_count + t_ext_count)); + strncpy(new_pre, d_name, (sub_count + t_ext_count)); new_pre[sub_count + t_ext_count] = '\0'; strcat(new_pre, new_ext); @@ -1425,15 +1506,32 @@ int PINT_dotconf_handle_star( } - sprintf(new_path, "%s%s", path, dirptr->d_name); - + sprintf(new_path, "%s%s", path, d_name); + +#ifdef WIN32 + hfile = CreateFile(new_path, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL); + if (hfile != INVALID_HANDLE_VALUE) + { + CloseHandle(hfile); + } + else +#else if (access(new_path, R_OK)) +#endif { PINT_dotconf_warning(cmd->configfile, DCLOG_WARNING, ERR_INCLUDE_ERROR, "Cannot open %s for inclusion.\n" "IncludePath is '%s'\n", new_path, cmd->configfile->includepath); + free(new_path); + return -1; } @@ -1452,8 +1550,13 @@ int PINT_dotconf_handle_star( } } +#ifdef WIN32 + while (FindNextFile(dh, &find_data)); + FindClose(dh); +#else closedir(dh); +#endif free(new_path); } @@ -1467,6 +1570,10 @@ DOTCONF_CB(dotconf_cb_include) char *filename = 0; configfile_t *included; +#ifdef WIN32 + HANDLE hfile; +#endif + char wild_card; char *path = 0; char *pre = 0; @@ -1519,8 +1626,22 @@ DOTCONF_CB(dotconf_cb_include) free(filename); return NULL; } - +#ifdef WIN32 + hfile = CreateFile(filename, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL); + if (hfile != INVALID_HANDLE_VALUE) + { + CloseHandle(hfile); + } + else +#else if (access(filename, R_OK)) +#endif { PINT_dotconf_warning(cmd->configfile, DCLOG_WARNING, ERR_INCLUDE_ERROR, "Cannot open %s for inclusion.\n" diff --git a/src/common/events/debug.h b/src/common/events/debug.h new file mode 100644 index 0000000..610051d --- /dev/null +++ b/src/common/events/debug.h @@ -0,0 +1,24 @@ +/************************************* + * File : debug.h + * Version : $Id: debug.h,v 1.2 2008-11-20 01:16:52 slang Exp $ + ************************************/ + +/* Author: Aroon Nataraj */ + +#ifndef _DEBUG_H_ +#define _DEBUG_H_ + +#define PFX TAU_NAME + +#ifdef TAU_DEBUG +#define dbg(format, arg...) printf(PFX ": " format "\n" , ## arg) +#define info(format, arg...) printf(PFX ": " format "\n" , ## arg) +#else /*TAU_DEBUG*/ +#define dbg(format, arg...) do {} while (0) +#define info(format, arg...) do {} while (0) +#endif /*TAU_DEBUG*/ + +#define err(format, arg...) printf(PFX ":Error: " format "\n" , ## arg) +#define warn(format, arg...) printf(PFX ":Warn: " format "\n" , ## arg) + +#endif /*_DEBUG_H_*/ diff --git a/src/common/events/fmt_api.h b/src/common/events/fmt_api.h new file mode 100644 index 0000000..2508df3 --- /dev/null +++ b/src/common/events/fmt_api.h @@ -0,0 +1,118 @@ + +#ifndef _FMT_API_H_ +#define _FMT_API_H_ + +/* Some macros * + ***************/ +#define TP_MAX_THREADS 1024 +#define TP_MAX_EVENTS 128 +#define TP_MAX_STR 256 +#define TAU_Log_get_event_number(THR) ((THR)->tau_no++) +#define TAUPVFS_MAX TP_MAX_THREADS + + +#include +#include + + +/* Ttf_closed_event_def: + * Stuct to hold descriptions about Start/Stop events + * , in particular, the format expected of the var-arg + * calls. + */ +struct Ttf_closed_event_def { + char name[TP_MAX_STR]; + ff_format fmt_start; + int start_no; + ff_format fmt_end; + int end_no; + int index; + int inited; + + //cons + Ttf_closed_event_def() { + start_no = end_no = 0; + inited = 0; + name[0] = '\0'; + index = 0; + } + + Ttf_closed_event_def& operator=(const Ttf_closed_event_def& thecopy) { + strncpy(name, thecopy.name, TP_MAX_STR); + fmt_start.init(thecopy.fmt_start); + fmt_end.init(thecopy.fmt_end); + start_no = thecopy.start_no; + end_no = thecopy.end_no; + + inited = 1; + + return *this; + } + + void init(char* _name, char* _fmt_start, char* _fmt_end, int _index, int _start_no, int _end_no) { + strncpy(name, _name, TP_MAX_STR); + fmt_start.init(_fmt_start); + fmt_start.parse(); + fmt_end.init(_fmt_end); + fmt_end.parse(); + start_no = _start_no; + end_no = _end_no; + inited = 1; + } + + Ttf_closed_event_def(char* _name, char* _fmt_start, char* _fmt_end, int _index, int _start_no, int _end_no) { + init(_name, _fmt_start, _fmt_end, _index, _start_no, _end_no); + } + +}; + + +/* Ttf_thread_bundle: + * Several things are managed on a per-thread basis. + * This allows eliminating or minimizing locking. + * The thread-bundle, for lack of a better name, is + * meant to keep track of the per-thread state. + */ +struct Ttf_thread_bundle { + Ttf_closed_event_def* events; + Ttf_FileHandleT taufile; + int tau_no; + x_uint32 event_no; + + #define TP_SCRATCH_SZ (8*50) + char scratch[TP_SCRATCH_SZ]; + int scratch_pos; + + //cons + Ttf_thread_bundle() { + events = NULL; + tau_no = 0; + taufile = NULL; + event_no = 0; + scratch_pos = 0; + } + //des? + //TODO +}; + + +/* g_t_bundles: + * For now we declare a bunch of these bundle-ptrs + * and allocate them as and when required. + * TODO: should we do better? + ****************************/ +extern Ttf_thread_bundle* g_t_bundles[TP_MAX_THREADS]; + + + +/* Process-level vars (safe to be globals - since process-level) + * these names are far too generic to be externally visible - + * TODO: give them a prefix * + ****************************/ +extern int g_pid; +extern char g_traceloc[TP_MAX_STR]; +extern char g_filepfx[TP_MAX_STR]; +extern int g_defbufsz; + +#endif + diff --git a/src/common/events/fmt_fsm.c b/src/common/events/fmt_fsm.c new file mode 100644 index 0000000..a67e447 --- /dev/null +++ b/src/common/events/fmt_fsm.c @@ -0,0 +1,420 @@ +/***************************************************** + * File : fmt_fsm.cpp + * Version : $Id: fmt_fsm.c,v 1.2 2008-11-20 01:16:52 slang Exp $ + ****************************************************/ + +/* Author: Aroon Nataraj */ + +#include "fmt_fsm.h" +#include +#include +#include + +int ff_format::ff_pattern::bfprint(FILE* fp, char* src, int src_sz, int *nowrote) { + *nowrote = v_size; + + switch(type) { + case FF_U: + switch(v_long) { + case 0: + fprintf(fp, "%u ", *((unsigned int*)src)); + break; + case 1: + fprintf(fp, "%lu ", *((unsigned long int*)src)); + break; + case 2: + fprintf(fp, "%llu ", *((unsigned long long int*)src)); + break; + default: + //badness + *nowrote = 0; + return -1; + } + break; + + case FF_D: + switch(v_long) { + case 0: + fprintf(fp, "%d ", *((int*)src)); + break; + case 1: + fprintf(fp, "%ld ", *((long int*)src)); + break; + case 2: + fprintf(fp, "%lld ", *((long long int*)src)); + break; + default: + //badness + *nowrote = 0; + return -1; + } + + break; + + case FF_F: + switch(v_long) { + case 0: + fprintf(fp, "%f ", *((float*)src)); + break; + case 1: + fprintf(fp, "%lf ", *((double*)src)); + break; + case 2: + fprintf(fp, "%lf ", *((long double*)src)); + break; + default: + //badness + *nowrote = 0; + return -1; + } + + break; + + case FF_C: + fprintf(fp, "%c ", *((char*)src)); + break; + + default: + *nowrote = 0; + return -1; + + } + + return 0; +} + +int ff_format::ff_pattern::suck(va_list ap, char* space, int *nostored) { + + *nostored = v_size; + + switch(type) { + case FF_U: + switch(v_long) { + case 0: + *((unsigned int*)space) = va_arg(ap, unsigned int); + break; + case 1: + *((unsigned long int*)space) = va_arg(ap, unsigned long int); + break; + case 2: + *((unsigned long long int*)space) = va_arg(ap, unsigned long long int); + break; + default: + //badness + *nostored = 0; + return -1; + } + break; + + case FF_D: + switch(v_long) { + case 0: + *(( int*)space) = va_arg(ap, int); + break; + case 1: + *(( long int*)space) = va_arg(ap, long int); + break; + case 2: + *(( long long int*)space) = va_arg(ap, long long int); + break; + default: + //badness + *nostored = 0; + return -1; + } + + break; + + case FF_F: + switch(v_long) { + case 0: + { + //compiler says.... + //warning: float is promoted to double when passed through ... + //warning: (so you should pass double not a float to a va_args) + //so we do as the compiler gods tell us to + double d_tmp = va_arg(ap, double); + *(( float*)space) = (float)d_tmp; + } + break; + case 1: + *(( double*)space) = va_arg(ap, double); + break; + case 2: + *(( long double*)space) = va_arg(ap, long double); + break; + default: + //badness + *nostored = 0; + return -1; + } + + break; + + case FF_C: + { + //compiler says.... + //warning: char is promoted to int when passed through ... + //warning: (so you should pass int not a char to a va_args) + //so we do as the compiler gods tell us to + int i_tmp = va_arg(ap, int); + *((unsigned char*)space) = (unsigned char)i_tmp; + } + break; + + default: + *nostored = 0; + return -1; + + } + + return 0; +} + +int ff_format::ff_pattern::parse(char* fmt, int *noread) { + + int pos = 0, start_pos = 0; + char *cur = fmt; + + init(); + + while(1) { + switch(next_state) { + case FF_INIT: + if(eop) { + strncpy(v_patn, fmt + start_pos, pos - start_pos); + v_patn[pos] = '\0'; + *noread=pos; + return 0; + } + + if(err) { + err = 0; + init(); + if(cur[pos] == ' ' || cur[pos] == '\t' || cur[pos] == '\n' || cur[pos] == '\r' || cur[pos] == '\0') { + //whitespace ends this (broken)pattern + *noread=pos; + return -1; + + } + break; + } + + if(cur[pos] == '\0') { + //term + *noread=pos; + return -1; + } + + if(cur[pos++] == '%') { + start_pos = pos-1; + next_state = FF_PATN; + break; + } + break; + + case FF_PATN: + if(cur[pos] == 'l') { + next_state = FF_L; + pos++; + break; + } else if(cur[pos] == 'u') { + next_state = FF_U; + pos++; + break; + } else if(cur[pos] == 'd') { + next_state = FF_D; + pos++; + break; + } else if(cur[pos] == 'c') { + next_state = FF_C; + pos++; + break; + } else if(cur[pos] == 'f') { + next_state = FF_F; + pos++; + break; + } else { + //some badness.. unrecognized pattern + err = 1; + next_state = FF_INIT; + break; + } + break; + + case FF_U: + //terminal...? yes , I think so + v_unsigned += 1; + if(v_unsigned > 1) { + //badness + err = 1; + next_state = FF_INIT; + break; + } + switch(v_long) { + case 0: + v_size = sizeof(unsigned int); + break; + case 1: + v_size = sizeof(unsigned long int); + break; + case 2: + v_size = sizeof(unsigned long long int); + break; + default: + //badness + break; + } + + type = FF_U; + + eop = 1; + next_state = FF_INIT; + break; + + case FF_L: + v_long += 1; + if(v_long > 2) { + //badness + err = 1; + next_state = FF_INIT; + break; + } + + //pos++; //dont incr here - already doing in FF_PATN + next_state = FF_PATN; + break; + + case FF_D: + switch(v_long) { + case 0: + v_size = sizeof(int); + break; + case 1: + v_size = sizeof(long int); + break; + case 2: + v_size = sizeof(long long int); + break; + default: + //badness + break; + } + + type = FF_D; + + //teminal state + eop = 1; + next_state = FF_INIT; + break; + + case FF_F: + switch(v_long) { + case 0: + v_size = sizeof(float); + break; + case 1: + v_size = sizeof(double); + break; + case 2: + v_size = sizeof(long double); + break; + default: + //badness + break; + } + + type = FF_F; + + //teminal state + eop = 1; + next_state = FF_INIT; + break; + + + break; + + case FF_C: + if((v_long > 0) || (v_unsigned > 0)) { + err = 1; + next_state = FF_INIT; + break; + } + + type = FF_C; + + v_size = sizeof(unsigned char); + eop = 1; + next_state = FF_INIT; + break; + + default: + //not good + break; + }//switch + + }//while + + return -1; +} + +int ff_format::suck(va_list ap, char* space, int* upto) { + int i, totread = 0, noread, err = 0; + for(i = 0; i< no_patns; i++) { + noread = 0; + err = patns[i].suck(ap, space+totread, &noread); + if(err != 0) { + //badness - we got to get out ... + if(upto) *upto = totread; + return -1; + } else { + totread += noread; + } + + } + + if(upto) *upto = totread; + + return 0; +} + +int ff_format::parse() { + int totread = 0, noread = 0, err = 0; + + while(v_fmt[totread] != '\0') { + noread = 0; + err = patns[no_patns].parse(v_fmt+totread, &noread); + if(err != 0) { + //badness + } else { + strncat(v_parsed_fmt, patns[no_patns].v_patn, 255); + v_tot_size += patns[no_patns].v_size; + no_patns++; + } + totread += noread; + } + + return no_patns; +} + + +int ff_format::bfprint(FILE* fp, char* src, int src_sz, int* upto) { + int i, totwrote = 0, nowrote = 0, err = 0; + for(i = 0; (i< no_patns) && (totwrote <= src_sz); i++) { + nowrote = 0; + err = patns[i].bfprint(fp, src+totwrote, (src_sz-totwrote), &nowrote); + if(err != 0) { + //badness - we got to get out ... + if(upto) *upto = totwrote; + return -1; + } else { + totwrote += nowrote; + } + } + + if(upto) *upto = totwrote; + + return 0; +} + + diff --git a/src/common/events/fmt_fsm.h b/src/common/events/fmt_fsm.h new file mode 100644 index 0000000..6bf7555 --- /dev/null +++ b/src/common/events/fmt_fsm.h @@ -0,0 +1,225 @@ +/***************************************************** + * File : fmt_fsm.h + * Version : $Id: fmt_fsm.h,v 1.2 2008-11-20 01:16:52 slang Exp $ + ****************************************************/ + +/* Author: Aroon Nataraj */ + +#ifndef _FMT_FSM_H +#define _FMT_FSM_H + +#include +#include +#include + +#include + +//per-format info (i.e. composed of patterns) +struct ff_format { + + //the states + enum {FF_INIT=0xbeef, FF_PATN, FF_L, FF_D, FF_F, FF_C, FF_U, FF_S, FF_MAX}; //FF_S unused now + + //the struct to hold per-pattern info + struct ff_pattern { + char v_patn[8]; //the pattern itself + int v_long; //flag that specifies how many longs have been encountered + int v_size; //the sizeof stuff + int v_unsigned; //is this unsigned? //shouldnt really matter until reader + int next_state; + int last_state; + int this_state; + int eop; // end-of-pattern flag + int err; //signals error condition + int type; + + void init(const ff_pattern& thecopy) { + strncpy(v_patn, thecopy.v_patn, 8); + v_long = thecopy.v_long; + v_size = thecopy.v_size; + v_unsigned = thecopy.v_unsigned; + next_state = thecopy.next_state; + last_state = thecopy.last_state; + this_state = thecopy.this_state; + eop = thecopy.eop; + err = thecopy.err; + type = thecopy.type; + } + + void init() { + strncpy(v_patn, "\0", 8); + v_long = 0; + v_size = 0; + v_unsigned = 0; + next_state = last_state = this_state = FF_INIT; + eop = err = 0; + } + + ff_pattern() { + init(); + } + + ff_pattern(const ff_pattern& thecopy) { + init(thecopy); + } + + + int parse(char* fmt, int *noread); + + int suck(va_list ap, char* space, int *nostored); + + int bfprint(FILE* fp, char* src, int src_sz, int *nowrote); + + template < class integralType > + int promoteIntegral(char* src, int src_sz, int *nowrote, integralType* retVal); + }; + + char v_fmt[256]; //the format itself + char v_parsed_fmt[256]; //the format parsed from v_fmt + ff_pattern patns[16]; //16 patterns + int no_patns; // the no in above array + int v_tot_size; //tot size in bytes req to read whole fmt + int inited; + + ff_format() { + init(""); + } + + ff_format(char* _fmt) { + init(_fmt); + } + + ff_format(const ff_format& o) { + init(o); + } + + void init(const ff_format& thecopy) { + strncpy(v_fmt, thecopy.v_fmt, 256); + strncpy(v_parsed_fmt, thecopy.v_parsed_fmt, 256); + no_patns = thecopy.no_patns; + v_tot_size = thecopy.v_tot_size; + inited = thecopy.inited; + int i = 0; + for(i=0; i< no_patns; i++) { + patns[i].init(thecopy.patns[i]); + } + } + + void init(char* _fmt) { + v_fmt[0] = '\0'; + v_parsed_fmt[0] = '\0'; + v_tot_size = no_patns = 0; + + if((!_fmt) || (!strcmp("", _fmt))) { + inited = 0; + return; + } + + strncpy(v_fmt, _fmt, 256); + + inited = 1; + } + + int parse(); + int suck(va_list ap, char* space, int* upto); + int bfprint(FILE* fp, char* src, int src_sz, int* upto); + template < class integralType > + int promoteIntegral(int patn_index, char* src, int src_sz, int *upto, integralType* retVal); +}; + + +template +int ff_format::ff_pattern::promoteIntegral(char* src, int src_sz, int *nowrote, integralType* retVal) { + *nowrote = v_size; + + switch(type) { + case FF_U: + switch(v_long) { + case 0: + *retVal = (integralType)*((unsigned int*)src); + break; + case 1: + *retVal = (integralType)*((unsigned long int*)src); + break; + case 2: + *retVal = (integralType)*((unsigned long long int*)src); + break; + default: + //badness + *nowrote = 0; + return -1; + } + break; + + case FF_D: + switch(v_long) { + case 0: + *retVal = (integralType)*((int*)src); + break; + case 1: + *retVal = (integralType)*((long int*)src); + break; + case 2: + *retVal = (integralType)*((long long int*)src); + break; + default: + //badness + *nowrote = 0; + return -1; + } + + break; + + case FF_F: /* not integralType - then dont do this */ + switch(v_long) { + case 0: + case 1: + case 2: + default: + //badness + *nowrote = 0; + return -1; + } + + break; + + case FF_C: + *retVal = (integralType)*((char*)src); + break; + + default: + *nowrote = 0; + return -1; + + } + + return 0; +} + +template +int ff_format::promoteIntegral(int patn_index, char* src, int src_sz, int *upto, integralType* retVal) { + int i, totwrote = 0, nowrote = 0, err = 0; + if(patn_index >= no_patns) { + return -1; + } + + nowrote = 0; + err = patns[patn_index].promoteIntegral(src, src_sz, &nowrote, retVal); + if(err != 0) { + //badness - we got to get out ... + if(upto) *upto = 0; + return -1; + } + + if(upto) *upto = nowrote; + + return 0; +} + + +#endif //_FMT_FSM_H + +/*************************************************************************** + * $RCSfile: fmt_fsm.h,v $ $Author: slang $ + * $Revision: 1.2 $ $Date: 2008-11-20 01:16:52 $ + ***************************************************************************/ diff --git a/src/common/events/module.mk.in b/src/common/events/module.mk.in new file mode 100644 index 0000000..c856a2c --- /dev/null +++ b/src/common/events/module.mk.in @@ -0,0 +1,19 @@ + + +ifneq (,$(BUILD_TAU)) + +DIR := src/common/events +files := pvfs_tau_api.c fmt_fsm.c + +src := $(patsubst %,$(DIR)/%,$(files)) + +LIBSRC += $(src) +SERVERSRC += $(src) +LIBBMISRC += $(src) + +ifneq (,@TAU_INCS@) +MODCFLAGS_$(DIR)/fmt_fsm.c := -x c++ @TAU_INCS@ +MODCFLAGS_$(DIR)/pvfs_tau_api.c := -x c++ @TAU_INCS@ +endif + +endif # BUILD_TAU diff --git a/src/common/events/pvfs_tau_api.c b/src/common/events/pvfs_tau_api.c new file mode 100644 index 0000000..101ddce --- /dev/null +++ b/src/common/events/pvfs_tau_api.c @@ -0,0 +1,618 @@ +/***************************************************** + * File : pvfs_tau_api.cpp + * Version : $Id: pvfs_tau_api.c,v 1.2 2008-11-20 01:16:52 slang Exp $ + ****************************************************/ + +/* Author: Aroon Nataraj */ + +#include +#include +#include +#include +#include +#include +#include + +#include +using namespace std; + +#include +#include +#include "TAU_tf.h" + +#include + +#include + +/* Ttf_closed_event_def: + * Stuct to hold descriptions about Start/Stop events + * , in particular, the format expected of the var-arg + * calls. + */ +struct Ttf_closed_event_def { + char name[TP_MAX_STR]; + ff_format fmt_start; + int start_no; + ff_format fmt_end; + int end_no; + int index; + int inited; + + //cons + Ttf_closed_event_def() { + start_no = end_no = 0; + inited = 0; + name[0] = '\0'; + index = 0; + } + + Ttf_closed_event_def& operator=(const Ttf_closed_event_def& thecopy) { + strncpy(name, thecopy.name, TP_MAX_STR); + fmt_start.init(thecopy.fmt_start); + fmt_end.init(thecopy.fmt_end); + start_no = thecopy.start_no; + end_no = thecopy.end_no; + index = thecopy.index; + + inited = 1; + + return *this; + } + + void init(char* _name, char* _fmt_start, char* _fmt_end, int _index, int _start_no, int _end_no) { + strncpy(name, _name, TP_MAX_STR); + fmt_start.init(_fmt_start); + fmt_start.parse(); + fmt_end.init(_fmt_end); + fmt_end.parse(); + start_no = _start_no; + end_no = _end_no; + index = _index; + inited = 1; + } + + Ttf_closed_event_def(char* _name, char* _fmt_start, char* _fmt_end, int _index, int _start_no, int _end_no) { + init(_name, _fmt_start, _fmt_end, _index, _start_no, _end_no); + } + +}; + + +/* Ttf_thread_bundle: + * Several things are managed on a per-thread basis. + * This allows eliminating or minimizing locking. + * The thread-bundle, for lack of a better name, is + * meant to keep track of the per-thread state. + */ +struct Ttf_thread_bundle { + Ttf_closed_event_def* events; + Ttf_FileHandleT taufile; + int tau_no; + x_uint32 event_no; + + #define TP_SCRATCH_SZ (8*50) + char scratch[TP_SCRATCH_SZ]; + int scratch_pos; + + //cons + Ttf_thread_bundle() { + events = NULL; + tau_no = 0; + taufile = NULL; + event_no = 0; + scratch_pos = 0; + } + //des? + //TODO +}; + + +/* g_t_bundles: + * For now we declare a bunch of these bundle-ptrs + * and allocate them as and when required. + * TODO: should we do better? + ****************************/ +extern Ttf_thread_bundle* g_t_bundles[TP_MAX_THREADS]; + + +//Debugging stuff +//--------------- +//#define TAU_DEBUG 1 //comment out to unset debugging +#define TAU_NAME "pvfs_tau_api" +#include "debug.h" + + +//Declarations for some TAU routines (these arent in TauAPI.h now) +//---------------------------------------------------------------- +extern "C" { +extern double Tau_get_timestamp(int tid); +extern int Tau_get_tid(void); +} + + +//Fwd decl of utils +//----------------- +int tau_thread_init(int tid, int max_elements); +static int init_master_bundle(); +static Ttf_thread_bundle* get_master_bundle(); +static void put_master_bundle(Ttf_thread_bundle* master_bundle); +static Ttf_thread_bundle* get_t_bundle(int tid); +static int refresh_bundle(Ttf_thread_bundle *gbl_bundle, Ttf_thread_bundle *lc_bundle); + + +//We need an enclosing dummy main for instances where no such overall enclosing events are present +static int PINT_dummy_main; + + +//The Global Master bundle and locks +static Ttf_thread_bundle* g_master_bundle = NULL; +static pthread_mutex_t ttf_MasterBundleMutex; +static pthread_mutexattr_t ttf_MasterBundleMutexAttr; + +//The per-thread state management bundles +Ttf_thread_bundle* g_t_bundles[TP_MAX_THREADS]; + + +//Process-level vars (safe to be globals - since process-level) +//these names are far too generic to be externally visible - +//TODO: give them a prefix +//------------------------- +int g_pid = -1; +char g_traceloc[TP_MAX_STR]; +char g_filepfx[TP_MAX_STR]; +int g_defbufsz = -1; + + +//The API definition follows +//-------------------------- + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_init(int process_id, char* folder, char* filename_prefix, int buffer_size) { + + info("Ttf_init: pid:%d folder:%s pfx:%s bsize:%d\n", process_id, folder, filename_prefix, buffer_size); + + g_pid = process_id; + strncpy(g_traceloc, folder, TP_MAX_STR); + strncpy(g_filepfx, filename_prefix, TP_MAX_STR); + g_defbufsz = buffer_size; + + TAU_INIT(NULL, NULL); +#ifndef TAU_MPI + TAU_PROFILE_SET_NODE(0); +#endif /* TAU_MPI */ + + if(init_master_bundle()) { + err("Ttf_init: init_master_bundle() failed."); + return -1; + } + + int i = 0; + for(i=0; ibuffer_size; + + info("Ttf_thread_start: *info:%p\t name:%s\t max:%d\n", info, info->name, info->max); + + Tau_get_tid(); //this is sort of a hack. Internal code assumes this code path is traversed before other thread-related code. To be fixed... + RtsLayer::RegisterThreadInGroup(info->name, info->max, pisnew); + *thread_id = Tau_get_tid(); + if(tmp_sz <= 0) tmp_sz = g_defbufsz; + err = tau_thread_init(*thread_id, tmp_sz); + + Ttf_thread_bundle *bundle = get_t_bundle(*thread_id); + + //copy over master-bundle to local bundle + Ttf_thread_bundle *gbl_bundle = get_master_bundle(); + refresh_bundle(gbl_bundle, bundle); + put_master_bundle(gbl_bundle); + + info("Ttf_thread_start: Exit\n"); + + return err; +} + + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_thread_stop() { + int tid = Tau_get_tid(); + info("Ttf_thread_stop:%d\n", tid); + RtsLayer::ReleaseThreadInGroup(); + info("Ttf_thread_stop: Exit: %d\n", tid); + return 0; +} + + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_finalize(void) +{ + int i, no_t = 0; + + info("Ttf_finalize: Enter.\n"); + + for(i =0; itaufile == NULL)) { + continue; + } + Ttf_LeaveState(g_t_bundles[i]->taufile, (x_uint64) Tau_get_timestamp(i), g_pid, i, PINT_dummy_main); + Ttf_CloseOutputFile(g_t_bundles[i]->taufile); + no_t++; + } + + //TODO - finalize may be called before end of program - MUST not leak mem - clean up here + + info("Ttf_finalize: Exit (finned %d threads).\n", no_t); + + return 0; +} + + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_event_define(char* name, char* format_start_event_info, char* format_end_event_info, int* event_type) { + + int tid = Tau_get_tid(); + + info("Ttf_event_define: tid:%d name:%s fmt_start:%s fmt_end:%s *event_type:%p\n", tid, name, format_start_event_info, format_end_event_info, event_type); + + //1. Register event definition on global master bundle + Ttf_thread_bundle *gbl_bundle = get_master_bundle(); + + int the_type = TAU_Log_get_event_number(gbl_bundle); + *event_type = the_type; + + int start_type = 0, end_type = 0; + + start_type = TAU_Log_get_event_number(gbl_bundle); + end_type = TAU_Log_get_event_number(gbl_bundle); + + gbl_bundle->events[the_type].init(name, format_start_event_info, format_end_event_info, *event_type, start_type, end_type); + + //2. Bring local bundle up to date with master bundle + Ttf_thread_bundle *lc_bundle = get_t_bundle(tid); + + if(lc_bundle->events == NULL) { + lc_bundle->events = new Ttf_closed_event_def[TP_MAX_EVENTS]; + if(!lc_bundle->events) { + err("refresh_bundle: new lc_bundle->events returned NULL.\n"); + return -1; + } + } + + if(lc_bundle->tau_no < gbl_bundle->tau_no) { + refresh_bundle(gbl_bundle, lc_bundle); + } else { + err("Ttf_event_define: No.Evs in local bundle(%d) >= global bundle(%d).", lc_bundle->tau_no, gbl_bundle->tau_no); + } + + //3. Return master bundle + put_master_bundle(gbl_bundle); + + info("Ttf_event_define: name:%s event_type:%d\t *bundle:%p *event:%p\n", name, *event_type, lc_bundle, &(lc_bundle->events[the_type])); + + return 0; +} + + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_EnterState_info(int event_type, int process_id, int* thread_id, int* event_id, ...) { + + int ret; + va_list ap; + va_start(ap, event_id); + ret = Ttf_EnterState_info_va(event_type, process_id, thread_id, event_id, ap); + va_end(ap); + return ret; +} + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_EnterState_info_va(int event_type, int process_id, int* thread_id, int* event_id, va_list ap) { + int tid = Tau_get_tid(); + + x_uint64 tstamp = (x_uint64) Tau_get_timestamp(tid); + + info("Ttf_EnterState_info: tid:%d event_type:%d pid:%d *event_id:%p\n", tid, event_type, process_id, event_id); + + Ttf_thread_bundle *bundle = get_t_bundle(tid); + + if(bundle->events == NULL) { + err("Ttf_EnterState_info: lc_bundle->events is NULL - no events defined yet? (tau_no==%d)", bundle->tau_no); + return -1; + } + + //check if "lookup" fails - requires bringing local bundle uptodate + if(event_type >= bundle->tau_no) { + Ttf_thread_bundle *gbl_bundle = get_master_bundle(); + refresh_bundle(gbl_bundle, bundle); + put_master_bundle(gbl_bundle); + } + + //if lookup continues to fail... + if(event_type >= bundle->tau_no) { + err("Ttf_EnterState_info: event_type(%d) not yet defined.", event_type); + return -1; + } + + Ttf_closed_event_def *event_info = &(bundle->events[event_type]); + + Ttf_EnterState(bundle->taufile, tstamp, g_pid, tid, event_type); + + *event_id = bundle->event_no++; + + x_uint32 eid = *event_id; + + bundle->scratch_pos = 0; + + memcpy(bundle->scratch+bundle->scratch_pos, &(eid), sizeof(eid));//1st encode the event id + bundle->scratch_pos += sizeof(eid); + + //suck from var-args + if(event_info->fmt_start.v_tot_size > 0) { + int tmp_sz = 0; + event_info->fmt_start.suck(ap, bundle->scratch + bundle->scratch_pos, &tmp_sz); + bundle->scratch_pos += tmp_sz; + } + + memset(bundle->scratch + bundle->scratch_pos, 0, 8); //just to zero out any unused fragment (since record is 8bytes long - memsetting 8 bytes is enough) + for(x_uint64* ptr = (x_uint64*)bundle->scratch; (unsigned long) ptr < (unsigned long)(bundle->scratch + bundle->scratch_pos); ptr++) { + Ttf_EventTrigger(bundle->taufile, tstamp, g_pid, tid, event_info->start_no, *ptr); + } + + info("Enter(After): Encoded eid:%d into scratch as:%d\n", eid, (*((x_uint32*)bundle->scratch))); + + info("Ttf_EnterState_info: *bundle:%p\t *event_info:%p\t v_parsed_fmt:%s\t v_tot_size:%d\n", bundle, event_info, event_info->fmt_start.v_parsed_fmt, event_info->fmt_start.v_tot_size); + + info("Ttf_EnterState_info: Exit: tid:%d event_type:%d pid:%d event_id:%d\n", tid, event_type, process_id, *event_id); + + return 0; +} + + +extern "C" int Ttf_LeaveState_info(int event_type, int process_id, int* thread_id, int event_id, ...) { + + int ret; + va_list ap; + + va_start(ap, event_id); + ret = Ttf_LeaveState_info_va(event_type, process_id, thread_id, event_id, ap); + va_end(ap); + return ret; +} + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_LeaveState_info_va(int event_type, int process_id, int* thread_id, int event_id, va_list ap) { + int tid = Tau_get_tid(); + + x_uint64 tstamp = (x_uint64) Tau_get_timestamp(tid); + + info("Ttf_LeaveState_info: tid:%d event_type:%d pid:%d event_id:%d\n", tid, event_type, process_id, event_id); + + Ttf_thread_bundle *bundle = get_t_bundle(tid); + + if(bundle->events == NULL) { + err("Ttf_LeaveState_info: lc_bundle->events is NULL - no events defined yet? (tau_no==%d)", bundle->tau_no); + return -1; + } + + //check if "lookup" fails - requires bringing local bundle uptodate + if(event_type >= bundle->tau_no) { + Ttf_thread_bundle *gbl_bundle = get_master_bundle(); + refresh_bundle(gbl_bundle, bundle); + put_master_bundle(gbl_bundle); + } + //if lookup continues to fail... + if(event_type >= bundle->tau_no) { + err("Ttf_LeaveState_info: event_type(%d) not yet defined.", event_type); + return -1; + } + + + Ttf_closed_event_def *event_info = &(bundle->events[event_type]); + + Ttf_LeaveState(bundle->taufile, tstamp, g_pid, tid, event_type); + + x_uint32 eid = event_id; + + bundle->scratch_pos = 0; + + memcpy(bundle->scratch + bundle->scratch_pos, &(eid), sizeof(eid)); + bundle->scratch_pos += sizeof(eid); + + if(event_info->fmt_end.v_tot_size > 0) { + int tmp_sz = 0; + event_info->fmt_end.suck(ap, bundle->scratch + bundle->scratch_pos, &tmp_sz); + bundle->scratch_pos += tmp_sz; + } + + memset(bundle->scratch + bundle->scratch_pos, 0, 8); //just to zero out any unused fragment (since record is 8bytes long - memsetting 8 bytes is enough) + for(x_uint64* ptr = (x_uint64*)bundle->scratch; (unsigned long) ptr < (unsigned long)(bundle->scratch + bundle->scratch_pos); ptr++) { + Ttf_EventTrigger(bundle->taufile, tstamp, g_pid, tid, event_info->end_no, *ptr); + } + + info("Leave(After): Encoded eid:%d into scratch as:%d\n", eid, (*((x_uint32*)bundle->scratch))); + + //may be the event-no should be uniq? + //bundle->event_no--; + + info("Ttf_LeaveState_info: Exit: tid:%d event_type:%d pid:%d event_id:%d\n", tid, event_type, process_id, event_id); + + return 0; +} + + +//////////////////////////////////////////////////////////////////////////////// +extern "C" int Ttf_LogEvent_info(int event_type, int process_id, int* thread_id, int* event_id, double start_time, double end_time, ...) { + int err = 0; + + if((err = Ttf_EnterState_info(event_type, process_id, thread_id, event_id)) != 0) { + //error...!!! + err("Ttf_LogEvent_info: Error from Ttf_EnterState_info: %d. Returning.\n", err); + return err; + } + + if((err = Ttf_LeaveState_info(event_type, process_id, thread_id, *event_id)) != 0) { + //error...!!! + err("Ttf_LogEvent_info: Error from Ttf_LeaveState_info: %d. Returning.\n", err); + return err; + } + + return 0; +} + + +//The Utils/Helpers of the API +//---------------------------- + +//////////////////////////////////////////////////////////////////////////////// +static Ttf_thread_bundle* get_t_bundle(int tid) { + if(g_t_bundles[tid] == 0) { + g_t_bundles[tid] = new Ttf_thread_bundle(); + } + return g_t_bundles[tid]; +} + + +//////////////////////////////////////////////////////////////////////////////// +static int init_master_bundle() { + //setup the mutex + pthread_mutexattr_init(&ttf_MasterBundleMutexAttr); + pthread_mutex_init(&ttf_MasterBundleMutex, &ttf_MasterBundleMutexAttr); + + //allocate and inialize the bundle + if(g_master_bundle != NULL) { + err("WARN: init_master_bundle: g_master_bundle(%p) != NULL\n", g_master_bundle); + } + g_master_bundle = new Ttf_thread_bundle(); + if(!g_master_bundle) { + err("init_master_bundle: new g_master_bundle() returned NULL.\n"); + return -1; + } + g_master_bundle->events = new Ttf_closed_event_def[TP_MAX_EVENTS]; + if(!g_master_bundle->events) { + err("init_master_bundle: new g_master_bundle->events returned NULL.\n"); + return -1; + } + + PINT_dummy_main = TAU_Log_get_event_number(g_master_bundle); + return 0; +} + + +//////////////////////////////////////////////////////////////////////////////// +static Ttf_thread_bundle* get_master_bundle() { + //lock mutex and return master bundle + pthread_mutex_lock(&ttf_MasterBundleMutex); + return g_master_bundle; +} + + +//////////////////////////////////////////////////////////////////////////////// +static void put_master_bundle(Ttf_thread_bundle* master_bundle) { + //unlock mutex and noop + pthread_mutex_unlock(&ttf_MasterBundleMutex); +} + + +//////////////////////////////////////////////////////////////////////////////// +int tau_thread_init(int tid, int max_elements) { + + Ttf_thread_bundle *bundle = NULL; + + info("tau_thread_init: tid:%d max_elements:%d\n", tid, max_elements); + + bundle = get_t_bundle(tid); + + char trcname[TP_MAX_STR*3]; + char edfname[TP_MAX_STR*3]; + + if(bundle->taufile != NULL) { + info("tau_thread_init: tid:%d Exit: Already inited. returning.\n", tid); + return 0; + } + + snprintf(trcname, (TP_MAX_STR*3), "%s/%s.%d.%d.0.trc", g_traceloc, g_filepfx, g_pid, tid); + snprintf(edfname, (TP_MAX_STR*3), "%s/%s.%d.%d.edf", g_traceloc, g_filepfx, g_pid, tid); + + bundle->taufile = Ttf_OpenFileForOutput_wsize(trcname,edfname, max_elements); + + bundle->tau_no = 0; + + Ttf_DefThread(bundle->taufile, g_pid, tid, "GENTHREAD"); + + PINT_dummy_main = TAU_Log_get_event_number(bundle); + Ttf_DefStateGroup(bundle->taufile, "TAU_DEFAULT", 1); + Ttf_DefState(bundle->taufile, PINT_dummy_main, "main", 1); + Ttf_EnterState(bundle->taufile, (x_uint64) Tau_get_timestamp(tid), g_pid, tid, PINT_dummy_main); + Ttf_FlushTrace(bundle->taufile); + + info("tau_thread_init: tid:%d Exit.\n", tid); + + return 0; +} + + +//////////////////////////////////////////////////////////////////////////////// +//Assumes any required locking has been performed by caller (no locks taken/released here) +static int refresh_bundle(Ttf_thread_bundle *gbl_bundle, Ttf_thread_bundle *lc_bundle) { + int no_copied = 0; + + if(lc_bundle->events == NULL) { + lc_bundle->events = new Ttf_closed_event_def[TP_MAX_EVENTS]; + if(!lc_bundle->events) { + err("refresh_bundle: new lc_bundle->events returned NULL.\n"); + return -1; + } + } + + while(lc_bundle->tau_no < gbl_bundle->tau_no) { + int lc_no = TAU_Log_get_event_number(lc_bundle); + TAU_Log_get_event_number(lc_bundle); + TAU_Log_get_event_number(lc_bundle); + + lc_bundle->events[lc_no] = gbl_bundle->events[lc_no]; + Ttf_closed_event_def* this_ev = &(lc_bundle->events[lc_no]); + + Ttf_DefState(lc_bundle->taufile, this_ev->index, this_ev->name, 1); + + char start_name[256]; + if(this_ev->fmt_start.no_patns > 0) { + snprintf(start_name, 256, "_xxSTART_%s_FMT_%s%s", this_ev->name, "%d", this_ev->fmt_start.v_parsed_fmt); + } else { + snprintf(start_name, 256, "_xxSTART_%s_FMT_%s", this_ev->name, "%d"); + } + Ttf_DefUserEvent(lc_bundle->taufile, this_ev->start_no, start_name, 0); + + char end_name[256]; + if(this_ev->fmt_end.no_patns > 0) { + snprintf(end_name, 256, "_xxSTOP_%s_FMT_%s%s", this_ev->name, "%d", this_ev->fmt_end.v_parsed_fmt); + } else { + snprintf(end_name, 256, "_xxSTOP_%s_FMT_%s", this_ev->name, "%d"); + } + Ttf_DefUserEvent(lc_bundle->taufile, this_ev->end_no, end_name, 0); + + no_copied++; + }//while + + return no_copied; +} + + +//notes +// we could wrap the thread_func as in here.... +//rc = tau_pthread_group_create(thread_pointer_array[k], &thread_attr, (void *) run_tau_version, +// (void*)threaddata[k], "trialgroup", number_of_threads_in_group); +// + diff --git a/src/common/events/pvfs_tau_api.h b/src/common/events/pvfs_tau_api.h new file mode 100644 index 0000000..51564de --- /dev/null +++ b/src/common/events/pvfs_tau_api.h @@ -0,0 +1,56 @@ +/***************************************************** + * File : pvfs_tau_api.h + * Version : $Id: pvfs_tau_api.h,v 1.2 2008-11-20 01:16:52 slang Exp $ + ****************************************************/ + +/* Author: Aroon Nataraj */ +#ifndef _PVFS_TAU_API_H +#define _PVFS_TAU_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Some macros * + ***************/ +#define TP_MAX_THREADS 1024 +#define TP_MAX_EVENTS 128 +#define TP_MAX_STR 256 +#define TAU_Log_get_event_number(THR) ((THR)->tau_no++) +#define TAUPVFS_MAX TP_MAX_THREADS + +#include +#include + +/* tau_thread_group_info: + * Grouping threads based on equiv-classes. Here + * this functionality is used to allow multiple + * threads (which are not alive at the same time) to + * share a single trace file. + */ +struct tau_thread_group_info { + char name[20]; + int max; + int blocking; + int buffer_size; +}; + +int Ttf_init( + int process_id, char* folder, char* filename_prefix, int buffer_size); +int Ttf_finalize(void); + +int Ttf_thread_start( + struct tau_thread_group_info* info, int* thread_id, int* pisnew); +int Ttf_thread_stop(void); + +int Ttf_event_define(char* name, char* format_start_event_info, char* format_end_event_info, int* event_type); +int Ttf_EnterState_info(int event_type, int process_id, int* thread_id, int* event_id, ...); +int Ttf_EnterState_info_va(int event_type, int process_id, int* thread_id, int* event_id, va_list ap); +int Ttf_LeaveState_info(int event_type, int process_id, int* thread_id, int event_id, ...); +int Ttf_LeaveState_info_va(int event_type, int process_id, int* thread_id, int event_id, va_list ap); + +#ifdef __cplusplus +} +#endif + +#endif /* _PVFS_TAU_API_H */ diff --git a/src/common/gen-locks/gen-locks.c b/src/common/gen-locks/gen-locks.c index 1876dbb..6a49211 100644 --- a/src/common/gen-locks/gen-locks.c +++ b/src/common/gen-locks/gen-locks.c @@ -77,7 +77,7 @@ int gen_posix_mutex_trylock( /* * gen_mutex_destroy() * - * uninitializes the mutex. + * uninitializes the mutex and frees all memory associated with it. * * returns 0 on success, -errno on failure. */ @@ -99,6 +99,42 @@ pthread_t gen_posix_thread_self(void) return pthread_self(); } +int gen_posix_cond_destroy(pthread_cond_t *cond) +{ + if(!cond) + { + return -EINVAL; + } + pthread_cond_destroy(cond); + return 0; +} + +int gen_posix_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mut) +{ + return pthread_cond_wait(cond, mut); +} + +int gen_posix_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mut, + const struct timespec *abstime) +{ + return pthread_cond_timedwait(cond, mut, abstime); +} + +int gen_posix_cond_signal(pthread_cond_t *cond) +{ + return pthread_cond_signal(cond); +} + +int gen_posix_cond_broadcast(pthread_cond_t *cond) +{ + return pthread_cond_broadcast(cond); +} + +int gen_posix_cond_init(pthread_cond_t *cond, pthread_condattr_t *attr) +{ + return pthread_cond_init(cond, attr); +} + #endif /* diff --git a/src/common/gen-locks/gen-locks.h b/src/common/gen-locks/gen-locks.h index 6bf0f77..ac8f98f 100644 --- a/src/common/gen-locks/gen-locks.h +++ b/src/common/gen-locks/gen-locks.h @@ -21,48 +21,162 @@ #define __GEN_LOCKS_H #include +#ifndef WIN32 +#include +#endif /* we will make posix locks the default unless turned off for now. */ /* this is especially important for development in which case the locks * should really be enabled in order to verify proper operation */ -#if !defined(__GEN_NULL_LOCKING__) && !defined(__GEN_POSIX_LOCKING__) +#if !defined(__GEN_NULL_LOCKING__) +#ifdef WIN32 +#if !defined(__GEN_WIN_LOCKING__) +#define __GEN_WIN_LOCKING__ +#endif +#else +#if !defined(__GEN_POSIX_LOCKING__) #define __GEN_POSIX_LOCKING__ #endif +#endif +#endif #ifdef __GEN_POSIX_LOCKING__ #include /* function prototypes for specific locking implementations */ -int gen_posix_mutex_lock( - pthread_mutex_t * mut); -int gen_posix_mutex_unlock( - pthread_mutex_t * mut); -int gen_posix_mutex_trylock( - pthread_mutex_t * mut); -int gen_posix_mutex_destroy( - pthread_mutex_t * mut); -int gen_posix_mutex_init( - pthread_mutex_t * mut); +int gen_posix_mutex_lock(pthread_mutex_t * mut); +int gen_posix_mutex_unlock(pthread_mutex_t * mut); +int gen_posix_mutex_trylock(pthread_mutex_t * mut); +pthread_mutex_t *gen_posix_mutex_build(void); +int gen_posix_mutex_destroy(pthread_mutex_t * mut); +int gen_posix_mutex_init(pthread_mutex_t * mut); pthread_t gen_posix_thread_self(void); +int gen_posix_cond_init(pthread_cond_t *cond, pthread_condattr_t *attr); +int gen_posix_cond_destroy(pthread_cond_t *cond); +int gen_posix_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mut); +int gen_posix_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mut, + const struct timespec *abstime); +int gen_posix_cond_signal(pthread_cond_t *cond); +int gen_posix_cond_broadcast(pthread_cond_t *cond); + typedef pthread_mutex_t gen_mutex_t; typedef pthread_t gen_thread_t; +typedef pthread_cond_t gen_cond_t; #define GEN_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER; +#define GEN_COND_INITIALIZER PTHREAD_COND_INITIALIZER; + +#ifdef __USE_GNU +/* Support for custom static initializer for a processor-shared pthread mutex.*/ +# if _POSIX_THREAD_PROCESS_SHARED != -1 +# if __WORDSIZE == 64 +# define GEN_SHARED_MUTEX_INITIALIZER_NP \ + { { 0, 0, 0, 0, 128, 0, { 0, 0 } } } +# else +# define GEN_SHARED_MUTEX_INITIALIZER_NP \ + { { 0, 0, 0, 0, 128, { 0 } } } +# endif /* __WORDSIZE */ +# endif /* _POSIX_THREAD_PROCESS_SHARED */ + +/* Support for custom static initializer for a recursive pthread mutex */ +# if __WORDSIZE == 64 +# define GEN_RECURSIVE_MUTEX_INITIALIZER_NP \ + { { 0, 0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, 0, { 0, 0 } } } +# else +# define GEN_RECURSIVE_MUTEX_INITIALIZER_NP \ + { { 0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, 0, { 0 } } } +# endif /* __WORDSIZE */ +#endif /* __USE_GNU */ + #define gen_mutex_lock(m) gen_posix_mutex_lock(m) #define gen_mutex_unlock(m) gen_posix_mutex_unlock(m) #define gen_mutex_trylock(m) gen_posix_mutex_trylock(m) #define gen_mutex_destroy(m) gen_posix_mutex_destroy(m) #define gen_mutex_init(m) gen_posix_mutex_init(m) #define gen_thread_self() gen_posix_thread_self() -#endif /* __GEN_POSIX_LOCKING__ */ +#define gen_cond_init(c) gen_posix_cond_init(c, NULL) +#define gen_cond_destroy(c) gen_posix_cond_destroy(c) +#define gen_cond_wait(c, m) gen_posix_cond_wait(c, m) +#define gen_cond_timedwait(c, m, s) gen_posix_cond_timedwait(c, m, s) +#define gen_cond_signal(c) gen_posix_cond_signal(c) +#define gen_cond_broadcast(c) gen_posix_cond_broadcast(c) + +#elif defined (__GEN_WIN_LOCKING__) + +#include + +typedef HANDLE gen_mutex_t; +typedef HANDLE gen_thread_t; + +/* Implementation based on Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright (C) 1998 John E. Bossom + * Copyright (C) 1999,2005 Pthreads-win32 contributors + */ +struct timespec +{ + time_t tv_sec; + long int tv_nsec; +}; + +typedef struct gen_cond_t_ *gen_cond_t; +struct gen_cond_t_ +{ + long nWaitersBlocked; /* Number of threads blocked */ + long nWaitersGone; /* Number of threads timed out */ + long nWaitersToUnblock; /* Number of threads to unblock */ + HANDLE semBlockQueue; /* Queue up threads waiting for the condition to become signalled */ + HANDLE semBlockLock; /* Semaphore that guards access to waiters blocked count/block queue */ + HANDLE mtxUnblockLock; /* Mutex that guards access to waiters (to)unblock(ed) counts */ + gen_cond_t next; /* Doubly linked list */ + gen_cond_t prev; +}; + +int gen_win_mutex_lock(HANDLE *mut); +int gen_win_mutex_unlock(HANDLE *mut); +int gen_win_mutex_trylock(HANDLE *mut); +HANDLE *gen_win_mutex_build(void); +int gen_win_mutex_destroy(HANDLE *mut); +int gen_win_mutex_init(HANDLE *mut); +HANDLE gen_win_thread_self(void); + +#define GEN_MUTEX_INITIALIZER ((gen_mutex_t) -1) +#define gen_mutex_lock(m) gen_win_mutex_lock(m) +#define gen_mutex_unlock(m) gen_win_mutex_unlock(m) +#define gen_mutex_trylock(m) gen_win_mutex_trylock(m) +#define gen_mutex_destroy(m) gen_win_mutex_destroy(m) +#define gen_mutex_init(m) gen_win_mutex_init(m) + +#define gen_thread_self() gen_win_thread_self() + +int gen_win_cond_init(gen_cond_t *cond); +int gen_win_cond_destroy(gen_cond_t *cond); +int gen_win_cond_wait(gen_cond_t *cond, HANDLE *mut); +int gen_win_cond_timedwait(gen_cond_t *cond, HANDLE *mut, + const struct timespec *abstime); +int gen_win_cond_signal(gen_cond_t *cond); +int gen_win_cond_broadcast(gen_cond_t *cond); + +#define GEN_COND_INITIALIZER ((gen_cond_t) -1) +#define gen_cond_init(c) gen_win_cond_init(c) +#define gen_cond_destroy(c) gen_win_cond_destroy(c) +#define gen_cond_wait(c, m) gen_win_cond_wait(c, m) +#define gen_cond_timedwait(c, m, s) gen_win_cond_timedwait(c, m, s) +#define gen_cond_signal(c) gen_win_cond_signal(c) +#define gen_cond_broadcast(c) gen_win_cond_broadcast(c) -#ifdef __GEN_NULL_LOCKING__ +#elif defined (__GEN_NULL_LOCKING__) /* this stuff messes around just enough to prevent warnings */ typedef int gen_mutex_t; typedef unsigned long gen_thread_t; +typedef int gen_cond_t; + #define GEN_MUTEX_INITIALIZER 0 +#define GEN_SHARED_MUTEX_INITIALIZER_NP 0 +#define GEN_RECURSIVE_MUTEX_INITIALIZER_NP 0 +#define GEN_COND_INITIALIZER 0 + static inline int gen_mutex_lock( gen_mutex_t * mutex_p) { @@ -87,7 +201,38 @@ static inline gen_thread_t gen_thread_self(void) } #define gen_mutex_init(m) do{}while(0) #define gen_mutex_destroy(m) do{}while(0) -#endif /* __GEN_NULL_LOCKING__ */ + +#define gen_cond_init(c) do{}while(0) +#define gen_cond_destroy(c) do{}while(0) + +static inline int gen_cond_wait(gen_cond_t *cond, gen_mutex_t *mut) +{ + (void) cond; + return 0; +} + +static inline int gen_cond_timedwait(gen_cond_t *cond, gen_mutex_t *mut, + const struct timespec *abstime) +{ + (void) cond; + return 0; +} + +static inline int gen_cond_signal(gen_cond_t *cond) +{ + (void) cond; + return 0; +} + +static inline int gen_cond_broadcast(gen_cond_t *cond) +{ + (void) cond; + return 0; +} + +#else /* __GEN_NULL_LOCKING__ */ +#error "Must define either POSIX, Windows or NULL locking" +#endif #endif /* __GEN_LOCKS_H */ diff --git a/src/common/gen-locks/module.mk.in b/src/common/gen-locks/module.mk.in index 23034c5..4da05c6 100644 --- a/src/common/gen-locks/module.mk.in +++ b/src/common/gen-locks/module.mk.in @@ -1,3 +1,4 @@ DIR := src/common/gen-locks LIBSRC += $(DIR)/gen-locks.c SERVERSRC += $(DIR)/gen-locks.c +LIBBMISRC += $(DIR)/gen-locks.c diff --git a/src/common/gossip/gossip.c b/src/common/gossip/gossip.c index 8a5d36a..5a54a5f 100644 --- a/src/common/gossip/gossip.c +++ b/src/common/gossip/gossip.c @@ -15,10 +15,16 @@ #include #include #include -#include #include #include + +#ifdef WIN32 +#include "wincommon.h" +#else +#include #include +#endif + #include "pvfs2-config.h" #ifdef HAVE_EXECINFO_H #include @@ -49,7 +55,9 @@ int gossip_facility = GOSSIP_STDERR; static FILE *internal_log_file = NULL; /* syslog priority setting */ +#ifndef WIN32 static int internal_syslog_priority = LOG_INFO; +#endif /* what type of timestamp to put on logs */ static enum gossip_logstamp internal_logstamp = GOSSIP_LOGSTAMP_DEFAULT; @@ -83,6 +91,16 @@ static int gossip_disable_syslog( * * \return 0 on success, -errno on failure. */ +#ifdef WIN32 +/** Only a stub on Windows + * TODO: possibly add logging to Windows Event Log + */ +int gossip_enable_syslog( + int priority) +{ + return 0; +} +#else int gossip_enable_syslog( int priority) { @@ -105,6 +123,7 @@ int gossip_enable_syslog( return 0; } +#endif /** Turns on logging to stderr. * @@ -163,6 +182,23 @@ int gossip_enable_file( return 0; } +int gossip_reopen_file( + const char *filename, + const char *mode) +{ + if( gossip_facility != GOSSIP_FILE ) + { + return -EINVAL; + } + + /* close the file */ + gossip_disable_file(); + + /* open the file */ + gossip_enable_file( filename, mode ); + return 0; +} + /** Turns off any active logging facility and disables debugging. * * \return 0 on success, -errno on failure. @@ -408,6 +444,18 @@ void gossip_backtrace(void) * * returns 0 on success, -errno on failure */ +#ifdef WIN32 +/** Only a stub on Windows + * TODO: possibly add logging to Windows Event Log + */ +static int gossip_debug_syslog( + char prefix, + const char *format, + va_list ap) +{ + return 0; +} +#else static int gossip_debug_syslog( char prefix, const char *format, @@ -428,10 +476,11 @@ static int gossip_debug_syslog( return -errno; } - syslog(internal_syslog_priority, buffer); + syslog(internal_syslog_priority, "%s", buffer); return 0; } +#endif int gossip_debug_fp(FILE *fp, char prefix, enum gossip_logstamp ts, const char *format, ...) @@ -457,7 +506,7 @@ static int gossip_debug_fp_va(FILE *fp, char prefix, const char *format, va_list ap, enum gossip_logstamp ts) { char buffer[GOSSIP_BUF_SIZE], *bptr = buffer; - int bsize = sizeof(buffer); + int bsize = sizeof(buffer), temp_size; int ret = -EINVAL; struct timeval tv; time_t tp; @@ -479,18 +528,24 @@ static int gossip_debug_fp_va(FILE *fp, char prefix, case GOSSIP_LOGSTAMP_DATETIME: gettimeofday(&tv, 0); tp = tv.tv_sec; - strftime(bptr, 14, "%m/%d %H:%M] ", localtime(&tp)); - bptr += 13; - bsize -= 13; + strftime(bptr, 22, "%m/%d/%Y %H:%M:%S] ", localtime(&tp)); + bptr += 21; + bsize -= 21; break; case GOSSIP_LOGSTAMP_THREAD: gettimeofday(&tv, 0); tp = tv.tv_sec; strftime(bptr, 9, "%H:%M:%S", localtime(&tp)); - sprintf(bptr+8, ".%06ld (%ld)] ", (long)tv.tv_usec, - gen_thread_self()); - bptr += 30; - bsize -= 30; + bptr += 8; +#ifdef WIN32 + temp_size = sprintf(bptr, ".%03ld (%4ld)] ", (long)tv.tv_usec / 1000, + GetThreadId(GetCurrentThread())); +#else + temp_size = sprintf(bptr, ".%06ld (%ld)] ", (long)tv.tv_usec, + gen_thread_self()); +#endif + bptr += temp_size; + bsize -= temp_size; break; case GOSSIP_LOGSTAMP_NONE: @@ -509,7 +564,7 @@ static int gossip_debug_fp_va(FILE *fp, char prefix, return -errno; } - ret = fprintf(fp, buffer); + ret = fprintf(fp, "%s", buffer); if (ret < 0) { return -errno; @@ -525,6 +580,17 @@ static int gossip_debug_fp_va(FILE *fp, char prefix, * * returns 0 on success, -errno on failure */ +#ifdef WIN32 +/** just a stub on Windows + * TODO: possibly add errors to Windows Event Log + */ +static int gossip_err_syslog( + const char *format, + va_list ap) +{ + return 0; +} +#else static int gossip_err_syslog( const char *format, va_list ap) @@ -541,7 +607,7 @@ static int gossip_err_syslog( return 0; } - +#endif /* gossip_disable_stderr() * @@ -579,12 +645,23 @@ static int gossip_disable_file( * * returns 0 on success, -errno on failure */ +#ifdef WIN32 +/** just a stub on Windows + * TODO: Possibly add logging to Windows Event Log + */ +static int gossip_disable_syslog( + void) +{ + return 0; +} +#else static int gossip_disable_syslog( void) { closelog(); return 0; } +#endif /* * Local variables: diff --git a/src/common/gossip/gossip.h b/src/common/gossip/gossip.h index 222e0d1..5f24214 100644 --- a/src/common/gossip/gossip.h +++ b/src/common/gossip/gossip.h @@ -20,11 +20,17 @@ #ifndef __GOSSIP_H #define __GOSSIP_H +#ifdef WIN32 +#include "wincommon.h" +#endif + #ifndef __KERNEL__ #include #include +#ifndef WIN32 #include "syslog.h" #endif +#endif #include "pvfs2-config.h" /******************************************************************** @@ -49,7 +55,7 @@ enum gossip_logstamp #ifdef GOSSIP_DISABLE_DEBUG #define gossip_debug(mask, format, f...) do {} while(0) #else -extern int gossip_debug_mask; +extern uint64_t gossip_debug_mask; /* try to avoid function call overhead by checking masks in macro */ #define gossip_debug(mask, format, f...) \ @@ -85,6 +91,9 @@ int gossip_enable_stderr( int gossip_enable_file( const char *filename, const char *mode); +int gossip_reopen_file( + const char *filename, + const char *mode); int gossip_disable( void); int gossip_set_debug_mask( @@ -168,6 +177,21 @@ do { \ } while(0) #else /* ! __GNUC__ */ +extern int gossip_debug_on; +extern int gossip_facility; +extern uint64_t gossip_debug_mask; + +#define gossip_perf_log(format, ...) \ +do { \ + if ((gossip_debug_on) && \ + (gossip_debug_mask & GOSSIP_PERFCOUNTER_DEBUG) && \ + (gossip_facility)) \ + { \ + __gossip_debug(GOSSIP_PERFCOUNTER_DEBUG, 'P', \ + format, __VA_ARGS__); \ + } \ +} while(0) + int __gossip_debug( uint64_t mask, char prefix, @@ -183,15 +207,24 @@ int gossip_err( ...); #ifdef GOSSIP_DISABLE_DEBUG +#ifdef WIN32 +#define gossip_debug(__m, __f, ...) __gossip_debug_stub(__m, '?', __f, __VA_ARGS__); +#define gossip_ldebug(__m, __f, ...) __gossip_debug_stub(__m, '?', __f, __VA_ARGS__); +#else #define gossip_debug(__m, __f, f...) __gossip_debug_stub(__m, '?', __f, ##f); #define gossip_ldebug(__m, __f, f...) __gossip_debug_stub(__m, '?', __f, ##f); +#endif #define gossip_debug_enabled(__m) 0 #else +#ifdef WIN32 +#define gossip_debug(__m, __f, ...) __gossip_debug(__m, '?', __f, __VA_ARGS__); +#define gossip_ldebug(__m, __f, ...) __gossip_debug(__m, '?', __f, __VA_ARGS__); +#else #define gossip_debug(__m, __f, f...) __gossip_debug(__m, '?', __f, ##f); #define gossip_ldebug(__m, __f, f...) __gossip_debug(__m, '?', __f, ##f); +#endif #define gossip_debug_enabled(__m) \ ((gossip_debug_on != 0) && (__m & gossip_debug_mask)) - #endif /* GOSSIP_DISABLE_DEBUG */ #define gossip_lerr gossip_err diff --git a/src/common/gossip/module.mk.in b/src/common/gossip/module.mk.in index c824c98..aced345 100644 --- a/src/common/gossip/module.mk.in +++ b/src/common/gossip/module.mk.in @@ -3,6 +3,7 @@ GOSSIP_ENABLE_BACKTRACE = @GOSSIP_ENABLE_BACKTRACE@ DIR := src/common/gossip LIBSRC += $(DIR)/gossip.c SERVERSRC += $(DIR)/gossip.c +LIBBMISRC += $(DIR)/gossip.c ifdef GOSSIP_ENABLE_BACKTRACE MODCFLAGS_$(DIR)/gossip.c := -DGOSSIP_ENABLE_BACKTRACE endif diff --git a/src/common/id-generator/id-generator.c b/src/common/id-generator/id-generator.c index afbd155..36ddef9 100644 --- a/src/common/id-generator/id-generator.c +++ b/src/common/id-generator/id-generator.c @@ -6,6 +6,11 @@ #include #include + +#ifdef WIN32 +#include "wincommon.h" +#endif + #include "id-generator.h" #include "quickhash.h" #include "gen-locks.h" @@ -18,13 +23,13 @@ static int s_id_gen_safe_init_count = 0; static int hash_key(void *key, int table_size); static int hash_key_compare(void *key, struct qlist_head *link); -static PVFS_id_gen_t s_id_gen_safe_tag = 0; +static BMI_id_gen_t s_id_gen_safe_tag = 0; typedef struct { struct qlist_head hash_link; - PVFS_id_gen_t id; + BMI_id_gen_t id; void *item; } id_gen_safe_t; @@ -41,7 +46,7 @@ int id_gen_safe_initialize() hash_key_compare, hash_key, DEFAULT_ID_GEN_SAFE_TABLE_SIZE); if (!s_id_gen_safe_table) { - return -PVFS_ENOMEM; + return -ENOMEM; } } s_id_gen_safe_init_count++; @@ -62,7 +67,7 @@ int id_gen_safe_finalize() } int id_gen_safe_register( - PVFS_id_gen_t *new_id, + BMI_id_gen_t *new_id, void *item) { id_gen_safe_t *id_elem = NULL; @@ -71,7 +76,7 @@ int id_gen_safe_register( if (!item) { - return -PVFS_EINVAL; + return -EINVAL; } gen_mutex_lock(&s_id_gen_safe_mutex); @@ -79,10 +84,15 @@ int id_gen_safe_register( id_elem = (id_gen_safe_t *)malloc(sizeof(id_gen_safe_t)); if (!id_elem) { - return -PVFS_ENOMEM; + return -ENOMEM; } id_elem->id = ++s_id_gen_safe_tag; + if(id_elem->id == 0) + { + /* don't want this to land on zero */ + id_elem->id = ++s_id_gen_safe_tag; + } id_elem->item = item; qhash_add(s_id_gen_safe_table, &id_elem->id, &id_elem->hash_link); @@ -93,7 +103,7 @@ int id_gen_safe_register( return 0; } -void *id_gen_safe_lookup(PVFS_id_gen_t id) +void *id_gen_safe_lookup(BMI_id_gen_t id) { void *ret = NULL; id_gen_safe_t *id_elem = NULL; @@ -118,9 +128,9 @@ void *id_gen_safe_lookup(PVFS_id_gen_t id) return ret; } -int id_gen_safe_unregister(PVFS_id_gen_t new_id) +int id_gen_safe_unregister(BMI_id_gen_t new_id) { - int ret = -PVFS_EINVAL; + int ret = -EINVAL; id_gen_safe_t *id_elem = NULL; struct qlist_head *hash_link = NULL; @@ -147,7 +157,7 @@ int id_gen_safe_unregister(PVFS_id_gen_t new_id) static int hash_key(void *key, int table_size) { unsigned long tmp = 0; - PVFS_id_gen_t *id = (PVFS_id_gen_t *)key; + BMI_id_gen_t *id = (BMI_id_gen_t *)key; tmp += *id; tmp = tmp % table_size; @@ -158,7 +168,7 @@ static int hash_key(void *key, int table_size) static int hash_key_compare(void *key, struct qlist_head *link) { id_gen_safe_t *id_elem = NULL; - PVFS_id_gen_t id = *((PVFS_id_gen_t *)key); + BMI_id_gen_t id = *((BMI_id_gen_t *)key); id_elem = qlist_entry(link, id_gen_safe_t, hash_link); assert(id_elem); diff --git a/src/common/id-generator/id-generator.h b/src/common/id-generator/id-generator.h index e518c77..1119605 100644 --- a/src/common/id-generator/id-generator.h +++ b/src/common/id-generator/id-generator.h @@ -11,16 +11,23 @@ /* This will hopefully eventually be a library of mechanisms for doing * fast registration and lookups of data structures. Right now it only - * has routines that directly convert pointers into integer types and vice + * has routines that directly convert pointers into integer types and vice * versa. */ #ifndef __ID_GENERATOR_H #define __ID_GENERATOR_H -#include "pvfs2-types.h" +#include +#include #include "pvfs2-config.h" +#ifdef __PVFS2_TYPES_H +typedef PVFS_id_gen_t BMI_id_gen_t; +#else +typedef int64_t BMI_id_gen_t; +#endif + /* id_gen_fast_register() * * registers a piece of data (a pointer of some sort) and @@ -28,7 +35,7 @@ * * *new_id will be 0 if item is NULL */ -static inline void id_gen_fast_register(PVFS_id_gen_t * new_id, +static inline void id_gen_fast_register(BMI_id_gen_t * new_id, void *item) { #if SIZEOF_VOID_P == 8 @@ -47,7 +54,7 @@ static inline void id_gen_fast_register(PVFS_id_gen_t * new_id, * * returns pointer to data on success, NULL on failure */ -static inline void *id_gen_fast_lookup(PVFS_id_gen_t id) +static inline void *id_gen_fast_lookup(BMI_id_gen_t id) { #if SIZEOF_VOID_P == 8 return (void *) id; @@ -69,12 +76,12 @@ int id_gen_safe_finalize(void); * * returns 0 on success, -errno on failure */ -int id_gen_safe_register(PVFS_id_gen_t *new_id, +int id_gen_safe_register(BMI_id_gen_t *new_id, void *item); -void *id_gen_safe_lookup(PVFS_id_gen_t id); +void *id_gen_safe_lookup(BMI_id_gen_t id); -int id_gen_safe_unregister(PVFS_id_gen_t new_id); +int id_gen_safe_unregister(BMI_id_gen_t new_id); #endif /* __ID_GENERATOR_H */ diff --git a/src/common/id-generator/module.mk.in b/src/common/id-generator/module.mk.in index 0a605f1..ea12f50 100644 --- a/src/common/id-generator/module.mk.in +++ b/src/common/id-generator/module.mk.in @@ -1,4 +1,4 @@ DIR := src/common/id-generator LIBSRC += $(DIR)/id-generator.c SERVERSRC += $(DIR)/id-generator.c - +LIBBMISRC += $(DIR)/id-generator.c diff --git a/src/common/llist/llist.h b/src/common/llist/llist.h index c99825a..f311a6d 100755 --- a/src/common/llist/llist.h +++ b/src/common/llist/llist.h @@ -8,16 +8,11 @@ #ifndef LLIST_H #define LLIST_H -#ifdef __KERNEL__ -#include -#include -#else #include #include #ifdef HAVE_MALLOC_H #include #endif -#endif #define PINT_llist_add(__llist_p, __void_p) \ diff --git a/src/common/llist/module.mk.in b/src/common/llist/module.mk.in index 71c2187..b71ad3f 100644 --- a/src/common/llist/module.mk.in +++ b/src/common/llist/module.mk.in @@ -1,3 +1,4 @@ DIR := src/common/llist LIBSRC += $(DIR)/llist.c SERVERSRC += $(DIR)/llist.c +LIBBMISRC += $(DIR)/llist.c diff --git a/src/common/mgmt/README b/src/common/mgmt/README new file mode 100644 index 0000000..745aa4f --- /dev/null +++ b/src/common/mgmt/README @@ -0,0 +1,264 @@ + +Current event handling and thread models in PVFS consist primarily of posting +an operation (placing it on a queue), servicing operations from the queue +either with a separate thread or via the test interfaces (test_context primarily) +and pushing completed operations onto completed queues (known as a 'context'). + +Although, the model is usually the same, different components have separate +implementations for queuing and signalling. + +...some bits about parallel fs servers facing unique concurrency requirements... + +Due to the design of PVFS, where interfaces separate different components, +such as trove (disk), bmi (network), device, flow, etc. a single request +steps its way through a number of different queues and completion contexts. +In fact, in some cases, individual components have multiple queues that +an operation must step through (dbpf for example) and requests +are often broken down into a number of operations, each one following the +queued/servicing/completed path. + +In some cases, a request's +operation is placed on a completion context, where it must wait for a +separate thread to pull it off the context, only to be pulled off and placed onto +another queue as the next operation in the request, +where it must wait to be serviced. Queue shifting in this manner is especially +inefficient, as adding/removing the first operation +from the completion context requires a context switch, and adding/removing +the second operation from the operation queue requires another context +switch. + +TYPES OF THREADING MODELS: + +EVENT MODELS: + +Event models consist of placing posted operations in a queue, to be serviced +at a later point. Operations can be pulled from the queue by a separate +thread or possibly multiple threads, or even the same thread that posted +the operation (in which case, the operation gets serviced in a test function). + +In general event/queuing models allow for scheduling, ordering, or grouping +operations, and so can often increase the overall throughput at the cost +of individual operation latency. + +Examples include the work queue in the linux kernel, ... + +THREADS: + +Thread models allow operations to be serviced more-or-less immediately by +either thread-creation: creating a thread that services that operation and exits, +or thread-pooling: take an idle thread off a relatively large list of +pre-created threads, that then services the operation and gets placed +back on the idle list. + +Independent threads often perform better with workloads consisting of +many short independent operations where the cost of queuing/dequeuing +is high relative to the time required to service the operation. + + +Requirements: + +* Provide a worker interface that abstracts the thread/event model from +the caller, allowing operations to be pushed or pulled into and +out of the worker using a single set of interfaces. + +* Provide a completion context interface and implementation that allows +for both callback and queued types of completions. + +* Provide a generic queue implementation that abstracts list management, locking +and signalling, as well as provides entry points for monitoring, grouping, +and scheduling and rate limiting. + +* Provide basic worker implementation types, such as thread pooling, +thread creation, and queuing with single/multiple service threads. + +* Eliminate unecessary context switching, locking, and synchronization + +TERMS: + +The work management interfaces can be divied up into three major components: + +* Completion Management. Manage completed operations. +* Queue Management. Store and manage posted and running operations. +* Thread Management. Manages the threads that do the actual work. + +The first two components, completion and queuing, provide interfaces +to the users of the API for posting (registering) and testing for +completion (polling) of operations. Thread management is used internally +by the other two components, and does the actual work of servicing operations. + +The queuing component allows different types of operations to be queued +separately, as well as provide multiple levels of servicing for an operation +(such as for sync-coalescing). + +The separation of thread management allows the queuing component to +specify different types of threading models. In general, the threading +model defines the relationship between queues and threads. For example, +there may be one thread for all queues, one thread for each queue, or +one thread created and destroyed for each operation. Separation of +thread management allows easy switching between threading models for +performance analasis or to dynamically re-configure for different workloads. + + +COMPLETION CONTEXT: + +The completion context component manages _completed_ operations. +A context allows callers to group completed operations together, so +that they can be tested (polled) for completion, or provide a callback +for a context which gets triggered on completion of an operation associated +with that context. The basic interfaces into the context component are: + +* Open and close a context. This allows callers to get a unique id that can +be used to group completed operations. Internally, any setup and teardown +of a context happens with these calls. + +cid open_context(callback [optional]); +close_context(cid); + +* Complete an operation. This allows another component (usually the thread +component) to specify completion of operations. Based on the type of context, +the completion of an operation calls a callback for that context, or adds +the completed operation to a completion queue for that context (tested +later by a test_* function), and signals a condition variable. + +complete(cid, op_id); +complete_list(cid, op_id_list); + +* Test for completion. These functions are called to test (poll) for completion +of one, some or all of the operations for a given context. + +test_all(cid); +test_some(cid, op_id_list); +test(cid, op_id); + +QUEUE: + +The queue component provides management interfaces for operations which are posted or in-progress. The basic interfaces for queueing are as follows: + +* Create and Destroy. Users of the queue interface will create and +destroy queues as necessary. The reference to a queue returned by +create is opaque and provides a handle for all future queue operations. + +* Post and cancel. Users of the work management interface will create +operations and service callbacks, and "post" them to the queue. +An operation id is returned to provide management for the poster of the +operation(s). The operation ID + +post +cancel +pull +timedwait +wait + +---- + +WORKER/THREAD: + +The worker API provides encapsulation for managing the thread to queue mappings, provides convenience interfaces for posting operations to queues, and testing +for completion on particular contexts, etc. A goal of the interfaces +is to allow multiple dynamic thread models to be configured for a worker, +but still keep the posting interfaces as simple and generic as possible. + +The basic worker interfaces consist of creating a worker that +will place completed operations in a particular completion queue. + +wid worker_init(contextid); + +With the worker created new threads can be added, with specific types and +attributes. Those threads return thread ids. By default a worker has a +default null thread id, for operations that should be queued and only +service within the test calls. + +thread_id worker_add_thread(worker_id, thread_attrs) + +worker_thread_set_attr(worker_id, thread_id, thread_attrs); + +WORKER_NULL_THREAD_ID + +Operations posted to that null thread id will get queued and later +serviced in calls to test/test_context. + +Threads of different types can be added to the worker: + +* thread pool: creates a bunch of threads and returns a thread id that +references that thread pool. Operations posted with that thread id will +get serviced by a thread in the pool. + +* thead per-op: same as the thread pool, except that threads are +created as operations are posted. + +* thread queue: creates a specified set of threads for servicing queued +operations. Operations posted to this thread id will first get queued, +and the worker threads will pull operations from the queue(s) and service +them. + +For the null thread and any thread of the QUEUE type, operations are +first queued and later serviced by threads. Multiple queues can be added +to a thread id. A thread of the QUEUE type that doesn't have +any queues added to it will return EINVAL if an operation is posted to it. + +worker_add_queue(worker_id, thread_id, qid); + +Once a queue is added to a thread (or the special null thread), it remains +part of that thread. Operations can be posted to queues, and the worker +manages which thread services that operation. + +Operations can be posted to a worker with a specified worker id, and to a +specific queue with a queue id. +Multiple sub-operations can be posted with multiple service functions and +queue ids, allowing an operation to perform several actions before +being complete, and before getting added to the completion queue for that +worker. + +worker_post(worker_id, user_ptr, hint, op_count, + op_fn1, op_ptr1, qid1, op_fn2, op_ptr2, qid2, ...); + +For example, worker_post might be called for a dspace metadata +create op, which then needs to do a sync of the db. The +post call would look like this: + +worker_post(wid, user_ptr, NULL, 2, + dspace_create_op_svc, &dspace_attr, dspace_queue_id, + dspace_sync_coalesce, &dspace_attr, dspace_sync_queue_id); + +Here, the worker takes over, and completion of the entire operation +(create and sync) is signalled through the completion context +(which can either be a callback or a queue). The dspace_queue_id +and dspace_sync_queue_id refer to queues that have already been added +to the worker, in one of its threads. + +--- + +In order to allow more dynamic queue addition/removal (this would allow +a queue per file handle), a seperate set of interfaces is proposed. +There needs to be a LRU cache of queues, and a way to map operations +to queue ids. New queue ids not in the cache need to be created and +added to the cache, and then the operation can be pushed onto it. + +A queue cache is added to a particular thread id: + +worker_add_queue_cache(worker_id, thread_id, + op_mapper, queue_cache_attr, queue_attr); + +The op_mapper essentially maps operations (their hint structures actually) +to queue ids. New queues that are created and added to the queue cache +inherit the queue_attr queue attributes specified. + +The signature for the mapper is something like: + +map_op_to_qid(worker, op_fn1, op_ptr1, hint, thread_id *, queue_id *) + +This makes the post function: + +worker_post(worker_id, user_ptr, hint, op_count, + op_fn1, op_ptr1, op_fn2, op_ptr2, ...); + +worker_test_all(worker_id, count, op_ids, user_ptrs, errors, timeout) +{ + /* anything done yet? */ + PINT_context_test_all(&count, ...); + + if(count == 0 && get_type(worker_id) == THREAD_NONE) + { + service functions + } +} diff --git a/src/common/mgmt/module.mk.in b/src/common/mgmt/module.mk.in new file mode 100644 index 0000000..d0d3b43 --- /dev/null +++ b/src/common/mgmt/module.mk.in @@ -0,0 +1,16 @@ +DIR := src/common/mgmt +MGMT_SRC = $(DIR)/pint-op.c \ + $(DIR)/pint-queue.c \ + $(DIR)/pint-mgmt.c \ + $(DIR)/pint-context.c \ + $(DIR)/pint-worker-queues.c \ + $(DIR)/pint-worker-threaded-queues.c \ + $(DIR)/pint-worker-blocking.c \ + $(DIR)/pint-worker-per-op.c \ + $(DIR)/pint-worker-pool.c \ + $(DIR)/pint-worker-external.c + +LIBSRC += $(MGMT_SRC) + +SERVERSRC += $(MGMT_SRC) + diff --git a/src/common/mgmt/pint-context.c b/src/common/mgmt/pint-context.c new file mode 100644 index 0000000..cda038c --- /dev/null +++ b/src/common/mgmt/pint-context.c @@ -0,0 +1,596 @@ + +#include +#include +#include "pint-context.h" +#include "gen-locks.h" +#include "quickhash.h" +#include "pint-util.h" +#include "pvfs2-internal.h" +#include "pvfs2-debug.h" +#include "gossip.h" + +#define PINT_CONTEXT_TABLE_SIZE 512 + +/* static hash of all contexts in this system. This allows + * callers to open contexts and keep track of them with context ids. + */ +static struct qhash_table *pint_contexts = NULL; +static int pint_context_count = 0; +static gen_mutex_t pint_context_mutex = GEN_MUTEX_INITIALIZER; + +struct PINT_context_queue_entry +{ + PINT_op_id op_id; + void *user_ptr; + PVFS_error result; + + PINT_queue_entry_t qentry; +}; + +struct PINT_context +{ + PINT_context_id id; + enum PINT_context_type type; + union + { + PINT_queue_id queue; + PINT_completion_callback callback; + } u; + int refcount; + struct qlist_head link; +}; + +static struct PINT_context * PINT_context_lookup(PINT_context_id context_id); + +static int PINT_context_compare(void *key, struct qhash_head *link); +static int PINT_context_hash(void *key, int tablesize); + +/** + * PINT_context_init + * + * Initialize the context layer. This function should be called + * before any contexts are created. + */ +static int PINT_context_init(void) +{ + if(pint_contexts) + { + return 0; + } + + pint_contexts = qhash_init(PINT_context_compare, PINT_context_hash, + PINT_CONTEXT_TABLE_SIZE); + if(!pint_contexts) + { + return -PVFS_ENOMEM; + } + + return 0; +} + +/** + * PINT_context_finalize + * + * Finalize the context layer. This function should be called + * after all contexts have been finalized. + */ +static int PINT_context_finalize(void) +{ + struct PINT_context *ctx, *tmp; + assert(pint_contexts); + + if(pint_contexts) + { + int i = 0; + for(; i < pint_contexts->table_size; ++i) + { + qlist_for_each_entry_safe(ctx, tmp, &(pint_contexts->array[i]), link) + { + qhash_del(&ctx->link); + PINT_close_context(ctx->id); + + } + } + } + qhash_finalize(pint_contexts); + + return 0; +} + +/** + * PINT_open_context + * + * Open a new context. The resulting context id can be used to add + * completed operations to the completion context, or test for completed + * operations on the completion context. + * + * @param context_id The context id created + * @param callback There are two types of contexts. By default, if + * this parameter is NULL, a queue context is created, which will queue + * completed operations until requested for via the test functions. If + * the callback parameter is not NULL, the context becomes a callback + * context, and the parameter specifies a callback function that should be + * called once an operation is completed. + * + * @return 0 on success, -PVFS_ENOMEM if out of memory. + */ +int PINT_open_context( + PINT_context_id *context_id, + PINT_completion_callback callback) +{ + struct PINT_context *ctx; + int ret; + + assert(context_id); + + gen_mutex_lock(&pint_context_mutex); + if(pint_context_count == 0) + { + PINT_context_init(); + } + gen_mutex_unlock(&pint_context_mutex); + + ctx = malloc(sizeof(struct PINT_context)); + if(!ctx) + { + gen_mutex_unlock(&pint_context_mutex); + return -PVFS_ENOMEM; + } + memset(ctx, 0, sizeof(struct PINT_context)); + + if(callback) + { + /* this is a callback context */ + ctx->type = PINT_CONTEXT_TYPE_CALLBACK; + ctx->u.callback = callback; + } + else + { + ctx->type = PINT_CONTEXT_TYPE_QUEUE; + ret = PINT_queue_create(&ctx->u.queue, NULL); + if(ret < 0) + { + free(ctx); + return ret; + } + PINT_queue_add_consumer(ctx->u.queue, ctx); + PINT_queue_add_producer(ctx->u.queue, ctx); + } + + id_gen_fast_register(&ctx->id, ctx); + *context_id = ctx->id; + + gen_mutex_lock(&pint_context_mutex); + qhash_add(pint_contexts, &ctx->id, &ctx->link); + gen_mutex_unlock(&pint_context_mutex); + return 0; +} + +/** + * PINT_close_context + * + * Close the context. + */ +int PINT_close_context(PINT_context_id context_id) +{ + struct qhash_head *entry; + struct PINT_context *ctx; + + gen_mutex_lock(&pint_context_mutex); + + entry = qhash_search_and_remove(pint_contexts, &context_id); + if(!entry) + { + gen_mutex_unlock(&pint_context_mutex); + return -PVFS_EINVAL; + } + gen_mutex_unlock(&pint_context_mutex); + + ctx = qhash_entry(entry, struct PINT_context, link); + assert(ctx->refcount == 0); + + if(ctx->type == PINT_CONTEXT_TYPE_QUEUE) + { + if(PINT_queue_count(ctx->u.queue) != 0) + { + /* the completion queue isn't empty! Can't + * close the context just yet. + */ + gen_mutex_unlock(&pint_context_mutex); + return -PVFS_EINVAL; + } + PINT_queue_remove_producer(ctx->u.queue, ctx); + PINT_queue_remove_consumer(ctx->u.queue, ctx); + PINT_queue_destroy(ctx->u.queue); + } + free(ctx); + + pint_context_count--; + if(pint_context_count > 0) + { + PINT_context_finalize(); + } + + return 0; +} + +int PINT_context_reference(PINT_context_id context_id) +{ + struct PINT_context *ctx; + + ctx = PINT_context_lookup(context_id); + assert(ctx); + + ctx->refcount++; + return 0; +} + +int PINT_context_dereference(PINT_context_id context_id) +{ + struct PINT_context *ctx; + + ctx = PINT_context_lookup(context_id); + assert(ctx); + + ctx->refcount--; + return 0; +} + +static struct PINT_context * PINT_context_lookup(PINT_context_id context_id) +{ + struct qhash_head *entry; + + entry = qhash_search(pint_contexts, &context_id); + if(!entry) + { + return NULL; + } + return qhash_entry(entry, struct PINT_context, link); +} + +/** + * PINT_context_complete + * + * Give the completion context a completed operation to either queue + * for later testing, or pass on to the callback for the completion context. + * + * @param context_id the context to give the completed operation to + * @param op_id the operation id of the completed operation + * @param user_ptr the user pointer given as part of the operation + * @param result the result of the operation + * + * @return 0 on success + */ +int PINT_context_complete(PINT_context_id context_id, + PINT_op_id op_id, + void *user_ptr, + PVFS_error result) +{ + int ret; + struct PINT_context *ctx; + + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: completing op: %llu, user_ptr: %p, result: %d\n", + __func__, llu(op_id), user_ptr, result); + + ctx = PINT_context_lookup(context_id); + if(!ctx) + { + return -PVFS_EINVAL; + } + + if(ctx->type == PINT_CONTEXT_TYPE_CALLBACK) + { + ctx->u.callback(context_id, 1, &op_id, &user_ptr, &result); + } + else + { + struct PINT_context_queue_entry *ctx_entry; + + /* If the malloc here is too expensive for every completed + * operation, we should add some logic that populates new + * entries in an unused list and pulls them off of that when + * operations are completed. + */ + ctx_entry = malloc(sizeof(struct PINT_context_queue_entry)); + if(!ctx_entry) + { + return -PVFS_ENOMEM; + } + memset(ctx_entry, 0, sizeof(struct PINT_context_queue_entry)); + + ctx_entry->op_id = op_id; + ctx_entry->user_ptr = user_ptr; + ctx_entry->result = result; + ret = PINT_queue_push(ctx->u.queue, &ctx_entry->qentry); + if(ret < 0) + { + free(ctx_entry); + return ret; + } + } + + return 0; +} + +/** + * PINT_context_complete_list + * + * Complete a list of operations + */ +int PINT_context_complete_list(PINT_context_id context_id, + int count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error *errors) +{ + int ret; + struct PINT_context *ctx; + int i; + + assert(count > 0); + + /* get first op */ + ctx = PINT_context_lookup(context_id); + if(!ctx) + { + return -PVFS_EINVAL; + } + + /* callers can only test on queue contexts */ + assert(ctx->type != PINT_CONTEXT_TYPE_CALLBACK); + + for(i = 0; i < count; ++i) + { + struct PINT_context_queue_entry *ctx_entry; + + ctx_entry = malloc(sizeof(struct PINT_context_queue_entry)); + if(!ctx_entry) + { + return -PVFS_ENOMEM; + } + ctx_entry->op_id = op_ids[i]; + ctx_entry->user_ptr = user_ptrs[i]; + ctx_entry->result = errors[i]; + + ret = PINT_queue_push(ctx->u.queue, &ctx_entry->qentry); + if(ret < 0) + { + free(ctx_entry); + return ret; + } + } + + return 0; +} + +/** + * PINT_context_test_all + * + * Test all the operations in the completion context, returning + * as many as possible in the return parameters. + * + * @param context_id the context to test on + * @param count As an input parameter, specifies the capacity of the + * return parameters (the number of completed operations that can be returned). + * As an output parameter, specifies the number of operations returned as + * completed. + * @param op_ids a return parameter - the array of completed operation ids + * @param user_ptrs a return parameter - the array of user pointers associated + * with the completed operations + * @param errors a return paramter - the array of errors associated + * with the completed operations + * @param timeout_ms The timeout in microseconds to wait for the completed + * operations. + * + * @return 0 on success. If the context type is a callback context, then + * this function will return PINT_CONTEXT_TYPE_CALLBACK, + * which can be treated as non-fatal. + * Otherwise, the errors returned will be -PVFS_* and should be considered + * fatal. + */ +int PINT_context_test_all(PINT_context_id context_id, + int *count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error * errors, + int timeout_ms) +{ + struct PINT_context_queue_entry *ctx_entry; + PINT_queue_entry_t *qentry; + struct PINT_context *ctx; + int ret; + int i = 0; + void *uptr; + + ctx = PINT_context_lookup(context_id); + if(!ctx) + { + return -PVFS_EINVAL; + } + + /* callers can only test on queue contexts */ + assert(ctx->type != PINT_CONTEXT_TYPE_CALLBACK); + + ret = PINT_queue_timedwait(ctx->u.queue, count, + (PINT_queue_entry_t **)user_ptrs, timeout_ms); + if(ret < 0) + { + *count = 0; + return ret; + } + + for(; i < *count; ++i) + { + struct PINT_op_entry *op_entry; + qentry = user_ptrs[i]; + ctx_entry = PINT_queue_entry_object( + qentry, struct PINT_context_queue_entry, qentry); + op_ids[i] = ctx_entry->op_id; + op_entry = id_gen_safe_lookup(ctx_entry->op_id); + if (op_entry) + { + id_gen_safe_unregister(op_entry->op.id); + free(op_entry); + } + uptr = ctx_entry->user_ptr; + errors[i] = ctx_entry->result; + user_ptrs[i] = uptr; + } + + return 0; +} + +static int PINT_context_find_id_callback( + PINT_queue_entry_t *entry, void *user_ptr) +{ + struct PINT_context_queue_entry *ctx_entry; + + ctx_entry = PINT_queue_entry_object(entry, + struct PINT_context_queue_entry, qentry); + if(*((PINT_op_id *)user_ptr) == ctx_entry->op_id) + { + return 1; + } + + return 0; +} + +#if 0 +static int PINT_context_find_id( + struct PINT_context *ctx, + PINT_op_id op_id, + struct PINT_context_queue_entry **entry) +{ + PINT_queue_entry_t *qentry; + int ret; + + /* if there's stuff in the queue we fill the output parameters and + * return immediately */ + ret = PINT_queue_search_and_remove( + ctx->u.queue, PINT_context_find_id_callback, + &op_id, &qentry); + if(ret < 0) + { + return ret; + } + + *entry = PINT_queue_entry_object(qentry, + struct PINT_context_queue_entry, qentry); + return 0; +} +#endif + +/** + * PINT_context_test + * + * Test the specified operation in the completion context, returning + * the parameters of the operation in the output parameters. + * + * @param context_id the context to test on + * @param op_id which completed operation to look for + * @param user_ptr a return parameter - the user pointer associated + * with the completed operation + * @param error a return paramter - the error associated + * with the completed operation + * @param timeout_ms The timeout in microseconds to wait for the completed + * operation. + * + * @return 0 on success. If the context type is a callback context, then + * this function will return -PVFS_ENOMSG. + * Otherwise, the errors returned will be -PVFS_* and should be considered + * fatal. + */ +int PINT_context_test(PINT_context_id context_id, + PINT_op_id op_id, + void **user_ptr, + PVFS_error *error, + int microsecs) +{ + struct PINT_context *ctx; + struct PINT_context_queue_entry *entry; + PINT_queue_entry_t *qentry; + int ret; + + ctx = PINT_context_lookup(context_id); + if(!ctx) + { + gen_mutex_unlock(&pint_context_mutex); + return -PVFS_EINVAL; + } + + /* callers can only test on queue contexts */ + assert(ctx->type != PINT_CONTEXT_TYPE_CALLBACK); + + ret = PINT_queue_wait_for_entry(ctx->u.queue, PINT_context_find_id_callback, + &op_id, &qentry, microsecs); + if(ret == 0) + { + struct PINT_op_entry *op_entry; + entry = PINT_queue_entry_object( + qentry, + struct PINT_context_queue_entry, qentry); + op_entry = id_gen_safe_lookup(entry->op_id); + if (op_entry) + { + id_gen_safe_unregister(op_entry->op.id); + free(op_entry); + } + *user_ptr = entry->user_ptr; + *error = entry->result; + + free(entry); + } + + /* otherwise nothing to do so just return. */ + return ret; +} + +static int PINT_context_hash(void *key, int tablesize) +{ + unsigned long ret = 0; + PINT_context_id *id; + + id = (PINT_context_id *)key; + + ret += *id; + ret = ret & (tablesize - 1); + + return (int) ret; +} + +static int PINT_context_compare(void *key, struct qhash_head *link) +{ + PINT_context_id *id = (PINT_context_id *)key; + struct PINT_context *context; + + context = qhash_entry(link, struct PINT_context, link); + + if(*id == context->id) + { + return 1; + } + return 0; +} + +int PINT_context_is_callback(PINT_context_id context_id) +{ + struct PINT_context *ctx; + + ctx = PINT_context_lookup(context_id); + assert(ctx); + if(ctx->type == PINT_CONTEXT_TYPE_CALLBACK) + { + return 1; + } + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-context.h b/src/common/mgmt/pint-context.h new file mode 100644 index 0000000..147be5c --- /dev/null +++ b/src/common/mgmt/pint-context.h @@ -0,0 +1,94 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_CONTEXT_H +#define PINT_CONTEXT_H + +#include "pint-op.h" +#include "pint-queue.h" +#include "quickhash.h" + +enum PINT_context_type +{ + PINT_CONTEXT_TYPE_QUEUE = 1, + PINT_CONTEXT_TYPE_CALLBACK = 2 +}; + +typedef PVFS_id_gen_t PINT_context_id; + +struct PINT_op_entry +{ + void *user_ptr; + PINT_operation_t op; + PVFS_id_gen_t wq_id; + PINT_context_id ctx_id; + PVFS_error error; + + struct qhash_head link; +}; + + + +typedef int (*PINT_completion_callback)(PINT_context_id ctx_id, + int count, + PINT_op_id *op_ids, + void ** user_ptrs, + PVFS_error *errors); + +int PINT_open_context( + PINT_context_id *context_id, + PINT_completion_callback callback); + +int PINT_close_context(PINT_context_id context_id); + + +int PINT_context_complete(PINT_context_id context_id, + PINT_op_id op_id, + void * user_ptr, + PVFS_error error); + +int PINT_context_complete_list(PINT_context_id context_id, + int count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error *errors); + +int PINT_context_test_all(PINT_context_id context_id, + int * count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error * errors, + int timeout_ms); + +int PINT_context_test_some(PINT_context_id context_id, + int count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error *errors, + int timeout_ms); + +int PINT_context_test(PINT_context_id context_id, + PINT_op_id op_id, + void **user_ptr, + PVFS_error *error, + int timeout_ms); + +int PINT_context_is_callback(PINT_context_id context_id); + +int PINT_context_reference(PINT_context_id context_id); +int PINT_context_dereference(PINT_context_id context_id); + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/mgmt/pint-mgmt.c b/src/common/mgmt/pint-mgmt.c new file mode 100644 index 0000000..120a5a1 --- /dev/null +++ b/src/common/mgmt/pint-mgmt.c @@ -0,0 +1,1461 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include +#include "pvfs2-types.h" +#include "pvfs2-internal.h" +#include "pint-mgmt.h" +#include "pint-util.h" +#include "quickhash.h" +#include "pvfs2-debug.h" +#include "gossip.h" + +#define DEFAULT_TIMEOUT_MICROSECS 1000 + +#define PINT_QUEUE_TO_WORKER_TABLESIZE 1024 +#define PINT_OP_ENTRY_TABLESIZE 32768 + +/* Used to specify that the management code should figure out which + * worker to use based on the op to worker mappings + */ +PINT_worker_id PINT_worker_implicit_id = 0; + +/* Every manager gets a blocking worker type. This global id allows + * us to reference the blocking worker without knowing the specific + * id for that manager. + */ +PINT_worker_id PINT_worker_blocking_id = 0xFFFFFFFFFFFFFFFFULL; + +struct PINT_worker_s +{ + PINT_worker_type_t type; + struct PINT_worker_impl *impl; + + PINT_worker_inst inst; + + PINT_worker_id id; + struct qlist_head link; +}; + +struct PINT_manager_s +{ + PINT_context_id context; + + gen_mutex_t mutex; + + /* mapping functions for mapping an operation to the + * queue or worker it should be added to. + */ + struct qlist_head op_maps; + + /* list of workers */ + struct qlist_head workers; + + /* hash of which queues are owned by which workers */ + struct qhash_table *queue_to_worker; + + /* list of operations that are being serviced. A post + * call can post a sequence of operations to be serviced, + * each of which may be handled by a different worker. + * Since C doesn't have continuations, we create a linked + * list on a post of the operations, and then add the first + * operation to the appropriate queue/worker, and the rest + * (the linked list) to this list. The manager keeps popping + * operations off the list and servicing them with the appropriate + * worker. + */ + struct qhash_table *ops; + + int op_count; + + PINT_worker_id blocking_id; + + struct qlist_head event_handlers; +}; + +struct PINT_worker_map_entry_s +{ + struct PINT_worker_s *worker; + PINT_queue_id queue_id; + struct qhash_head link; +}; + +struct PINT_worker_id_mapper_entry_s +{ + PINT_worker_mapping_callout fn; + struct qlist_head link; +}; + +static void PINT_worker_destroy(PINT_manager_t manager, + struct PINT_worker_s *worker); + +static int PINT_manager_find_worker(PINT_manager_t manager, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t input_work_id, + struct PINT_worker_s **result_worker, + PINT_queue_id *queue_id); + +static int PINT_queue_to_worker_compare(void *key, struct qhash_head *link); +static int PINT_queue_to_worker_hash(void *key, int tablesize); +static int PINT_op_entry_compare(void *key, struct qhash_head *link); +static int PINT_op_entry_hash(void *key, int tablesize); + +struct PINT_manager_event_handler_entry_s +{ + PINT_event_callback callback; + void *event_ptr; + + struct qlist_head link; +}; + +static void PINT_manager_op_start(PINT_manager_t manager, PINT_operation_t *op); +static void PINT_manager_op_end(PINT_manager_t manager, PINT_operation_t *op); + +inline static int PINT_op_entry_create(struct PINT_op_entry **op, + void *user_ptr, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t wq_id, + PINT_context_id context_id); + +/** + * Create a manager that can be referenced and used for posting + * operations. Be default a new manager has a blocking worker + * (the post call blocks while servicing the operation) but nothing + * else. Other workers must be added to this manager using + * the PINT_manager_worker_add call. + * + * @param new_manager The new manager reference + * @param ctx The context id that all operations posted to this + * manager will by default get completed on. Operations can + * override this context by posting using the PINT_manager_ctx_post call. + * + * @return 0 on success + * -PVFS_ENOMEM if out of memory + */ +int PINT_manager_init( + PINT_manager_t *new_manager, + PINT_context_id ctx) +{ + int ret; + struct PINT_manager_s *manager; + PINT_worker_attr_t blocking_attr; + + manager = (struct PINT_manager_s *)malloc(sizeof(struct PINT_manager_s)); + if(!manager) + { + return -PVFS_ENOMEM; + } + + gen_mutex_init(&manager->mutex); + if(ctx) + { + PINT_context_reference(ctx); + manager->context = ctx; + } + + manager->queue_to_worker = qhash_init(PINT_queue_to_worker_compare, + PINT_queue_to_worker_hash, + PINT_QUEUE_TO_WORKER_TABLESIZE); + if(!manager->queue_to_worker) + { + free(manager); + return -PVFS_ENOMEM; + } + + INIT_QLIST_HEAD(&manager->op_maps); + INIT_QLIST_HEAD(&manager->workers); + INIT_QLIST_HEAD(&manager->event_handlers); + + gen_mutex_lock(&manager->mutex); + manager->ops = qhash_init(PINT_op_entry_compare, + PINT_op_entry_hash, + PINT_OP_ENTRY_TABLESIZE); + if(!manager->ops) + { + gen_mutex_unlock(&manager->mutex); + free(manager); + return -PVFS_ENOMEM; + } + + manager->op_count = 0; + gen_mutex_unlock(&manager->mutex); + + /* add the blocking worker to the manager. Every manager + * gets one of these. The blocking_id is held internally, and the global + * PINT_worker_blocking_id is used to reference the blocking worker. + */ + + blocking_attr.type = PINT_WORKER_TYPE_BLOCKING; + ret = PINT_manager_worker_add( + manager, &blocking_attr, &manager->blocking_id); + if(ret < 0) + { + qhash_finalize(manager->ops); + free(manager); + return ret; + } + + *new_manager = manager; + return 0; +} + +/** + * Destroy a manager. + * + * @param manager The manager to destroy. + * + * @return 0 on succes. + * -PVFS_EINVAL if operations are still being worked on. + */ +int PINT_manager_destroy(PINT_manager_t manager) +{ + struct PINT_worker_s *worker, *tmp; + + gen_mutex_lock(&manager->mutex); + + if(manager->op_count > 0) + { + gen_mutex_unlock(&manager->mutex); + return -PVFS_EINVAL; + } + + qhash_finalize(manager->ops); + + qlist_for_each_entry_safe(worker, tmp, &manager->workers, link) + { + qlist_del(&worker->link); + PINT_worker_destroy(manager, worker); + } + + qhash_finalize(manager->queue_to_worker); + + if(manager->context) + { + PINT_context_dereference(manager->context); + } + + gen_mutex_unlock(&manager->mutex); + return 0; +} + +/** + * Add a worker to a manager. + * + * @param manager The manager to add the worker to + * @param attr The worker attrs. This specifies both the type of + * worker, as well as the attributes defining how that worker should + * be initialized. + * @param worker_id The id of the worker created. This id can + * be used to specify the worker on post calls, or via mapping from + * the operation type to this worker. + * + * @return 0 on success + * -PVFS_ENOMEM if out of memory + */ +int PINT_manager_worker_add(PINT_manager_t manager, + PINT_worker_attr_t *attr, + PINT_worker_id *worker_id) +{ + struct PINT_worker_s *worker = NULL; + struct PINT_worker_s *tmp_worker = NULL; + int ret = 0; + + assert(manager); + assert(attr); + + do + { + worker = malloc(sizeof(struct PINT_worker_s)); + if(!worker) + { + return -PVFS_ENOMEM; + } + memset(worker, 0, sizeof(struct PINT_worker_s)); + id_gen_fast_register(&worker->id, &worker->impl); + if(worker->id == PINT_worker_blocking_id) + { + /* happened to choose the id we statically defined + * for the blocking worker, so we try another */ + tmp_worker = worker; + worker = NULL; + } + } while(worker == NULL); + + if(tmp_worker) + { + free(tmp_worker); + } + + /* match the worker type to a worker implementation */ + worker->type = attr->type; + switch(worker->type) + { + case PINT_WORKER_TYPE_QUEUES: + worker->impl = &PINT_worker_queues_impl; + break; + case PINT_WORKER_TYPE_THREADED_QUEUES: + worker->impl = &PINT_worker_threaded_queues_impl; + break; + case PINT_WORKER_TYPE_PER_OP: + worker->impl = &PINT_worker_per_op_impl; + break; + case PINT_WORKER_TYPE_BLOCKING: + worker->impl = &PINT_worker_blocking_impl; + break; + case PINT_WORKER_TYPE_EXTERNAL: + worker->impl = &PINT_worker_external_impl; + break; + case PINT_WORKER_TYPE_POOL: + ret = -PVFS_ENOSYS; + goto free_worker; + break; + default: + assert(0); + } + + /* found a valid worker. initialize it! */ + if(worker->impl->init) + { + ret = worker->impl->init(manager, &worker->inst, attr); + if(ret < 0) + { + return ret; + } + } + + /* add the worker to the list of workers */ + gen_mutex_lock(&manager->mutex); + qlist_add_tail(&worker->link, &manager->workers); + gen_mutex_unlock(&manager->mutex); + + /* use the worker->impl as the id so that worker impls can + * access the id as well + */ + *worker_id = worker->id; + + return 0; + +free_worker: + + free(worker); + return ret; +} + +/** + * Remove a worker from a manager. + * + * @param manager The manager + * @param id The worker id to remove from the manager + * + * @return 0 on success + */ +int PINT_manager_worker_remove(PINT_manager_t manager, PINT_worker_id wid) +{ + struct PINT_worker_s *worker; + + worker = id_gen_fast_lookup(wid); + assert(worker); + + gen_mutex_lock(&manager->mutex); + qlist_del(&worker->link); + gen_mutex_unlock(&manager->mutex); + + PINT_worker_destroy(manager, worker); + return 0; +} + +static void PINT_worker_destroy(PINT_manager_t manager, + struct PINT_worker_s *worker) +{ + int ret; + if(worker->impl->destroy) + { + ret = worker->impl->destroy(manager, &worker->inst); + if(ret < 0) + { + gossip_err("%s: worker_impl->destroy (type: %s) " + "failed with error: %d\n", + __func__, + worker->impl->name, + ret); + return; + } + } + + free(worker); + return; +} + +/** + * Add a queue to a worker. There are worker types that manage/service + * operations by first queuing them, and then servicing them one-by-one. + * These worker types require queues to be added to them. This function + * provides the interface into the individual worker type that adds + * the queue. + * + * @param manager the manager holding the worker + * @param worker_id the worker to add the queue to + * @param queue_id the queue to add to the worker + * + * @return 0 on success + * -PVFS_ENOMEM if out of memory + * -PVFS_EINVAL if the worker_id doesn't match any workers held + * by the manager + */ +int PINT_manager_queue_add(PINT_manager_t manager, + PINT_worker_id worker_id, + PINT_queue_id qid) +{ + int ret = 0; + struct PINT_worker_s *worker; + struct PINT_worker_map_entry_s *entry; + + gen_mutex_lock(&manager->mutex); + + /* verify that worker has been added to manager */ + qlist_for_each_entry(worker, &manager->workers, link) + { + if(worker->id == worker_id) + { + break; + } + } + + if(worker->id != worker_id) + { + /* couldn't find a valid worker in the list that + * matches passed in id. + */ + gen_mutex_unlock(&manager->mutex); + return -PVFS_EINVAL; + } + + if(!worker->impl->queue_add) + { + gossip_err("%s: can't add queue to worker type %s\n", + __func__, + worker->impl->name); + return -PVFS_EINVAL; + } + + ret = worker->impl->queue_add(manager, &worker->inst, qid); + if(ret < 0) + { + goto done; + } + + /* map queue to worker in hashtable */ + entry = malloc(sizeof(struct PINT_worker_map_entry_s)); + if(!entry) + { + ret = -PVFS_ENOMEM; + goto done; + } + + entry->worker = worker; + entry->queue_id = qid; + qhash_add(manager->queue_to_worker, &qid, &entry->link); + +done: + gen_mutex_unlock(&manager->mutex); + return ret; +} + +/** + * Remove a queue from a worker. Internally, the manager keeps track of + * which queues were added to which workers, so only the queue id is needed. + * + * @param manager The manager holding the worker holding the queue + * @param queue_id The queue_id to remove + * + * @param 0 on success + * @param -PVFS_EINVAL if the queue isn't held by any workers held + * by this manager + */ +int PINT_manager_queue_remove(PINT_manager_t manager, PINT_queue_id id) +{ + int ret; + struct PINT_worker_map_entry_s *entry; + struct qlist_head *link; + + gen_mutex_lock(&manager->mutex); + + /* find the worker where the queue lives */ + link = qhash_search_and_remove(manager->queue_to_worker, &id); + if(!link) + { + gen_mutex_unlock(&manager->mutex); + return -PVFS_EINVAL; + } + + entry = qhash_entry(link, struct PINT_worker_map_entry_s, link); + + ret = entry->worker->impl->queue_remove(manager, &entry->worker->inst, id); + + /* free the entry that was in the queue_to_worker hashtable, since + * we've just removed queue. + */ + free(entry); + + gen_mutex_unlock(&manager->mutex); + return ret; +} + +/** + * Post an operation to the manager. The operation is completed to the + * context specified when the worker was created. + * + * @param manager the manager to post to + * @param user_ptr the caller's object to pass to the completion context + * @param mid the op id returned to the caller to keep track of this + * chained operation. + * @param callout This is the actual service function for + * the operation. + * @param op_ptr The operation pointer to pass to the service function. + * @param hint A set of hints that can specify how the operation is + * to be serviced. + * @param queue_worker_id The worker id or queue id specifying that the + * operation should be added to the worker with this id, or (if no + * such worker exists) to the queue with this id. + * If the specific worker id PINT_worker_implicit is used, the + * worker or queue will be chosen dynamically using the mapping functions + * specified by calls to PINT_manager_add_map. + * + * @return PINT_MGMT_OP_POSTED if the op was posted successfully + * PINT_MGMT_OP_COMPLETED if the op completed immediately, either + * because the blocking worker was used, or because the op was + * completed speculatively. + */ +int PINT_manager_id_post(PINT_manager_t manager, + void *user_ptr, + PINT_op_id *id, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t queue_worker_id) +{ + return PINT_manager_ctx_post(manager, manager->context, user_ptr, + id, callout, op_ptr, hint, queue_worker_id); +} + +/** + * Post an operation to the manager with an explicit context. + * + * @param manager the manager to post to + * @param user_ptr the caller's object to pass to the completion context + * @param mid the op id returned to the caller to keep track of this + * chained operation. + * @param ctx_id the context to complete to. + * @param callout This is the actual service function for + * the operation. + * @param op_ptr The operation pointer to pass to the service function. + * @param hint A set of hints that can specify how the operation is + * to be serviced. + * @param queue_worker_id The worker id or queue id specifying that the + * operation should be added to the worker with this id, or (if no + * such worker exists) to the queue with this id. + * If the specific worker id PINT_worker_implicit is used, the + * worker or queue will be chosen dynamically using the mapping functions + * specified by calls to PINT_manager_add_map. + * + * @return PINT_MGMT_OP_POSTED if the op was posted successfully + * PINT_MGMT_OP_COMPLETED if the op completed immediately, either + * because the blocking worker was used, or because the op was + * completed speculatively. + */ +int PINT_manager_ctx_post(PINT_manager_t manager, + PINT_context_id context_id, + void *user_ptr, + PINT_op_id *id, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t worker_id) +{ + struct PINT_op_entry *op_entry; + struct PINT_worker_s *worker; + PINT_queue_id queue_id; + int ret = 0; + + ret = PINT_manager_find_worker( + manager, callout, op_ptr, hint, worker_id, &worker, &queue_id); + if(ret < 0) + { + return ret; + } + + /* special case for blocking worker - don't allocate op entry. We + * need to know in advance that the type of worker isn't going to queue + * (or reference) the operation pointer passed into the post call + */ + if(PINT_WORKER_TYPE_BLOCKING == worker->type) + { + PINT_operation_t op; + + op.operation = callout; + op.operation_ptr = op_ptr; + PVFS_hint_copy(hint, &op.hint); + + ret = worker->impl->post(manager, &worker->inst, 0, &op); + assert(ret != PINT_MGMT_OP_POSTED); + + if(id) + { + *id = -1; + } + + return ret; + } + + ret = PINT_op_entry_create( + &op_entry, user_ptr, callout, op_ptr, hint, worker_id, context_id); + if(ret < 0) + { + return ret; + } + + gossip_debug(GOSSIP_MGMT_DEBUG, + "[MGMT]: manager ops: adding op id: %llu\n", + llu(op_entry->op.id)); + gen_mutex_lock(&manager->mutex); + ret = id_gen_safe_register(&op_entry->op.id, op_entry); + if (ret < 0) + { + return ret; + } + qhash_add(manager->ops, &op_entry->op.id, &op_entry->link); + manager->op_count++; + gen_mutex_unlock(&manager->mutex); + if(id) + { + *id = op_entry->op.id; + } + + ret = worker->impl->post( + manager, &worker->inst, queue_id, &op_entry->op); + if(ret < 0 || PINT_MGMT_OP_COMPLETED == ret) + { + /* if there was an error (either from a blocking operation serviced or + * from just posting the operation), we stop all servicing for this + * operation and return the error + */ + gen_mutex_lock(&manager->mutex); + id_gen_safe_unregister(op_entry->op.id); + qhash_search_and_remove(manager->ops, &op_entry->op.id); + gen_mutex_unlock(&manager->mutex); + + free(op_entry); + return ret; + } + + return ret; +} + +inline static int PINT_op_entry_create(struct PINT_op_entry **op, + void *user_ptr, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t wq_id, + PINT_context_id context_id) +{ + struct PINT_op_entry *opentry; + + opentry = malloc(sizeof(*opentry)); + if(!opentry) + { + return -PVFS_ENOMEM; + } + memset(opentry, 0, sizeof(*opentry)); + + opentry->user_ptr = user_ptr; + + opentry->op.operation = callout; + opentry->op.operation_ptr = op_ptr; + if(hint) + { + PVFS_hint_copy(hint, &opentry->op.hint); + } + + opentry->wq_id = wq_id; + opentry->ctx_id = context_id; + + *op = opentry; + return 0; +} + +static int PINT_manager_find_worker(PINT_manager_t manager, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t input_worker_id, + struct PINT_worker_s **result_worker, + PINT_queue_id *queue_id) +{ + struct PINT_worker_map_entry_s *worker_entry; + struct PINT_worker_s *w; + int ret = 0; + struct qhash_head *listlink; + PVFS_id_gen_t result_worker_id; + + gen_mutex_lock(&manager->mutex); + + result_worker_id = input_worker_id; + + /* if the queue/worker id refers to the global blocking worker id, we + * set it to the correct id maintained by the manager + */ + if(input_worker_id == PINT_worker_blocking_id) + { + result_worker_id = manager->blocking_id; + } + + /* if the queue/worker id is 0, assume the queue/worker id is supposed + * to be fetched from the id mapping functions. (or if there's only one + * worker that isn't a queue type or only has one queue) + */ + if(input_worker_id == PINT_worker_implicit_id && callout != NULL) + { + struct PINT_worker_id_mapper_entry_s *map; + + /* get queue/worker from mapper functions */ + qlist_for_each_entry(map, &manager->op_maps, link) + { + /* try each mapping function to see if it returns + * a queue or worker id that this operation should be added to + */ + ret = map->fn(manager, + callout, + op_ptr, + hint, + &result_worker_id); + if(0 == ret && result_worker_id != PINT_worker_implicit_id) + { + /* found one! */ + break; + } + + if(ret < 0) + { + goto exit; + } + } + + if(0 == ret && PINT_worker_implicit_id == result_worker_id) + { + /* didn't find any worker/queue ids in any of the mapping + * functions, so use the blocking worker + */ + result_worker_id = manager->blocking_id; + } + } + + /* so now we should have a valid worker/queue id */ + + /* check that the queue/worker id is a worker id that the manager + * manages. Otherwise assume its a queue id and look for + * the associated worker in the queue-to-worker map. + */ + qlist_for_each_entry(w, &manager->workers, link) + { + if(w->id == result_worker_id) + { + /* Its a worker id. This should only be specified for + * worker types that don't manage queues + */ + *result_worker = w; + *queue_id = 0; + goto exit; + } + } + + /* if its not a recognized worker id, assume its a queue id + * and verify that its a queue maintained by one of the workers + */ + listlink = qhash_search(manager->queue_to_worker, &result_worker_id); + if(!listlink) + { + /* not a worker id/queue id we recognize. */ + gossip_err("%s: operation posted with a queue id (%llu) that isn't " + "held by this manager\n", + __func__, llu(result_worker_id)); + ret = -PVFS_EINVAL; + goto exit; + } + + worker_entry = qhash_entry( + listlink, struct PINT_worker_map_entry_s, link); + *result_worker = worker_entry->worker; + *queue_id = result_worker_id; + +exit: + + gen_mutex_unlock(&manager->mutex); + return ret; +} + +/** + * Add an id to queue/worker callback. Operations can be posted without + * specifying explicitly which worker or queue to post the operation to. + * This allows changing the behavior of how certain operations are serviced + * (which worker handles them, etc.) dynamically via mapping functions that + * map the operation (based on its type or the hints specified) to the worker + * or queue that should eventually service it. The mapping callout functions + * are kept in a list by the manager. Once an 'implicit' operation is + * posted, the mapping functions are called in the order they were added + * to the manager until a valid worker or queue id is returned. + * + * @param manager the manager to add the mapping callout to + * @param callout the callout function to add + * + * @return 0 on success + * -PVFS_error on erro + */ +int PINT_manager_add_map(PINT_manager_t manager, + PINT_worker_mapping_callout map) +{ + struct PINT_worker_id_mapper_entry_s *entry; + + entry = malloc(sizeof(struct PINT_worker_id_mapper_entry_s)); + if(!entry) + { + return -PVFS_ENOMEM; + } + + entry->fn = map; + + qlist_add_tail(&entry->link, &manager->op_maps); + + return 0; +} + +/** + * Test for completion on a particular context. This is a wrapper + * function for the PINT_context_testall function, because the queue + * worker does not service operations in a separate thread. Instead it + * does all work in the actual test call. This call essentiall tries + * to return completed operations, or do work on queue-worker operations + * and returned those that completed. + * + * @param manager the manager + * @param context_id the context to test on + * @param opcount As an input parameter, this holds the sizes of the + * arrays for the following parameters (max number of ops that + * can be returned). As an output parameter, this holds the + * actual number of completed ops returned. + * @param mids the completed operation ids + * @param user_ptrs the completed operation user pointers + * @param errors the errors returned from each serviced operation + * @param microsecs A hint as to how long to wait for some completed + * operations. There's no guarantee that this function will + * return before the the timeout, but it should be around there. + * + * @return 0 on success + * -PVFS_ETIMEOUT if the timeout was reached and no ops were completed + * -PVFS_error on error + */ +int PINT_manager_test_context(PINT_manager_t manager, + PINT_context_id context, + int *opcount, + PINT_op_id *ids, + void **user_ptrs, + PVFS_error *errors, + int microsecs) +{ + int ret; + int count; + struct PINT_worker_s *worker; + struct timeval start, now; + int timeleft = microsecs; + + gettimeofday(&start, NULL); + + count = *opcount; + + /* we don't want to wait for operations to complete here in case + * operations get serviced later in this function, so we just check + * that there aren't a bunch of operations that already completed + * that would fill up the output arrays of completed operations. + */ + ret = PINT_context_test_all( + context, opcount, ids, user_ptrs, errors, 0); + if(ret < 0 && ret != -PVFS_ETIMEDOUT) + { + gossip_debug(GOSSIP_MGMT_DEBUG, "%s: context_test_all failed: %d\n", + __func__, ret); + return ret; + } + + /* if the test_all succeeds but returns zero in opcount or times-out + * we want to try to do work on non-threaded workers and test again + */ + /* if no operations have completed, then we try to make progress + * on workers that don't do work themselves. + */ + if(0 == *opcount) + { + gen_mutex_lock(&manager->mutex); + /* try to do work if the op is in that type of worker */ + qlist_for_each_entry(worker, &manager->workers, link) + { + gen_mutex_unlock(&manager->mutex); + if(worker->impl->do_work) + { + ret = worker->impl->do_work(manager, &worker->inst, + context, 0, timeleft); + if(ret < 0) + { + return ret; + } + break; + } + gen_mutex_lock(&manager->mutex); + } + gen_mutex_unlock(&manager->mutex); + + /* test again. */ + *opcount = count; + + /* If the timeout is forever, keep testing until + * we've filled in the output arrays or an error occurs + */ + if(PINT_MGMT_TIMEOUT_NONE == microsecs) + { + microsecs = DEFAULT_TIMEOUT_MICROSECS; + do + { + gossip_debug( + GOSSIP_MGMT_DEBUG, + "%s: calling context_test_all again: opcount: %d\n", + __func__, *opcount); + ret = PINT_context_test_all( + manager->context, opcount, ids, + user_ptrs, errors, microsecs); + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: context_test_all: res: %d, " + "opcount: %d, " + "microsecs: %d\n", + __func__, ret, *opcount, microsecs); + + } while(0 == ret && 0 == *opcount); + } + else + { + /* see how much time is left. If there's no timeleft (negative) + * then we zero and get any ops that completed during the callout + * to do_work + */ + gettimeofday(&now, NULL); + timeleft = microsecs - + (((now.tv_sec * 1e6) + now.tv_usec) - + ((start.tv_sec * 1e6) + start.tv_usec)); + if(timeleft < 0) + { + timeleft = 0; + } + + gossip_debug( + GOSSIP_MGMT_DEBUG, + "%s: calling context_test_all again: " + "opcount: %d, timeleft: %d\n", + __func__, *opcount, timeleft); + ret = PINT_context_test_all( + manager->context, opcount, ids, user_ptrs, errors, timeleft); + } + } + + return 0; +} + +/** + * Test for completion on the manager's context, and returns an array + * of completed operations on that context. If the manager manages workers + * that implement the do_work callback (i.e. they don't service operations + * in a separate thread), this function will also drive work for those workers. + * This function should only be called if the context associated with this manager + * is of the queueing type. If the context is a callback type, PINT_manager_wait + * should be used. + * + * @param manager the manager + * @param opcount As an input parameter, this holds the sizes of the + * arrays for the following parameters (max number of ops that + * can be returned). As an output parameter, this holds the + * actual number of completed ops returned. + * @param mids the completed operation ids + * @param user_ptrs the completed operation user pointers + * @param errors the errors returned from each serviced operation + * @param microsecs A hint as to how long to wait for some completed + * operations. There's no guarantee that this function will + * return before the the timeout, but it should be around there. + * + * @return 0 on success, PINT_CONTEXT_TYPE_CALLBACK if the context tested + * on is a callback context. + * -PVFS_ETIMEOUT if the timeout was reached and no ops were completed + * -PVFS_error on error + */ +int PINT_manager_test(PINT_manager_t manager, + int *count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error *errors, + int timeout_ms) +{ + return PINT_manager_test_context( + manager, manager->context, count, + op_ids, user_ptrs, errors, timeout_ms); +} + +/* Test for completion on an individual operation. This function tests for + * completion of the operation specified, or tries to do work if its not + * completed and the operation is in a worker that doesn't service operations + * in a separate thread. + * + * @param manager the manager the operation was posted to + * @param op_id the operation id to test on + * @param user_ptr the user pointer for the completed operation + * @param error the error value returned by the service callback + * @param microsecs timeout to wait for completion of the operation + * + * @return 0 on success, -PVFS_EBUSY if the operation hasn't completed, or + * PINT_CONTEXT_TYPE_CALLBACK if the context type was a callback type. + * Otherwise, return -PVFS_error on error + */ +int PINT_manager_test_op(PINT_manager_t manager, + PINT_op_id op_id, + void **user_ptr, + PVFS_error *error, + int microsecs) +{ + int ret; + struct PINT_worker_s *worker; + struct PINT_op_entry *entry; + PINT_context_id context; + + entry = id_gen_safe_lookup(op_id); + if(!entry) + { + return -PVFS_EINVAL; + } + context = entry->ctx_id; + + /* don't use the entry for anything else since it might get freed + * by the worker calling complete_op + */ + entry = NULL; + + /* test for completion -- set the timeout to + * return immediately if not complete */ + ret = PINT_context_test(context, op_id, user_ptr, error, 0); + if(ret == 0 || ret != -PVFS_ENOENT) + { + /* if the op was completed and the user_ptr and error filled in, + * then we can return. Or if there was a fatal error we return. + */ + return ret; + } + + gen_mutex_lock(&manager->mutex); + + /* must be ENOENT or a callback context, + * so we try to service if its in a worker that + * doesn't service operations separately + */ + qlist_for_each_entry(worker, &manager->workers, link) + { + gen_mutex_unlock(&manager->mutex); + if(worker->impl->do_work) + { + ret = worker->impl->do_work( + manager, &worker->inst, context, &entry->op, microsecs); + if(ret < 0) + { + return ret; + } + } + gen_mutex_lock(&manager->mutex); + } + gen_mutex_unlock(&manager->mutex); + + assert(!PINT_context_is_callback(context)); + + /* now test again with the timeout. If the timeout is zero, + * wait indefinitely */ + if(PINT_MGMT_TIMEOUT_NONE == microsecs) + { + ret = -PVFS_ENOENT; + microsecs = 1000; + while(ret == -PVFS_ENOENT) + { + ret = PINT_context_test( + context, op_id, user_ptr, error, microsecs); + } + } + else + { + ret = PINT_context_test( + context, op_id, user_ptr, error, microsecs); + } + + return ret; +} + +int PINT_manager_wait(PINT_manager_t manager, + int microsecs) +{ + return PINT_manager_wait_context(manager, manager->context, microsecs); +} + +/* Cancel an individual operation. This function attempts to cancel + * a posted operation. + * + * @param manager the manager the operation was posted to + * @param op_id the operation id to test on + * + * @return 0 on successful cancellation, otherwise, return -PVFS_error on error + */ +int PINT_manager_cancel(PINT_manager_t manager, + PINT_op_id op_id) +{ + int ret; + struct PINT_worker_s *worker; + PINT_queue_id queue_id; + struct PINT_op_entry *entry; + PINT_operation_t *op; + PINT_context_id context; + int (*cancel_impl)(struct PINT_manager_s *, + PINT_worker_inst *, + PINT_queue_id, + PINT_operation_t *); + + entry = id_gen_safe_lookup(op_id); + if(!entry) + { + return -PVFS_EINVAL; + } + ret = PINT_manager_find_worker( + manager, NULL, NULL, NULL, entry->wq_id, &worker, &queue_id); + if (ret != 0) + { + return ret; + } + context = entry->ctx_id; + op = &entry->op; + + /* don't use the entry for anything else since it might get freed + * by the worker calling complete_op + */ + entry = NULL; + + gen_mutex_lock(&manager->mutex); + cancel_impl = worker->impl->cancel; + gen_mutex_unlock(&manager->mutex); + + if(cancel_impl) + { + ret = cancel_impl( + manager, &worker->inst, context, op); + if(ret < 0) + { + return ret; + } + } + return 0; +} + +int PINT_manager_wait_context(PINT_manager_t manager, + PINT_context_id context, + int microsecs) +{ + int ret = 0; + struct PINT_worker_s *worker; + + /* can only wait on callback contexts. queue contexts require + * test calls */ + assert(PINT_context_is_callback(context)); + + gen_mutex_lock(&manager->mutex); + qlist_for_each_entry(worker, &manager->workers, link) + { + gen_mutex_unlock(&manager->mutex); + if(worker->impl->do_work) + { + ret = worker->impl->do_work( + manager, &worker->inst, context, 0, microsecs); + if(ret < 0) + { + return ret; + } + } + gen_mutex_lock(&manager->mutex); + } + + gen_mutex_unlock(&manager->mutex); + return ret; +} + +int PINT_manager_wait_op(PINT_manager_t manager, + PINT_op_id op_id, + int microsecs) +{ + int ret = 0; + struct PINT_worker_s *worker; + struct PINT_op_entry *entry; + int msecs_remaining; + struct timeval last, now; + + entry = id_gen_safe_lookup(op_id); + if(!entry) + { + return -PVFS_EINVAL; + } + + /* can only wait on callback contexts. queue contexts require + * test calls */ + assert(PINT_context_is_callback(entry->ctx_id)); + + msecs_remaining = microsecs; + gettimeofday(&last, NULL); + + gen_mutex_lock(&manager->mutex); + qlist_for_each_entry(worker, &manager->workers, link) + { + gen_mutex_unlock(&manager->mutex); + if(worker->impl->do_work) + { + ret = worker->impl->do_work( + manager, &worker->inst, entry->ctx_id, + &entry->op, msecs_remaining); + if(ret < 0) + { + return ret; + } + gettimeofday(&now, NULL); + msecs_remaining -= + ((now.tv_sec * 1e6) + now.tv_usec) - + ((last.tv_sec * 1e6) + last.tv_usec); + last = now; + if(msecs_remaining < 0) + { + break; + } + } + gen_mutex_lock(&manager->mutex); + } + + gen_mutex_unlock(&manager->mutex); + return ret; +} + +static int PINT_queue_to_worker_compare(void *key, struct qhash_head *link) +{ + PINT_queue_id *queue_id; + struct PINT_worker_map_entry_s *entry; + + queue_id = (PINT_queue_id *) key; + entry = qhash_entry(link, struct PINT_worker_map_entry_s, link); + + if(*queue_id == entry->queue_id) + { + return 1; + } + return 0; +} + +static int PINT_queue_to_worker_hash(void *key, int tablesize) +{ + unsigned long ret = 0; + PINT_queue_id *queue_id; + + queue_id = (PINT_queue_id *)key; + + ret += *queue_id; + ret = ret & (tablesize - 1); + + return (int) ret; +} + +static int PINT_op_entry_compare(void *key, struct qhash_head *link) +{ + PINT_op_id *id; + struct PINT_op_entry *entry; + + id = (PINT_op_id *)key; + entry = qhash_entry(link, struct PINT_op_entry, link); + + if(entry->op.id == *id) + { + return 1; + } + return 0; +} + +static int PINT_op_entry_hash(void *key, int tablesize) +{ + unsigned long ret = 0; + PINT_op_id * id = (PINT_op_id *)key; + + ret += *id; + ret = ret & (tablesize - 1); + + return (int) ret; +} + +static void PINT_manager_op_start(PINT_manager_t manager, PINT_operation_t *op) +{ + struct PINT_manager_event_handler_entry_s *handler; + + /* invoke the event callbacks for this manager */ + qlist_for_each_entry(handler, &manager->event_handlers, link) + { + handler->callback( + PINT_OP_EVENT_START, handler->event_ptr, op->id, op->hint); + } +} + +static void PINT_manager_op_end(PINT_manager_t manager, PINT_operation_t *op) +{ + struct PINT_manager_event_handler_entry_s *handler; + + qlist_for_each_entry(handler, &manager->event_handlers, link) + { + handler->callback( + PINT_OP_EVENT_END, handler->event_ptr, op->id, op->hint); + } +} + +/** + * Add an event handler to get called on operation start/stop events. + * + * @param manager the manager to add the handler to + * @param callback the event handler function + * @param event_ptr a user pointer to be passed to each event call + * + * @return 0 on success, -PVFS_ENOMEM if out of memory + */ +void PINT_manager_event_handler_add(PINT_manager_t manager, + PINT_event_callback callback, + void *event_ptr) +{ + struct PINT_manager_event_handler_entry_s *entry; + + entry = malloc(sizeof(struct PINT_manager_event_handler_entry_s)); + assert(entry); + + entry->callback = callback; + entry->event_ptr = event_ptr; + + gen_mutex_lock(&manager->mutex); + qlist_add(&entry->link, &manager->event_handlers); + gen_mutex_unlock(&manager->mutex); +} + +/** + * Service an operation. This function is a wrapper for calling the operation + * callback. It should only be called by workers. Besides servicing the operation, + * it triggers events and keeps track of service time. + * + * @param manager the manager used to service the operation + * @param op the operation to service + * @param service_time the time elapsed in microsecs to service the operation + * @param error the error value returned from the operation callback + * + * @return 0 on successful servicing. Right now this function always succeeds. + */ +int PINT_manager_service_op(PINT_manager_t manager, + PINT_operation_t *op, + int *service_time, + int *error) +{ + struct timeval after; + /* get the timestamp for when the operation began servicing */ + PINT_util_get_current_timeval(&op->timestamp); + + PINT_manager_op_start(manager, op); + + /* service */ + *error = op->operation(op->operation_ptr, op->hint); + + PINT_manager_op_end(manager, op); + + PINT_util_get_current_timeval(&after); + *service_time = PINT_util_get_timeval_diff(&op->timestamp, &after); + + return 0; +} + +/** + * Complete an operation. This function tells the manager to complete + * the operation (add the op to the completion queue), and post the next + * operation if this operation is part of a chained list of operations + * managed by this manager. This function should only be called by the + * worker implementations. + * + * @param manager the manager for the operation + * @param op the operation to complete + * @param error the error returned by the operation callback + * + * @return 0 on success, -PVFS_error on error. If the operation + * isn't found in the list of operations managed by this + * manager, -PVFS_EINVAL is returned. As such, errors returned + * by this function are likely programmer errors, not system + * errors. + */ +int PINT_manager_complete_op(PINT_manager_t manager, + PINT_operation_t *op, + int error) +{ + struct qhash_head *hash_entry; + struct PINT_op_entry *entry; + int ret; + struct qhash_head *link; + + gen_mutex_lock(&manager->mutex); + hash_entry = qhash_search(manager->ops, &op->id); + if(!hash_entry) + { + /* failed to get the managed op out of the manager operations queue */ + gossip_err("%s: failed to get the managed op %llu out of the " + "manager operations queue\n", + __func__, llu(op->id)); + gen_mutex_unlock(&manager->mutex); + return -PVFS_EINVAL; + } + manager->op_count--; + gen_mutex_unlock(&manager->mutex); + + entry = qhash_entry(hash_entry, struct PINT_op_entry, link); + entry->error = error; + + ret = PINT_context_complete(manager->context, + entry->op.id, + entry->user_ptr, + entry->error); + gen_mutex_lock(&manager->mutex); + link = qhash_search_and_remove( + manager->ops, &entry->op.id); + gen_mutex_unlock(&manager->mutex); + + /* for now we ignore whether the op was in the table + * since blocking calls never add the op + * to the table + */ + return ret; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-mgmt.h b/src/common/mgmt/pint-mgmt.h new file mode 100644 index 0000000..8bce2cc --- /dev/null +++ b/src/common/mgmt/pint-mgmt.h @@ -0,0 +1,190 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_MGMT_H +#define PINT_MGMT_H + +#include "id-generator.h" +#include "pint-context.h" +#include "pint-worker.h" +#include "pint-queue.h" + +/** + * @defgroup pint-mgmt + * + * The PVFS management interfaces provide abstraction for posting and testing + * operations, and allowing them to be serviced via different methods, + * both blocking and asynchronous (using different models such as + * thread pools or queues). An 'operation' is a unit of work defined by + * the caller via a callback function. The management interfaces allow + * callers to hand off these units of work, ignore the details of how + * and when the operation is serviced, and test for completion later. + * + * Use of the API is done by creating a 'manager', + * which provides a common reference for all the operations that should + * be grouped together somehow. To a manager are added 'workers', which + * specify how an operation is serviced. Some examples of worker types are + * 'per-op', which creates a thread for that operation and services it, + * 'blocking', which blocks on the post call, services the operation and + * returns, or 'threaded-queues', which create a number of threads and + * pull operations off queues in-turn to service them. Finally, once a + * manager has been setup with the appropriate workers, operations can + * be posted to the manager. The worker that services the operation + * is chosen either explicitly in the post call, or dynamically using + * the type of the operation as the key. Notification + * of completed operations is made through the completion context, + * which is specified for a particular manager, or passed in with the + * post call. + * + */ + +enum +{ + /** + * The post calls will return POSTED if the operation was posted + * but not complete. + */ + PINT_MGMT_OP_POSTED = 0, + + /** + * The service functions will return COMPLETE if the operation was completed + * (either by blocking or because it was done speculatively). + */ + PINT_MGMT_OP_COMPLETED = 1, + + /** + * The service functions will return CONTINUE if the operation wasn't completed + * but added back to the queue, and will be completed later. + */ + PINT_MGMT_OP_CONTINUE = 2 +}; + +typedef struct PINT_manager_s *PINT_manager_t; + +int PINT_manager_init(PINT_manager_t *new_manager, + PINT_context_id ctx); + +int PINT_manager_destroy(PINT_manager_t manager); + +int PINT_manager_worker_add(PINT_manager_t manager, + PINT_worker_attr_t *attr, + PINT_worker_id *worker_id); + +int PINT_manager_worker_remove(PINT_manager_t manager, PINT_worker_id id); + +int PINT_manager_queue_add(PINT_manager_t manager, + PINT_worker_id worker_id, + PINT_queue_id queue_id); + +int PINT_manager_queue_remove(PINT_manager_t manager, + PINT_queue_id queue_id); + +/* post an operation without specifying a worker explicitly. The management + * code tries to figure out the proper queue/worker to use for + * this operation based on the op-to-worker mappings + */ +#define PINT_manager_post(mgr, ptr, mid, callout, opptr, hint) \ + PINT_manager_id_post( \ + mgr, ptr, mid, callout, opptr, hint, PINT_worker_implicit_id) + +int PINT_manager_id_post(PINT_manager_t manager, + void *user_ptr, + PINT_op_id *id, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t queue_worker_id); + +int PINT_manager_ctx_post(PINT_manager_t manager, + PINT_context_id context_id, + void *user_ptr, + PINT_op_id *id, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t queue_worker_id); + +int PINT_manager_cancel(PINT_manager_t manager, + PINT_op_id op_id); + + +typedef int (*PINT_worker_mapping_callout) (PINT_manager_t manager, + PINT_service_callout callout, + void *op_ptr, + PVFS_hint hint, + PVFS_id_gen_t *id); + +int PINT_manager_add_map(PINT_manager_t manager, + PINT_worker_mapping_callout map); + +#define PINT_MGMT_TIMEOUT_NONE 0xFFFFFFFF + +int PINT_manager_test_context(PINT_manager_t manager, + PINT_context_id context_id, + int * opcount, + PINT_op_id *ids, + void **user_ptrs, + PVFS_error *errors, + int microsecs); + +int PINT_manager_test(PINT_manager_t manager, + int *opcount, + PINT_op_id *ids, + void **user_ptrs, + PVFS_error *errors, + int microsecs); + +int PINT_manager_test_op(PINT_manager_t manager, + PINT_op_id op_id, + void **user_ptr, + PVFS_error *error, + int microsecs); + +int PINT_manager_wait_context(PINT_manager_t manager, + PINT_context_id context_id, + int microsecs); + +int PINT_manager_wait(PINT_manager_t manager, + int microsecs); + +int PINT_manager_wait_op(PINT_manager_t manager, + PINT_op_id op_id, + int microsecs); + +int PINT_manager_service_op(PINT_manager_t manager, + PINT_operation_t *op, + int *service_time, + int *error); + +int PINT_manager_complete_op(PINT_manager_t manager, + PINT_operation_t *op, + int error); + +/* Event handling. */ + +enum PINT_event_type +{ + PINT_OP_EVENT_START = 1, + PINT_OP_EVENT_END = 2 +}; + +typedef void (*PINT_event_callback) ( + enum PINT_event_type type, void *event_ptr, PINT_op_id id, PVFS_hint hint); + +void PINT_manager_event_handler_add(PINT_manager_t manager, + PINT_event_callback callback, + void *event_ptr); + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-op.c b/src/common/mgmt/pint-op.c new file mode 100644 index 0000000..14cb68d --- /dev/null +++ b/src/common/mgmt/pint-op.c @@ -0,0 +1,28 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pint-op.h" +#include + +int PINT_op_queue_find_op_id_callback(PINT_queue_entry_t *entry, void *user_ptr) +{ + if(*((PINT_op_id *)user_ptr) == PINT_op_from_qentry(entry)->id) + { + return 1; + } + + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/mgmt/pint-op.h b/src/common/mgmt/pint-op.h new file mode 100644 index 0000000..aa0b9f6 --- /dev/null +++ b/src/common/mgmt/pint-op.h @@ -0,0 +1,58 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_OP_H +#define PINT_OP_H + +#include "pint-queue.h" + +typedef PVFS_id_gen_t PINT_op_id; + +/* returns 0 on sucess or + * a negative error value + */ +typedef int (* PINT_service_callout)( + void *user_ptr, PVFS_hint hints); + +typedef struct +{ + PINT_op_id id; + PINT_service_callout operation; + PINT_service_callout cancel; + void *operation_ptr; + PVFS_hint hint; + + /* Used to manage things like time in queue, time servicing, etc. */ + struct timeval timestamp; + + /* Used by the queue this op is added to. Prevents extra mem allocation */ + PINT_queue_entry_t qentry; +} PINT_operation_t; + +int PINT_op_queue_find_op_id_callback(PINT_queue_entry_t *entry, void *user_ptr); + +#define PINT_operation_fill(op, id, fn, ptr, hint) \ + do { \ + op->op_id = id; \ + op->fn = operation; \ + op->operation_ptr = ptr; \ + op->hint = hint; \ + } while(0) + +#endif + +#define PINT_op_from_qentry(qe) \ + PINT_queue_entry_object((qe), PINT_operation_t, qentry) + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-queue.c b/src/common/mgmt/pint-queue.c new file mode 100644 index 0000000..ddaa21e --- /dev/null +++ b/src/common/mgmt/pint-queue.c @@ -0,0 +1,632 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pint-queue.h" +#include "gossip.h" +#include +#include +#include +#include "pint-util.h" +#include "pvfs2-debug.h" + +struct PINT_queue_trigger +{ + enum PINT_queue_action action; + PINT_queue_trigger_callback trigger; + PINT_queue_trigger_destroy destroy; + void *user_ptr; + struct qlist_head link; +}; + +static int PINT_queue_update_stats(struct PINT_queue_s *queue, int microsecs); + +inline static int PINT_default_compare( + PINT_queue_entry_t *a, PINT_queue_entry_t *b) +{ + return 0; +} + +int PINT_queue_create( + PINT_queue_id *qid, PINT_queue_entry_compare_callback compare) +{ + struct PINT_queue_s *queue; + int ret = 0; + + queue = malloc(sizeof(struct PINT_queue_s)); + if(!queue) + { + return -PVFS_ENOMEM; + } + memset(queue, 0, sizeof(struct PINT_queue_s)); + + if(compare) + { + queue->compare = compare; + } + gen_mutex_init(&queue->mutex); + gen_cond_init(&queue->cond); + + INIT_QLIST_HEAD(&queue->triggers); + INIT_QLIST_HEAD(&queue->entries); + id_gen_fast_register(&queue->id, queue); + *qid = queue->id; + return ret; +} + +int PINT_queue_destroy(PINT_queue_id qid) +{ + struct PINT_queue_s *queue; + struct PINT_queue_trigger *trigger; + struct PINT_queue_trigger *tmp; + + queue = id_gen_fast_lookup(qid); + assert(queue); + gen_mutex_lock(&queue->mutex); + + /* the producers and consumers should have unregistered */ + assert(queue->producer_refcount == 0 && + queue->consumer_refcount == 0); + + if(!qlist_empty(&queue->entries)) + { + gossip_err("%s: can't destroy non-empty queue\n", __func__); + return -PVFS_EINVAL; + } + + qlist_for_each_entry_safe(trigger, tmp, &queue->triggers, link) + { + trigger->destroy(trigger->user_ptr); + qlist_del(&trigger->link); + free(trigger); + } + + gen_cond_destroy(&queue->cond); + gen_mutex_unlock(&queue->mutex); + + free(queue); + return 0; +} + +int PINT_queue_add_producer(PINT_queue_id queue_id, + void *producer) +{ + /* right now we just increment the ref counter. Could keep + * track of producers at some point */ + + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&queue->mutex); + queue->producer_refcount++; + gen_mutex_unlock(&queue->mutex); + return 0; +} + +int PINT_queue_add_consumer(PINT_queue_id queue_id, + void *consumer) +{ + /* right now we just increment the ref counter. Could keep + * track of consumers at some point */ + + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&queue->mutex); + queue->consumer_refcount++; + gen_mutex_unlock(&queue->mutex); + return 0; +} + +int PINT_queue_remove_producer(PINT_queue_id queue_id, + void *producer) +{ + /* right now we just decrement the ref counter. Could keep + * track of producers at some point */ + + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&queue->mutex); + queue->producer_refcount--; + gen_mutex_unlock(&queue->mutex); + return 0; +} + +int PINT_queue_remove_consumer(PINT_queue_id queue_id, + void *consumer) +{ + /* right now we just decrement the ref counter. Could keep + * track of consumers at some point */ + + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&queue->mutex); + queue->consumer_refcount--; + gen_mutex_unlock(&queue->mutex); + return 0; +} + +int PINT_queue_add_trigger(PINT_queue_id queue_id, + enum PINT_queue_action action, + PINT_queue_trigger_callback trigger, + PINT_queue_trigger_destroy destroy, + void *user_ptr) +{ + struct PINT_queue_s *queue; + struct PINT_queue_trigger *trigger_entry; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + trigger_entry = malloc(sizeof(struct PINT_queue_trigger)); + if(!trigger_entry) + { + return -PVFS_ENOMEM; + } + + trigger_entry->trigger = trigger; + trigger_entry->destroy = destroy; + trigger_entry->action = action; + trigger_entry->user_ptr = user_ptr; + + gen_mutex_lock(&queue->mutex); + qlist_add(&queue->triggers, &trigger_entry->link); + gen_mutex_unlock(&queue->mutex); + return 0; +} + +inline int PINT_queue_count(PINT_queue_id qid) +{ + struct PINT_queue_s *queue; + int count; + queue = id_gen_fast_lookup(qid); + if(!queue) + { + return -PVFS_EINVAL; + } + + gen_mutex_lock(&queue->mutex); + count = queue->count; + gen_mutex_unlock(&queue->mutex); + return count; +} + +static int PINT_queue_insert(PINT_queue_id qid, + PINT_queue_entry_t *entry, + int front) +{ + struct PINT_queue_s *queue; + struct PINT_queue_trigger *trigger; + + assert(entry->link.next == NULL && entry->link.prev == NULL); + + queue = id_gen_fast_lookup(qid); + if(!queue) + { + return -PVFS_EINVAL; + } + + gen_mutex_lock(&queue->mutex); + if(front) + { + /* push it onto the front */ + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: pushing entry: %p to front of queue: %p\n", + __func__, &entry->link, queue); + qlist_add(&entry->link, &queue->entries); + } + else + { + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: pushing entry: %p to back of queue: %p\n", + __func__, &entry->link, queue); + qlist_add_tail(&entry->link, &queue->entries); + } + + /* set the timestamp for when the entries enters the queue */ + PINT_util_get_current_timeval(&entry->timestamp); + + queue->count++; + + /* for now, we signal on all any addition to the queue + * (not just from an empty queue). This allows us to test + * on a particular entry + */ + gen_cond_signal(&queue->cond); + + qlist_for_each_entry(trigger, &queue->triggers, link) + { + if(trigger->action == PINT_QUEUE_ACTION_POSTED) + { + trigger->trigger(trigger->user_ptr, &queue->count); + } + } + + gen_mutex_unlock(&queue->mutex); + return 0; +} + +int PINT_queue_push(PINT_queue_id qid, PINT_queue_entry_t *entry) +{ + return PINT_queue_insert(qid, entry, 0); +} + +int PINT_queue_push_front(PINT_queue_id qid, PINT_queue_entry_t *entry) +{ + return PINT_queue_insert(qid, entry, 1); +} + +int PINT_queue_pull(PINT_queue_id qid, + int *count, + PINT_queue_entry_t **entries) +{ + int retcount = 0; + PINT_queue_entry_t *entry, *tmp; + struct PINT_queue_trigger *trigger; + struct PINT_queue_s *queue; + struct timeval now; + + queue = id_gen_fast_lookup(qid); + assert(queue); + assert(*count > 0); + + gen_mutex_lock(&queue->mutex); + qlist_for_each_entry_safe(entry, tmp, &queue->entries, link) + { + entries[retcount] = entry; + + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: removing entry: %p from queue: %p\n", + __func__, &entry->link, queue); + qlist_del(&entry->link); + memset(&entry->link, 0, sizeof(entry->link)); + queue->count--; + + /* update average queued time for this queue */ + PINT_util_get_current_timeval(&now); + PINT_queue_update_stats( + queue, PINT_util_get_timeval_diff(&entry->timestamp, &now)); + + ++retcount; + if(retcount == *count) + { + break; + } + } + + gossip_debug(GOSSIP_MGMT_DEBUG, "%s: returning %d entries\n", + __func__, *count); + *count = retcount; + + if(retcount > 0) + { + + qlist_for_each_entry(trigger, &queue->triggers, link) + { + if(trigger->action == PINT_QUEUE_ACTION_REMOVED) + { + trigger->trigger(trigger->user_ptr, &queue->count); + } + + if(queue->count == 0 && + trigger->action == PINT_QUEUE_ACTION_EMPTIED) + { + trigger->trigger(trigger->user_ptr, NULL); + } + } + } + + gen_mutex_unlock(&queue->mutex); + + return 0; +} + +int PINT_queue_remove(PINT_queue_id queue_id, + PINT_queue_entry_t *entry) +{ + struct PINT_queue_s *queue; + struct PINT_queue_trigger *trigger; + PINT_queue_entry_t *e, *tmp; + struct timeval now; + int ret = -PVFS_ENOENT; + + if(entry->link.prev == NULL) + { + return -PVFS_ENOENT; + } + + queue = id_gen_fast_lookup(queue_id); + + gen_mutex_lock(&queue->mutex); + if(queue->count > 0) + { + /* make sure its actually in the queue somewhere */ + qlist_for_each_entry_safe(e, tmp, &queue->entries, link) + { + if(e == entry) + { + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: removing entry: %p from queue: %p\n", + __func__, &e->link, queue); + qlist_del(&e->link); + memset(&e->link, 0, sizeof(e->link)); + queue->count--; + + PINT_util_get_current_timeval(&now); + + PINT_queue_update_stats( + queue, + PINT_util_get_timeval_diff(&e->timestamp, &now)); + ret = 0; + break; + } + } + } + + qlist_for_each_entry(trigger, &queue->triggers, link) + { + if(trigger->action == PINT_QUEUE_ACTION_REMOVED) + { + trigger->trigger(trigger->user_ptr, &queue->count); + } + + if(queue->count == 0 && trigger->action == PINT_QUEUE_ACTION_EMPTIED) + { + trigger->trigger(trigger->user_ptr, NULL); + } + } + + gen_mutex_unlock(&queue->mutex); + + return ret; +} + +int PINT_queue_search_and_remove(PINT_queue_id queue_id, + PINT_queue_entry_find_callback compare, + void *user_ptr, + PINT_queue_entry_t **entry) +{ + struct PINT_queue_s *queue; + PINT_queue_entry_t *e, *tmp; + + queue = id_gen_fast_lookup(queue_id); + + gen_mutex_lock(&queue->mutex); + qlist_for_each_entry_safe(e, tmp, &queue->entries, link) + { + if(compare(e, user_ptr)) + { + *entry = e; + qlist_del(&e->link); + gen_mutex_unlock(&queue->mutex); + return 0; + } + } + gen_mutex_unlock(&queue->mutex); + return -PVFS_ENOENT; +} + +int PINT_queue_wait_for_entry(PINT_queue_id queue_id, + PINT_queue_entry_find_callback find, + void *user_ptr, + PINT_queue_entry_t **entry, + int microsecs) +{ + struct PINT_queue_s *queue; + int ret = 0, cond_ret = 0; + struct timespec timeout; + + ret = PINT_queue_search_and_remove(queue_id, find, user_ptr, entry); + if(0 == ret) + { + return ret; + } + + if(ret != -PVFS_ENOENT) + { + return ret; + } + + queue = id_gen_fast_lookup(queue_id); + + /* queue does not have entry, wait for signal of addition */ + timeout = PINT_util_get_abs_timespec(microsecs); + + do + { + gen_mutex_lock(&queue->mutex); + cond_ret = gen_cond_timedwait(&queue->cond, &queue->mutex, &timeout); + gen_mutex_unlock(&queue->mutex); + + /* either the condition variable gets signalled, we have + * a spurious wakeup, or the timeout was reached. + * If gen_cond_timedwait didn't return ETIMEDOUT, + * then we assume something was added to the queue. If the queue + * is still empty, the return from timedwait must have been spurious + * and we should try again with an updated value for microsecs. + */ + ret = PINT_queue_search_and_remove( + queue_id, find, user_ptr, entry); + if(ret == 0 || ret != -PVFS_ENOENT) + { + return ret; + } + + } while(cond_ret == 0); + + gen_mutex_unlock(&queue->mutex); + if(cond_ret != 0) + { + if(cond_ret == ETIMEDOUT) + { + cond_ret = -PVFS_ETIMEDOUT; + } + if(cond_ret == EINVAL) + { + cond_ret = -PVFS_EINVAL; + } + return cond_ret; + } + + return PINT_queue_search_and_remove(queue_id, find, user_ptr, entry); +} + +int PINT_queue_timedwait(PINT_queue_id queue_id, + int *count, + PINT_queue_entry_t **entries, + int microsecs) +{ + struct PINT_queue_s *queue; + int ret = 0; + struct timespec timeout; + + queue = id_gen_fast_lookup(queue_id); + + gen_mutex_lock(&queue->mutex); + + if(queue->count > 0) + { + gen_mutex_unlock(&queue->mutex); + return PINT_queue_pull(queue_id, count, entries); + } + + /* queue is empty, wait for signal of addition */ + + timeout = PINT_util_get_abs_timespec(microsecs); + + do + { + ret = gen_cond_timedwait(&queue->cond, &queue->mutex, &timeout); + if(ret == EINVAL) + { + gossip_lerr("gen_cond_timedwait returned EINVAL!\n"); + } + + /* either the condition variable gets signalled, we have + * a spurious wakeup, or the timeout was reached. + * If gen_cond_timedwait didn't return ETIMEDOUT, + * then we assume something was added to the queue. If the queue + * is still empty, the return from timedwait must have been spurious + * and we should try again with an updated value for microsecs. + */ + } while(queue->count == 0 && ret == 0); + + gen_mutex_unlock(&queue->mutex); + if(ret != 0) + { + *count = 0; + if(ret == EINVAL) + { + ret = -PVFS_EINVAL; + } + else if(ret == ETIMEDOUT) + { + ret = -PVFS_ETIMEDOUT; + } + return ret; + } + + return PINT_queue_pull(queue_id, count, entries); +} + +int PINT_queue_wait(PINT_queue_id queue_id, + int *count, + PINT_queue_entry_t **entries) +{ + struct PINT_queue_s *queue; + int ret = 0; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&queue->mutex); + if(queue->count > 0) + { + gen_mutex_unlock(&queue->mutex); + return PINT_queue_pull(queue_id, count, entries); + } + /* queue is empty, wait for signal of addition */ + + do + { + ret = gen_cond_wait(&queue->cond, &queue->mutex); + + /* either the condition variable gets signalled, or we have + * a spurious wakeup. + * If gen_cond_wait didn't return an error, + * then we assume something was added to the queue. If the queue + * is still empty, the return from cond_wait must have been spurious + * and we should try again. + */ + } while(queue->count == 0 && ret == 0); + + gen_mutex_unlock(&queue->mutex); + if(ret != 0) + { + return ret; + } + + return PINT_queue_pull(queue_id, count, entries); +} + +int PINT_queue_get_stats(PINT_queue_id queue_id, struct PINT_queue_stats *stats) +{ + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + stats->total_queued = queue->stats.total_queued; + stats->avg_queued_time = queue->stats.avg_queued_time; + stats->var_queued_time = queue->stats.var_queued_time / + (queue->stats.total_queued - 1); + return 0; +} + +int PINT_queue_reset_stats(PINT_queue_id queue_id) +{ + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + memset(&queue->stats, 0, sizeof(struct PINT_queue_stats)); + return 0; +} + +static int PINT_queue_update_stats(struct PINT_queue_s *queue, int microsecs) +{ + int diff; + + /* note that the average and variance are estimates based on an + * algorithm due to Knuth + */ + + queue->stats.total_queued++; + + diff = microsecs - queue->stats.avg_queued_time; + queue->stats.avg_queued_time = (diff / queue->stats.total_queued); + + queue->stats.var_queued_time += + diff * (microsecs - queue->stats.avg_queued_time); + + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/mgmt/pint-queue.h b/src/common/mgmt/pint-queue.h new file mode 100644 index 0000000..e833290 --- /dev/null +++ b/src/common/mgmt/pint-queue.h @@ -0,0 +1,173 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_QUEUE_H +#define PINT_QUEUE_H + +#include "id-generator.h" +#include "quicklist.h" +#include "gen-locks.h" +#include "pvfs2-types.h" + +typedef PVFS_id_gen_t PINT_queue_id; + +typedef struct +{ + PVFS_id_gen_t id; + struct timeval timestamp; + struct qlist_head link; +} PINT_queue_entry_t; + +typedef int (* PINT_queue_entry_compare_callback)( + PINT_queue_entry_t *a, PINT_queue_entry_t *b); + +struct PINT_queue_stats +{ + int avg_queued_time; + int var_queued_time; + int total_queued; +}; + +struct PINT_queue_s +{ + /* The generated id for this queue */ + PINT_queue_id id; + + PINT_queue_entry_compare_callback compare; + + /* Locks the queue during multithreaded access */ + gen_mutex_t mutex; + + gen_cond_t cond; + + /* The entries list */ + struct qlist_head entries; + + /* Count of entries in the queue */ + int count; + + /* Count of maximum number of entries to accept in the queue + * before returning EAGAIN + */ + int max; + + /* Triggers to be invoked on some action */ + struct qlist_head triggers; + + /* Queue stats. Don't access this struct directly, that's + * what the get_stats function is for + * */ + struct PINT_queue_stats stats; + + int producer_refcount; + int consumer_refcount; + + /* link for adding this queue to lists of queues (used by worker) */ + struct qlist_head link; +}; + +#define PINT_queue_entry_object(_entryp, _type, _member) \ + ((_type *)((char *)(_entryp) - (unsigned long)&((_type *)0)->_member)) + +enum PINT_queue_action +{ + /* The EMPTIED action is triggered when the queue reaches empty. + * Any triggers registered for the EMPTY action on this queue will + * be called once it becomes empty. + * The value argument passed to the trigger callback is NULL for this + * action and should be ignored. */ + PINT_QUEUE_ACTION_EMPTIED, + + /* The POSTED action is triggered once a new operation is posted + * to the queue. Any triggers registered for the POSTED action + * on this queue will be called once a new operation has been posted. + * The value argument passed to the trigger callback is the count of + * operations in the queue after the post. + */ + PINT_QUEUE_ACTION_POSTED, + + /* The REMOVED action is triggered once operations are pulled from + * the queue. Any triggers registered for the REMOVED action + * on this queue will be called once operations have been removed. + * The value argument passed to the trigger callback is the count + * of operations in the queue after the removals. + */ + PINT_QUEUE_ACTION_REMOVED +}; + +typedef int (*PINT_queue_trigger_callback) (void *user_ptr, void *action_value); +typedef int (*PINT_queue_trigger_destroy) (void *user_ptr); + +int PINT_queue_create(PINT_queue_id *queue, + PINT_queue_entry_compare_callback compare); + +int PINT_queue_destroy(PINT_queue_id queue); + +int PINT_queue_add_producer(PINT_queue_id queue, void *producer); + +int PINT_queue_add_consumer(PINT_queue_id queue, void *consumer); + +int PINT_queue_remove_producer(PINT_queue_id queue, void *producer); + +int PINT_queue_remove_consumer(PINT_queue_id queue, void *consumer); + +int PINT_queue_add_trigger(PINT_queue_id queue, + enum PINT_queue_action action, + PINT_queue_trigger_callback trigger, + PINT_queue_trigger_destroy destroy, + void *user_ptr); + +int PINT_queue_count(PINT_queue_id queue_id); + +int PINT_queue_push(PINT_queue_id queue_id, PINT_queue_entry_t *entry); + +int PINT_queue_push_front(PINT_queue_id qid, PINT_queue_entry_t *entry); + +int PINT_queue_pull(PINT_queue_id queue_id, + int *count, + PINT_queue_entry_t **entries); + +int PINT_queue_remove(PINT_queue_id queue_id, + PINT_queue_entry_t *entry); + +typedef int (*PINT_queue_entry_find_callback)( + PINT_queue_entry_t *entry, void *user_ptr); + +int PINT_queue_search_and_remove(PINT_queue_id queue_id, + PINT_queue_entry_find_callback find, + void *user_ptr, + PINT_queue_entry_t **entry); + +int PINT_queue_wait_for_entry(PINT_queue_id queue_id, + PINT_queue_entry_find_callback find, + void *user_ptr, + PINT_queue_entry_t **entry, + int microsecs); + +int PINT_queue_timedwait(PINT_queue_id queue_id, + int *count, + PINT_queue_entry_t **entries, + int microsecs); + +int PINT_queue_wait(PINT_queue_id queue_id, + int *count, + PINT_queue_entry_t **entries); + +int PINT_queue_get_stats(PINT_queue_id queue, + struct PINT_queue_stats *stats); + +int PINT_queue_reset_stats(PINT_queue_id queue); + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-blocking.c b/src/common/mgmt/pint-worker-blocking.c new file mode 100644 index 0000000..06d058f --- /dev/null +++ b/src/common/mgmt/pint-worker-blocking.c @@ -0,0 +1,61 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include "pint-worker-blocking.h" +#include "pint-worker.h" +#include "pint-mgmt.h" + +static int blocking_post(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *operation) +{ + int ret; + int service_time, error; + + assert(queue_id == 0); + + ret = PINT_manager_service_op(manager, operation, &service_time, &error); + if(0 == ret && 0 == error) + { + return PINT_MGMT_OP_COMPLETED; + } + return error; +} + +struct PINT_worker_impl PINT_worker_blocking_impl = +{ + "BLOCKING", + + /* init and destroy are null because the blocking worker + * doesn't do anything besides service and return in the post + */ + NULL, + NULL, + + /* the blocking worker doesn't use queues, so the queue_add + * and queue_remove callbacks aren't implemented + */ + NULL, + NULL, + + blocking_post, + + /* the blocking impl doesn't implement the do_work callback + * since the work is done in the post + */ + NULL +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-blocking.h b/src/common/mgmt/pint-worker-blocking.h new file mode 100644 index 0000000..daea9cd --- /dev/null +++ b/src/common/mgmt/pint-worker-blocking.h @@ -0,0 +1,23 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_BLOCKING_H +#define PINT_WORKER_BLOCKING_H + +#include "pint-op.h" + +struct PINT_worker_impl PINT_worker_blocking_impl; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-external.c b/src/common/mgmt/pint-worker-external.c new file mode 100644 index 0000000..34238cc --- /dev/null +++ b/src/common/mgmt/pint-worker-external.c @@ -0,0 +1,78 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pint-worker-external.h" +#include "pint-queue.h" +#include "pint-mgmt.h" +#include "pint-worker.h" +#include "pvfs2-internal.h" + +static int external_init(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_worker_attr_t *attr) +{ + inst->external.attr = attr->u.external; + inst->external.posted = 0; + + gen_mutex_init(&inst->external.mutex); + return PINT_queue_create(&inst->external.wait_queue, NULL); +} + +static int external_destroy(struct PINT_manager_s *manager, + PINT_worker_inst *inst) +{ + return PINT_queue_destroy(inst->external.wait_queue); +} + +static int external_post(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *operation) +{ + int ret; + gen_mutex_lock(&inst->external.mutex); + + /* put in wait queue if we've exceeded max posts. If max posts is 0, + * we never queue + */ + if(inst->external.attr.max_posts > 0 && + inst->external.posted >= inst->external.attr.max_posts) + { + ret = PINT_queue_push(inst->external.wait_queue, &operation->qentry); + } + else + { + ret = inst->external.attr.post(&operation->id, + inst->external.attr.external_ptr, + operation); + inst->external.posted++; + } + gen_mutex_unlock(&inst->external.mutex); + return ret; +} + +struct PINT_worker_impl PINT_worker_external_impl = +{ + "EXTERNAL", + external_init, + external_destroy, + + /* the external worker doesn't use queues */ + NULL, + NULL, + + external_post, + NULL +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-external.h b/src/common/mgmt/pint-worker-external.h new file mode 100644 index 0000000..758f36c --- /dev/null +++ b/src/common/mgmt/pint-worker-external.h @@ -0,0 +1,55 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_EXTERNAL_H +#define PINT_WORKER_EXTERNAL_H + +#include "pint-op.h" +#include "pint-queue.h" + +/* should return PINT_MGMT_OP_COMPLETE or PINT_MGMT_OP_POSTED + * or some negative value on error + */ +typedef int (*PINT_worker_external_post_callout) ( + PINT_op_id *op_id, + void *external_ptr, + PINT_operation_t *operation); + +typedef int (*PINT_worker_external_test_callout) ( + PINT_op_id *op_ids, + void *external_ptr, + int *count, + PINT_operation_t *operation); + +typedef struct +{ + PINT_worker_external_post_callout post; + PINT_worker_external_test_callout test; + void *external_ptr; + + int max_posts; +} PINT_worker_external_attr_t; + +struct PINT_worker_external_s +{ + PINT_worker_external_attr_t attr; + PINT_queue_id wait_queue; + int posted; + gen_mutex_t mutex; +}; + +struct PINT_worker_impl PINT_worker_external_impl; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-none.c b/src/common/mgmt/pint-worker-none.c new file mode 100644 index 0000000..d1488b0 --- /dev/null +++ b/src/common/mgmt/pint-worker-none.c @@ -0,0 +1,230 @@ + +#include "pint-worker-none.h" +#include "pint-queue.h" + +static int PINT_worker_queues_init(PINT_worker_inst *inst, + PINT_worker_attr_t *attr) +{ + int ret = 0; + inst->queues.attr = attr->u.queues; + INIT_QLIST_HEAD(inst->queues.queues); + gen_mutex_init(&inst->queues.mutex); + gen_cond_init(&inst->queues.cond); + + inst->queues.ops = malloc(sizeof(PINT_operation_t) * + inst->queues.attr.ops_per_queue); + if(!inst->queues.ops) + { + ret = -PVFS_ENOMEM; + goto error_exit; + } + +error_exit: + gen_cond_destroy(&inst->queues.cond); + return ret; +} + +static int PINT_worker_queues_destroy(PINT_worker_inst *inst) +{ + free(inst->queues.ops); + gen_cond_destroy(&inst->queues.cond); + return 0; +} + +static int PINT_worker_queues_queue_add(PINT_worker_inst * inst, + PINT_queue_id queue_id) +{ + struct PINT_queue_s *queue; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&inst->queues.mutex); + qlist_add_tail(&queue->link, &inst->queues.queues); + gen_mutex_unlock(&inst->queues.mutex); + + return 0; +} + +static int PINT_worker_queues_queue_remove(PINT_worker_inst *inst, + PINT_queue_id queue_id) +{ + struct PINT_queue_s *queue; + + gen_mutex_lock(&inst->queues.mutex); + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + /* make sure its actually in there at the moment */ + while(!qlist_exists(&inst->queues.queues, queue->link)) + { + /* assume that operations are being pulled off presently + * and it just needs to be added back to the + * list of queues, which we will wait for + */ + gen_mutex_condwait(&inst->queues.cond, + &inst->queues.mutex); + } + + /* now we're sure that its there, so pluck it off */ + qlist_del(&queue->link); + gen_mutex_unlock(&inst->queues.mutex); + + return 0; +} + +static int PINT_worker_queues_post(PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *operation) +{ + gen_mutex_lock(&inst->queues.mutex); + + /* the queue id is in the entry->id field */ + ret = PINT_queue_push(queue_id, operation); + if(ret < 0) + { + gossip_err("%s: failed to push op onto queue: %p\n", + __func__, queue_id); + return ret; + } + + gossip_debug(PINT_MGMT_DEBUG, + "%s: post op to worker (queues) queue: %p\n", + __func__, + queue_id); + + gen_mutex_unlock(&inst->queues.mutex); + + return PINT_MGMT_OP_POSTED; +} + +static int PINT_worker_queues_do_work(PINT_manager_t manager, + PINT_worker_inst *inst, + int microsecs) +{ + struct timeval start, now; + struct PINT_queue_s *queue; + int count; + int i, j; + + assert(microsecs > 0); + + gettimeofday(&start, NULL); + gen_mutex_lock(&inst->queues.mutex); + + if(qlist_empty(&inst->queues.queues)) + { + /* no queues! just return zero */ + return 0; + } + + while(1) + { + queue = qlist_entry(inst->queues.queues.next, struct PINT_queue_s, link); + + /* remove it from the list so that we can operate on the queues + * in a round-robin fashion. + */ + qlist_del(queue->link); + + count = inst->queues.attr.ops_per_queue; + + /* service as many operations as specified in the attributes + */ + if(inst->queues.attr.timeout == 0) + { + ret = PINT_queue_wait(queue->id, + &count, + inst->queues.ops); + } + else + { + ret = PINT_queue_timedwait(queue->id, + &count, + inst->queues.ops, + inst->queues.attr.timeout); + } + + if(ret < 0) + { + /* fatal error, couldn't get operations off the queue */ + goto exit; + } + + for(i = 0; i < count; ++i) + { + /* service! */ + error = inst->queues.ops[i].operation( + inst->queues.ops[i].operation_ptr, + inst->queues.ops[i].hint); + + ret = PINT_manager_serviced(manager, &inst->queues.ops[i], error); + if(ret < 0) + { + /* failed to notify the manager that this operation + * had been serviced. Put the queue back on the list + * of queues. + */ + goto exit; + } + + gettimeofday(&now, NULL); + + if(microsecs > 0 && + ((now.tv_sec * 1e6 + now.tv_usec) - + (start.tv_sec * 1e6 + start.tv_usec)) > microsecs) + { + /* went past timeout. put the remaining operations back + * on the queue and return + */ + break; + } + } + + if(i < count) + { + /* push all the un-serviced operations back onto the + * queue. We push them onto the front since they were + * removed from the front. + */ + for(j = (count - 1); j > i; --j) + { + ret = PINT_queue_push_front( + queue_id, inst->queues.ops[j]); + if(ret < 0) + { + goto exit; + } + } + } + + qlist_add_tail(&queue->link, &inst->queues.queues); + } + +exit: + /* put the queue back on the list of queues */ + qlist_add_tail(&queue->link, &inst->queues.queues); + gen_mutex_unlock(&inst->queues.mutex); + return ret; +} + +struct PINT_worker_impl PINT_worker_queues_impl +{ + "NONE", + PINT_worker_queues_init, + PINT_worker_queues_destroy, + PINT_worker_queues_queue_add, + PINT_worker_queues_queue_remove, + PINT_worker_queues_post, + PINT_worker_queues_do_work +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-none.h b/src/common/mgmt/pint-worker-none.h new file mode 100644 index 0000000..cae2dd7 --- /dev/null +++ b/src/common/mgmt/pint-worker-none.h @@ -0,0 +1,36 @@ + +#include "pint-mgmt.h" + +typedef struct +{ + /* The number of operations that should be serviced before moving on + * to the next queue + */ + int ops_per_queue; + + /* time to wait (in microsecs) for an operation to be added to the queue. + * 0 means no timeout. + */ + int timeout; + +} PINT_worker_queues_attr_t; + +struct PINT_worker_queues_s +{ + PINT_worker_queues_attr_t attr; + PINT_op_id *ids; + PINT_service_callout *callouts; + void **service_ptrs; + PVFS_hint **hints; + struct qlist_head queues; +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/mgmt/pint-worker-per-op.c b/src/common/mgmt/pint-worker-per-op.c new file mode 100644 index 0000000..a9857d0 --- /dev/null +++ b/src/common/mgmt/pint-worker-per-op.c @@ -0,0 +1,165 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include "pvfs2.h" +#include "pvfs2-internal.h" +#include "pint-worker-per-op.h" +#include "pint-worker.h" +#include "pint-mgmt.h" +#include "gossip.h" + +struct PINT_manager_s; + +struct PINT_worker_per_op_thread_s +{ + pthread_t id; + struct PINT_manager_s *manager; + PINT_operation_t *operation; + struct PINT_worker_per_op_s *worker; +}; + +static void *PINT_worker_per_op_thread_function(void * ptr); + +static int per_op_init(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_worker_attr_t *attr) +{ + struct PINT_worker_per_op_s *w; + w = &inst->per_op; + + w->attr = attr->u.per_op; + w->service_count = 0; + return 0; +} + +static int per_op_destroy(struct PINT_manager_s *manager, + PINT_worker_inst *inst) +{ + struct PINT_worker_per_op_s *w; + w = &inst->per_op; + + if(w->service_count) + { + return -PVFS_EBUSY; + } + + return 0; +} + +static int per_op_post(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *operation) +{ + struct PINT_worker_per_op_s *w; + int ret; + struct PINT_worker_per_op_thread_s *thread; + pthread_attr_t attr; + + w = &inst->per_op; + + /* no queues in per-op */ + assert(queue_id == 0); + + thread = malloc(sizeof(struct PINT_worker_per_op_thread_s)); + if(!thread) + { + return -PVFS_ENOMEM; + } + + thread->manager = manager; + thread->operation = operation; + thread->worker = w; + + ret = pthread_attr_init(&attr); + if(ret != 0) + { + return PVFS_get_errno_mapping(ret); + } + + /* set the thread to detached. Once the operation finishes + * this thread will exit + */ + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if(ret != 0) + { + pthread_attr_destroy(&attr); + return PVFS_get_errno_mapping(ret); + } + + /* create the thread */ + ret = pthread_create(&thread->id, + &attr, + PINT_worker_per_op_thread_function, + thread); + if(ret != 0) + { + return PVFS_errno_to_error(ret); + } + + return PINT_MGMT_OP_POSTED; +} + +static void *PINT_worker_per_op_thread_function(void * ptr) +{ + int ret, service_time, error; + struct PINT_worker_per_op_thread_s *thread; + + thread = (struct PINT_worker_per_op_thread_s *)ptr; + + thread->worker->service_count++; + + ret = PINT_manager_service_op( + thread->manager, thread->operation, &service_time, &error); + if(ret < 0) + { + gossip_err("%s: failed to service operation: %llu\n", + __func__, llu(thread->operation->id)); + } + + ret = PINT_manager_complete_op( + thread->manager, thread->operation, error); + if(ret < 0) + { + gossip_err("%s: failed to complete operation: %llu\n", + __func__, llu(thread->operation->id)); + } + + thread->worker->service_count--; + + return NULL; +} + +struct PINT_worker_impl PINT_worker_per_op_impl = +{ + "PER_OP", + per_op_init, + per_op_destroy, + + /* the per-op worker doesn't use queues, so the queue_add and + * queue_remove callbacks aren't implemented + */ + NULL, + NULL, + + per_op_post, + + /* the per-op impl doesn't implement the do_work callback + * because work is done in the threads + */ + NULL +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-per-op.h b/src/common/mgmt/pint-worker-per-op.h new file mode 100644 index 0000000..f473662 --- /dev/null +++ b/src/common/mgmt/pint-worker-per-op.h @@ -0,0 +1,38 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_PER_OP_H +#define PINT_WORKER_PER_OP_H + +#include "pint-op.h" + +typedef struct +{ + /* Max number of threads that be started/servicing operations. Once + * this value is reached, calls to post new operations will return + * EAGAIN */ + int max_threads; + +} PINT_worker_per_op_attr_t; + +struct PINT_worker_per_op_s +{ + PINT_worker_per_op_attr_t attr; + int service_count; +}; + +struct PINT_worker_impl PINT_worker_per_op_impl; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-pool.c b/src/common/mgmt/pint-worker-pool.c new file mode 100644 index 0000000..2a02ed7 --- /dev/null +++ b/src/common/mgmt/pint-worker-pool.c @@ -0,0 +1,31 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "pint-worker-pool.h" +#include "pint-worker.h" +#include "pint-mgmt.h" + +#include "pvfs2-types.h" + +struct PINT_worker_impl PINT_worker_pool_impl = +{ + "POOL", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-pool.h b/src/common/mgmt/pint-worker-pool.h new file mode 100644 index 0000000..e89b114 --- /dev/null +++ b/src/common/mgmt/pint-worker-pool.h @@ -0,0 +1,31 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_POOL_H +#define PINT_WORKER_POOL_H + +#include "pint-op.h" + +typedef struct +{ + int max_threads; + +} PINT_worker_pool_attr_t; + +struct PINT_worker_pool_s +{ + PINT_worker_pool_attr_t attr; +}; +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-queues.c b/src/common/mgmt/pint-worker-queues.c new file mode 100644 index 0000000..1abf125 --- /dev/null +++ b/src/common/mgmt/pint-worker-queues.c @@ -0,0 +1,376 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include "pvfs2-internal.h" +#include "pint-worker-queues.h" +#include "pint-queue.h" +#include "pint-worker.h" +#include "pint-mgmt.h" +#include "pvfs2-debug.h" +#include "gossip.h" + +static int queues_init(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_worker_attr_t *attr) +{ + struct PINT_worker_queues_s *w; + int ret = 0; + + w = &inst->queues; + + w->attr = attr->u.queues; + INIT_QLIST_HEAD(&w->queues); + gen_mutex_init(&w->mutex); + gen_cond_init(&w->cond); + + w->qentries = malloc(sizeof(PINT_queue_entry_t) * w->attr.ops_per_queue); + if(!w->qentries) + { + ret = -PVFS_ENOMEM; + goto error_exit; + } + + return 0; + +error_exit: + gen_cond_destroy(&w->cond); + + return ret; +} + +static int queues_destroy(struct PINT_manager_s *manager, + PINT_worker_inst *inst) +{ + struct PINT_worker_queues_s *w; + w = &inst->queues; + + free(w->qentries); + gen_cond_destroy(&w->cond); + return 0; +} + +static int queues_queue_add(struct PINT_manager_s *manager, + PINT_worker_inst * inst, + PINT_queue_id queue_id) +{ + struct PINT_worker_queues_s *w; + struct PINT_queue_s *queue; + + w = &inst->queues; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + gen_mutex_lock(&w->mutex); + qlist_add_tail(&queue->link, &w->queues); + PINT_queue_add_producer(queue_id, w); + PINT_queue_add_consumer(queue_id, w); + gen_cond_signal(&w->cond); + gen_mutex_unlock(&w->mutex); + + return 0; +} + +static int queues_queue_remove(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id) +{ + struct PINT_worker_queues_s *w; + struct PINT_queue_s *queue; + + w = &inst->queues; + + gen_mutex_lock(&w->mutex); + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + /* make sure its actually in there at the moment */ + while(!qlist_exists(&w->queues, &queue->link)) + { + /* assume that operations are being pulled off presently + * and it just needs to be added back to the + * list of queues, which we will wait for + */ + gen_cond_wait(&w->cond, &w->mutex); + } + + /* now we're sure that its there, so pluck it off */ + qlist_del(&queue->link); + PINT_queue_remove_producer(queue_id, w); + PINT_queue_remove_consumer(queue_id, w); + gen_mutex_unlock(&w->mutex); + + return 0; +} + +static int queues_post(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id id, + PINT_operation_t *operation) +{ + struct PINT_worker_queues_s *w; + int ret; + PINT_worker_id wid; + struct PINT_queue_s *queue; + PINT_queue_id queue_id; + + w = &inst->queues; + + gen_mutex_lock(&w->mutex); + + if(qlist_empty(&w->queues)) + { + gossip_err("%s: cannot post an operation without first adding queues " + "to the queue worker\n", __func__); + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + + id_gen_fast_register(&wid, w); + + /* if the queue_id is zero, then assume that there's + * only one queue maintained by this worker and use that + */ + if(id == 0) + { + /* a dirty hack to check that the list of queues only has one element */ + if(w->queues.next->next != &w->queues) + { + gossip_err("%s: no queue id was specified and there's more than " + "one queue being managed by this worker\n", __func__); + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + + /* there must be only one queue, so just use that */ + queue = qlist_entry(w->queues.next, struct PINT_queue_s, link); + queue_id = queue->id; + + } + else + { + /* its not the worker id, so it must be an id for one of our queues */ + queue = id_gen_fast_lookup(id); + + /* verify that this is a queue id we know about */ + if(!qlist_exists(&w->queues, &queue->link)) + { + gen_mutex_unlock(&w->mutex); + gossip_err("%s: failed to find a valid queue matching the " + "queue id passed in\n", __func__); + return -PVFS_EINVAL; + } + queue_id = id; + } + + /* at this point we should have a valid queue_id that's managed + * by this worker. + */ + + ret = PINT_queue_push(queue_id, &operation->qentry); + if(ret < 0) + { + gen_mutex_unlock(&w->mutex); + gossip_err("%s: failed to push op onto queue: %llu\n", + __func__, llu(queue_id)); + return ret; + } + + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: post op to worker (queues) queue: %llu\n", + __func__, llu(queue_id)); + + gen_mutex_unlock(&w->mutex); + return PINT_MGMT_OP_POSTED; +} + +static int queues_do_work(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_context_id context_id, + PINT_operation_t *op, + int microsecs) +{ + struct PINT_worker_queues_s *w; + struct timeval start, now; + struct PINT_queue_s *queue; + int count; + int i, j, ret; + PINT_queue_entry_t *qentry; + int service_time, error; + + w = &inst->queues; + + gettimeofday(&start, NULL); + gen_mutex_lock(&w->mutex); + + if(qlist_empty(&w->queues)) + { + /* no queues! just return zero */ + gen_mutex_unlock(&w->mutex); + return 0; + } + + if(op->id != 0) + { + /* find the op in one of the queues */ + qlist_for_each_entry(queue, &w->queues, link) + { + ret = PINT_queue_search_and_remove( + queue->id, + PINT_op_queue_find_op_id_callback, + &op->id, + &qentry); + if(ret == -PVFS_ENOENT) + { + continue; + } + else if(ret < 0) + { + gen_mutex_unlock(&w->mutex); + return 0; + } + + op = PINT_op_from_qentry(qentry); + /* must have removed it. */ + break; + } + + if(!op) + { + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + + /* service */ + ret = PINT_manager_service_op(manager, op, &service_time, &error); + if(ret < 0) + { + gen_mutex_unlock(&w->mutex); + return ret; + } + + ret = PINT_manager_complete_op(manager, op, error); + gen_mutex_unlock(&w->mutex); + return ret; + } + + /* no specific op was specified to do work on, so we just + * iterate through the queues doing work until the timeout + * expires + */ + while(1) + { + queue = qlist_entry(w->queues.next, struct PINT_queue_s, link); + + /* remove it from the list so that we can operate on the queues + * in a round-robin fashion. + */ + qlist_del(&queue->link); + + count = w->attr.ops_per_queue; + + /* service as many operations as specified in the attributes + */ + if(w->attr.timeout == 0) + { + ret = PINT_queue_wait(queue->id, &count, &w->qentries); + + } + else + { + ret = PINT_queue_timedwait( + queue->id, &count, &w->qentries, w->attr.timeout); + } + + if(ret < 0) + { + /* fatal error, couldn't get operations off the queue */ + goto exit; + } + + for(i = 0; i < count; ++i) + { + op = PINT_op_from_qentry(&w->qentries[i]); + + /* service! */ + ret = PINT_manager_service_op( + manager, op, &service_time, &error); + if(ret < 0) + { + /* failed to notify the manager that this operation + * had been serviced. Put the queue back on the list + * of queues. + */ + goto exit; + } + + ret = PINT_manager_complete_op(manager, op, error); + if(ret < 0) + { + goto exit; + } + + gettimeofday(&now, NULL); + + if(microsecs > 0 && + ((now.tv_sec * 1e6 + now.tv_usec) - + (start.tv_sec * 1e6 + start.tv_usec)) > microsecs) + { + /* went past timeout. put the remaining operations back + * on the queue and return + */ + break; + } + } + + if(i < count) + { + /* push all the un-serviced operations back onto the + * queue. We push them onto the front since they were + * removed from the front. + */ + for(j = (count - 1); j > i; --j) + { + ret = PINT_queue_push_front(queue->id, &w->qentries[j]); + if(ret < 0) + { + goto exit; + } + } + } + + qlist_add_tail(&queue->link, &w->queues); + } + +exit: + /* put the queue back on the list of queues */ + qlist_add_tail(&queue->link, &w->queues); + gen_mutex_unlock(&w->mutex); + return ret; +}; + +struct PINT_worker_impl PINT_worker_queues_impl = +{ + "QUEUES", + queues_init, + queues_destroy, + queues_queue_add, + queues_queue_remove, + queues_post, + queues_do_work, + NULL +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-queues.h b/src/common/mgmt/pint-worker-queues.h new file mode 100644 index 0000000..1ec41d9 --- /dev/null +++ b/src/common/mgmt/pint-worker-queues.h @@ -0,0 +1,52 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_QUEUES_H +#define PINT_WORKER_QUEUES_H + +#include "pint-op.h" +#include "pint-queue.h" + +typedef struct +{ + /* The number of operations that should be serviced before moving on + * to the next queue + */ + int ops_per_queue; + + /* The time to wait (in microsecs) for new ops to be added to a queue. + * 0 means no timeout. + */ + int timeout; + +} PINT_worker_queues_attr_t; + +struct PINT_worker_queues_s +{ + PINT_worker_queues_attr_t attr; + PINT_op_id *ids; + PINT_service_callout *callouts; + void **service_ptrs; + PVFS_hint **hints; + struct qlist_head queues; + PINT_queue_entry_t *qentries; + gen_mutex_t mutex; + gen_cond_t cond; +}; + +struct PINT_worker_impl PINT_worker_queues_impl; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/common/mgmt/pint-worker-threaded-queues.c b/src/common/mgmt/pint-worker-threaded-queues.c new file mode 100644 index 0000000..2ff3a96 --- /dev/null +++ b/src/common/mgmt/pint-worker-threaded-queues.c @@ -0,0 +1,571 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include "pvfs2-types.h" +#include "pvfs2-internal.h" +#include "pint-worker-threaded-queues.h" +#include "pint-queue.h" +#include "pint-worker.h" +#include "pint-mgmt.h" +#include "pvfs2-debug.h" +#include "gossip.h" +#include "quicklist.h" +#include + + +/* How long we wait for a queue to be added before timing out. + * Note that its necessary to timeout and check that a request to + * stop the thread has not been sent. Right now we set this to + * 10 millisecs (value in microsecs) + */ +#define WAIT_FOR_QUEUE_INTERVAL 1e5 + +#define DEFAULT_TIMEOUT 1e4 + +static int PINT_worker_queue_thread_start( + struct PINT_worker_thread_entry * tentry); + +static int PINT_worker_queue_thread_stop( + struct PINT_worker_thread_entry * tentry); + +static int threaded_queues_init(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_worker_attr_t *attr) +{ + struct PINT_worker_threaded_queues_s *w; + int ret = 0; + int i; + + w = &inst->threaded; + + w->attr = attr->u.threaded; + gen_mutex_init(&w->mutex); + gen_cond_init(&w->cond); + INIT_QLIST_HEAD(&w->queues); + INIT_QLIST_HEAD(&w->inuse_queues); + + w->manager = manager; + + w->threads = malloc(sizeof(struct PINT_worker_thread_entry) * + w->attr.thread_count); + if(!w->threads) + { + ret = -PVFS_ENOMEM; + gen_cond_destroy(&w->cond); + goto exit; + } + + for(i = 0; i < w->attr.thread_count; ++i) + { + w->threads[i].worker = w; + ret = PINT_worker_queue_thread_start(&w->threads[i]); + if(ret < 0) + { + /* stop the other threads */ + for(; i >= 0; --i) + { + PINT_worker_queue_thread_stop(&w->threads[i]); + } + free(w->threads); + gen_cond_destroy(&w->cond); + } + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread_id %d:thread #%d.\n" + ,__func__ + ,(int)w->threads[i].thread_id,i); + } + +exit: + return ret; +} + +static int threaded_queues_destroy(struct PINT_manager_s *manager, + PINT_worker_inst *inst) +{ + struct PINT_worker_threaded_queues_s *w; + struct PINT_worker_thread_entry * tentry; + int i; + + w = &inst->threaded; + + gen_mutex_lock(&w->mutex); + gen_mutex_unlock(&w->mutex); + + /* stop all threads */ + for(i = 0; i < w->attr.thread_count; ++i) + { + gen_mutex_lock(&w->mutex); + tentry = &w->threads[i]; + gen_mutex_unlock(&w->mutex); + + PINT_worker_queue_thread_stop(tentry); + } + + free(w->threads); + gen_cond_destroy(&w->cond); + gen_mutex_unlock(&w->mutex); + + return 0; +} + +static int threaded_queues_queue_add(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id) +{ + struct PINT_worker_threaded_queues_s *w; + struct PINT_queue_s *queue; + + w = &inst->threaded; + + queue = id_gen_fast_lookup(queue_id); + gen_mutex_lock(&w->mutex); + + assert(queue->link.next == NULL && queue->link.prev == NULL); + + qlist_add_tail(&queue->link, &w->queues); + PINT_queue_add_producer(queue_id, w); + PINT_queue_add_consumer(queue_id, w); + + /* send a signal to one thread waiting for a queue to be added */ + gen_cond_signal(&w->cond); + gen_mutex_unlock(&w->mutex); + + return 0; +} + +static int threaded_queues_queue_remove(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id) +{ + struct PINT_worker_threaded_queues_s *w; + struct PINT_queue_s *queue; + struct timespec timeout; + + w = &inst->threaded; + + gen_mutex_lock(&w->mutex); + w->remove_requested = 1; + + queue = id_gen_fast_lookup(queue_id); + assert(queue); + + /* we wait for 10 millisecs -- long enough for the queue to + * be added back to the unused list */ + while(!qlist_exists(&w->queues, &queue->link)) + { + /* assume that operations are being pulled off presently + * and it just needs to be added back to the + * list of queues, which we will wait for + */ + timeout.tv_sec = time(NULL); + timeout.tv_nsec = 1e6; + gen_cond_timedwait(&w->cond, &w->mutex, &timeout); + } + + /* now we're ensured that its there, so pluck it off */ + qlist_del(&queue->link); + PINT_queue_remove_producer(queue_id, w); + PINT_queue_remove_consumer(queue_id, w); + + memset(&queue->link, 0, sizeof(queue->link)); + + w->remove_requested = 0; + gen_cond_broadcast(&w->cond); + gen_mutex_unlock(&w->mutex); + + return 0; +} + +static int threaded_queues_post(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id id, + PINT_operation_t *operation) +{ + struct PINT_worker_threaded_queues_s *w; + int ret; + PINT_worker_id wid; + struct PINT_queue_s *queue; + PINT_queue_id queue_id; + + w = &inst->threaded; + + gen_mutex_lock(&w->mutex); + + id_gen_fast_register(&wid, w); + + /* if the queue_id matches the worker_id, then assume that there's + * only one queue maintained by this worker and use that + */ + if(id == 0) + { + /* a dirty hack to check that the list of queues only has one element */ + if(w->queues.next->next != NULL) + { + gossip_err("%s: no queue id was specified and there's more than " + "one queue being managed by this worker\n", __func__); + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + + /* there must be only one queue, so just use that */ + queue = qlist_entry(&w->queues.next, struct PINT_queue_s, link); + queue_id = queue->id; + } + else + { + /* its not the worker id, so it must be an id for one of our queues */ + queue = id_gen_fast_lookup(id); + + /* verify that this is a queue id we know about */ + if(!qlist_exists(&w->queues, &queue->link) && + !qlist_exists(&w->inuse_queues, &queue->link)) + { + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + queue_id = id; + } + + /* at this point we should have a valid queue_id that's managed + * by this worker. + */ + ret = PINT_queue_push(queue_id, &operation->qentry); + if(ret < 0) + { + gossip_err("%s: failed to push op onto queue: %llu\n", + __func__, llu(queue_id)); + gen_mutex_unlock(&w->mutex); + return ret; + } + + gossip_debug(GOSSIP_MGMT_DEBUG, + "%s: post op to worker (threaded queues) queue: %llu\n", + __func__, + llu(queue_id)); + + gen_mutex_unlock(&w->mutex); + return PINT_MGMT_OP_POSTED; +} + +static int threaded_queues_cancel(struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *op) +{ + struct PINT_worker_threaded_queues_s *w; + struct PINT_queue_s *queue; + + w = &inst->threaded; + + gen_mutex_lock(&w->mutex); + + /* if the queue_id matches the worker_id, then assume that there's + * only one queue maintained by this worker and use that + */ + if(queue_id == 0) + { + /* a dirty hack to check that the list of queues only has one element */ + if(w->queues.next->next != NULL) + { + gossip_err("%s: no queue id was specified and there's more than " + "one queue being managed by this worker\n", __func__); + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + + /* there must be only one queue, so just use that */ + queue = qlist_entry(&w->queues.next, struct PINT_queue_s, link); + queue_id = queue->id; + } + else + { + /* its not the worker id, so it must be an id for one of our queues */ + queue = id_gen_fast_lookup(queue_id); + + /* verify that this is a queue id we know about */ + if(!qlist_exists(&w->queues, &queue->link) && + !qlist_exists(&w->inuse_queues, &queue->link)) + { + gen_mutex_unlock(&w->mutex); + return -PVFS_EINVAL; + } + } + + return PINT_queue_remove(queue_id, &op->qentry); +} + +struct PINT_worker_impl PINT_worker_threaded_queues_impl = +{ + "THREADED", + threaded_queues_init, + threaded_queues_destroy, + threaded_queues_queue_add, + threaded_queues_queue_remove, + threaded_queues_post, + + /* the threaded queues impl doesn't implement the do_work callback + * because work is done in the threads + */ + NULL, + + threaded_queues_cancel +}; + +static void *PINT_worker_queues_thread_function(void * ptr) +{ + struct PINT_worker_thread_entry *thread; + struct PINT_worker_threaded_queues_s *worker; + struct PINT_manager_s *manager; + struct PINT_queue_s *queue; + PINT_operation_t *op; + int op_count; + int timeout; + PINT_queue_entry_t **qentries = NULL; + int i = 0; + int ret, service_time, error; + struct timespec wait_interval; + + wait_interval.tv_sec = (WAIT_FOR_QUEUE_INTERVAL / 1e6); + wait_interval.tv_nsec = + (WAIT_FOR_QUEUE_INTERVAL - (wait_interval.tv_sec * 1e6)) * 1e3; + + thread = (struct PINT_worker_thread_entry *)ptr; + + gen_mutex_lock(&thread->mutex); + worker = thread->worker; + manager = worker->manager; + gen_mutex_unlock(&thread->mutex); + + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread-id %d:worker location is %p: manager location is %p.\n" + ,__func__ + ,(int)thread->thread_id + ,worker + ,manager); + + gen_mutex_lock(&worker->mutex); + op_count = worker->attr.ops_per_queue; + timeout = worker->attr.timeout; + if(timeout == 0) + { + timeout = DEFAULT_TIMEOUT; + } + gen_mutex_unlock(&worker->mutex); + + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread-id %d:op-count is %d:timeout is %d.\n" + ,__func__ + ,(int)thread->thread_id + ,op_count + ,timeout); + qentries = malloc(sizeof(PINT_queue_entry_t *) * op_count); + if(!qentries) + { + ret = -PVFS_ENOMEM; + gen_mutex_unlock(&thread->mutex); + goto free_ops; + } + + gen_mutex_lock(&thread->mutex); + thread->running = 1; + gossip_debug(GOSSIP_MGMT_DEBUG,"%s: starting thread function for thread_id %d\n" + ,__func__ + ,(int)thread->thread_id); + while(thread->running) + { + /* unlock the thread mutex to allow someone else + * to set the running field to zero + */ + gen_mutex_unlock(&thread->mutex); + + gen_mutex_lock(&worker->mutex); + + if(worker->remove_requested) + { + gen_cond_wait(&worker->cond, &worker->mutex); + gen_mutex_unlock(&worker->mutex); + + /* lock the mutex again before checking the running field */ + gen_mutex_lock(&thread->mutex); + continue; + } + + if(!qlist_empty(&worker->queues)) + { + queue = qlist_entry( + worker->queues.next, struct PINT_queue_s, link); + + /* take the queue off the head of the list so + * that we can put it back on the tail. This + * allows the threads to work on the queues in + * a round-robin fashion + */ + qlist_del(&queue->link); + qlist_add_tail(&queue->link, &worker->inuse_queues); + gen_mutex_unlock(&worker->mutex); + + op_count = worker->attr.ops_per_queue; + /* now we wait for operations to get put on the queue + * and service them when they do + */ + ret = PINT_queue_timedwait( + queue->id, &op_count, qentries, timeout); + if(ret < 0 && ret != -PVFS_ETIMEDOUT) + { + goto thread_failed; + } + + /* add the queue back to the end of the list */ + gen_mutex_lock(&worker->mutex); + qlist_del(&queue->link); + qlist_add_tail(&queue->link, &worker->queues); + + /* Don't signal another thread unless there is actually more + * work left in this queue. Note that it is safe to check the + * count here because new operations cannot be submitted to the + * queue while worker->mutex is held (see + * threaded_queues_post()). PINT_queue_insert() will do its own + * signalling as needed later. + */ + if(PINT_queue_count(queue->id) > 0) + { + gen_cond_signal(&worker->cond); + } + gen_mutex_unlock(&worker->mutex); + + if(op_count > 0) + { + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread_id %d: op_count is %d.\n" + ,__func__ + ,(int)thread->thread_id,op_count); + for(i = 0; i < op_count; ++i) + { + struct PINT_op_entry *op_entry; + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread_id %d: i is %d.\n" + ,__func__ + ,(int)thread->thread_id,i); + op = PINT_op_from_qentry(qentries[i]); + /* service the operation */ + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread_id %d: calling PINT_manager_service_op.\n" + ,__func__ + ,(int)thread->thread_id); + ret = PINT_manager_service_op( + manager, op, &service_time, &error); + if(ret < 0) + { + /* fatal if we can't service an operation */ + goto free_ops; + } + + gossip_debug(GOSSIP_MGMT_DEBUG,"%s:thread_id %d: calling PINT_manager_complete_op.\n" + ,__func__ + ,(int)thread->thread_id); + ret = PINT_manager_complete_op( + manager, op, error); + if(ret < 0) + { + /* fatal if we can't complete an op */ + goto free_ops; + } + op_entry = id_gen_safe_lookup(op->id); + if (op_entry) + { + id_gen_safe_unregister(op_entry->op.id); + free(op_entry); + op_entry = NULL; + } + } + } + } + else + { + /* no queues in the list, wait for addition */ + + /* we set a timeout of 1 second, long enough to not peg the cpu, but + * short enough to check thread cancellation */ + struct timespec empty_timeout; + empty_timeout.tv_sec = time(NULL) + 1; + empty_timeout.tv_nsec = 0; + + ret = gen_cond_timedwait(&worker->cond, &worker->mutex, &empty_timeout); + if(ret != 0 && ret != ETIMEDOUT) + { + gossip_lerr("gen_cond_timedwait failed with error: %s\n", + strerror(ret)); + } + else + { + gen_mutex_unlock(&worker->mutex); + } + } + /* lock the mutex again before checking the running field */ + gen_mutex_lock(&thread->mutex); + }/*end while thread->running*/ + + gen_mutex_unlock(&thread->mutex); + + /* must have been external request to stop thread */ + ret = 0; + +free_ops: + if(qentries) + { + free(qentries); + } + +thread_failed: + thread->error = ret; + return NULL; +} + +static int PINT_worker_queue_thread_start( + struct PINT_worker_thread_entry * tentry) +{ + int ret = 0; + + gen_mutex_init(&tentry->mutex); + ret = pthread_create(&tentry->thread_id, NULL, + PINT_worker_queues_thread_function, tentry); + if(ret < 0) + { + /* convert to PVFS error */ + return PVFS_errno_to_error(ret); + } + return 0; +} + +static int PINT_worker_queue_thread_stop( + struct PINT_worker_thread_entry * tentry) +{ + int ret; + void *ptr; + struct PINT_worker_threaded_queues_s *w; + + gen_mutex_lock(&tentry->mutex); + w = tentry->worker; + tentry->running = 0; + gen_mutex_unlock(&tentry->mutex); + + gen_mutex_lock(&w->mutex); + gen_cond_broadcast(&w->cond); + gen_mutex_unlock(&w->mutex); + + ret = pthread_join(tentry->thread_id, &ptr); + if(ret < 0) + { + return PVFS_errno_to_error(ret); + } + + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker-threaded-queues.h b/src/common/mgmt/pint-worker-threaded-queues.h new file mode 100644 index 0000000..14bdcb2 --- /dev/null +++ b/src/common/mgmt/pint-worker-threaded-queues.h @@ -0,0 +1,65 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_THREADED_QUEUES_H +#define PINT_WORKER_THREADED_QUEUES_H + +#include "gen-locks.h" +#include "quicklist.h" +#include "pint-op.h" + +typedef struct +{ + /* The number of threads to create for this worker */ + int thread_count; + + /* The number of operations that should be serviced before moving on + * to the next queue + */ + int ops_per_queue; + + /* time to wait (in microsecs) for an operation to be added to the queue. + * 0 means no timeout. + */ + int timeout; + +} PINT_worker_threaded_queues_attr_t; + +struct PINT_worker_thread_entry +{ + gen_thread_t thread_id; + struct PINT_worker_threaded_queues_s *worker; + gen_mutex_t mutex; + int running; + int error; +}; + +struct PINT_manager_s; + +struct PINT_worker_threaded_queues_s +{ + PINT_worker_threaded_queues_attr_t attr; + struct PINT_worker_thread_entry *threads; + struct qlist_head queues; + struct qlist_head inuse_queues; + gen_mutex_t mutex; + gen_cond_t cond; + struct PINT_manager_s *manager; + int remove_requested; +}; + +struct PINT_worker_impl PINT_worker_threaded_queues_impl; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/mgmt/pint-worker.h b/src/common/mgmt/pint-worker.h new file mode 100644 index 0000000..a79c5ad --- /dev/null +++ b/src/common/mgmt/pint-worker.h @@ -0,0 +1,167 @@ +/* + * (C) 2006 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef PINT_WORKER_H +#define PINT_WORKER_H + +#include "pint-context.h" +#include "pint-worker-queues.h" +#include "pint-worker-threaded-queues.h" +#include "pint-worker-per-op.h" +#include "pint-worker-pool.h" +#include "pint-worker-blocking.h" +#include "pint-worker-external.h" + +typedef PVFS_id_gen_t PINT_worker_id; + +typedef enum +{ + PINT_WORKER_TYPE_QUEUES, + PINT_WORKER_TYPE_THREADED_QUEUES, + PINT_WORKER_TYPE_PER_OP, + PINT_WORKER_TYPE_POOL, + PINT_WORKER_TYPE_BLOCKING, + PINT_WORKER_TYPE_EXTERNAL +} PINT_worker_type_t; + +extern PINT_worker_id PINT_worker_implicit_id; +extern PINT_worker_id PINT_worker_blocking_id; + +union PINT_worker_attr_u +{ + PINT_worker_queues_attr_t queues; + PINT_worker_threaded_queues_attr_t threaded; + PINT_worker_per_op_attr_t per_op; + PINT_worker_pool_attr_t pool; + PINT_worker_external_attr_t external; +}; + +typedef struct +{ + PINT_worker_type_t type; + union PINT_worker_attr_u u; +} PINT_worker_attr_t; + +typedef union +{ + struct PINT_worker_queues_s queues; + struct PINT_worker_threaded_queues_s threaded; + struct PINT_worker_per_op_s per_op; + struct PINT_worker_pool_s pool; + struct PINT_worker_external_s external; +} PINT_worker_inst; + +struct PINT_manager_s; + +struct PINT_worker_impl +{ + /* The name of the worker impl. */ + const char *name; + + /** + * Initialize this worker impl. + * + * @param manager the manager to which this worker belongs. + * @param inst the union of worker internal instances. The + * specific worker's fields should be set on this instance. + * @param attr the attributes of the worker impl. These should + * be copied to the worker instance. + * + * @return should return 0 on success or -PVFS_error on error + */ + int (*init) (struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_worker_attr_t *attr); + + /** + * Destroy the worker impl. + * + * @param manager the manager this worker is on + * @param the instance of the worker impl. + * + * @return 0 on success, -PVFS_error on error + */ + int (*destroy) (struct PINT_manager_s *manager, PINT_worker_inst *inst); + + /** + * Add a queue to the worker. Some workers don't accept queues + * (such as the thread-per-op or thread-pool workers), and should + * not implement this callback. Other workers that manage queues + * should add this queue to their list of queues. The queue + * should not be free within the worker, Its lifetime should + * be managed separately. + */ + int (*queue_add) (struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id); + + /** + * Remove a queue from the worker. + */ + int (*queue_remove) (struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id); + + /** + * Post an operation to this worker. This callback is required for + * all worker impls to implement. + * + * @param manager the manager this worker is on + * @param inst the worker instance + * @param queue_id the queue in this worker to post the operation to. For + * workers that don't manage queues, the queue_id should be zero. + * + * @param operation the operation to post to the worker. This object + * is managed outside the worker instance, so the worker does not + * need to copy or free it. It can be added to a queue directly. + * + * @return PINT_MGMT_OP_POSTED if the operation was posted successfully. + * PINT_MGMT_OP_COMPLETE if the worker is a blocking worker and + * the operation was completed successfully. + * -PVFS_error on error + */ + int (*post) (struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *operation); + + /** + * Do work for the operations in the worker. Some workers don't service + * operations in separate threads, and so servicing must progress by + * calling this function. Other workers that service operations separately + * shouldn't implement this callback. + * + * @param manager the manager this worker is in + * @param inst the worker instance + * @param op_id do work on a particular operation. If the value is 0, + * do work on all operations until the timeout is reached. + * @param microsecs The timeout for doing work. This is a hint to + * return from the do_work callback after the timeout has passed, + * but there are not strict guarantees that do_work will return + * before it is reached. + */ + int (*do_work) (struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_context_id context_id, + PINT_operation_t *op, + int microsecs); + + int (*cancel) (struct PINT_manager_s *manager, + PINT_worker_inst *inst, + PINT_queue_id queue_id, + PINT_operation_t *op); +}; + +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/misc/digest.c b/src/common/misc/digest.c index 5229b5e..f0b8e63 100644 --- a/src/common/misc/digest.c +++ b/src/common/misc/digest.c @@ -26,7 +26,9 @@ #ifdef __GEN_POSIX_LOCKING__ #include +#ifdef HAVE_LINUX_UNISTD_H #include +#endif #include #include #include diff --git a/src/common/misc/errno-mapping.c b/src/common/misc/errno-mapping.c index 641f395..7618634 100644 --- a/src/common/misc/errno-mapping.c +++ b/src/common/misc/errno-mapping.c @@ -12,12 +12,25 @@ #include "pvfs2-util.h" #include "gossip.h" +#ifdef WIN32 +#include "wincommon.h" + +#define snprintf(b, c, f, ...) _snprintf(b, c, f, __VA_ARGS__) + +/* error codes not defined on Windows */ +#define EREMOTE 66 +#define EHOSTDOWN 112 +#endif + #define MAX_PVFS_STRERROR_LEN 256 +/* static global controls whether pvfs_sys calls print user errors */ +static int pvfs_perror_gossip_silent = 0; + /* macro defined in include/pvfs2-types.h */ DECLARE_ERRNO_MAPPING_AND_FN(); -/* +/** the pvfs analog to strerror_r that handles PVFS_error codes as well as errno error codes */ @@ -29,11 +42,11 @@ int PVFS_strerror_r(int errnum, char *buf, int n) if (IS_PVFS_NON_ERRNO_ERROR(-errnum)) { - snprintf(buf, limit, PINT_non_errno_strerror_mapping[tmp]); + snprintf(buf, limit, "%s", PINT_non_errno_strerror_mapping[tmp]); } else { -#ifdef HAVE_GNU_STRERROR_R +#if defined(HAVE_GNU_STRERROR_R) || defined(_GNU_SOURCE) char *tmpbuf = strerror_r(tmp, buf, limit); if (tmpbuf && (strcmp(tmpbuf, buf))) { @@ -41,14 +54,16 @@ int PVFS_strerror_r(int errnum, char *buf, int n) strncpy(buf, tmpbuf, (size_t)limit); } ret = (tmpbuf ? 0 : -1); -#else - ret = strerror_r(tmp, buf, (size_t)limit); +#elif defined(WIN32) + ret = (int) strerror_s(buf, (size_t) limit, tmp); +#else + ret = (int)strerror_r(tmp, buf, (size_t)limit); #endif } return ret; } -/* PVFS_perror() +/** PVFS_perror() * * prints a message on stderr, consisting of text argument followed by * a colon, space, and error string for the given retcode. NOTE: also @@ -81,13 +96,33 @@ void PVFS_perror(const char *text, int retcode) return; } -/* PVFS_perror_gossip() +/** silences user error messages from system interface calls + */ +void PVFS_perror_gossip_silent(void) +{ + pvfs_perror_gossip_silent = 1; + return; +} + +/** turns on user error messages from system interface calls + */ +void PVFS_perror_gossip_verbose(void) +{ + pvfs_perror_gossip_silent = 0; + return; +} + +/** PVFS_perror_gossip() * * same as PVFS_perror, except that the output is routed through * gossip rather than stderr */ void PVFS_perror_gossip(const char *text, int retcode) { + if (pvfs_perror_gossip_silent) + { + return; + } if (IS_PVFS_NON_ERRNO_ERROR(-retcode)) { char buf[MAX_PVFS_STRERROR_LEN] = {0}; diff --git a/src/common/misc/extent-utils.c b/src/common/misc/extent-utils.c index bb2e95b..87e70f9 100644 --- a/src/common/misc/extent-utils.c +++ b/src/common/misc/extent-utils.c @@ -64,10 +64,42 @@ PINT_llist *PINT_create_extent_list(char *extent_str) */ int PINT_handle_in_extent(PVFS_handle_extent *ext, PVFS_handle handle) { + /* return ((handle > ext->first-1) && - (handle < ext->last+1)); + (handle < ext->last+1)); + */ + /* ext->last may be max, 2^64 - 1 */ + return ((handle >= ext->first) && + (handle <= ext->last)); } +/* PINT_handle_in_extent_array() + * + * Parameters: + * PVFS_handle_extent_array - array of extents + * PVFS_handle - a handle + * + * Returns 1 if the specified handle is within any of the + * extents in the specified list of extents. Returns 0 + * otherwise. + * + */ +int PINT_handle_in_extent_array( + PVFS_handle_extent_array *ext_array, PVFS_handle handle) +{ + int i, ret; + for(i = 0; i < ext_array->extent_count; ++i) + { + ret = PINT_handle_in_extent(&ext_array->extent_array[i], handle); + if(ret) + { + return ret; + } + } + return 0; +} + + /* PINT_handle_in_extent_list() * * Parameters: @@ -115,32 +147,18 @@ int PINT_handle_in_extent_list( * returns the 0 on success and fills in the specified count argument * with the extent count total. returns -PVFS_error on error */ -int PINT_extent_list_count_total( - PINT_llist *extent_list, uint64_t *count) +uint64_t PINT_extent_array_count_total( + PVFS_handle_extent_array *extent_array) { - int ret = -PVFS_EINVAL; - PINT_llist *cur = NULL; - PVFS_handle_extent *cur_extent = NULL; + int i; + uint64_t count = 0; - if (extent_list && count) + for(i = 0; i < extent_array->extent_count; ++i) { - *count = 0; - - cur = extent_list; - while(cur) - { - cur_extent = PINT_llist_head(cur); - if (!cur_extent) - { - break; - } - - *count += (cur_extent->last - cur_extent->first + 1); - cur = PINT_llist_next(cur); - } - ret = 0; + count += (extent_array->extent_array[i].last - + extent_array->extent_array[i].first + 1); } - return ret; + return count; } /* PINT_release_extent_list() diff --git a/src/common/misc/extent-utils.h b/src/common/misc/extent-utils.h index 700433e..279e224 100644 --- a/src/common/misc/extent-utils.h +++ b/src/common/misc/extent-utils.h @@ -17,11 +17,13 @@ PINT_llist *PINT_create_extent_list( int PINT_handle_in_extent( PVFS_handle_extent *ext, PVFS_handle handle); +int PINT_handle_in_extent_array( + PVFS_handle_extent_array *ext_array, PVFS_handle handle); int PINT_handle_in_extent_list( PINT_llist *extent_list, PVFS_handle handle); -int PINT_extent_list_count_total( - PINT_llist *extent_list, uint64_t *count); +uint64_t PINT_extent_array_count_total( + PVFS_handle_extent_array *extent_array); void PINT_release_extent_list( PINT_llist *extent_list); diff --git a/src/common/misc/fsck-utils.c b/src/common/misc/fsck-utils.c index a779f2f..27a817c 100755 --- a/src/common/misc/fsck-utils.c +++ b/src/common/misc/fsck-utils.c @@ -6,7 +6,9 @@ #include #include #include +#ifndef WIN32 #include +#endif #include #include @@ -18,6 +20,140 @@ #define SERVER_CONFIG_BUFFER_SIZE 5000 #define FS_CONFIG_BUFFER_SIZE 10000 +#ifdef WIN32 +int PVFS_fsck_initialize( + const struct PINT_fsck_options* options, + const PVFS_credentials* creds, + const PVFS_fs_id* cur_fs) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dfile( + const struct PINT_fsck_options* fsck_options, + const PVFS_handle* handle, + const PVFS_fs_id* cur_fs, + const PVFS_credentials* creds, + PVFS_size* dfile_total_size) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dfile_attr( + const struct PINT_fsck_options* fsck_options, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_metafile( + const struct PINT_fsck_options* fsck_options, + const PVFS_object_ref* obj_ref, + const PVFS_sysresp_getattr* attributes, + const PVFS_credentials* creds) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_metafile_attr( + const struct PINT_fsck_options* fsck_options, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_symlink( + const struct PINT_fsck_options* fsck_options, + const PVFS_object_ref* obj_ref, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_symlink_attr( + const struct PINT_fsck_options* fsck_options, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_symlink_target( + const struct PINT_fsck_options* fsck_options, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dirdata( + const struct PINT_fsck_options* fsck_options, + const PVFS_handle* handle, + const PVFS_fs_id* cur_fs, + const PVFS_credentials* creds) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dirdata_attr( + const struct PINT_fsck_options* fsck_options, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dir( + const struct PINT_fsck_options* fsck_options, + const PVFS_object_ref* obj_ref, + const PVFS_sysresp_getattr* attributes, + const PVFS_credentials* creds, + PVFS_dirent* directory_entries) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dir_attr( + const struct PINT_fsck_options* fsck_options, + const PVFS_sysresp_getattr* attributes) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_validate_dir_ent( + const struct PINT_fsck_options* fsck_options, + const char* filename) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_finalize( + const struct PINT_fsck_options* fsck_options, + const PVFS_fs_id* cur_fs, + const PVFS_credentials* cred) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_get_attributes( + const struct PINT_fsck_options* fsck_options, + const PVFS_object_ref* object_ref, + const PVFS_credentials* cred, + PVFS_sysresp_getattr* sysresp_getattr) +{ + return -PVFS_EOPNOTSUPP; +} + +int PVFS_fsck_check_server_configs( + const struct PINT_fsck_options* fsck_options, + const PVFS_credentials* cred, + const PVFS_fs_id* fs_id) +{ + return -PVFS_EOPNOTSUPP; +} + +#else +/** + * Just stub functions out on Windows + */ + /** \file * \ingroup fsckutils * Implementation of the fsck-utils component. @@ -474,7 +610,7 @@ int PVFS_fsck_validate_metafile( err = PVFS_mgmt_get_dfile_array(*obj_ref, (PVFS_credentials *) creds, - df_handles, attributes->attr.dfile_count); + df_handles, attributes->attr.dfile_count, NULL); if(err < 0) { PVFS_perror("PVFS_mgmt_get_dfile_array", err); @@ -780,7 +916,7 @@ int PVFS_fsck_validate_dir( /* get the dirdata handle and validate */ err = PVFS_mgmt_get_dirdata_handle - (*obj_ref, &dirdata_handle, (PVFS_credentials *) creds); + (*obj_ref, &dirdata_handle, (PVFS_credentials *) creds, NULL); if(err < 0) { gossip_err("Error: unable to get dirdata handle\n"); @@ -804,7 +940,7 @@ int PVFS_fsck_validate_dir( err = PVFS_sys_readdir(*obj_ref, token, MAX_DIR_ENTS, - (PVFS_credentials *) creds, &readdir_resp); + (PVFS_credentials *) creds, &readdir_resp, NULL); if(err < 0) { gossip_err("Error: could not read directory entries\n"); @@ -941,7 +1077,7 @@ int PVFS_fsck_get_attributes( int ret = 0; ret = PVFS_sys_getattr - (*pref, PVFS_ATTR_SYS_ALL, (PVFS_credentials *) creds, getattr_resp); + (*pref, PVFS_ATTR_SYS_ALL, (PVFS_credentials *) creds, getattr_resp, NULL); if(ret < 0) { gossip_err("Error: unable to retrieve attributes\n"); @@ -1015,6 +1151,9 @@ int PVFS_fsck_get_attributes( case PVFS_TYPE_DIRDATA: gossip_debug(GOSSIP_FSCK_DEBUG, "dirdata\n"); break; + case PVFS_TYPE_INTERNAL: + gossip_debug(GOSSIP_FSCK_DEBUG, "internal\n"); + break; } gossip_debug(GOSSIP_FSCK_DEBUG, "\n"); @@ -1118,7 +1257,7 @@ static int PINT_handle_wrangler_load_handles( (PVFS_credentials *) creds, stat_array, PINT_handle_wrangler_handlelist.addr_array, - server_count, NULL); + server_count, NULL, NULL); if(err < 0) { ret = -PVFS_ENOMEM; @@ -1240,12 +1379,15 @@ static int PINT_handle_wrangler_load_handles( { /* mgmt call to get block of handles */ err = PVFS_mgmt_iterate_handles_list(*cur_fs, - (PVFS_credentials *) creds, - handle_matrix, - handle_count_array, - position_array, - PINT_handle_wrangler_handlelist. - addr_array, server_count, NULL); + (PVFS_credentials *) creds, + handle_matrix, + handle_count_array, + position_array, + PINT_handle_wrangler_handlelist.addr_array, + server_count, + 0, + NULL, + NULL); if(err < 0) { PVFS_perror_gossip("PVFS_mgmt_iterate_handles", err); @@ -1288,6 +1430,54 @@ static int PINT_handle_wrangler_load_handles( PINT_handle_wrangler_handlelist.stranded_array[i] = PINT_handle_wrangler_handlelist.used_array[i]; } + + /* now look for reserved handles from each server */ + for (i = 0; i < server_count; i++) + { + position_array[i] = PVFS_ITERATE_START; + handle_count_array[i] = HANDLE_BATCH; + } + + do + { + err = PVFS_mgmt_iterate_handles_list(*cur_fs, + (PVFS_credentials *) creds, + handle_matrix, + handle_count_array, + position_array, + PINT_handle_wrangler_handlelist.addr_array, + server_count, + PVFS_MGMT_RESERVED, + NULL, + NULL); + if(err < 0) + { + PVFS_perror_gossip("PVFS_mgmt_iterate_handles", err); + ret = err; + goto load_handles_error; + } + + more_handles = 0; + + for (i = 0; i < server_count; i++) + { + /* remove these handles */ + int j = 0; + for (j = 0; j < handle_count_array[i]; j++) + { + PINT_handle_wrangler_remove_handle(&handle_matrix[i][j], + cur_fs); + } + + /* are there more handles? */ + if (position_array[i] != PVFS_ITERATE_END) + { + more_handles = 1; + handle_count_array[i] = HANDLE_BATCH; + } + } + } while (more_handles != 0); + ret = 0; goto load_handles_success; @@ -1471,6 +1661,7 @@ static int PINT_handle_wrangler_display_stranded_handles( PVFS_object_ref pref; const char *server_name = NULL; int header = 0; + char buf[128] = {0}; for (i = 0; i < PINT_handle_wrangler_handlelist.num_servers; i++) { @@ -1498,46 +1689,51 @@ static int PINT_handle_wrangler_display_stranded_handles( /* get this objects attributes */ ret = PVFS_fsck_get_attributes(fsck_options, &pref, creds, &attributes); - if(ret < 0) - { - PVFS_perror_gossip("PVFS_fsck_get_attributes", ret); - gossip_err("Error: unable to retrieve attributes for handle [%llu]\n", - llu(pref.handle)); - return(ret); - } - + printf(" %llu %d ", llu(PINT_handle_wrangler_handlelist.list_array[i][j]), *cur_fs); - if (attributes.attr.mask & PVFS_ATTR_SYS_SIZE) + if(ret < 0) { - printf("%13lld ", lld(attributes.attr.size)); + PVFS_strerror_r(ret, buf, 127); + printf("Unknown: getattr error: %s)\n", buf); } - - switch (attributes.attr.objtype) + else { - case PVFS_TYPE_NONE: - printf("none "); - break; - case PVFS_TYPE_METAFILE: - printf("meta file"); - break; - case PVFS_TYPE_DATAFILE: - printf("data file"); - break; - case PVFS_TYPE_DIRECTORY: - printf("directory"); - break; - case PVFS_TYPE_SYMLINK: - printf("symlink "); - free(attributes.attr.link_target); - break; - case PVFS_TYPE_DIRDATA: - printf("dirdata "); - break; + + if (attributes.attr.mask & PVFS_ATTR_SYS_SIZE) + { + printf("%13lld ", lld(attributes.attr.size)); + } + + switch (attributes.attr.objtype) + { + case PVFS_TYPE_NONE: + printf("none "); + break; + case PVFS_TYPE_METAFILE: + printf("meta file"); + break; + case PVFS_TYPE_DATAFILE: + printf("data file"); + break; + case PVFS_TYPE_DIRECTORY: + printf("directory"); + break; + case PVFS_TYPE_SYMLINK: + printf("symlink "); + free(attributes.attr.link_target); + break; + case PVFS_TYPE_DIRDATA: + printf("dirdata "); + break; + case PVFS_TYPE_INTERNAL: + printf("internal "); + break; + } + printf(" %s\n", server_name); } - printf(" %s\n", server_name); } } } @@ -1596,6 +1792,7 @@ static void set_return_code( *ret = retval; } } +#endif /* WIN32 */ /* @} */ diff --git a/src/common/misc/mkspace.c b/src/common/misc/mkspace.c index ca7aecc..a1532be 100644 --- a/src/common/misc/mkspace.c +++ b/src/common/misc/mkspace.c @@ -24,6 +24,7 @@ #include "pvfs2-util.h" #include "pvfs2-internal.h" #include "pint-util.h" +#include "pint-event.h" static char *lost_and_found_string = "lost+found"; @@ -110,7 +111,8 @@ static void get_handle_extent_from_ranges( } int pvfs2_mkspace( - char *storage_space, + char *data_path, + char *meta_path, char *collection, TROVE_coll_id coll_id, TROVE_handle root_handle, @@ -133,7 +135,8 @@ int pvfs2_mkspace( TROVE_handle lost_and_found_handle = TROVE_HANDLE_NULL; TROVE_handle lost_and_found_dirdata_handle = TROVE_HANDLE_NULL; - mkspace_print(verbose,"Storage space: %s\n",storage_space); + mkspace_print(verbose,"Data storage space : %s\n",data_path); + mkspace_print(verbose,"Metadata storage space : %s\n", meta_path); mkspace_print(verbose,"Collection : %s\n",collection); mkspace_print(verbose,"ID : %d\n",coll_id); mkspace_print(verbose,"Root Handle : %llu\n",llu(root_handle)); @@ -158,16 +161,17 @@ int pvfs2_mkspace( */ ret = trove_initialize(TROVE_METHOD_DBPF, NULL, - storage_space, + data_path, + meta_path, 0); if (ret > -1) { - gossip_err("error: storage space %s already " - "exists; aborting!\n",storage_space); + gossip_err("error: storage space %s or %s already " + "exists; aborting!\n",data_path,meta_path); return -1; } - ret = trove_storage_create(TROVE_METHOD_DBPF, storage_space, NULL, &op_id); + ret = trove_storage_create(TROVE_METHOD_DBPF, data_path, meta_path, NULL, &op_id); if (ret != 1) { gossip_err("error: storage create failed; aborting!\n"); @@ -178,15 +182,17 @@ int pvfs2_mkspace( /* now that the storage space exists, initialize trove properly */ ret = trove_initialize( TROVE_METHOD_DBPF, NULL, - storage_space, 0); + data_path, meta_path, 0); if (ret < 0) { gossip_err("error: trove initialize failed; aborting!\n"); return -1; } - mkspace_print(verbose,"info: created storage space '%s'.\n", - storage_space); + mkspace_print(verbose,"info: created data storage space '%s'.\n", + data_path); + mkspace_print(verbose,"info: created metadata storage space '%s'.\n", + meta_path); /* try to look up collection used to store file system */ ret = trove_collection_lookup( @@ -286,7 +292,7 @@ int pvfs2_mkspace( coll_id, &extent_array, &new_root_handle, PVFS_TYPE_DIRECTORY, NULL, (TROVE_SYNC | TROVE_FORCE_REQUESTED_HANDLE), - NULL, trove_context, &op_id); + NULL, trove_context, &op_id, NULL); while (ret == 0) { @@ -338,7 +344,7 @@ int pvfs2_mkspace( ret = trove_dspace_setattr( coll_id, new_root_handle, &attr, TROVE_SYNC, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -390,7 +396,7 @@ int pvfs2_mkspace( ret = trove_dspace_create( coll_id, &extent_array, &root_dirdata_handle, PVFS_TYPE_DIRDATA, NULL, TROVE_SYNC, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -417,7 +423,7 @@ int pvfs2_mkspace( ret = trove_keyval_write( coll_id, new_root_handle, &key, &val, TROVE_SYNC, 0, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -466,7 +472,7 @@ int pvfs2_mkspace( ret = trove_dspace_create( coll_id, &extent_array, &lost_and_found_handle, PVFS_TYPE_DIRECTORY, NULL, TROVE_SYNC, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -497,7 +503,7 @@ int pvfs2_mkspace( ret = trove_dspace_setattr( coll_id, lost_and_found_handle, &attr, TROVE_SYNC, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -540,7 +546,7 @@ int pvfs2_mkspace( ret = trove_dspace_create( coll_id, &extent_array, &lost_and_found_dirdata_handle, PVFS_TYPE_DIRDATA, NULL, TROVE_SYNC, NULL, - trove_context, &op_id); + trove_context, &op_id, NULL); while (ret == 0) { @@ -568,7 +574,7 @@ int pvfs2_mkspace( ret = trove_keyval_write( coll_id, lost_and_found_handle, &key, &val, TROVE_SYNC, - 0, NULL, trove_context, &op_id); + 0, NULL, trove_context, &op_id, NULL); while (ret == 0) { @@ -601,7 +607,7 @@ int pvfs2_mkspace( coll_id, root_dirdata_handle, &key, &val, TROVE_SYNC | TROVE_NOOVERWRITE | TROVE_KEYVAL_HANDLE_COUNT, 0, - NULL, trove_context, &op_id); + NULL, trove_context, &op_id, NULL); while (ret == 0) { @@ -635,7 +641,8 @@ int pvfs2_mkspace( } int pvfs2_rmspace( - char *storage_space, + char *data_path, + char *meta_path, char *collection, TROVE_coll_id coll_id, int remove_collection_only, @@ -650,11 +657,11 @@ int pvfs2_rmspace( { ret = trove_initialize( TROVE_METHOD_DBPF, NULL, - storage_space, 0); + data_path, meta_path, 0); if (ret == -1) { - gossip_err("error: storage space %s does not " - "exist; aborting!\n", storage_space); + gossip_err("error: storage space %s or %s does not " + "exist; aborting!\n", data_path, meta_path); return -1; } trove_is_initialized = 1; @@ -672,8 +679,8 @@ int pvfs2_rmspace( if (!remove_collection_only) { - ret = trove_storage_remove( - TROVE_METHOD_DBPF, storage_space, NULL, &op_id); + ret = trove_storage_remove(TROVE_METHOD_DBPF, data_path, meta_path, + NULL, &op_id); /* * it is a bit weird to do a trove_finaliz() prior to blowing away * the storage space, but this allows the __db files of the DB env @@ -681,8 +688,8 @@ int pvfs2_rmspace( */ trove_finalize(TROVE_METHOD_DBPF); mkspace_print( - verbose, "PVFS2 Storage Space %s removed %s\n", - storage_space, (((ret == 1) || (ret == -TROVE_ENOENT)) ? + verbose, "PVFS2 Storage Space %s and %s removed %s\n", + data_path, meta_path, (((ret == 1) || (ret == -TROVE_ENOENT)) ? "successfully" : "with errors")); trove_is_initialized = 0; @@ -690,3 +697,11 @@ int pvfs2_rmspace( return ret; } +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/misc/mkspace.h b/src/common/misc/mkspace.h index 663ba2c..76d36a5 100644 --- a/src/common/misc/mkspace.h +++ b/src/common/misc/mkspace.h @@ -13,7 +13,8 @@ #define PVFS2_MKSPACE_STDERR_VERBOSE 2 int pvfs2_mkspace( - char *storage_space, + char *data_path, + char *meta_path, char *collection, TROVE_coll_id coll_id, TROVE_handle root_handle, @@ -23,7 +24,8 @@ int pvfs2_mkspace( int verbose); int pvfs2_rmspace( - char *storage_space, + char *data_path, + char *meta_path, char *collection, TROVE_coll_id coll_id, int remove_collection_only, diff --git a/src/common/misc/mmap-ra-cache.c b/src/common/misc/mmap-ra-cache.c index 5e78f18..bfc447d 100644 --- a/src/common/misc/mmap-ra-cache.c +++ b/src/common/misc/mmap-ra-cache.c @@ -9,8 +9,10 @@ #include #include #include +#ifndef WIN32 #include #include +#endif #include "pvfs2.h" #include "gossip.h" diff --git a/src/common/misc/module.mk.in b/src/common/misc/module.mk.in index 1dd34c3..266c148 100644 --- a/src/common/misc/module.mk.in +++ b/src/common/misc/module.mk.in @@ -19,7 +19,10 @@ LIBSRC += $(DIR)/server-config.c \ $(DIR)/tcache.c \ $(DIR)/state-machine-fns.c \ $(DIR)/fsck-utils.c \ - $(DIR)/pint-eattr.c + $(DIR)/pint-eattr.c \ + $(DIR)/pint-hint.c \ + $(DIR)/pint-mem.c \ + $(DIR)/pint-uid-mgmt.c SERVERSRC += $(DIR)/server-config.c \ $(DIR)/server-config-mgr.c \ $(DIR)/str-utils.c \ @@ -34,7 +37,17 @@ SERVERSRC += $(DIR)/server-config.c \ $(DIR)/tcache.c \ $(DIR)/state-machine-fns.c \ $(DIR)/void.c \ - $(DIR)/pint-eattr.c + $(DIR)/realpath.c \ + $(DIR)/msgpairarray.c \ + $(DIR)/pint-eattr.c \ + $(DIR)/pint-mem.c \ + $(DIR)/pint-hint.c \ + $(DIR)/pint-uid-mgmt.c + +LIBBMISRC += $(DIR)/str-utils.c \ + $(DIR)/pint-event.c \ + $(DIR)/errno-mapping.c \ + $(DIR)/pint-mem.c MODCFLAGS_$(DIR)/server-config.c = \ -I$(srcdir)/src/server diff --git a/src/common/misc/msgpairarray.h b/src/common/misc/msgpairarray.h index e7f24a8..abbe0c4 100644 --- a/src/common/misc/msgpairarray.h +++ b/src/common/misc/msgpairarray.h @@ -13,7 +13,6 @@ #include "job.h" #include "command.h" - extern struct PINT_state_machine_s pvfs2_msgpairarray_sm; extern struct PINT_state_machine_s pvfs2_osd_msgpairarray_sm; @@ -24,6 +23,8 @@ extern struct PINT_state_machine_s pvfs2_osd_msgpairarray_sm; #define PVFS_MSGPAIR_RETRY 0xFE #define PVFS_MSGPAIR_NO_RETRY 0xFF +#define PINT_MSGPAIR_PARENT_SM -1 + /* * This structure holds everything that we need for the state of a * message pair. We need arrays of these in some cases, so it's @@ -49,6 +50,11 @@ typedef struct PINT_sm_msgpair_state_s /* server address */ PVFS_BMI_addr_t svr_addr; + /*session identifier between send and rcvs*/ + /*note: server-side i/o machine uses the session_tag as the flow */ + /*descriptor tag. */ + bmi_msg_tag_t session_tag; + /* req and encoded_req are used to send a request */ struct PVFS_server_req req; struct PINT_encoded_msg encoded_req; @@ -56,10 +62,9 @@ typedef struct PINT_sm_msgpair_state_s /* request when talking to an OSD server */ struct osd_command osd_command; - /* point to things that may need freed during clean up */ - void *osd_iov; - void *osd_sgl; - + /* point to things that may need freed during clean up */ + void *osd_iov; + void *osd_sgl; /* the encoding type to use for the req */ enum PVFS_encoding_type enc_type; @@ -94,26 +99,53 @@ typedef struct PINT_sm_msgpair_state_s } PINT_sm_msgpair_state; /* used to pass in parameters that apply to every entry in a msgpair array */ -typedef struct PINT_sm_msgpair_params_s -{ +typedef struct +{ int job_timeout; int retry_delay; int retry_limit; job_context_id job_context; + int quiet_flag; /* if set, cuts down on error messages during retry */ /* comp_ct used to keep up with number of operations remaining */ int comp_ct; } PINT_sm_msgpair_params; +typedef struct +{ + PINT_sm_msgpair_params params; + int count; + PINT_sm_msgpair_state *msgarray; + PINT_sm_msgpair_state msgpair; +} PINT_sm_msgarray_op; + +#define PINT_msgpair_init(op) \ + do { \ + memset(&(op)->msgpair, 0, sizeof(PINT_sm_msgpair_state)); \ + if((op)->msgarray != &(op)->msgpair) \ + { \ + free((op)->msgarray); \ + (op)->msgarray = NULL; \ + } \ + (op)->count = 1; \ + (op)->msgarray = &(op)->msgpair; \ + } while(0) + +#define foreach_msgpair(__msgarray_op, __msg_p, __i) \ + for(__i = 0, __msg_p = &((__msgarray_op)->msgarray[__i]); \ + __i < (__msgarray_op)->count; \ + ++__i, __msg_p = &((__msgarray_op)->msgarray[__i])) /* helper functions */ int PINT_msgpairarray_init( - PINT_sm_msgpair_state ** msgpairarray, + PINT_sm_msgarray_op *op, int count); -void PINT_msgpairarray_destroy(PINT_sm_msgpair_state * msgpairarray); +void PINT_msgpairarray_destroy(PINT_sm_msgarray_op *op); + +int PINT_msgarray_status(PINT_sm_msgarray_op *op); int PINT_serv_decode_resp( PVFS_fs_id fs_id, @@ -130,9 +162,7 @@ int PINT_serv_free_msgpair_resources( PVFS_BMI_addr_t *svr_addr_p, int max_resp_sz); -int PINT_serv_msgpairarray_resolve_addrs( - int count, - PINT_sm_msgpair_state* msgarray); +int PINT_serv_msgpairarray_resolve_addrs(PINT_sm_msgarray_op *op); #define PRINT_ENCODING_ERROR(type_str, type) \ do { \ @@ -147,6 +177,20 @@ do { \ "********************\n"); \ } while(0) +#define PINT_init_msgpair(__sm_p, __msg_p) \ +do { \ + __msg_p = &__sm_p->msgpair; \ + memset(__msg_p, 0, sizeof(PINT_sm_msgpair_state)); \ + if (__sm_p->msgarray && (__sm_p->msgarray != &(__sm_p->msgpair)))\ + { \ + free(__sm_p->msgarray); \ + __sm_p->msgarray = NULL; \ + } \ + __sm_p->msgarray = __msg_p; \ + __sm_p->msgarray_count = 1; \ +} while(0) + + #endif /* __MSGPAIRARRAY_H */ /* diff --git a/src/common/misc/msgpairarray.sm b/src/common/misc/msgpairarray.sm index ad0f5e1..47f75e1 100644 --- a/src/common/misc/msgpairarray.sm +++ b/src/common/misc/msgpairarray.sm @@ -13,26 +13,6 @@ #include #include -/* conditionally include different headers to get the type of the state - * machine struct that contains the msgpair state structs we need to access; - * this is done to allow the reuse of this nested machine on both client and - * server side where completely different primary state machine structures - * are used - */ -#if defined(__PVFS2_CLIENT__) -#include "client-state-machine.h" -#define PARENT_SM PINT_client_sm -#elif defined(__PVFS2_SERVER__) -#include "src/server/pvfs2-server.h" -#define PARENT_SM PINT_server_op -#else -/* this condition is only included for dependency building step; will not - * actually compile cleanly - */ -#define __SM_CHECK_DEP -#include "client-state-machine.h" -#include "src/server/pvfs2-server.h" -#endif #include "msgpairarray.h" #include "pvfs2-debug.h" #include "pint-cached-config.h" @@ -43,6 +23,30 @@ #include "pint-util.h" #include "server-config-mgr.h" #include "pvfs2-internal.h" +#include "state-machine.h" + +#ifdef WIN32 +#define gossip_err_unless_quiet(format, ...) \ +do {\ + if(mop->params.quiet_flag)\ + {\ + gossip_debug(GOSSIP_MSGPAIR_DEBUG, format, __VA_ARGS__); \ + }\ + else \ + {\ + gossip_err(format, __VA_ARGS__); \ + }\ +} while(0) + +#else +#define gossip_err_unless_quiet(format, f...) \ +do {\ + if(mop->params.quiet_flag)\ + gossip_debug(GOSSIP_MSGPAIR_DEBUG, format, ##f); \ + else \ + gossip_err(format, ##f); \ +} while(0) +#endif enum { @@ -86,6 +90,12 @@ nested machine pvfs2_msgpairarray_sm run msgpairarray_completion_fn; MSGPAIRS_RETRY => post_retry; MSGPAIRS_RETRY_NODELAY => post; + default => done; + } + + state done + { + run msgpairarray_done; default => return; } } @@ -95,23 +105,27 @@ nested machine pvfs2_msgpairarray_sm static PINT_sm_action msgpairarray_init( struct PINT_smcb *smcb, job_status_s *js_p) { - PARENT_SM *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int i = 0; PINT_sm_msgpair_state *msg_p = NULL; + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing msgpairarray_init...\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tbase frame:%d\tframe count:%d\n" + ,smcb->base_frame,smcb->frame_count); + gossip_debug(GOSSIP_MSGPAIR_DEBUG, "(%p) msgpairarray state: init " - "(%d msgpair(s))\n", smcb, sm_p->msgarray_count); + "(%d msgpair(s))\n", smcb, mop->count); - assert(sm_p->msgarray_count > 0); + assert(mop->count > 0); js_p->error_code = 0; /* set number of operations that must complete. */ - sm_p->msgarray_params.comp_ct = (2 * sm_p->msgarray_count); + mop->params.comp_ct = (2 * mop->count); - for(i = 0; i < sm_p->msgarray_count; i++) + for(i = 0; i < mop->count; i++) { - msg_p = &sm_p->msgarray[i]; + msg_p = &mop->msgarray[i]; assert(msg_p); assert((msg_p->retry_flag == PVFS_MSGPAIR_RETRY) || @@ -122,7 +136,7 @@ static PINT_sm_action msgpairarray_init( if (msg_p->suppress) { msg_p->complete = 1; - sm_p->msgarray_params.comp_ct -= 2; + mop->params.comp_ct -= 2; } else msg_p->complete = 0; } @@ -150,7 +164,7 @@ static PINT_sm_action msgpairarray_init( static PINT_sm_action msgpairarray_post( struct PINT_smcb *smcb, job_status_s *js_p) { - PARENT_SM *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL, i = 0, tmp = 0; struct server_configuration_s *server_config = NULL; PVFS_msg_tag_t session_tag; @@ -162,16 +176,16 @@ static PINT_sm_action msgpairarray_post( gossip_debug( GOSSIP_MSGPAIR_DEBUG, "%s: sm %p " "%d total message(s) with %d incomplete\n", __func__, smcb, - sm_p->msgarray_count * 2, sm_p->msgarray_params.comp_ct); + mop->count * 2, mop->params.comp_ct); js_p->error_code = 0; - assert(sm_p->msgarray_count > 0); - assert(sm_p->msgarray_params.comp_ct >= 2); + assert(mop->count > 0); + assert(mop->params.comp_ct >= 2); - for (i = 0; i < sm_p->msgarray_count; i++) + for (i = 0; i < mop->count; i++) { - msg_p = &sm_p->msgarray[i]; + msg_p = &mop->msgarray[i]; assert(msg_p); /* @@ -242,6 +256,7 @@ static PINT_sm_action msgpairarray_post( msg_p->max_resp_sz = PINT_encode_calc_max_size( PINT_ENCODE_RESP, msg_p->req.op, msg_p->enc_type); + msg_p->encoded_resp_p = BMI_memalloc( msg_p->svr_addr, msg_p->max_resp_sz, BMI_RECV); @@ -255,6 +270,10 @@ static PINT_sm_action msgpairarray_post( session_tag = PINT_util_get_next_tag(); + /*store the session tag for this msgpair, so the msgpair completion */ + /*function can pass it to the caller of msgpairarray. */ + msg_p->session_tag = session_tag; + gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p msgpair %d: " "posting recv\n", __func__, smcb, i); @@ -268,8 +287,9 @@ static PINT_sm_action msgpairarray_post( i, &msg_p->recv_status, &msg_p->recv_id, - sm_p->msgarray_params.job_context, - sm_p->msgarray_params.job_timeout); + mop->params.job_context, + mop->params.job_timeout, + msg_p->req.hints); if (ret == 0) { /* perform a quick test to see if the recv failed before posting @@ -279,7 +299,7 @@ static PINT_sm_action msgpairarray_post( */ ret = job_test(msg_p->recv_id, &tmp, NULL, &msg_p->recv_status, 0, - sm_p->msgarray_params.job_context); + mop->params.job_context); } if ((ret < 0) || (ret == 1)) @@ -304,11 +324,12 @@ static PINT_sm_action msgpairarray_post( /* mark send as bad too and don't post it */ msg_p->send_status.error_code = msg_p->recv_status.error_code; msg_p->op_status = msg_p->recv_status.error_code; - sm_p->msgarray_params.comp_ct -= 2; + mop->params.comp_ct -= 2; if (local_enc_and_alloc) { PINT_encode_release(&msg_p->encoded_req, PINT_ENCODE_REQ); + memset(&msg_p->encoded_req,0,sizeof(msg_p->encoded_req)); BMI_memfree(msg_p->svr_addr,msg_p->encoded_resp_p, msg_p->max_resp_sz, BMI_RECV); msg_p->encoded_resp_p = NULL; @@ -337,11 +358,12 @@ static PINT_sm_action msgpairarray_post( msg_p->encoded_req.buffer_type, 1, smcb, - sm_p->msgarray_count+i, + mop->count+i, &msg_p->send_status, &msg_p->send_id, - sm_p->msgarray_params.job_context, - sm_p->msgarray_params.job_timeout); + mop->params.job_context, + mop->params.job_timeout, + msg_p->req.hints); if ((ret < 0) || ((ret == 1) && (msg_p->send_status.error_code != 0))) @@ -356,28 +378,28 @@ static PINT_sm_action msgpairarray_post( msg_p->send_status.error_code); } - gossip_err("Send error: cancelling recv.\n"); + gossip_err_unless_quiet("Send error: cancelling recv.\n"); - job_bmi_cancel(msg_p->recv_id, sm_p->msgarray_params.job_context); + job_bmi_cancel(msg_p->recv_id, mop->params.job_context); /* we still have to wait for recv completion, so just decrement * comp_ct by one and keep going */ msg_p->op_status = msg_p->send_status.error_code; msg_p->send_id = 0; - sm_p->msgarray_params.comp_ct--; + mop->params.comp_ct--; } else if (ret == 1) { /* immediate completion */ msg_p->send_id = 0; /* decrement our count, since send is already done. */ - sm_p->msgarray_params.comp_ct--; + mop->params.comp_ct--; } /* else: successful post, no immediate completion */ } - if (sm_p->msgarray_params.comp_ct == 0) + if (mop->params.comp_ct == 0) { /* everything is completed already (could happen in some failure * cases); jump straight to final completion function. @@ -395,36 +417,36 @@ static PINT_sm_action msgpairarray_post( static PINT_sm_action msgpairarray_post_retry( struct PINT_smcb *smcb, job_status_s *js_p) { - PARENT_SM *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); job_id_t tmp_id; gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p, wait %d ms\n", - __func__, smcb, sm_p->msgarray_params.retry_delay); + __func__, smcb, mop->params.retry_delay); js_p->error_code = 0; /* do not leak MSGPAIRS_RETRY through to wait */ return job_req_sched_post_timer( - sm_p->msgarray_params.retry_delay, + mop->params.retry_delay, smcb, 0, js_p, &tmp_id, - sm_p->msgarray_params.job_context); + mop->params.job_context); } static PINT_sm_action msgpairarray_complete( struct PINT_smcb *smcb, job_status_s *js_p) { - PARENT_SM *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p status_user_tag %d msgarray_count %d\n", - __func__, smcb, (int) js_p->status_user_tag, sm_p->msgarray_count); + __func__, smcb, (int) js_p->status_user_tag, mop->count); /* match operation with something in the msgpair array */ /* the first N tags are receives, the second N are sends */ - assert(js_p->status_user_tag < sm_p->msgarray_count*2); + assert(js_p->status_user_tag < mop->count*2); - if (js_p->status_user_tag < sm_p->msgarray_count) + if (js_p->status_user_tag < mop->count) { PINT_sm_msgpair_state *msg_p = - &sm_p->msgarray[js_p->status_user_tag]; + &mop->msgarray[js_p->status_user_tag]; msg_p->recv_id = 0; msg_p->recv_status = *js_p; @@ -438,13 +460,13 @@ static PINT_sm_action msgpairarray_complete( /* we got a receive error, but send is still pending. Cancel * the send */ - job_bmi_cancel(msg_p->send_id, sm_p->msgarray_params.job_context); + job_bmi_cancel(msg_p->send_id, mop->params.job_context); } } else { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[ - js_p->status_user_tag - sm_p->msgarray_count]; + PINT_sm_msgpair_state *msg_p = &mop->msgarray[ + js_p->status_user_tag - mop->count]; msg_p->send_id = 0; msg_p->send_status = *js_p; @@ -458,20 +480,20 @@ static PINT_sm_action msgpairarray_complete( /* we got a send error, but recv is still pending. Cancel * the recv */ - job_bmi_cancel(msg_p->recv_id, sm_p->msgarray_params.job_context); + job_bmi_cancel(msg_p->recv_id, mop->params.job_context); } } /* decrement comp_ct until all operations have completed */ - if (--sm_p->msgarray_params.comp_ct > 0) + if (--mop->params.comp_ct > 0) { gossip_debug(GOSSIP_MSGPAIR_DEBUG, " msgpairarray: %d operations remain\n", - sm_p->msgarray_params.comp_ct); + mop->params.comp_ct); return SM_ACTION_DEFERRED; } - assert(sm_p->msgarray_params.comp_ct == 0); + assert(mop->params.comp_ct == 0); gossip_debug(GOSSIP_MSGPAIR_DEBUG, " msgpairarray: all operations complete\n"); @@ -482,7 +504,7 @@ static PINT_sm_action msgpairarray_complete( static PINT_sm_action msgpairarray_completion_fn( struct PINT_smcb *smcb, job_status_s *js_p) { - PARENT_SM *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL, i = 0; int need_retry = 0; struct PINT_decoded_msg decoded_resp; @@ -496,10 +518,11 @@ static PINT_sm_action msgpairarray_completion_fn( gossip_debug(GOSSIP_MSGPAIR_DEBUG, "(%p) msgpairarray state: " "completion_fn\n", smcb); + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing msgpairarray_completion_fn..\n"); - for (i = 0; i < sm_p->msgarray_count; i++) + for (i = 0; i < mop->count; i++) { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; + PINT_sm_msgpair_state *msg_p = &mop->msgarray[i]; assert(msg_p); /* @@ -511,7 +534,16 @@ static PINT_sm_action msgpairarray_completion_fn( if (msg_p->op_status != 0) { - PVFS_perror_gossip("msgpair failed, will retry", msg_p->op_status); + char s[1024]; + PVFS_strerror_r(msg_p->op_status, s, sizeof(s)); + server_string = PINT_cached_config_map_addr( + msg_p->fs_id, msg_p->svr_addr, &server_type); + if(!server_string) + { + server_string = BMI_addr_rev_lookup(msg_p->svr_addr); + } + + gossip_err("Warning: msgpair failed to %s, will retry: %s\n", server_string, s); ++need_retry; continue; @@ -544,6 +576,7 @@ static PINT_sm_action msgpairarray_completion_fn( */ if (msg_p->comp_fn != NULL) { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\texecuting msg_p->comp_fn..\n"); /* If we call the completion function, store the result on * a per message pair basis. Also store some non-zero * (failure) value in js_p->error_code if we see one. @@ -558,6 +591,17 @@ static PINT_sm_action msgpairarray_completion_fn( * completion function. -- RobR */ } + else if( (msg_p->op_status != 0) && (resp_p == NULL) ) + { + /* before we reference resp_p we need to catch the case where + * PINT_serv_decode_resp didn't give us a struct. We error out + * the same without referencing resp_p */ + gossip_debug(GOSSIP_MSGPAIR_DEBUG, + "%s: error code %d from PINT_serv_decode_resp, " + "from server %d\n", __func__, msg_p->op_status, i); + js_p->error_code = msg_p->op_status; + break; + } else if (resp_p->status != 0) { /* no comp_fn specified and status non-zero */ @@ -588,6 +632,7 @@ static PINT_sm_action msgpairarray_completion_fn( return SM_ACTION_COMPLETE; } + memset(&msg_p->encoded_req,0,sizeof(msg_p->encoded_req)); msg_p->encoded_resp_p = NULL; msg_p->max_resp_sz = 0; @@ -599,7 +644,7 @@ static PINT_sm_action msgpairarray_completion_fn( gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p msgpair %d " "marked complete\n", __func__, smcb, i); - } + }/*end for*/ if (need_retry) { /* @@ -607,21 +652,21 @@ static PINT_sm_action msgpairarray_completion_fn( * of two since they are pairs. If over the count, do not * retry, just return one of the error codes. */ - sm_p->msgarray_params.comp_ct = 0; + mop->params.comp_ct = 0; js_p->error_code = 0; - for (i=0; i < sm_p->msgarray_count; i++) { + for (i=0; i < mop->count; i++) { - PINT_sm_msgpair_state *msg_p = &sm_p->msgarray[i]; + PINT_sm_msgpair_state *msg_p = &mop->msgarray[i]; if (msg_p->complete) continue; if (msg_p->retry_flag == PVFS_MSGPAIR_RETRY && PVFS_ERROR_CLASS(-msg_p->op_status) == PVFS_ERROR_BMI - && msg_p->retry_count < sm_p->msgarray_params.retry_limit) { + && msg_p->retry_count < mop->params.retry_limit) { ++msg_p->retry_count; - sm_p->msgarray_params.comp_ct += 2; + mop->params.comp_ct += 2; gossip_debug(GOSSIP_MSGPAIR_DEBUG, "*** %s: msgpair %d failed, retry %d\n", __func__, i, msg_p->retry_count); @@ -650,20 +695,20 @@ static PINT_sm_action msgpairarray_completion_fn( server_string = "[UNKNOWN]"; } PVFS_strerror_r(msg_p->op_status, s, sizeof(s)); - gossip_err("*** %s: msgpair to server %s failed: %s\n", - __func__, server_string, s); + gossip_err_unless_quiet("*** %s: msgpair to server %s failed: %s\n", + __func__, server_string, s); if(msg_p->retry_flag != PVFS_MSGPAIR_RETRY) { - gossip_err("*** No retries requested.\n"); + gossip_err_unless_quiet("*** No retries requested.\n"); } else if(PVFS_ERROR_CLASS(-msg_p->op_status) != PVFS_ERROR_BMI) { - gossip_err("*** Non-BMI failure.\n"); + gossip_err_unless_quiet("*** Non-BMI failure.\n"); } else { - gossip_err("*** Out of retries.\n"); + gossip_err_unless_quiet("*** Out of retries.\n"); } if (js_p->error_code == 0) js_p->error_code = msg_p->op_status; @@ -674,30 +719,57 @@ static PINT_sm_action msgpairarray_completion_fn( return SM_ACTION_COMPLETE; } +static PINT_sm_action msgpairarray_done( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + int task_id, error_code, remaining; + PINT_sm_pop_frame(smcb, &task_id, &error_code, &remaining); + return SM_ACTION_COMPLETE; +} + /********************************************************************* * helper functions used in conjunction with state machine defined above */ int PINT_msgpairarray_init( - PINT_sm_msgpair_state ** msgpairarray, + PINT_sm_msgarray_op *op, int count) { - *msgpairarray = (PINT_sm_msgpair_state *)malloc( + op->msgarray = (PINT_sm_msgpair_state *)malloc( count * sizeof(PINT_sm_msgpair_state)); - if(!*msgpairarray) + if(!op->msgarray) { return -PVFS_ENOMEM; } - memset(*msgpairarray, 0, (count * sizeof(PINT_sm_msgpair_state))); + memset(op->msgarray, 0, (count * sizeof(PINT_sm_msgpair_state))); + op->count = count; return 0; } /* we pass in a pointer to the array so that we can set it to NULL */ void PINT_msgpairarray_destroy( - PINT_sm_msgpair_state * msgpairarray) + PINT_sm_msgarray_op *op) { - free(msgpairarray); + if(op->msgarray && (&op->msgpair) != op->msgarray) + { + free(op->msgarray); + } + op->msgarray = NULL; + op->count = 0; +} + +int PINT_msgarray_status(PINT_sm_msgarray_op *op) +{ + int i; + for (i = 0; i < op->count; i++) + { + if (op->msgarray[i].op_status != 0) + { + return op->msgarray[i].op_status; + } + } + return 0; } int PINT_serv_decode_resp(PVFS_fs_id fs_id, @@ -754,10 +826,13 @@ int PINT_serv_free_msgpair_resources( if (encoded_req_p && decoded_resp_p && svr_addr_p) { PINT_encode_release(encoded_req_p, PINT_ENCODE_REQ); + memset(encoded_req_p,0,sizeof(*encoded_req_p)); PINT_decode_release(decoded_resp_p, PINT_DECODE_RESP); + memset(decoded_resp_p,0,sizeof(*decoded_resp_p)); BMI_memfree(*svr_addr_p, encoded_resp_p, max_resp_sz, BMI_RECV); + encoded_resp_p = NULL; ret = 0; } @@ -772,16 +847,16 @@ int PINT_serv_free_msgpair_resources( * returns 0 on success, -PVFS_error on failure */ int PINT_serv_msgpairarray_resolve_addrs( - int count, PINT_sm_msgpair_state *msgarray) + PINT_sm_msgarray_op *mop) { int i = 0; int ret = -PVFS_EINVAL; - if ((count > 0) && msgarray) + if ((mop->count > 0) && mop->msgarray) { - for(i = 0; i < count; i++) + for(i = 0; i < mop->count; i++) { - PINT_sm_msgpair_state *msg_p = &msgarray[i]; + PINT_sm_msgpair_state *msg_p = &mop->msgarray[i]; assert(msg_p); ret = PINT_cached_config_map_to_server( diff --git a/src/common/misc/pint-cached-config.c b/src/common/misc/pint-cached-config.c index 60b6fe7..67ab1f1 100644 --- a/src/common/misc/pint-cached-config.c +++ b/src/common/misc/pint-cached-config.c @@ -4,11 +4,22 @@ * See COPYING in top-level directory. */ +#include #include #include #include #include +#ifndef WIN32 +#include +#endif #include +#include +#ifndef WIN32 +#include +#endif +#ifdef HAVE_OPENSSL_SHA_H +#include +#endif #include "pvfs2-types.h" #include "pvfs2-attr.h" @@ -19,15 +30,19 @@ #include "quickhash.h" #include "extent-utils.h" #include "pint-cached-config.h" +#include "pvfs2-internal.h" #include "src/client/sysint/osd.h" -/* maps bmi address to handle ranges/extents */ -struct bmi_host_extent_table_s -{ - char *bmi_address; +/* really old linux distributions (jazz's RHEL 3) don't have this(!?) */ +#ifndef HOST_NAME_MAX +#define HOST_NAME_MAX 64 +#endif - /* ptrs are type struct extent */ - PINT_llist *extent_list; +struct handle_lookup_entry +{ + PVFS_handle_extent extent; + char* server_name; + PVFS_BMI_addr_t server_addr; }; struct config_fs_cache_s @@ -35,15 +50,17 @@ struct config_fs_cache_s struct qlist_head hash_link; struct filesystem_configuration_s *fs; - /* ptrs are struct bmi_host_extent_table_s */ - PINT_llist *bmi_host_extent_tables; - /* index into fs->meta_handle_ranges obj (see server-config.h) */ PINT_llist *meta_server_cursor; /* index into fs->data_handle_ranges obj (see server-config.h) */ PINT_llist *data_server_cursor; + /* copy of server_configuration_s/host_id (see server-config.h) */ + char *data_local_alias; + /* handle mapping of local server (see server-config.h) */ + struct host_handle_mapping_s *data_local_mapping; + /* the following fields are used to cache arrays of unique physical server addresses, of particular use to the mgmt interface @@ -55,44 +72,10 @@ struct config_fs_cache_s phys_server_desc_s* server_array; int server_count; + struct handle_lookup_entry* handle_lookup_table; + int handle_lookup_table_size; }; -#define map_handle_range_to_extent_list(hrange_list) \ -do { cur = hrange_list; \ - while(cur) { \ - cur_mapping = PINT_llist_head(cur); \ - if (!cur_mapping) break; \ - assert(cur_mapping->alias_mapping); \ - assert(cur_mapping->alias_mapping->host_alias); \ - assert(cur_mapping->handle_range); \ - cur_host_extent_table = malloc( \ - sizeof(struct bmi_host_extent_table_s)); \ - if (!cur_host_extent_table) { \ - ret = -ENOMEM; \ - break; \ - } \ - cur_host_extent_table->bmi_address = \ - cur_mapping->alias_mapping->bmi_address; \ - assert(cur_host_extent_table->bmi_address); \ - cur_host_extent_table->extent_list = \ - PINT_create_extent_list(cur_mapping->handle_range); \ - if (!cur_host_extent_table->extent_list) { \ - free(cur_host_extent_table); \ - ret = -ENOMEM; \ - break; \ - } \ - /* \ - add this host to extent list mapping to \ - config cache object's host extent table \ - */ \ - ret = PINT_llist_add_to_tail( \ - cur_config_fs_cache->bmi_host_extent_tables, \ - cur_host_extent_table); \ - assert(ret == 0); \ - cur = PINT_llist_next(cur); \ - } } while(0) - - struct qhash_table *PINT_fsid_config_cache_table = NULL; /* these are based on code from src/server/request-scheduler.c */ @@ -101,13 +84,16 @@ static int hash_fsid( static int hash_fsid_compare( void *key, struct qlist_head *link); -static void free_host_extent_table(void *ptr); static int cache_server_array(PVFS_fs_id fsid); +static int handle_lookup_entry_compare(const void *p1, const void *p2); +const struct handle_lookup_entry* find_handle_lookup_entry( + PVFS_handle handle, PVFS_fs_id fsid); +static int load_handle_lookup_table( + struct config_fs_cache_s *cur_config_fs_cache); -#ifndef __PVFS2_SERVER__ +/* removed by WBL when selection algorithm rewritten static int meta_randomized = 0; -#endif -static int io_randomized = 0; +static int io_randomized = 0; */ /* PINT_cached_config_initialize() * @@ -117,12 +103,70 @@ static int io_randomized = 0; */ int PINT_cached_config_initialize(void) { + struct timeval tv; + unsigned int pid = 0; + unsigned int hostmix = 0; + unsigned int seed = 0; + unsigned char *hashseed = NULL; + char hostname[HOST_NAME_MAX]; + int ret; + int i; + int hostnamelen; + if (!PINT_fsid_config_cache_table) { PINT_fsid_config_cache_table = qhash_init(hash_fsid_compare,hash_fsid,11); } - srand((unsigned int)time(NULL)); + + /* include time, pid, and hostname in random seed in order to help avoid + * collisions on object placement when many clients are launched + * concurrently + */ + gettimeofday(&tv, NULL); +#ifdef WIN32 + pid = GetCurrentProcessId(); +#else + pid = getpid(); +#endif + + ret = gethostname(hostname, HOST_NAME_MAX); + if(ret == 0) + { + hostmix = 0; + hostnamelen = strlen(hostname); + for(i=0; ifs); - assert(cur_config_cache->bmi_host_extent_tables); /* fs object is freed by PINT_config_release */ cur_config_cache->fs = NULL; - PINT_llist_free(cur_config_cache->bmi_host_extent_tables, - free_host_extent_table); - /* if the 'cached server arrays' are used, free them */ if (cur_config_cache->io_server_count && cur_config_cache->io_server_array) @@ -191,6 +231,8 @@ int PINT_cached_config_finalize(void) cur_config_cache->server_array = NULL; } + free(cur_config_cache->handle_lookup_table); + free(cur_config_cache); } } while(hash_link); @@ -222,7 +264,8 @@ int PINT_cached_config_reinitialize( break; } - ret = PINT_cached_config_handle_load_mapping(cur_fs); + ret = PINT_cached_config_handle_load_mapping(cur_fs, + config); if (ret) { break; @@ -242,13 +285,11 @@ int PINT_cached_config_reinitialize( * returns 0 on success, -errno on failure */ int PINT_cached_config_handle_load_mapping( - struct filesystem_configuration_s *fs) + struct filesystem_configuration_s *fs, + struct server_configuration_s *config) { - int ret = -PVFS_EINVAL; - PINT_llist *cur = NULL; - struct host_handle_mapping_s *cur_mapping = NULL; struct config_fs_cache_s *cur_config_fs_cache = NULL; - struct bmi_host_extent_table_s *cur_host_extent_table = NULL; + int ret; if (fs) { @@ -258,45 +299,125 @@ int PINT_cached_config_handle_load_mapping( memset(cur_config_fs_cache, 0, sizeof(struct config_fs_cache_s)); cur_config_fs_cache->fs = (struct filesystem_configuration_s *)fs; - cur_config_fs_cache->meta_server_cursor = NULL; - cur_config_fs_cache->data_server_cursor = NULL; - cur_config_fs_cache->bmi_host_extent_tables = PINT_llist_new(); - assert(cur_config_fs_cache->bmi_host_extent_tables); - - /* - map all meta and data handle ranges to the extent list, if any. - map_handle_range_to_extent_list is a macro defined in - pint-cached-config.h for convenience only. - */ - assert(cur_config_fs_cache->fs->meta_handle_ranges); - map_handle_range_to_extent_list( - cur_config_fs_cache->fs->meta_handle_ranges); - - assert(cur_config_fs_cache->fs->data_handle_ranges); - map_handle_range_to_extent_list( - cur_config_fs_cache->fs->data_handle_ranges); - /* - add config cache object to the hash table that maps fsid to - a config_fs_cache_s. NOTE: the - 'map_handle_range_to_extent_list' can set ret to -ENOMEM, so - check for that here. - */ - if (ret != -ENOMEM) - { - cur_config_fs_cache->meta_server_cursor = + cur_config_fs_cache->meta_server_cursor = cur_config_fs_cache->fs->meta_handle_ranges; - cur_config_fs_cache->data_server_cursor = + cur_config_fs_cache->data_server_cursor = cur_config_fs_cache->fs->data_handle_ranges; + cur_config_fs_cache->data_local_alias = + config->host_id; + /* find handle mapping of local host */ + if (cur_config_fs_cache->data_local_alias) + { + cur_config_fs_cache->data_local_mapping = + PINT_get_handle_mapping(fs->data_handle_ranges, + cur_config_fs_cache->data_local_alias); + } + else + { + cur_config_fs_cache->data_local_mapping = NULL; + } - qhash_add(PINT_fsid_config_cache_table, - &(cur_config_fs_cache->fs->coll_id), - &(cur_config_fs_cache->hash_link)); - - ret = 0; + /* populate table used to speed up mapping of handle values + * to servers + */ + ret = load_handle_lookup_table(cur_config_fs_cache); + if(ret < 0) + { + free(cur_config_fs_cache); + gossip_err("Error: failed to load handle lookup table.\n"); + return(ret); } + + qhash_add(PINT_fsid_config_cache_table, + &(cur_config_fs_cache->fs->coll_id), + &(cur_config_fs_cache->hash_link)); } - return ret; + + return 0; +} + +static struct host_handle_mapping_s * +PINT_cached_config_find_server(PINT_llist *handle_ranges, const char *addr) +{ + host_handle_mapping_s *cur_mapping; + PINT_llist *server_cursor = handle_ranges; + + cur_mapping = PINT_llist_head(server_cursor); + while(cur_mapping && + strcmp(cur_mapping->alias_mapping->bmi_address, addr)) + { + server_cursor = PINT_llist_next(server_cursor); + cur_mapping = PINT_llist_head(server_cursor); + } + return cur_mapping; +} + +/* PINT_cached_config_get_server() + * + * Find the extent array for a specified server. + * This array MUST NOT be freed by the caller, nor cached for + * later use. + * + * returns 0 on success, -errno on failure + */ +int PINT_cached_config_get_server( + PVFS_fs_id fsid, + const char* host, + PVFS_ds_type type, + PVFS_handle_extent_array *ext_array) +{ + struct host_handle_mapping_s *cur_mapping = NULL; + struct qlist_head *hash_link = NULL; + struct config_fs_cache_s *cur_config_cache = NULL; + PINT_llist* server_cursor; + + if (!ext_array) + { + return(-PVFS_EINVAL); + } + + if((int)type != PINT_SERVER_TYPE_META && (int)type != PINT_SERVER_TYPE_IO) + { + return(-PVFS_EINVAL); + } + + hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); + if (!hash_link) + { + return(-PVFS_EINVAL); + } + + cur_config_cache = qlist_entry( + hash_link, struct config_fs_cache_s, hash_link); + + assert(cur_config_cache); + assert(cur_config_cache->fs); + + if((int)type == PINT_SERVER_TYPE_META) + { + server_cursor = + cur_config_cache->fs->meta_handle_ranges; + } + else + { + server_cursor = + cur_config_cache->fs->data_handle_ranges; + } + + cur_mapping = PINT_cached_config_find_server(server_cursor, host); + /* didn't find the server */ + if(!cur_mapping) + { + return(-PVFS_ENOENT); + } + + ext_array->extent_count = + cur_mapping->handle_extent_array.extent_count; + ext_array->extent_array = + cur_mapping->handle_extent_array.extent_array; + + return(0); } #ifndef __PVFS2_SERVER__ @@ -321,7 +442,7 @@ int PINT_cached_config_get_next_meta( PVFS_handle_extent_array *ext_array, int is_directory) { - int ret = -PVFS_EINVAL, jitter = 0, num_meta_servers = 0; + int ret = -PVFS_EINVAL, randsrv = 0, num_meta_servers = 0; char *meta_server_bmi_str = NULL; struct host_handle_mapping_s *cur_mapping = NULL; struct qlist_head *hash_link = NULL; @@ -329,108 +450,113 @@ int PINT_cached_config_get_next_meta( struct server_configuration_s *config; int want_osd; - if (ext_array) + if (!ext_array) { - hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); - if (hash_link) - { - config = PINT_get_server_config_struct(fsid); + gossip_err("PINT_cached_config_get_next_meta called with " + "NULL ext_array.\n"); + return -PVFS_EINVAL; + } + hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); + if (!hash_link) + { + gossip_err("Hash search failed to return configuration.\n"); + return -PVFS_EINVAL; + } - cur_config_cache = qlist_entry( - hash_link, struct config_fs_cache_s, hash_link); + config = PINT_get_server_config_struct(fsid); + cur_config_cache = qlist_entry( + hash_link, struct config_fs_cache_s, hash_link); - assert(cur_config_cache); - assert(cur_config_cache->fs); - assert(cur_config_cache->meta_server_cursor); + assert(cur_config_cache); + assert(cur_config_cache->fs); + assert(cur_config_cache->meta_server_cursor); - /* figure out if we want or do not want an OSD based on - * type settings and if a directory server was requested - * (from mkdir). - */ - want_osd = 0; /* 0 == give me a pvfs */ - switch (config->osd_type) { - case OSD_NONE: - if (config->osd_dir_type == OSD_DIR_PVFS) - want_osd = 0; - else - want_osd = is_directory; - break; - case OSD_DATAFILE: - if (config->osd_dir_type == OSD_DIR_PVFS) - want_osd = 0; - else - want_osd = is_directory; - break; - case OSD_METAFILE: - if (config->osd_dir_type == OSD_DIR_PVFS) - want_osd = !is_directory; - else - want_osd = 1; - break; - case OSD_MDFILE: - if (config->osd_dir_type == OSD_DIR_PVFS) - want_osd = !is_directory; /* never called */ - else - want_osd = 1; - break; - } + /* figure out if we want or do not want an OSD based on + * type settings and if a directory server was requested + * (from mkdir). + */ + want_osd = 0; /* 0 == give me a pvfs */ + switch (config->osd_type) { + case OSD_NONE: + if (config->osd_dir_type == OSD_DIR_PVFS) + want_osd = 0; + else + want_osd = is_directory; + break; + case OSD_DATAFILE: + if (config->osd_dir_type == OSD_DIR_PVFS) + want_osd = 0; + else + want_osd = is_directory; + break; + case OSD_METAFILE: + if (config->osd_dir_type == OSD_DIR_PVFS) + want_osd = !is_directory; + else + want_osd = 1; + break; + case OSD_MDFILE: + if (config->osd_dir_type == OSD_DIR_PVFS) + want_osd = !is_directory; /* never called */ + else + want_osd = 1; + break; + } - num_meta_servers = PINT_llist_count( - cur_config_cache->fs->meta_handle_ranges); + num_meta_servers = PINT_llist_count( + cur_config_cache->fs->meta_handle_ranges); - /* pick random starting point, then round robin */ - if(!meta_randomized) - { - jitter = (rand() % num_meta_servers); - meta_randomized = 1; - } - else - { - /* we let the jitter loop below increment the cursor by one */ - jitter = 0; - } + randsrv = (rand() % num_meta_servers); - while(jitter >= 0) - { - cur_mapping = PINT_llist_head( - cur_config_cache->meta_server_cursor); - if (!cur_mapping) - { - cur_config_cache->meta_server_cursor = - cur_config_cache->fs->meta_handle_ranges; - cur_mapping = PINT_llist_head( - cur_config_cache->meta_server_cursor); - assert(cur_mapping); - } - cur_config_cache->meta_server_cursor = PINT_llist_next( - cur_config_cache->meta_server_cursor); - /* first jitter across all, regardless of acceptability, then - * keep looping until find one that matches */ - if (jitter == 0) - if (want_osd ^ - alias_is_osd(cur_mapping->alias_mapping->bmi_address)) - continue; - --jitter; - } - meta_server_bmi_str = cur_mapping->alias_mapping->bmi_address; - - ext_array->extent_count = - cur_mapping->handle_extent_array.extent_count; - ext_array->extent_array = - cur_mapping->handle_extent_array.extent_array; - - if (meta_addr != NULL) - { - ret = BMI_addr_lookup(meta_addr,meta_server_bmi_str); - } - else - { - ret = 0; - } - - PINT_put_server_config_struct(config); + /* set cursor at beginning of list */ + cur_config_cache->meta_server_cursor = + cur_config_cache->fs->meta_handle_ranges; + + while(randsrv--) + { + cur_config_cache->meta_server_cursor = PINT_llist_next( + cur_config_cache->meta_server_cursor); + if (!cur_config_cache->meta_server_cursor) + { + /* found end of list before we should have */ + gossip_err("Found end of list of metaservers " + "before expected in " + "PINT_cached_config_get_next_meta\n"); + /* return first metaserver */ + cur_config_cache->meta_server_cursor = + cur_config_cache->fs->meta_handle_ranges; + break; } + + /* first jitter across all, regardless of acceptability, then + * keep looping until find one that matches */ + if (randsrv == 0) + if (want_osd ^ alias_is_osd(cur_mapping->alias_mapping->bmi_address)) + continue; + --randsrv; + } + + cur_mapping = PINT_llist_head( + cur_config_cache->meta_server_cursor); + + meta_server_bmi_str = cur_mapping->alias_mapping->bmi_address; + + ext_array->extent_count = + cur_mapping->handle_extent_array.extent_count; + ext_array->extent_array = + cur_mapping->handle_extent_array.extent_array; + + if (meta_addr != NULL) + { + ret = BMI_addr_lookup(meta_addr,meta_server_bmi_str); } + else + { + ret = 0; + } + + PINT_put_server_config_struct(config); + return ret; } #endif @@ -445,23 +571,23 @@ static int PINT_cached_config_get_extents( PVFS_BMI_addr_t tmp_addr; struct config_fs_cache_s *cur_config_cache = NULL; struct host_handle_mapping_s *cur_mapping = NULL; - int num_io_servers, ret; + int ret; hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); if(!hash_link) { - gossip_err("Failed to find a file system matching fsid: %d\n", fsid); + gossip_err("Failed to find a file system matching fsid: %d\n", + fsid); return -PVFS_EINVAL; } cur_config_cache = qlist_entry( - hash_link, struct config_fs_cache_s, hash_link); + hash_link, struct config_fs_cache_s, hash_link); assert(cur_config_cache); assert(cur_config_cache->fs); server_list = cur_config_cache->fs->data_handle_ranges; - num_io_servers = PINT_llist_count(server_list); while(!PINT_llist_empty(server_list)) { @@ -470,7 +596,7 @@ static int PINT_cached_config_get_extents( server_list = PINT_llist_next(server_list); ret = BMI_addr_lookup( - &tmp_addr, cur_mapping->alias_mapping->bmi_address); + &tmp_addr, cur_mapping->alias_mapping->bmi_address); if(ret < 0) { return ret; @@ -479,9 +605,9 @@ static int PINT_cached_config_get_extents( if(tmp_addr == *addr) { handle_extents->extent_count = - cur_mapping->handle_extent_array.extent_count; + cur_mapping->handle_extent_array.extent_count; handle_extents->extent_array = - cur_mapping->handle_extent_array.extent_array; + cur_mapping->handle_extent_array.extent_array; return 0; } @@ -496,13 +622,16 @@ int PINT_cached_config_map_servers( PVFS_BMI_addr_t *addr_array, PVFS_handle_extent_array *handle_extent_array) { - struct qhash_head *hash_link; - struct PINT_llist *server_list; - struct host_handle_mapping_s *cur_mapping = NULL; + struct qhash_head *hash_link = NULL; + struct PINT_llist *server_list = NULL; + struct PINT_llist *server_list_head = NULL; + struct host_handle_mapping_s *sv = NULL; struct config_fs_cache_s *cur_config_cache = NULL; - int num_io_servers, i, j, ret; + int num_io_servers, i, df, ret; + int current_sv = -1; int start_index = -1; - int index; + int *random_array = NULL; + int *server_array = NULL; assert(inout_num_datafiles); @@ -519,131 +648,234 @@ int PINT_cached_config_map_servers( assert(cur_config_cache); assert(cur_config_cache->fs); - server_list = cur_config_cache->fs->data_handle_ranges; - num_io_servers = PINT_llist_count(server_list); + server_list_head = cur_config_cache->fs->data_handle_ranges; + num_io_servers = PINT_llist_count(server_list_head); switch(layout->algorithm) { - case PVFS_SYS_LAYOUT_LIST: + case PVFS_SYS_LAYOUT_LIST: - if(*inout_num_datafiles < layout->server_list.count) - { - gossip_err("The specified datafile layout is larger" - " than the number of requested datafiles\n"); - return -PVFS_EINVAL; - } + if(*inout_num_datafiles < layout->server_list.count) + { + gossip_err("The specified datafile layout is larger" + " than the number of requested datafiles\n"); + return -PVFS_EINVAL; + } - *inout_num_datafiles = layout->server_list.count; - for(i = 0; i < layout->server_list.count; ++i) + *inout_num_datafiles = layout->server_list.count; + for(i = 0; i < layout->server_list.count; ++i) + { + if(handle_extent_array) { - if(handle_extent_array) - { - ret = PINT_cached_config_get_extents( + ret = PINT_cached_config_get_extents( fsid, &layout->server_list.servers[i], &handle_extent_array[i]); - if(ret < 0) - { - gossip_err("The address specified in the datafile " - "layout is invalid\n"); - return ret; - } + if(ret < 0) + { + gossip_err("The address specified in the datafile " + "layout is invalid\n"); + return ret; } + } + + addr_array[i] = layout->server_list.servers[i]; + } + break; - addr_array[i] = layout->server_list.servers[i]; + case PVFS_SYS_LAYOUT_LOCAL: + /* + * This layout puts the one datafile on the local + * machine, assuming the local machine is a server. + * This should have been determined when the config + * is parsed. If this machine is not a server then + * use the default. + */ + if (cur_config_cache->data_local_alias) + { + /* lookup addresses */ + ret = BMI_addr_lookup(&addr_array[0], + cur_config_cache->data_local_alias); + if (!ret) + { + struct host_handle_mapping_s *mapping; + mapping = cur_config_cache->data_local_mapping; + if (mapping && handle_extent_array) + { + handle_extent_array[0].extent_count = + mapping->handle_extent_array.extent_count; + handle_extent_array[0].extent_array = + mapping->handle_extent_array.extent_array; + } + /* local layout is only for one data file */ + *inout_num_datafiles = 1; } break; + } + /* else random */ + if(start_index == -1) + { + start_index = rand() % num_io_servers; + } + /* fall through */ - case PVFS_SYS_LAYOUT_NONE: + case PVFS_SYS_LAYOUT_NONE: + /* + * This layout is just like Round Robin except + * it does not randomly set the start_index with + * a random call but uses zero. If start_index + * is already set, just fall through. + */ + if (start_index == -1) + { start_index = 0; - /* fall through */ + } + /* fall through */ + + case PVFS_SYS_LAYOUT_ROUND_ROBIN: + /* + * This layout generates a random number from + * zero to num_io_servers - 1 and then allocates + * inout_num_datafiles servers starting with that + * as the first. Other parts of the code ensure + * that inout_num_datafiles < num_io_servers but + * this code should correctly allocate multiple + * datafiles per server round robin - though that + * won't happen with the current caode base + */ - case PVFS_SYS_LAYOUT_ROUND_ROBIN: + if(num_io_servers < *inout_num_datafiles) + { + *inout_num_datafiles = num_io_servers; + } - if(num_io_servers < *inout_num_datafiles) + if(start_index == -1) + { + start_index = rand() % num_io_servers; + } + + /* start at beginning of server list */ + server_list = server_list_head; + /* search for the start_index server */ + for (i = 0; i < start_index; i++) + { + server_list = PINT_llist_next(server_list); + } + sv = PINT_llist_head(server_list); + assert(sv); + /* for each data file */ + for(df = 0; df < *inout_num_datafiles; df++) + { + /* lookup addresses */ + ret = BMI_addr_lookup(&addr_array[df], + sv->alias_mapping->bmi_address); + if (ret) { - *inout_num_datafiles = num_io_servers; + return ret; } - if(start_index == -1) + /* no one uses this but we get it anyway */ + if(handle_extent_array) { - start_index = rand() % *inout_num_datafiles; + handle_extent_array[df].extent_count = + sv->handle_extent_array.extent_count; + handle_extent_array[df].extent_array = + sv->handle_extent_array.extent_array; } - - for(i = 0; i < *inout_num_datafiles; ++i) + /* go to next server in list */ + server_list = PINT_llist_next(server_list); + sv = PINT_llist_head(server_list); + if (!sv) { - cur_mapping = PINT_llist_head(server_list); - assert(cur_mapping); - server_list = PINT_llist_next(server_list); - - index = (i + start_index) % *inout_num_datafiles; - ret = BMI_addr_lookup( - &addr_array[index], - cur_mapping->alias_mapping->bmi_address); - if (ret) - { - return ret; - } - - if(handle_extent_array) - { - handle_extent_array[index].extent_count = - cur_mapping->handle_extent_array.extent_count; - handle_extent_array[index].extent_array = - cur_mapping->handle_extent_array.extent_array; - } + server_list = server_list_head; + sv = PINT_llist_head(server_list); } - break; + assert(sv); + } + break; - case PVFS_SYS_LAYOUT_RANDOM: + case PVFS_SYS_LAYOUT_RANDOM: + /* this layout randomizes the order but still uses each server + * only once + */ - /* all random */ - if(num_io_servers < *inout_num_datafiles) + /* limit this layout to a number of datafiles no greater than + * the number of servers + */ + if(num_io_servers < *inout_num_datafiles) + { + *inout_num_datafiles = num_io_servers; + } + /* init all the addrs to 0, so we know whether we've set an + * address at a particular index or not + */ + random_array = (int *)malloc(*inout_num_datafiles * sizeof(int)); + server_array = (int *)malloc(num_io_servers * sizeof(int)); + memset(random_array, 0, (*inout_num_datafiles)*sizeof(*addr_array)); + memset(server_array, 0, (num_io_servers)*sizeof(*addr_array)); + + /* generate list of unique random numbers from 0 to */ + /* inout_num_datafiles - 1 */ + for(df = 0; df < *inout_num_datafiles; df++) + { + int server = rand() % num_io_servers; + while (server_array[server]) { - *inout_num_datafiles = num_io_servers; + /* if we get a conflict skip on down to next entry */ + server = (server + 1) % num_io_servers; } + server_array[server] = 1; + random_array[df] = server; + } + /* server array is only to make sure we don't duplicate */ + free(server_array); - /* init all the addrs to 0, so we know whether we've set an - * address at a particular index or not - */ - for(i = 0; i < *inout_num_datafiles; ++i) + current_sv = 0; + server_list = server_list_head; + /* go through data file list in order */ + for(df = 0; df < *inout_num_datafiles; df++) + { + /* if we're already past the next one on the list */ + /* go back to head of the list */ + if (random_array[df] < current_sv) { - index = rand() % *inout_num_datafiles; - for(j = 0; j < i; ++j) - { - if(addr_array[index] == 0) - { - cur_mapping = PINT_llist_head(server_list); - assert(cur_mapping); - server_list = PINT_llist_next(server_list); - - /* found an unused index */ - ret = BMI_addr_lookup( - &addr_array[index], - cur_mapping->alias_mapping->bmi_address); - if (ret) - { - return ret; - } - - if(handle_extent_array) - { - handle_extent_array[index].extent_count = - cur_mapping->handle_extent_array.extent_count; - handle_extent_array[index].extent_array = - cur_mapping->handle_extent_array.extent_array; - } - } - } + server_list = server_list_head; + current_sv = 0; } - break; - default: - gossip_err("Unknown datafile mapping algorithm\n"); - return -PVFS_EINVAL; + /* skip down the list to the one we want */ + while(current_sv < random_array[df]) + { + server_list = PINT_llist_next(server_list); + current_sv++; + } + /* get the server info */ + sv = PINT_llist_head(server_list); + assert(sv); + /* lookup addresses */ + ret = BMI_addr_lookup(&addr_array[df], + sv->alias_mapping->bmi_address); + /* no one uses this but we get it anyway */ + if(handle_extent_array) + { + handle_extent_array[df].extent_count = + sv->handle_extent_array.extent_count; + handle_extent_array[df].extent_array = + sv->handle_extent_array.extent_array; + } + } + /* done with this so free it */ + free(random_array); + break; + + default: + gossip_err("Unknown datafile mapping algorithm\n"); + return -PVFS_EINVAL; } return 0; } +/* THIS APPEARS TO BE SUPERCEDED BY THE PREVIOUS FUNCTION*/ +#if 0 /* PINT_cached_config_get_next_io() * * returns the address of a set of servers that should be used to @@ -756,6 +988,7 @@ int PINT_cached_config_get_next_io( } return ret; } +#endif /* PINT_cached_config_map_addr() * @@ -765,7 +998,7 @@ int PINT_cached_config_get_next_io( * returns pointer to string on success, NULL on failure */ const char *PINT_cached_config_map_addr( - PVFS_fs_id fsid, + PVFS_fs_id fsid, PVFS_BMI_addr_t addr, int *server_type) { @@ -804,6 +1037,52 @@ const char *PINT_cached_config_map_addr( return NULL; } + +/* PINT_cached_config_check_type() + * + * Retrieves the server type flags for a specified BMI addr string + * + * returns 0 on success, -errno on failure + */ +int PINT_cached_config_check_type( + PVFS_fs_id fsid, + const char *server_addr_str, + int* server_type) +{ + int ret = -PVFS_EINVAL, i = 0; + struct qlist_head *hash_link = NULL; + struct config_fs_cache_s *cur_config_cache = NULL; + + hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); + if (!hash_link) + { + return(-PVFS_EINVAL); + } + cur_config_cache = qlist_entry( + hash_link, struct config_fs_cache_s, hash_link); + assert(cur_config_cache); + assert(cur_config_cache->fs); + + ret = cache_server_array(fsid); + if (ret < 0) + { + return(ret); + } + + /* run through general server list for a match */ + for(i = 0; i < cur_config_cache->server_count; i++) + { + if (!(strcmp(cur_config_cache->server_array[i].addr_string, + server_addr_str))) + { + *server_type = cur_config_cache->server_array[i].server_type; + return(0); + } + } + return(-PVFS_EINVAL); +} + + /* PINT_cached_config_count_servers() * * counts the number of physical servers of the specified type @@ -962,23 +1241,27 @@ int PINT_cached_config_map_to_server( PVFS_handle handle, PVFS_fs_id fs_id) { - int ret = -PVFS_EINVAL; - char bmi_server_addr[PVFS_MAX_SERVER_ADDR_LEN] = {0}; + struct handle_lookup_entry* tmp_entry; + + tmp_entry = find_handle_lookup_entry(handle, fs_id); - ret = PINT_cached_config_get_server_name( - bmi_server_addr, PVFS_MAX_SERVER_ADDR_LEN, handle, fs_id); - if (ret) + if(!tmp_entry) { - PVFS_perror_gossip("PINT_cached_config_get_server_name failed", ret); + gossip_err("Error: failed to find handle %llu in fs configuration.\n", + llu(handle)); + return(-PVFS_EINVAL); } - return (!ret ? BMI_addr_lookup(server_addr, bmi_server_addr) : ret); + + *server_addr = tmp_entry->server_addr; + + return(0); } /* PINT_cached_config_get_num_dfiles() * * Returns 0 if the number of dfiles has been successfully set * - * Sets the number of dfiles to a distribution approved the value. Clients + * Sets the number of dfiles to a distribution approved value. Clients * may pass in num_dfiles_requested as a hint, if no hint is given, the server * configuration is checked to find a hint there. The distribution will * choose a correct number of dfiles even if no hint is set. @@ -989,7 +1272,6 @@ int PINT_cached_config_get_num_dfiles( int num_dfiles_requested, int *num_dfiles) { - int ret = -PVFS_EINVAL; int rc; int num_io_servers; @@ -1014,17 +1296,33 @@ int PINT_cached_config_get_num_dfiles( /* Determine the number of I/O servers available */ rc = PINT_cached_config_get_num_io(fsid, &num_io_servers); + if(rc < 0) + { + return(rc); + } - if (0 == rc) + /* Allow the distribution to apply its hint to the number of + dfiles requested and the number of I/O servers available */ + *num_dfiles = dist->methods->get_num_dfiles(dist->params, + num_io_servers, + num_dfiles_requested); + if(*num_dfiles < 1) { - /* Allow the distribution to apply its hint to the number of - dfiles requested and the number of I/O servers available */ - *num_dfiles = dist->methods->get_num_dfiles(dist->params, - num_io_servers, - num_dfiles_requested); - ret = 0; + gossip_err("Error: distribution failure for %d servers and %d requested datafiles.\n", num_io_servers, num_dfiles_requested); + return(-PVFS_EINVAL); } - return ret; + + if (*num_dfiles > num_io_servers) + { + gossip_err("%s: Distribution requires more datafiles(%d) than I/O servers(%d) currently defined in the system. Capping " + "number of datafiles to the number of I/O servers.\n" + ,__func__ + ,*num_dfiles + ,num_io_servers); + *num_dfiles = num_io_servers; + } + + return 0; } /* PINT_cached_config_get_num_meta() @@ -1106,17 +1404,15 @@ int PINT_cached_config_get_server_handle_count( PVFS_fs_id fs_id, uint64_t *handle_count) { - int ret = -PVFS_EINVAL; - PINT_llist *cur = NULL; - struct bmi_host_extent_table_s *cur_host_extent_table = NULL; struct qlist_head *hash_link = NULL; struct config_fs_cache_s *cur_config_cache = NULL; - uint64_t tmp_count; + struct host_handle_mapping_s *server_mapping = NULL; *handle_count = 0; assert(PINT_fsid_config_cache_table); + /* for each fs find the right server */ hash_link = qhash_search(PINT_fsid_config_cache_table,&(fs_id)); if (hash_link) { @@ -1125,36 +1421,24 @@ int PINT_cached_config_get_server_handle_count( assert(cur_config_cache); assert(cur_config_cache->fs); - assert(cur_config_cache->bmi_host_extent_tables); - cur = cur_config_cache->bmi_host_extent_tables; - while (cur) + server_mapping = PINT_cached_config_find_server( + cur_config_cache->fs->meta_handle_ranges, server_addr_str); + if(server_mapping) { - cur_host_extent_table = PINT_llist_head(cur); - if (!cur_host_extent_table) - { - break; - } - assert(cur_host_extent_table->bmi_address); - assert(cur_host_extent_table->extent_list); - - if (strcmp(cur_host_extent_table->bmi_address, - server_addr_str) == 0) - { - ret = PINT_extent_list_count_total( - cur_host_extent_table->extent_list, &tmp_count); + *handle_count += PINT_extent_array_count_total( + &server_mapping->handle_extent_array); + } - if (ret) - { - return ret; - } - *handle_count += tmp_count; - } - cur = PINT_llist_next(cur); + server_mapping = PINT_cached_config_find_server( + cur_config_cache->fs->data_handle_ranges, server_addr_str); + if(server_mapping) + { + *handle_count += PINT_extent_array_count_total( + &server_mapping->handle_extent_array); } - return 0; } - return ret; + return 0; } /* PINT_cached_config_get_server_name() @@ -1170,48 +1454,19 @@ int PINT_cached_config_get_server_name( PVFS_handle handle, PVFS_fs_id fsid) { - int ret = -PVFS_EINVAL; - PINT_llist *cur = NULL; - struct bmi_host_extent_table_s *cur_host_extent_table = NULL; - struct qlist_head *hash_link = NULL; - struct config_fs_cache_s *cur_config_cache = NULL; + struct handle_lookup_entry* tmp_entry; - assert(PINT_fsid_config_cache_table); + tmp_entry = find_handle_lookup_entry(handle, fsid); - hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); - if (hash_link) + if(!tmp_entry) { - cur_config_cache = qlist_entry( - hash_link, struct config_fs_cache_s, hash_link); - - assert(cur_config_cache); - assert(cur_config_cache->fs); - assert(cur_config_cache->bmi_host_extent_tables); - - cur = cur_config_cache->bmi_host_extent_tables; - while (cur) - { - cur_host_extent_table = PINT_llist_head(cur); - if (!cur_host_extent_table) - { - break; - } - assert(cur_host_extent_table->bmi_address); - assert(cur_host_extent_table->extent_list); - - if (PINT_handle_in_extent_list( - cur_host_extent_table->extent_list, handle)) - { - strncpy(server_name,cur_host_extent_table->bmi_address, - max_server_name_len); - ret = 0; - break; - } - cur = PINT_llist_next(cur); - } + gossip_err("Error: failed to find handle %llu in fs configuration.\n", + llu(handle)); + return(-PVFS_EINVAL); } - return ret; + strncpy(server_name, tmp_entry->server_name, max_server_name_len); + return(0); } /* PINT_cached_config_get_root_handle() @@ -1273,6 +1528,93 @@ int PINT_cached_config_get_handle_timeout( return ret; } +int PINT_cached_config_get_server_list( + PVFS_fs_id fs_id, + PINT_dist *dist, + int num_dfiles_req, + int user_id, + int num_energy_nodes, + PVFS_sys_layout *layout, + const char ***server_names, + int *server_count) +{ + int num_io_servers, ret, i; + PVFS_BMI_addr_t *server_addrs; + const char **servers; + + /* find the server list from the layout */ + ret = PINT_cached_config_get_num_dfiles( + fs_id, + dist, + num_dfiles_req, + &num_io_servers); + if (ret < 0) + { + gossip_err("Failed to get number of data servers\n"); + return ret; + } + + if(num_io_servers > PVFS_REQ_LIMIT_DFILE_COUNT) + { + num_io_servers = PVFS_REQ_LIMIT_DFILE_COUNT; + gossip_err("Warning: reducing number of data " + "files to PVFS_REQ_LIMIT_DFILE_COUNT\n"); + } + + server_addrs = malloc(sizeof(*server_addrs) * num_io_servers); + if(!server_addrs) + { + gossip_err("Failed to allocate server address list\n"); + return -PVFS_ENOMEM; + } + + ret = PINT_cached_config_map_servers( + fs_id, + &num_io_servers, + layout, + server_addrs, + NULL); + if(ret != 0) + { + gossip_err("Failed to get IO server addrs from layout\n"); + return ret; + } + + if(num_energy_nodes) + { + servers = malloc(sizeof(*servers) * num_energy_nodes); + } else { + servers = malloc(sizeof(*servers) * num_io_servers); + } + + if(!servers) + { + gossip_err("Failed to allocate server address list\n"); + free(server_addrs); + return -PVFS_ENOMEM; + } + + if(num_energy_nodes) { + for(i = 0; i < num_energy_nodes; ++i) + { + servers[i] = BMI_addr_rev_lookup(server_addrs[(i+(user_id % num_io_servers)) % num_io_servers]); + } + *server_count = num_energy_nodes; + } else { + for(i = 0; i < num_io_servers; ++i) + { + servers[i] = BMI_addr_rev_lookup(server_addrs[i]); + } + *server_count = num_io_servers; + } + + free(server_addrs); + + *server_names = servers; + + return 0; +} + /* cache_server_array() * * verifies that the arrays of physical server addresses have been @@ -1312,7 +1654,7 @@ static int cache_server_array( cur_config_cache->server_count = 0; cur_config_cache->meta_server_count = 0; cur_config_cache->io_server_count = 0; - + /* iterate through lists to come up with an upper bound for * the size of the arrays that we need */ @@ -1492,23 +1834,236 @@ static int hash_fsid_compare(void *key, struct qlist_head *link) return 0; } -static void free_host_extent_table(void *ptr) +/* handle_lookup_entry_compare() + * * + * * comparison function used by qsort() + * */ +static int handle_lookup_entry_compare(const void *p1, const void *p2) { - struct bmi_host_extent_table_s *cur_host_extent_table = - (struct bmi_host_extent_table_s *)ptr; + const struct handle_lookup_entry* e1 = p1; + const struct handle_lookup_entry* e2 = p2; - assert(cur_host_extent_table); - assert(cur_host_extent_table->bmi_address); - assert(cur_host_extent_table->extent_list); + if(e1->extent.first < e2->extent.first) + return(-1); + if(e1->extent.first > e2->extent.first) + return(1); - /* - NOTE: cur_host_extent_table->bmi_address is a ptr - into a server_configuration_s->host_aliases object. - it is properly freed by PINT_config_release - */ - cur_host_extent_table->bmi_address = (char *)0; - PINT_release_extent_list(cur_host_extent_table->extent_list); - free(cur_host_extent_table); + return(0); +} + +/* find_handle_lookup_entry() + * + * searches sorted table for extent that contains the specified handle + * + * returns pointer to table entry on success, NULL on failure + */ +const struct handle_lookup_entry* find_handle_lookup_entry( + PVFS_handle handle, PVFS_fs_id fsid) +{ + struct qlist_head *hash_link = NULL; + struct config_fs_cache_s *cur_config_cache = NULL; + int high, low, mid; + int table_index; + + assert(PINT_fsid_config_cache_table); + + hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); + if(!hash_link) + { + return(NULL); + } + + cur_config_cache = qlist_entry( + hash_link, struct config_fs_cache_s, hash_link); + + assert(cur_config_cache); + assert(cur_config_cache->fs); + + /* iterative binary search through handle lookup table to find the + * extent that this handle falls into + */ + low = 0; + high = cur_config_cache->handle_lookup_table_size; + while (low < high) + { + mid = (low + high)/2; + if (cur_config_cache->handle_lookup_table[mid].extent.first < handle) + low = mid + 1; + else + high = mid; + } + if ((low < cur_config_cache->handle_lookup_table_size) && + (cur_config_cache->handle_lookup_table[low].extent.first == handle)) + { + /* we happened to locate the first handle in a range */ + table_index = low; + } + else + { + /* this handle must fall into the previous range if any */ + table_index = low-1; + } + + /* confirm match */ + if(PINT_handle_in_extent( + &cur_config_cache->handle_lookup_table[table_index].extent, + handle)) + { + if(!strncmp(cur_config_cache->handle_lookup_table[table_index].server_name, "osd", 3)) + { + /* osd address might be wrong, since the handle lookup table doesn't call BMI_addr_lookup for osd server */ + BMI_addr_lookup(&cur_config_cache->handle_lookup_table[table_index].server_addr, cur_config_cache->handle_lookup_table[table_index].server_name); + } + + return(&cur_config_cache->handle_lookup_table[table_index]); + } + + /* no match */ + return(NULL); +} + +/* load_handle_lookup_table() + * + * iterates through extents for all servers and constructs a table sorted by + * the first handle in each extent. This table can then be searched with a + * binary algorithm to map handles to servers. Table includes extent, + * server name, and server's resolved bmi address. + * + * returns 0 on success, -PVFS_error on failure + */ +static int load_handle_lookup_table( + struct config_fs_cache_s *cur_config_fs_cache) +{ + int ret = -PVFS_EINVAL; + host_handle_mapping_s *cur_mapping = NULL; + int count = 0; + int table_offset = 0; + PINT_llist* server_cursor; + int i; + int j; + PINT_llist* range_list[2] = + { + cur_config_fs_cache->fs->meta_handle_ranges, + cur_config_fs_cache->fs->data_handle_ranges + }; + + /* count total number of extents */ + /* loop through both meta and data ranges */ + for(j=0; j<2; j++) + { + server_cursor = range_list[j]; + cur_mapping = PINT_llist_head(server_cursor); + while(cur_mapping) + { + /* each server may have multiple extents */ + for(i=0; ihandle_extent_array.extent_count; i++) + { + count += 1; + } + server_cursor = PINT_llist_next(server_cursor); + cur_mapping = PINT_llist_head(server_cursor); + } + } + + /* allocate a table to hold all extents for faster searching */ + if(cur_config_fs_cache->handle_lookup_table) + { + free(cur_config_fs_cache->handle_lookup_table); + } + cur_config_fs_cache->handle_lookup_table = + malloc(sizeof(*cur_config_fs_cache->handle_lookup_table) * count); + if(!cur_config_fs_cache->handle_lookup_table) + { + return(-PVFS_ENOMEM); + } + cur_config_fs_cache->handle_lookup_table_size = count; + + /* populate table */ + /* loop through both meta and data ranges */ + for(j=0; j<2; j++) + { + server_cursor = range_list[j]; + cur_mapping = PINT_llist_head(server_cursor); + while(cur_mapping) + { + for(i=0; ihandle_extent_array.extent_count; i++) + { + cur_config_fs_cache->handle_lookup_table[table_offset].extent + = cur_mapping->handle_extent_array.extent_array[i]; + cur_config_fs_cache->handle_lookup_table[table_offset].server_name + = cur_mapping->alias_mapping->bmi_address; + +#ifndef __PVFS2_SERVER__ + ret = BMI_addr_lookup( + &cur_config_fs_cache->handle_lookup_table[table_offset].server_addr, + cur_config_fs_cache->handle_lookup_table[table_offset].server_name); + if(ret < 0) + { + free(cur_config_fs_cache->handle_lookup_table); + gossip_err("Error: failed to resolve address of server: %s\n", + cur_config_fs_cache->handle_lookup_table[table_offset].server_name); + return(ret); + } +#endif + table_offset++; + } + server_cursor = PINT_llist_next(server_cursor); + cur_mapping = PINT_llist_head(server_cursor); + } + } + + /* sort table */ + qsort(cur_config_fs_cache->handle_lookup_table, table_offset, + sizeof(*cur_config_fs_cache->handle_lookup_table), + handle_lookup_entry_compare); + + return(0); +} + +/* PINT_cached_config_server_names() + * + * Returns a list of pointers to the IO server names currently running in this + * file system. + * + * returns 0 on success, -PVFS_error on failure + */ +int PINT_cached_config_io_server_names( char ***list + , int *size + , PVFS_fs_id fsid) +{ + int i; + struct qlist_head *hash_link = NULL; + struct config_fs_cache_s *cur_config_cache = NULL; + + assert(PINT_fsid_config_cache_table); + + hash_link = qhash_search(PINT_fsid_config_cache_table,&(fsid)); + if(!hash_link) + { + return(-PVFS_ENOENT); + } + + cur_config_cache = qlist_entry( + hash_link, struct config_fs_cache_s, hash_link); + + assert(cur_config_cache); + + *size = cur_config_cache->io_server_count; + + *list = malloc(sizeof(char *) * (*size)); + + if (! (*list) ) + return(-PVFS_ENOMEM); + + memset(*list,0,sizeof(char *) * (*size)); + + for (i=0; i<(*size); i++) + { + /*addr_string originates from the alias mapping->bmi_address*/ + (*list)[i] = cur_config_cache->io_server_array[i].addr_string; + } + + return(0); } /* diff --git a/src/common/misc/pint-cached-config.h b/src/common/misc/pint-cached-config.h index fea2da6..e4e96bb 100644 --- a/src/common/misc/pint-cached-config.h +++ b/src/common/misc/pint-cached-config.h @@ -29,23 +29,39 @@ int PINT_cached_config_initialize(void); int PINT_cached_config_finalize(void); int PINT_cached_config_handle_load_mapping( - struct filesystem_configuration_s *fs); + struct filesystem_configuration_s *fs, + struct server_configuration_s *config); int PINT_cached_config_map_alias( const char *alias, PVFS_BMI_addr_t *addr); +int PINT_cached_config_get_server( + PVFS_fs_id fsid, + const char* host, + PVFS_ds_type type, + PVFS_handle_extent_array *ext_array); + int PINT_cached_config_get_next_meta( PVFS_fs_id fsid, PVFS_BMI_addr_t *meta_addr, PVFS_handle_extent_array *meta_extent_array, int is_directory); +int PINT_cached_config_get_io( + PVFS_fs_id fsid, + const char* host, + PVFS_BMI_addr_t *io_addr, + PVFS_handle_extent_array *ext_array); + +/* This appears to be obsolete */ +#if 0 int PINT_cached_config_get_next_io( PVFS_fs_id fsid, int num_servers, PVFS_BMI_addr_t *io_addr_array, PVFS_handle_extent_array *io_handle_extent_array); +#endif const char *PINT_cached_config_map_addr( PVFS_fs_id fsid, @@ -95,11 +111,25 @@ int PINT_cached_config_get_server_name( PVFS_handle handle, PVFS_fs_id fsid); +int PINT_cached_config_update_first_handle( + PVFS_handle handle, + PVFS_fs_id fsid); + +int PINT_cached_config_get_first_handle( + PVFS_handle initial_handle, + PVFS_fs_id fs_id, + PVFS_handle *new_handle); + int PINT_cached_config_get_server_handle_count( const char *server_addr_str, PVFS_fs_id fs_id, uint64_t *handle_count); - + +int PINT_cached_config_check_type( + PVFS_fs_id fsid, + const char *server_addr_str, + int* server_type); + int PINT_cached_config_get_root_handle( PVFS_fs_id fsid, PVFS_handle *fh_root); @@ -108,9 +138,29 @@ int PINT_cached_config_get_handle_timeout( PVFS_fs_id fsid, struct timeval *timeout); +int PINT_cached_config_get_server_list( + PVFS_fs_id fs_id, + PINT_dist *dist, + int num_dfiles_req, + int user_id, + int num_energy_nodes, + PVFS_sys_layout *layout, + const char ***server_names, + int *server_count); + int PINT_cached_config_reinitialize( struct server_configuration_s *config); +int PINT_cached_config_io_server_names( + char ***list, + int *size, + PVFS_fs_id fsid); + +int PINT_cached_config_store_new_oid( + PVFS_BMI_addr_t *addr, + PVFS_fs_id fs_id, + PVFS_handle new_oid); + #endif /* __PINT_CACHED_CONFIG_H */ /* diff --git a/src/common/misc/pint-event.c b/src/common/misc/pint-event.c index e7a655f..a6b3779 100644 --- a/src/common/misc/pint-event.c +++ b/src/common/misc/pint-event.c @@ -6,408 +6,545 @@ #include #include +#ifndef WIN32 #include +#endif #include +#ifdef WIN32 +#include "wincommon.h" + +#define strdup(s) _strdup(s) +#else +#include +#endif + #include "pint-event.h" #include "pvfs2-types.h" #include "pvfs2-mgmt.h" #include "gossip.h" +#include "quicklist.h" +#include "quickhash.h" +#include "id-generator.h" +#include "str-utils.h" -/* variables that provide runtime control over which events are recorded */ -int PINT_event_on = 0; -int32_t PINT_event_api_mask = 0; -int32_t PINT_event_op_mask = 0; - -/* global data structures for storing measurements */ -static struct PVFS_mgmt_event* ts_ring = NULL; -static int ts_head = 0; -static int ts_tail = 0; -static int ts_ring_size = 0; -static gen_mutex_t event_mutex = GEN_MUTEX_INITIALIZER; - -#ifdef HAVE_MPE -int PINT_event_job_start, PINT_event_job_stop; -int PINT_event_trove_rd_start, PINT_event_trove_rd_stop; -int PINT_event_trove_wr_start, PINT_event_trove_wr_stop; -int PINT_event_bmi_start, PINT_event_bmi_stop; -int PINT_event_flow_start, PINT_event_flow_stop; -#endif +#include "pvfs2-config.h" +#ifdef HAVE_TAU +#include "pvfs_tau_api.h" +#endif -/* PINT_event_initialize() - * - * starts up the event logging interface - * - * returns 0 on success, -PVFS_error on failure - */ -int PINT_event_initialize(int ring_size) -{ - gen_mutex_lock(&event_mutex); +/* variables that provide runtime control over which events are recorded */ -#if defined(HAVE_PABLO) - PINT_event_pablo_init(); -#endif -#if defined(HAVE_MPE) - PINT_event_mpe_init(); -#endif +static PINT_event_group default_group; - PINT_event_default_init(ring_size); +static struct qhash_table *events_table = NULL; +static struct qhash_table *groups_table = NULL; +static uint32_t event_count = 0; +uint64_t PINT_event_enabled_mask = 0; - gen_mutex_unlock(&event_mutex); - return(0); -} +#ifdef HAVE_TAU +static int PINT_event_default_buffer_size = 1024*1024; +static int PINT_event_default_max_traces = 1024; +#endif -#if defined(HAVE_MPE) -/* - * PINT_event_mpe_init - * initialize the mpe profiling interface - */ -int PINT_event_mpe_init(void) +struct PINT_group { - MPI_Init(NULL, NULL); - MPE_Init_log(); - - PINT_event_job_start = MPE_Log_get_event_number(); - PINT_event_job_stop = MPE_Log_get_event_number(); - PINT_event_trove_rd_start = MPE_Log_get_event_number(); - PINT_event_trove_rd_stop = MPE_Log_get_event_number(); - PINT_event_trove_wr_start = MPE_Log_get_event_number(); - PINT_event_trove_wr_stop = MPE_Log_get_event_number(); - PINT_event_bmi_start = MPE_Log_get_event_number(); - PINT_event_bmi_stop = MPE_Log_get_event_number(); - PINT_event_flow_start = MPE_Log_get_event_number(); - PINT_event_flow_stop = MPE_Log_get_event_number(); - - - MPE_Describe_state(PINT_event_job_start, PINT_event_job_stop, "Job", "red"); - MPE_Describe_state(PINT_event_trove_rd_start, PINT_event_trove_rd_stop, - "Trove Read", "orange"); - MPE_Describe_state(PINT_event_trove_wr_start, PINT_event_trove_wr_stop, - "Trove Write", "blue"); - MPE_Describe_state(PINT_event_bmi_start, PINT_event_bmi_stop, - "BMI", "yellow"); - MPE_Describe_state(PINT_event_flow_start, PINT_event_flow_stop, - "Flow", "green"); - - return 0; + char *name; + PINT_event_group id; + struct qlist_head events; + uint64_t mask; + struct qhash_head link; +}; + +struct PINT_event +{ + char *name; + PINT_event_type type; + PINT_event_group group; + uint64_t mask; + struct qlist_head group_link; + struct qlist_head link; +}; + +#if defined(HAVE_TAU) + +static void PINT_event_tau_init(void); +static void PINT_event_tau_fini(void); +static void PINT_event_tau_thread_init(char* gname); +static void PINT_event_tau_thread_fini(void); +static void PINT_event_free( struct PINT_event *p ); +static void PINT_group_free( strcut PINT_group *g ); + +#endif /* HAVE_TAU */ + +static void PINT_event_free( struct PINT_event *p ) +{ + if( p != NULL ) + { + if( p->name != NULL ) + { + free( p->name ); + } + free( p ); + } + return; } -void PINT_event_mpe_finalize(void) +static void PINT_group_free( struct PINT_group *g ) { - /* TODO: use mkstemp like pablo_finalize does */ - MPE_Finish_log("/tmp/pvfs2-server"); - MPI_Finalize(); + if( g != NULL ) + { + if( g->name != NULL ) + { + free( g->name ); + } + free( g ); + } return; } -#endif -#if defined(HAVE_PABLO) -/* PINT_event_pablo_init - * initialize the pablo trace library - */ -int PINT_event_pablo_init(void) + +static int PINT_group_compare(void *key, struct qhash_head *link) { - char tracefile[PATH_MAX]; - snprintf(tracefile, PATH_MAX, "/tmp/pvfs2-server.pablo.XXXXXX"); - mkstemp(tracefile); - setTraceFileName(tracefile); + struct PINT_group *eg = qhash_entry(link, struct PINT_group, link); + + if(!strcmp(eg->name, (char *)key)) + { + return 1; + } return 0; } -void PINT_event_pablo_finalize(void) +static int PINT_events_compare(void *key, struct qhash_head *link) { - endTracing(); -} + struct PINT_event *e = qhash_entry(link, struct PINT_event, link); -#endif - - -int PINT_event_default_init(int ring_size) -{ - if(ts_ring != NULL) + if(!strcmp(e->name, (char *)key)) { - gen_mutex_unlock(&event_mutex); - return(-PVFS_EALREADY); + return 1; } + return 0; +} - /* give a reasonable ring buffer size to work with! */ - if(ring_size < 4) +int PINT_event_init(enum PINT_event_method method) +{ + int ret; + + events_table = qhash_init(PINT_events_compare, quickhash_string_hash, 1024); + if(!events_table) { - gen_mutex_unlock(&event_mutex); - return(-PVFS_EINVAL); + return -PVFS_ENOMEM; } - /* allocate a ring buffer for time stamped events */ - ts_ring = (struct PVFS_mgmt_event*)malloc(ring_size - *sizeof(struct PVFS_mgmt_event)); - if(!ts_ring) + groups_table = qhash_init(PINT_group_compare, quickhash_string_hash, 1024); + if(!groups_table) { - gen_mutex_unlock(&event_mutex); - return(-PVFS_ENOMEM); + qhash_destroy_and_finalize( events_table, struct PINT_event, link, + PINT_event_free); + return -PVFS_ENOMEM; } - memset(ts_ring, 0, ring_size*sizeof(struct PVFS_mgmt_event)); - ts_head = 0; - ts_tail = 0; - ts_ring_size = ring_size; + ret = PINT_event_define_group("defaults", &default_group); + if(ret < 0) + { + qhash_destroy_and_finalize( groups_table, struct PINT_group, link, + PINT_group_free ); + qhash_destroy_and_finalize( events_table, struct PINT_event, link, + PINT_event_free); - return 0; -} + return ret; + } -void PINT_event_default_finalize(void) -{ - if(ts_ring == NULL) + switch(method) { - gen_mutex_unlock(&event_mutex); - return; + case PINT_EVENT_TRACE_TAU: +#if defined(HAVE_TAU) + PINT_event_tau_init(); + break; +#else + return -PVFS_ENOSYS; +#endif } - free(ts_ring); - ts_ring = NULL; - ts_head = 0; - ts_tail = 0; - ts_ring_size = 0; + return(0); } +void PINT_event_free_bucket_resources(struct qhash_table *qht, unsigned long distance_from_link) +{ + char **name = NULL; + char *start_of_structure = NULL; + struct qhash_head *bucket_entry = NULL; + struct qhash_head *bucket = NULL; + struct qhash_head *next = NULL; + int i; + + for (i=0; itable_size; i++) + { + bucket = &(qht->array[i]); + if (bucket==bucket->next && bucket==bucket->prev) + continue; //this bucket is empty + + /*for each entry, deallocate the name string and the entry structure*/ + for (bucket_entry=bucket->next; bucket_entry != bucket; bucket_entry = next) + { + start_of_structure = (char *)((char *)bucket_entry - distance_from_link); + name = (char **)start_of_structure; + if (*name) + { + free(*name); + *name=NULL; + } + name=NULL; + + next = bucket_entry->next; + free(start_of_structure); + start_of_structure=NULL; + }/*end for*/ + + + } /*end for*/ + + return; +} -/* PINT_event_finalize() - * - * shuts down the event logging interface - * - * returns 0 on success, -PVFS_error on failure - */ void PINT_event_finalize(void) { - - gen_mutex_lock(&event_mutex); -#if defined(HAVE_PABLO) - PINT_event_pablo_finalize(); -#endif -#if defined(HAVE_MPE) - PINT_event_mpe_finalize(); +#if defined(HAVE_TAU) + PINT_event_tau_fini(); #endif - PINT_event_default_finalize(); - gen_mutex_unlock(&event_mutex); + /*free the buckets in the tables and the tables themselves*/ + /* need to free contents as well */ + qhash_destroy_and_finalize( groups_table, struct PINT_group, link, + PINT_group_free ); + qhash_destroy_and_finalize( events_table, struct PINT_event, link, + PINT_event_free); return; } -/* PINT_event_set_masks() - * - * sets masks that determine if event logging is enabled, what the api mask - * is, and what the operation mask is. The combination of these values - * determines which events are recorded - * - * no return value - */ -void PINT_event_set_masks(int event_on, int32_t api_mask, int32_t op_mask) +int PINT_event_thread_start(char *name) { - gen_mutex_lock(&event_mutex); + if(!groups_table) + { + /* assume that the events interface just hasn't been initialized */ + return 0; + } - PINT_event_on = event_on; - PINT_event_api_mask = api_mask; - PINT_event_op_mask = op_mask; +#if defined(HAVE_TAU) + PINT_event_tau_thread_init(name); +#endif - gen_mutex_unlock(&event_mutex); - return; + return 0; } - -/* PINT_event_get_masks() - * - * retrieves current mask values - * - * no return value - */ -void PINT_event_get_masks(int* event_on, int32_t* api_mask, int32_t* op_mask) +int PINT_event_thread_stop(void) { - gen_mutex_lock(&event_mutex); + if(!groups_table) + { + /* assume that the events interface just hasn't been initialized */ + return 0; + } - *event_on = PINT_event_on; - *api_mask = PINT_event_api_mask; - *op_mask = PINT_event_op_mask; +#if defined(HAVE_TAU) + PINT_event_tau_thread_fini(); + return 0; +#endif - gen_mutex_unlock(&event_mutex); - return; + return 0; } -/* PINT_event_timestamp() - * - * records a timestamp in the ring buffer - * - * returns 0 on success, -PVFS_error on failure - */ -void __PINT_event_timestamp(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags) +int PINT_event_enable(const char *events) { - gen_mutex_lock(&event_mutex); - -#if defined(HAVE_PABLO) - __PINT_event_pablo(api, operation, value, id, flags); -#endif + struct qhash_head *entry; + struct PINT_event *event; + struct PINT_group *group; + char **event_strings; + int count, i; + int ret = 0; + + if(!groups_table) + { + /* assume that the events interface just hasn't been initialized */ + return 0; + } -#if defined (HAVE_MPE) - __PINT_event_mpe(api, operation, value, id, flags); -#endif + count = PINT_split_string_list(&event_strings, events); - __PINT_event_default(api, operation, value, id, flags); + for(i = 0; i < count; ++i) + { + entry = qhash_search(events_table, event_strings[i]); + if(entry) + { + event = qhash_entry(entry, struct PINT_event, link); + PINT_event_enabled_mask |= event->mask; + } + else + { + entry = qhash_search(groups_table, event_strings[i]); + if(entry) + { + group = qhash_entry(entry, struct PINT_group, link); + PINT_event_enabled_mask |= group->mask; + } + } + + if(!strcmp(events, "all")) + { + PINT_event_enabled_mask = 0xFFFFFFFF; + goto done; + } + + if(!entry) + { + gossip_err("Unknown event or event group: %s\n", event_strings[i]); + ret = -PVFS_EINVAL; + goto done; + } + } - gen_mutex_unlock(&event_mutex); +done: + for(i = 0; i < count; ++i) + { + free(event_strings[i]); + } + free(event_strings); - return; + return ret; } -void __PINT_event_default(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags) +int PINT_event_disable(const char *events) { - struct timeval tv; - - /* immediately grab timestamp */ - gettimeofday(&tv, NULL); - - /* fill in event */ - ts_ring[ts_head].api = api; - ts_ring[ts_head].operation = operation; - ts_ring[ts_head].value = value; - ts_ring[ts_head].id = id; - ts_ring[ts_head].flags = flags; - ts_ring[ts_head].tv_sec = tv.tv_sec; - ts_ring[ts_head].tv_usec = tv.tv_usec; - - /* update ring buffer positions */ - ts_head = (ts_head+1)%ts_ring_size; - if(ts_head == ts_tail) + struct qhash_head *entry; + struct PINT_event *event; + struct PINT_group *group; + char **event_strings; + int count, i; + int ret = 0; + + count = PINT_split_string_list(&event_strings, events); + + for(i = 0; i < count; ++i) + { + entry = qhash_search(events_table, event_strings[i]); + if(entry) + { + event = qhash_entry(entry, struct PINT_event, link); + PINT_event_enabled_mask &= ~(event->mask); + } + else + { + entry = qhash_search(groups_table, event_strings[i]); + if(entry) + { + group = qhash_entry(entry, struct PINT_group, link); + PINT_event_enabled_mask &= ~(group->mask); + } + } + + if(!entry) + { + gossip_err("Unknown event or event group: %s\n", event_strings[i]); + ret = -PVFS_EINVAL; + goto done; + } + } + + if(!strcmp(events, "none")) + { + PINT_event_enabled_mask = 0; + } + +done: + for(i = 0; i < count; ++i) { - ts_tail = (ts_tail+1)%ts_ring_size; + free(event_strings[i]); } + free(event_strings); + + return ret; } -#ifdef HAVE_PABLO -/* enter a pablo trace into the log */ -void __PINT_event_pablo(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags) +int PINT_event_define_group(const char *name, PINT_event_group *group) { - /* TODO: this can all go once there is a nice "enum to string" function */ - char description[100]; - switch(api) { - case PVFS_EVENT_API_BMI: - sprintf(description, "bmi operation"); - break; - case PVFS_EVENT_API_JOB: - sprintf(description, "job operation"); - break; - case PVFS_EVENT_API_TROVE: - sprintf(description, "trove operation"); - break; - case PVFS_EVENT_API_ENCODE_REQ: - case PVFS_EVENT_API_ENCODE_RESP: - case PVFS_EVENT_API_DECODE_REQ: - case PVFS_EVENT_API_DECODE_RESP: - case PVFS_EVENT_API_SM: + struct PINT_group *g; + + if(!groups_table) + { + /* assume that the events interface just hasn't been initialized */ + return 0; } - /* PVFS_EVENT_API_BMI, operation(SEND|RECV), value, id, FLAG (start|end) */ - /* our usage better maps to the "startTimeEvent/endTimeEvent" model */ - switch(flags) { - case PVFS_EVENT_FLAG_START: - startTimeEvent( ((api<<6)&(operation<<3)) ); - traceEvent( ( (api<<6) & (operation<<3) & flags), - description, strlen(description)); - break; - case PVFS_EVENT_FLAG_END: - endTimeEvent( ((api<6)&(operation<3)) ); - break; - default: - /* TODO: someone fed us bad flags */ + g = malloc(sizeof(*g)); + if(!g) + { + return -PVFS_ENOMEM; } + memset(g, 0, sizeof(*g)); + + g->name = strdup(name); + if(!g->name) + { + return -PVFS_ENOMEM; + } + + INIT_QLIST_HEAD(&g->events); + qhash_add(groups_table, g->name, &g->link); + id_gen_fast_register(&g->id, g); + *group = g->id; + return 0; } -#endif -#if defined(HAVE_MPE) -void __PINT_event_mpe(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags) +int PINT_event_define_event(PINT_event_group *group, + char *name, + char *format_start, + char *format_end, + PINT_event_type *et) { - switch(api) { - case PVFS_EVENT_API_BMI: - if (flags & PVFS_EVENT_FLAG_START) { - MPE_Log_event(PINT_event_bmi_start, 0, NULL); - } else if (flags & PVFS_EVENT_FLAG_END) { - MPE_Log_event(PINT_event_bmi_stop, value, NULL); - } - case PVFS_EVENT_API_JOB: - if (flags & PVFS_EVENT_FLAG_START) { - MPE_Log_event(PINT_event_job_start, 0, NULL); - } else if (flags & PVFS_EVENT_FLAG_END) { - MPE_Log_event(PINT_event_job_stop, value, NULL); - } - case PVFS_EVENT_API_TROVE: - if (flags & PVFS_EVENT_FLAG_START) { - MPE_Log_event(PINT_event_trove_wr_start, 0, NULL); - } else if (flags & PVFS_EVENT_FLAG_END) { - MPE_Log_event(PINT_event_trove_wr_stop, value, NULL); - } - case PVFS_EVENT_API_ENCODE_REQ: - case PVFS_EVENT_API_ENCODE_RESP: - case PVFS_EVENT_API_DECODE_REQ: - case PVFS_EVENT_API_DECODE_RESP: - case PVFS_EVENT_API_SM: - ; /* XXX: NEEDS SOMETHING */ + struct PINT_group *g; + PINT_event_group ag; + struct PINT_event *event; + + if(!groups_table) + { + /* assume that the events interface just hasn't been initialized */ + return 0; } -} + if(!group) + { + /* use default group */ + ag = default_group; + } + else + { + ag = *group; + } + + event = malloc(sizeof(*event)); + if(!event) + { + return -PVFS_ENOMEM; + } + memset(event, 0, sizeof(*event)); + + event->name = strdup(name); + if(!event->name) + { + free(event); + return -PVFS_ENOMEM; + } + +#ifdef HAVE_TAU + Ttf_event_define(name, format_start, format_end, (int *)&event->type); #endif -/* PINT_event_retrieve() - * - * fills in an array with current snapshot of event buffer - * - * no return value - */ -void PINT_event_retrieve(struct PVFS_mgmt_event* event_array, - int count) + event->group = ag; + event->mask = (1 << event_count); + ++event_count; + + g = id_gen_fast_lookup(ag); + g->mask |= event->mask; + qlist_add(&event->group_link, &g->events); + qhash_add(events_table, event->name, &event->link); + + id_gen_fast_register(et, event); + return 0; +} + +int PINT_event_start_event( + PINT_event_type type, int process_id, int *thread_id, PINT_event_id *id, ...) { - int tmp_tail = ts_tail; - int cur_index = 0; - int i; + va_list ap; + struct PINT_event *event; - gen_mutex_lock(&event_mutex); + if(!groups_table) + { + /* assume that the events interface just hasn't been initialized */ + return 0; + } - /* copy out any events from the ring buffer */ - while(tmp_tail != ts_head && cur_index < count) +event = id_gen_fast_lookup(type); + if(event && (event->mask & PINT_event_enabled_mask)) { - event_array[cur_index] = ts_ring[tmp_tail]; - tmp_tail = (tmp_tail+1)%ts_ring_size; - cur_index++; + va_start(ap, id); +#ifdef HAVE_TAU + Ttf_EnterState_info_va(event->type, process_id, thread_id, (int *)id, ap); +#endif + va_end(ap); } + return 0; +} - gen_mutex_unlock(&event_mutex); +int PINT_event_end_event( + PINT_event_type type, int process_id, int *thread_id, PINT_event_id id, ...) +{ + va_list ap; + struct PINT_event *event; - /* fill in remainder of array with invalid flag */ - for(i=cur_index; imask & PINT_event_enabled_mask)) + { + va_start(ap, id); +#ifdef HAVE_TAU + Ttf_LeaveState_info_va(event->type, process_id, thread_id, id, ap); +#endif + va_end(ap); + } + return 0; +} + +/******************************************************************************/ +#if defined(HAVE_TAU) + +void PINT_event_tau_init(void) { + char* foldername = "/tmp/"; + char* prefix = "pvfs2"; + int bufsz = 0; //use default + + Ttf_init(getpid(), foldername, prefix, bufsz); + return; } + +void PINT_event_tau_fini(void) { + Ttf_finalize(); + return; +} + +static void PINT_event_tau_thread_init(char* gname) { + int tid = 0; + struct tau_thread_group_info tg_info; + strncpy(tg_info.name, gname, sizeof(tg_info.name)); + tg_info.buffer_size = PINT_event_default_buffer_size; + tg_info.max = PINT_event_default_max_traces; + int isnew = 0; + Ttf_thread_start(&tg_info, &tid, &isnew); + + return; +} + + +static void PINT_event_tau_thread_fini() { + Ttf_thread_stop(); + return; +} + +#endif /* HAVE_TAU */ +/******************************************************************************/ + + /* * Local variables: * c-indent-level: 4 diff --git a/src/common/misc/pint-event.h b/src/common/misc/pint-event.h index a78ee13..75d34d7 100644 --- a/src/common/misc/pint-event.h +++ b/src/common/misc/pint-event.h @@ -1,5 +1,5 @@ /* - * (C) 2001 Clemson University and The University of Chicago + * (C) 2007 Clemson University and The University of Chicago * * See COPYING in top-level directory. */ @@ -7,87 +7,124 @@ #ifndef __PINT_EVENT_H #define __PINT_EVENT_H -#include "pvfs2-config.h" #include "pvfs2-types.h" -#include "pvfs2-mgmt.h" -#include "gen-locks.h" -#include "pvfs2-event.h" +#include "quickhash.h" -/* TODO: put this value somewhere else? read from config file? */ -#define PINT_EVENT_DEFAULT_RING_SIZE 4000 +typedef PVFS_id_gen_t PINT_event_type; +typedef PVFS_id_gen_t PINT_event_id; +typedef PVFS_id_gen_t PINT_event_group; -/* variables that provide runtime control over which events are recorded */ -extern int PINT_event_on; -extern int32_t PINT_event_api_mask; -extern int32_t PINT_event_op_mask; +extern uint64_t PINT_event_enabled_mask; + +enum PINT_event_method +{ + PINT_EVENT_TRACE_TAU +}; + +enum PINT_event_info +{ + PINT_EVENT_INFO_MAX_TRACES, + PINT_EVENT_INFO_BLOCKING, + PINT_EVENT_INFO_BUFFER_SIZE +}; + +int PINT_event_init(enum PINT_event_method type); -int PINT_event_initialize(int ring_size); void PINT_event_finalize(void); -void PINT_event_set_masks(int event_on, int32_t api_mask, int32_t op_mask); -void PINT_event_get_masks(int* event_on, int32_t* api_mask, int32_t* op_mask); -void PINT_event_retrieve(struct PVFS_mgmt_event* event_array, - int count); -void __PINT_event_timestamp(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags); - -#if defined(HAVE_PABLO) -#include "SystemDepend.h" -#include "Trace.h" - -int PINT_event_pablo_init(void); -void PINT_event_pablo_finalize(void); - -void __PINT_event_pablo(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags); -#endif - -#if defined(HAVE_MPE) -#include "mpe.h" -extern int PINT_event_job_start, PINT_event_job_stop; -extern int PINT_event_trove_rd_start, PINT_event_trove_rd_stop; -extern int PINT_event_trove_wr_start, PINT_event_trove_wr_stop; -extern int PINT_event_bmi_start, PINT_event_bmi_stop; -extern int PINT_event_flow_start, PINT_event_flow_stop; - -int PINT_event_mpe_init(void); -void PINT_event_mpe_finalize(void); - -void __PINT_event_mpe(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags); - -#endif - -int PINT_event_default_init(int ringsize); -void PINT_event_default_finalize(void); - -void __PINT_event_default(enum PVFS_event_api api, - int32_t operation, - int64_t value, - PVFS_id_gen_t id, - int8_t flags); - -#ifdef __PVFS2_DISABLE_EVENT__ -#define PINT_event_timestamp(__api, __operation, __value, __id, __flags) \ - do {} while(0) -#else -#define PINT_event_timestamp(__api, __operation, __value, __id, __flags) \ - do { \ - if(PINT_event_on && (PINT_event_api_mask & (__api)) && \ - ((PINT_event_op_mask & (__operation))||((__operation)==0))){\ - __PINT_event_timestamp((__api), (__operation), (__value), (__id), \ - (__flags)); }\ - } while(0) -#endif +void PINT_event_free_bucket_resources(struct qhash_table *qt + ,unsigned long distance_from_link); + +int PINT_event_enable(const char *events); +int PINT_event_disable(const char *events); + +int PINT_event_setinfo(enum PINT_event_info info, void *value); +int PINT_event_getinfo(enum PINT_event_info info, void *value); + +int PINT_event_thread_start(char *name); +int PINT_event_thread_stop(void); + +int PINT_event_define_group(const char *name, PINT_event_group *group); + +int PINT_event_define_event(PINT_event_group *group, + char *name, + char *format_start, + char *format_end, + PINT_event_type *type); + +int PINT_event_start_event(PINT_event_type type, + int process_id, + int *thread_id, + PINT_event_id *event_id, + ...); + +int PINT_event_end_event(PINT_event_type type, + int process_id, + int *thread_id, + PINT_event_id event_id, + ...); + +int PINT_event_log_event(PINT_event_type type, + int process_id, + int *thread_id, + ...); + +#ifdef __PVFS2_ENABLE_EVENT__ + +#ifdef WIN32 + +#define PINT_EVENT_START(ET, PID, TID, EID, ...) \ + PINT_event_start_event(ET, PID, TID, EID, __VA_ARGS__) + +#define PINT_EVENT_END(ET, PID, TID, EID, ...) \ + PINT_event_end_event(ET, PID, TID, EID, __VA_ARGS__) + +#define PINT_EVENT_LOG(ET, PID, TID, ...) \ + PINT_event_log_event(ET, PID, TID, __VA_ARGS__) + +#else + +#define PINT_EVENT_START(ET, PID, TID, EID, args...) \ + PINT_event_start_event(ET, PID, TID, EID, ## args) + +#define PINT_EVENT_END(ET, PID, TID, EID, args...) \ + PINT_event_end_event(ET, PID, TID, EID, ## args) + +#define PINT_EVENT_LOG(ET, PID, TID, args...) \ + PINT_event_log_event(ET, PID, TID, ## args) + +#endif /* WIN32 */ + +#define PINT_EVENT_ENABLED 1 + +#else /* __PVFS2_ENABLE_EVENT__ */ + +#ifdef WIN32 + +#define PINT_EVENT_START(ET, PID, TID, EID, ...) \ + do { } while(0) + +#define PINT_EVENT_END(ET, PID, TID, EID, ...) \ + do { } while(0) + +#define PINT_EVENT_LOG(ET, PID, TID, ...) \ + do { } while(0) + +#else + +#define PINT_EVENT_START(ET, PID, TID, EID, args...) \ + do { } while(0) + +#define PINT_EVENT_END(ET, PID, TID, EID, args...) \ + do { } while(0) + +#define PINT_EVENT_LOG(ET, PID, TID, args...) \ + do { } while(0) + +#endif /* WIN32 */ + +#define PINT_EVENT_ENABLED 0 +#endif /* __PVFS2_ENABLE_EVENT__ */ #endif /* __PINT_EVENT_H */ /* diff --git a/src/common/misc/pint-hint.c b/src/common/misc/pint-hint.c new file mode 100644 index 0000000..8b533ff --- /dev/null +++ b/src/common/misc/pint-hint.c @@ -0,0 +1,666 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#define __PINT_REQPROTO_ENCODE_FUNCS_C + +#include +#include +#include +#include +#include "pint-hint.h" +#include "gossip.h" +#include +#include + +DEFINE_STATIC_ENDECODE_FUNCS(uint64_t, uint64_t); +DEFINE_STATIC_ENDECODE_FUNCS(int64_t, int64_t); +DEFINE_STATIC_ENDECODE_FUNCS(uint32_t, uint32_t); +DEFINE_STATIC_ENDECODE_FUNCS(int32_t, int32_t); +DEFINE_STATIC_ENDECODE_FUNCS(string, char *); + +struct PINT_hint_info +{ + enum PINT_hint_type type; + int flags; + const char *name; + void (*encode)(char **pptr, void *value); + void (*decode)(char **pptr, void *value); + int length; +}; + +static int PINT_hint_check(PVFS_hint *hints, enum PINT_hint_type type); + +static const struct PINT_hint_info hint_types[] = { + + {PINT_HINT_REQUEST_ID, + PINT_HINT_TRANSFER, + PVFS_HINT_REQUEST_ID_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_CLIENT_ID, + PINT_HINT_TRANSFER, + PVFS_HINT_CLIENT_ID_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_HANDLE, + PINT_HINT_TRANSFER, + PVFS_HINT_HANDLE_NAME, + encode_func_uint64_t, + decode_func_uint64_t, + sizeof(PVFS_handle)}, + + {PINT_HINT_OP_ID, + 0, + PVFS_HINT_OP_ID_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_RANK, + PINT_HINT_TRANSFER, + PVFS_HINT_RANK_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_SERVER_ID, + PINT_HINT_TRANSFER, + PVFS_HINT_SERVER_ID_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_DISTRIBUTION, + 0, + PVFS_HINT_DISTRIBUTION_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_DFILE_COUNT, + 0, + PVFS_HINT_DFILE_COUNT_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_LAYOUT, + 0, + PVFS_HINT_LAYOUT_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_SERVERLIST, + 0, + PVFS_HINT_SERVERLIST_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {PINT_HINT_NOCACHE, + 0, + PVFS_HINT_NOCACHE_NAME, + encode_func_uint32_t, + decode_func_uint32_t, + sizeof(uint32_t)}, + + {0} +}; + +static const struct PINT_hint_info *PINT_hint_get_info_by_type(int type) +{ + int j = 0; + while(hint_types[j].type != 0) + { + if(type == hint_types[j].type) + { + return &hint_types[j]; + } + ++j; + } + + return NULL; +} + +static const struct PINT_hint_info * +PINT_hint_get_info_by_name(const char *name) +{ + int j = 0; + while(hint_types[j].type != 0) + { + if(!strcmp(name, hint_types[j].name)) + { + return &hint_types[j]; + } + ++j; + } + return NULL; +} + +int PVFS_hint_add_internal( + PVFS_hint *hint, + enum PINT_hint_type type, + int length, + void *value) +{ + int ret; + const struct PINT_hint_info *info; + PINT_hint *new_hint; + + info = PINT_hint_get_info_by_type(type); + if(info) + { + ret = PINT_hint_check(hint, info->type); + if(ret == -PVFS_EEXIST) + { + return PVFS_hint_replace_internal(hint, type, length, value); + } + } + + new_hint = malloc(sizeof(PINT_hint)); + if (!new_hint) + { + return -PVFS_ENOMEM; + } + + new_hint->length = length; + new_hint->type_string = NULL; + new_hint->value = malloc(new_hint->length); + if(!new_hint->value) + { + free(new_hint); + return -PVFS_ENOMEM; + } + + memcpy(new_hint->value, value, length); + + info = PINT_hint_get_info_by_type(type); + if(info) + { + new_hint->type = info->type; + new_hint->flags = info->flags; + new_hint->encode = info->encode; + new_hint->decode = info->decode; + } + + new_hint->next = *hint; + *hint = new_hint; + + return 0; +} + +int PVFS_hint_replace( + PVFS_hint *hint, + const char *name, + int length, + void *value) +{ + const struct PINT_hint_info *info; + + info = PINT_hint_get_info_by_name(name); + if(info) + { + return PVFS_hint_replace_internal(hint, info->type, length, value); + } + return PVFS_hint_add(hint, name, length, value); +} + +int PVFS_hint_replace_internal( + PVFS_hint *hint, + enum PINT_hint_type type, + int length, + void *value) +{ + PINT_hint *tmp; + const struct PINT_hint_info *info; + + info = PINT_hint_get_info_by_type(type); + if(info) + { + tmp = *hint; + while(tmp) + { + if(tmp->type == info->type) + { + free(tmp->value); + tmp->length = length; + tmp->value = malloc(length); + if(!tmp->value) + { + return -PVFS_ENOMEM; + } + memcpy(tmp->value, value, length); + return 0; + } + + tmp = tmp->next; + } + } + return -PVFS_ENOENT; +} + +int PVFS_hint_add( + PVFS_hint *hint, + const char *name, + int length, + void *value) +{ + int ret; + const struct PINT_hint_info *info; + PINT_hint *new_hint; + + info = PINT_hint_get_info_by_name(name); + if(info) + { + ret = PINT_hint_check(hint, info->type); + if(ret == -PVFS_EEXIST) + { + return ret; + } + } + + new_hint = malloc(sizeof(PINT_hint)); + if (!new_hint) + { + return -PVFS_ENOMEM; + } + + new_hint->length = length; + new_hint->value = malloc(new_hint->length); + if(!new_hint->value) + { + free(new_hint); + return -PVFS_ENOMEM; + } + memcpy(new_hint->value, value, length); + + if(info) + { + new_hint->type_string = NULL; + new_hint->type = info->type; + new_hint->flags = info->flags; + new_hint->encode = info->encode; + new_hint->decode = info->decode; + } + else + { + new_hint->type = PINT_HINT_UNKNOWN; + new_hint->type_string = strdup(name); + + /* always transfer unknown hints */ + new_hint->flags = PINT_HINT_TRANSFER; + new_hint->encode = encode_func_string; + new_hint->decode = decode_func_string; + } + + new_hint->next = *hint; + *hint = new_hint; + + return 0; +} + +int PVFS_hint_check(PVFS_hint *hints, const char *name) +{ + const struct PINT_hint_info *info; + + info = PINT_hint_get_info_by_name(name); + return PINT_hint_check(hints, info->type); +} + +static int PINT_hint_check(PVFS_hint *hints, enum PINT_hint_type type) +{ + PINT_hint *tmp; + + if(!hints) + { + return 0; + } + + tmp = *hints; + while(tmp) + { + if(tmp->type == type) + { + return -PVFS_EEXIST; + } + tmp = tmp->next; + } + return 0; +} + +int PVFS_hint_check_transfer(PVFS_hint *hints) +{ + PINT_hint *tmp; + + if(!hints) + { + return 0; + } + + tmp = *hints; + while(tmp) + { + if (PINT_hint_get_info_by_type(tmp->type)->flags & + PINT_HINT_TRANSFER) + { + return 1; + } + tmp = tmp->next; + } + return 0; +} + +void encode_PINT_hint(char **pptr, const PINT_hint *hint) +{ + int transfer_count = 0; + const PINT_hint *tmp_hint = hint; + + /* count up the transferable hints */ + while(tmp_hint) + { + if(tmp_hint->flags & PINT_HINT_TRANSFER) + { + transfer_count++; + } + + tmp_hint = tmp_hint->next; + } + + /* encode the number of hints to be transferred */ + encode_uint32_t(pptr, &transfer_count); + + tmp_hint = hint; + while(tmp_hint) + { + /* encode the hint type */ + if(tmp_hint->flags & PINT_HINT_TRANSFER) + { + encode_uint32_t(pptr, &tmp_hint->type); + + /* if the type is unknown, encode the type string */ + if(tmp_hint->type == PINT_HINT_UNKNOWN) + { + encode_string(pptr, &tmp_hint->type_string); + tmp_hint->encode(pptr, (void *)&tmp_hint->value); + } + else + { + /* encode the hint using the encode function provided */ + tmp_hint->encode(pptr, tmp_hint->value); + } + } + + tmp_hint = tmp_hint->next; + } +} + +void decode_PINT_hint(char **pptr, PINT_hint **hint) +{ + int count, i, type; + PINT_hint *new_hint = NULL; + const struct PINT_hint_info *info; + + decode_uint32_t(pptr, &count); + + gossip_debug(GOSSIP_SERVER_DEBUG, "decoding %d hints from request\n", + count); + + for(i = 0; i < count; ++i) + { + decode_uint32_t(pptr, &type); + info = PINT_hint_get_info_by_type(type); + if(info) + { + char *start; + int len; + void *value = malloc(info->length); + if(!value) + { + return; + } + + start = *pptr; + info->decode(pptr, value); + len = (*pptr - start); + PVFS_hint_add(&new_hint, info->name, len, value); + free(value); + } + else + { + char *type_string; + char *value; + /* not a recognized hint, assume its a string */ + decode_string(pptr, &type_string); + decode_string(pptr, &value); + PVFS_hint_add(&new_hint, type_string, strlen(value) + 1, value); + } + } + + *hint = new_hint; +} + +int PVFS_hint_copy(PVFS_hint old_hint, PVFS_hint *new_hint) +{ + const struct PINT_hint_info *info; + PINT_hint *h = old_hint; + const char *name; + + if(!old_hint) + { + *new_hint = NULL; + return 0; + } + + while(h) + { + info = PINT_hint_get_info_by_type(h->type); + if(!info) + { + name = h->type_string; + } + else + { + name = info->name; + } + + PVFS_hint_add(new_hint, name, h->length, h->value); + h = h->next; + } + return 0; +} + +void PVFS_hint_free(PVFS_hint hint) +{ + PINT_hint * act = hint; + PINT_hint * old; + + while(act != NULL) + { + old = act; + act = act->next; + + free(old->value); + + if(old->type == PINT_HINT_UNKNOWN) + { + free(old->type_string); + } + free(old); + } +} + +/* + * example environment variable + * PVFS2_HINTS = + *'pvfs.hint.request_id:10+pvfs.hint.client_id:30' + */ +int PVFS_hint_import_env(PVFS_hint *out_hint) +{ + char * env; + char * env_copy; + char * save_ptr = NULL; + char * aktvar; + char name[PVFS_HINT_MAX_NAME_LENGTH]; + int len; + const struct PINT_hint_info *info; + PINT_hint *hint = NULL; + int ret; + + if( out_hint == NULL ) + { + return 1; + } + env = getenv("PVFS2_HINTS"); + if( env == NULL ) + { + return 0; + } + len = strlen(env); + env_copy = (char *) malloc(sizeof(char) * (len+1)); + strncpy(env_copy, env, len+1); + + /* parse hints and do not overwrite already specified hints !*/ +#ifdef WIN32 + aktvar = strtok(env_copy, "+"); /* thread-safe */ +#else + aktvar = strtok_r(env_copy, "+", & save_ptr); +#endif + while( aktvar != NULL ) + { + char * rest; + +#ifdef WIN32 + rest = strchr(aktvar, ':'); +#else + rest = index(aktvar, ':'); +#endif + if (rest == NULL) + { + gossip_err("Environment variable PVFS2_HINTS is " + "malformed starting with: %s\n", + aktvar); + free(env_copy); + return 0; + } + + *rest = 0; + + sprintf(name, "pvfs2.hint.%s", aktvar); + info = PINT_hint_get_info_by_name(name); + if(info) + { + /* a bit of a hack..if we know the type and its + * an int, we convert from a string + */ + if(info->encode == encode_func_uint32_t) + { + uint32_t val; + sscanf(rest+1, "%u", &val); + ret = PVFS_hint_add(&hint, info->name, sizeof(val), &val); + } + else if(info->encode == encode_func_uint64_t) + { + uint32_t val; + sscanf(rest+1, "%u", &val); + ret = PVFS_hint_add(&hint, info->name, sizeof(val), &val); + } + else if(info->encode == encode_func_string) + { + /* just pass the string along as the hint value */ + ret = PVFS_hint_add(&hint, info->name, strlen(rest+1), rest+1); + } + else + { + /* Can't specify a complex hint in the PVFS2_HINTS environment + * variable. + */ + ret = -PVFS_EINVAL; + } + } + else + { + /* Hint not recognized, so we store it with its name */ + ret = PVFS_hint_add(&hint, name, strlen(rest+1), rest+1); + } + + if(ret < 0) + { + /* hint parsing failed */ + PVFS_hint_free(hint); + free(env_copy); + return ret; + } +#ifdef WIN32 + aktvar = strtok(NULL, "+"); +#else + aktvar = strtok_r(NULL, "+", & save_ptr); +#endif + } + + free(env_copy); + return 0; +} + +void *PINT_hint_get_value_by_type( + struct PVFS_hint_s *hint, enum PINT_hint_type type, int *length) +{ + PINT_hint *h; + + h = hint; + + while(h) + { + if(h->type == type) + { + if(length) + { + *length = h->length; + } + return h->value; + } + + h = h->next; + } + return NULL; +} + +void *PINT_hint_get_value_by_name( + struct PVFS_hint_s *hint, const char *name, int *length) +{ + PINT_hint *h; + + h = hint; + + while(h) + { + if(h->type_string != NULL && !strcmp(h->type_string, name)) + { + if(length) + { + *length = h->length; + } + return h->value; + } + + h = h->next; + } + return NULL; +} + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/misc/pint-hint.h b/src/common/misc/pint-hint.h new file mode 100644 index 0000000..4b2ea0f --- /dev/null +++ b/src/common/misc/pint-hint.h @@ -0,0 +1,101 @@ + +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __PINT_HINT_H__ +#define __PINT_HINT_H__ + +#define PVFS_HINT_MAX 24 +#define PVFS_HINT_MAX_LENGTH 1024 +#define PVFS_HINT_MAX_NAME_LENGTH 512 + +#define PINT_HINT_TRANSFER 0x01 + +#include "pvfs2-hint.h" + +enum PINT_hint_type +{ + PINT_HINT_UNKNOWN = 0, + PINT_HINT_REQUEST_ID, + PINT_HINT_CLIENT_ID, + PINT_HINT_HANDLE, + PINT_HINT_OP_ID, + PINT_HINT_RANK, + PINT_HINT_SERVER_ID, + PINT_HINT_DISTRIBUTION, + PINT_HINT_LAYOUT, + PINT_HINT_DFILE_COUNT, + PINT_HINT_SERVERLIST, + PINT_HINT_NOCACHE +}; + +typedef struct PVFS_hint_s +{ + enum PINT_hint_type type; + char *type_string; + char *value; + int32_t length; + + void (*encode)(char **pptr, void *value); + void (*decode)(char **pptr, void *value); + + int flags; + struct PVFS_hint_s *next; + +} PINT_hint; + +void encode_PINT_hint(char **pptr, const PINT_hint *hint); +void decode_PINT_hint(char **pptr, PINT_hint **hint); + +void *PINT_hint_get_value_by_type(struct PVFS_hint_s *hint, enum PINT_hint_type type, + int *length); + +void *PINT_hint_get_value_by_name( + struct PVFS_hint_s *hint, const char *name, int *length); + +int PVFS_hint_add_internal( + PVFS_hint *hint, + enum PINT_hint_type type, + int length, + void *value); + +int PVFS_hint_replace_internal( + PVFS_hint *hint, + enum PINT_hint_type type, + int length, + void *value); + +#define PINT_HINT_GET_REQUEST_ID(hints) \ + PINT_hint_get_value_by_type(hints, PINT_HINT_REQUEST_ID, NULL) ? \ + *(uint32_t *)PINT_hint_get_value_by_type(hints, PINT_HINT_REQUEST_ID, NULL) : 0 + +#define PINT_HINT_GET_CLIENT_ID(hints) \ + PINT_hint_get_value_by_type(hints, PINT_HINT_CLIENT_ID, NULL) ? \ + *(uint32_t *)PINT_hint_get_value_by_type(hints, PINT_HINT_CLIENT_ID, NULL) : 0 + +#define PINT_HINT_GET_HANDLE(hints) \ + PINT_hint_get_value_by_type(hints, PINT_HINT_HANDLE, NULL) ? \ + *(uint64_t *)PINT_hint_get_value_by_type(hints, PINT_HINT_HANDLE, NULL) : 0 + +#define PINT_HINT_GET_OP_ID(hints) \ + PINT_hint_get_value_by_type(hints, PINT_HINT_OP_ID, NULL) ? \ + *(uint32_t *)PINT_hint_get_value_by_type(hints, PINT_HINT_OP_ID, NULL) : 0 + +#define PINT_HINT_GET_RANK(hints) \ + PINT_hint_get_value_by_type(hints, PINT_HINT_RANK, NULL) ? \ + *(uint32_t *)PINT_hint_get_value_by_type(hints, PINT_HINT_RANK, NULL) : 0 + +#endif /* __PINT_HINT_H__ */ + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/misc/pint-mem.c b/src/common/misc/pint-mem.c new file mode 100644 index 0000000..246fdef --- /dev/null +++ b/src/common/misc/pint-mem.c @@ -0,0 +1,85 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#define _XOPEN_SOURCE 600 +#include +#include +#include +#ifdef HAVE_MALLOC_H +#include +#endif + +#ifdef WIN32 +#include "wincommon.h" + +/* do not declare inline on Windows (can't be exported)*/ +#undef inline +#define inline +#endif + +/* prototype definitions */ +inline void* PINT_mem_aligned_alloc(size_t size, size_t alignment); +inline void PINT_mem_aligned_free(void *ptr); + +/* PINT_mem_aligned_alloc() + * + * allocates a memory region of the specified size and returns a + * pointer to the region. The address of the memory will be evenly + * divisible by alignment. + * + * returns pointer to memory on success, NULL on failure + */ +inline void* PINT_mem_aligned_alloc(size_t size, size_t alignment) +{ + int ret; + void *ptr; + +#ifdef WIN32 + ret = 0; + ptr = _aligned_malloc(size, alignment); + if (ptr == NULL) + { + ret = ENOMEM; + } +#else + ret = posix_memalign(&ptr, alignment, size); +#endif + if(ret != 0) + { + errno = ret; + return NULL; + } + memset(ptr, 0, size); + return ptr; +} + +/* PINT_mem_aligned_free() + * + * frees memory region previously allocated with + * PINT_mem_aligned_alloc() + * + * no return value + */ +inline void PINT_mem_aligned_free(void *ptr) +{ +#ifdef WIN32 + _aligned_free(ptr); +#else + free(ptr); +#endif + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ + + diff --git a/src/common/misc/pint-mem.h b/src/common/misc/pint-mem.h index 62de3f0..281392f 100644 --- a/src/common/misc/pint-mem.h +++ b/src/common/misc/pint-mem.h @@ -7,75 +7,9 @@ #ifndef __PINT_MEM_H #define __PINT_MEM_H -/* struct that describes the underlying memory region that we pick our - * aligned region from - */ -struct PINT_mem_desc -{ - void* addr; - int size; -}; - -/* PINT_mem_aligned_alloc() - * - * allocates a memory region of the specified size and returns a - * pointer to the region. The address of the memory will be evenly - * divisible by alignment. - * - * returns pointer to memory on success, NULL on failure - */ -static inline void* PINT_mem_aligned_alloc(int size, unsigned long alignment) -{ - char* true_ptr = NULL; - char* returned_ptr = NULL; - struct PINT_mem_desc* desc = NULL; - int full_size = size + alignment + sizeof(struct PINT_mem_desc); - unsigned long alignment_mask = (~(alignment-1)); - - true_ptr = (char*)malloc(full_size); - if(!true_ptr) - { - return(NULL); - } - - /* we need to find the first aligned address within the malloc'd - * area that leaves room for a descriptor structure - */ - returned_ptr = - (char*)(((unsigned long)(true_ptr) - + alignment + sizeof(struct PINT_mem_desc) - 1)&alignment_mask); - - /* index backwards and fill in a descriptor that tells us what - * region really needs to be free'd when the time comes - */ - desc = (struct PINT_mem_desc*)(returned_ptr - sizeof(struct - PINT_mem_desc)); - desc->addr = true_ptr; - desc->size = full_size; - - return(returned_ptr); -} - -/* PINT_mem_aligned_free() - * - * frees memory region previously allocated with - * PINT_mem_aligned_alloc() - * - * no return value - */ -static inline void PINT_mem_aligned_free(void* ptr) -{ - struct PINT_mem_desc* desc = NULL; - - /* backup a little off of the pointer and find the descriptor of the - * underlying memory region - */ - desc = (struct PINT_mem_desc*)((char*)ptr - sizeof(struct PINT_mem_desc)); - - free(desc->addr); +extern void* PINT_mem_aligned_alloc(size_t size, size_t alignment); +extern void PINT_mem_aligned_free(void *ptr); - return; -} #endif /* __PINT_MEM_H */ /* diff --git a/src/common/misc/pint-perf-counter.c b/src/common/misc/pint-perf-counter.c index b0f3882..2cc4145 100644 --- a/src/common/misc/pint-perf-counter.c +++ b/src/common/misc/pint-perf-counter.c @@ -4,9 +4,17 @@ * See COPYING in top-level directory. */ +/** + * a perf counter (pc) has a linked list of samples (pc->sample) that + * in turn has a start time, and interval, and a pointer to an array of + * counters. + */ + #include #include +#ifndef WIN32 #include +#endif #include #include #include @@ -15,35 +23,78 @@ #include "pvfs2-util.h" #include "pvfs2-internal.h" #include "pint-perf-counter.h" +#include "pint-util.h" #include "gossip.h" #define PINT_PERF_REALLOC_ARRAY(__pc, __tmp_ptr, __src_ptr, __new_history, __type) \ { \ - __tmp_ptr = (__type*)malloc(__new_history*sizeof(__type)); \ + __tmp_ptr = (__type *)malloc(__new_history * sizeof(__type)); \ if(!__tmp_ptr) \ return(-PVFS_ENOMEM); \ - memset(__tmp_ptr, 0, (__new_history*sizeof(__type))); \ + memset(__tmp_ptr, 0, (__new_history * sizeof(__type))); \ memcpy(__tmp_ptr, __src_ptr, \ - (__pc->history_size*sizeof(__type))); \ + (__pc->history_size * sizeof(__type))); \ free(__src_ptr); \ __src_ptr = __tmp_ptr; \ } +/** + * track performance counters for the server + * keys must be defined here in order based on the + * enumeration in include/pvfs2-mgmt.h + */ +struct PINT_perf_key server_keys[] = +{ + {"bytes read", PINT_PERF_READ, PINT_PERF_PRESERVE}, + {"bytes written", PINT_PERF_WRITE, PINT_PERF_PRESERVE}, + {"metadata reads", PINT_PERF_METADATA_READ, PINT_PERF_PRESERVE}, + {"metadata writes", PINT_PERF_METADATA_WRITE, PINT_PERF_PRESERVE}, + {"metadata dspace ops", PINT_PERF_METADATA_DSPACE_OPS, PINT_PERF_PRESERVE}, + {"metadata keyval ops", PINT_PERF_METADATA_KEYVAL_OPS, PINT_PERF_PRESERVE}, + {"request scheduler", PINT_PERF_REQSCHED, PINT_PERF_PRESERVE}, + {"requests received ", PINT_PERF_REQUESTS, PINT_PERF_PRESERVE}, + {"bytes read by small_io", PINT_PERF_SMALL_READ, PINT_PERF_PRESERVE}, + {"bytes written by small_io", PINT_PERF_SMALL_WRITE, PINT_PERF_PRESERVE}, + {"bytes read by flow", PINT_PERF_FLOW_READ, PINT_PERF_PRESERVE}, + {"bytes written by flow", PINT_PERF_FLOW_WRITE, PINT_PERF_PRESERVE}, + {NULL, 0, 0}, +}; + +/** + * this utility removes all of the samples from a perf counter + * this is mostly for cleanup in case of a memory alloc error + */ +void PINT_free_pc (struct PINT_perf_counter *pc) +{ + struct PINT_perf_sample *tmp, *tmp2; + if (!pc) + return; + tmp = pc->sample; + while(tmp) + { + if (tmp->value) + free (tmp->value); + tmp2 = tmp; + tmp = tmp->next; + free (tmp2); + } + free(pc); +} + /** * creates a new perf counter instance * \note key_array must not be freed by caller until after * PINT_perf_finalize() * \returns pointer to perf counter on success, NULL on failure */ -struct PINT_perf_counter* PINT_perf_initialize( - struct PINT_perf_key* key_array) /**< NULL terminated array of keys */ +struct PINT_perf_counter *PINT_perf_initialize(struct PINT_perf_key *key_array) { - struct PINT_perf_counter* pc = NULL; - struct PINT_perf_key* key = NULL; + struct PINT_perf_counter *pc = NULL; + struct PINT_perf_key *key = NULL; int i; - struct timeval tv; + struct PINT_perf_sample *tmp; - pc = (struct PINT_perf_counter*)malloc(sizeof(struct PINT_perf_counter)); + pc = (struct PINT_perf_counter *)malloc(sizeof(struct PINT_perf_counter)); if(!pc) { return(NULL); @@ -52,7 +103,7 @@ struct PINT_perf_counter* PINT_perf_initialize( gen_mutex_init(&pc->mutex); pc->key_array = key_array; - key = &key_array[pc->key_count]; + key = &key_array[pc->key_count]; /* key count is zero */ while(key->key_name) { /* keys must be in order from zero */ @@ -75,66 +126,55 @@ struct PINT_perf_counter* PINT_perf_initialize( return(NULL); } + /* running will be used to decide if we should start an update process */ pc->history_size = PERF_DEFAULT_HISTORY_SIZE; + pc->running = (pc->history_size > 1); + pc->interval = PERF_DEFAULT_UPDATE_INTERVAL; - /* allocate time arrays */ - pc->start_time_array_ms = - (uint64_t*)malloc(PERF_DEFAULT_HISTORY_SIZE*sizeof(uint64_t)); - if(!pc->start_time_array_ms) + /* create a simple linked list of samples, each with a value array */ + tmp = (struct PINT_perf_sample *)malloc(sizeof (struct PINT_perf_sample)); + if(!tmp) { gen_mutex_destroy(&pc->mutex); free(pc); return(NULL); } - pc->interval_array_ms = - (uint64_t*)malloc(PERF_DEFAULT_HISTORY_SIZE*sizeof(uint64_t)); - if(!pc->interval_array_ms) + memset(tmp, 0, sizeof(struct PINT_perf_sample)); + tmp->next = NULL; + tmp->value = (int64_t *)malloc(pc->key_count * sizeof(int64_t)); + if(!tmp->value) { - free(pc->start_time_array_ms); gen_mutex_destroy(&pc->mutex); - free(pc); + PINT_free_pc(pc); return(NULL); } - memset(pc->start_time_array_ms, 0, - PERF_DEFAULT_HISTORY_SIZE*sizeof(uint64_t)); - memset(pc->interval_array_ms, 0, - PERF_DEFAULT_HISTORY_SIZE*sizeof(uint64_t)); - - /* allocate value matrix */ - pc->value_matrix = (int64_t**)malloc(pc->key_count*sizeof(int64_t*)); - if(!pc->value_matrix) + memset(tmp->value, 0, pc->key_count * sizeof(int64_t)); + pc->sample = tmp; + for (i = pc->history_size - 1; i > 0 && tmp; i--) { - free(pc->start_time_array_ms); - free(pc->interval_array_ms); - gen_mutex_destroy(&pc->mutex); - free(pc); - return(NULL); - } - - for(i=0; ikey_count; i++) - { - pc->value_matrix[i] = - (int64_t*)malloc(pc->history_size*sizeof(int64_t)); - if(!pc->value_matrix[i]) + tmp->next = (struct PINT_perf_sample *) + malloc(sizeof (struct PINT_perf_sample)); + if(!tmp->next) { - for(i=i-1; i>= 0; i--) - { - free(pc->value_matrix[i]); - } - free(pc->value_matrix); - free(pc->start_time_array_ms); - free(pc->interval_array_ms); gen_mutex_destroy(&pc->mutex); - free(pc); + PINT_free_pc(pc); return(NULL); } - memset(pc->value_matrix[i], 0, pc->history_size*sizeof(int64_t)); + memset(tmp->next, 0, sizeof(struct PINT_perf_sample)); + tmp->next->next = NULL; + tmp->next->value = (int64_t *)malloc(pc->key_count * sizeof(int64_t)); + if(!tmp->value) + { + gen_mutex_destroy(&pc->mutex); + PINT_free_pc(pc); + return(NULL); + } + memset(tmp->next->value, 0, pc->key_count * sizeof(int64_t)); + tmp = tmp->next; } /* set initial timestamp */ - gettimeofday(&tv, NULL); - pc->start_time_array_ms[0] = ((uint64_t)tv.tv_sec)*1000 + - tv.tv_usec/1000; + pc->sample->start_time_ms = PINT_util_get_time_ms(); return(pc); } @@ -147,28 +187,28 @@ void PINT_perf_reset( struct PINT_perf_counter* pc) { int i; - struct timeval tv; + struct PINT_perf_sample *s; gen_mutex_lock(&pc->mutex); - /* zero out all fields */ - memset(pc->start_time_array_ms, 0, - PERF_DEFAULT_HISTORY_SIZE*sizeof(uint64_t)); - memset(pc->interval_array_ms, 0, - PERF_DEFAULT_HISTORY_SIZE*sizeof(uint64_t)); - for(i=0; ikey_count; i++) + if (!pc || !pc->sample || !pc->sample->value) + return; + for(s = pc->sample; s; s = s->next) { - if(!(pc->key_array[i].flag & PINT_PERF_PRESERVE)) + /* zero out all fields */ + memset(&s->start_time_ms, 0, sizeof(uint64_t)); + memset(&s->interval_ms, 0, sizeof(uint64_t)); + for(i = 0; i < pc->key_count; i++) { - memset(pc->value_matrix[i], 0, pc->history_size*sizeof(int64_t)); + if(!(pc->key_array[i].flag & PINT_PERF_PRESERVE)) + { + memset(&s->value[i], 0, sizeof(int64_t)); + } } } /* set initial timestamp */ - gettimeofday(&tv, NULL); - pc->start_time_array_ms[0] = ((uint64_t)tv.tv_sec)*1000 + - tv.tv_usec/1000; - + s->start_time_ms = PINT_util_get_time_ms(); gen_mutex_unlock(&pc->mutex); return; @@ -178,21 +218,9 @@ void PINT_perf_reset( * destroys a perf counter instance */ void PINT_perf_finalize( - struct PINT_perf_counter* pc) /**< pointer to counter instance */ + struct PINT_perf_counter *pc) /**< pointer to counter instance */ { - int i; - - for(i=0; ikey_count; i++) - { - free(pc->value_matrix[i]); - } - free(pc->value_matrix); - free(pc->start_time_array_ms); - free(pc->interval_array_ms); - gen_mutex_destroy(&pc->mutex); - free(pc); - pc = NULL; - + PINT_free_pc(pc); return; } @@ -200,13 +228,16 @@ void PINT_perf_finalize( * performs an operation on the given key within a performance counter * \see PINT_perf_count macro */ -void __PINT_perf_count( - struct PINT_perf_counter* pc, - int key, - int64_t value, - enum PINT_perf_ops op) +void __PINT_perf_count( struct PINT_perf_counter* pc, + int key, + int64_t value, + enum PINT_perf_ops op) { - if(!pc) +#if 0 + int64_t tmp; /* this is for debugging purposes */ +#endif + + if(!pc || !pc->sample || !pc->sample->value) { /* do nothing if perf counter is not initialized */ return; @@ -214,84 +245,80 @@ void __PINT_perf_count( gen_mutex_lock(&pc->mutex); +#if 0 + tmp = pc->sample->value[key]; +#endif + if(key >= pc->key_count) { gossip_err("Error: PINT_perf_count(): invalid key.\n"); - return; + goto errorout; } switch(op) { case PINT_PERF_ADD: - pc->value_matrix[key][0] = pc->value_matrix[key][0] + value; + pc->sample->value[key] += value; break; case PINT_PERF_SUB: - pc->value_matrix[key][0] = pc->value_matrix[key][0] - value; + pc->sample->value[key] -= value; break; case PINT_PERF_SET: - pc->value_matrix[key][0] = value; + pc->sample->value[key] = value; break; } +#if 0 +/* debug code shows counters being manipulated */ +gossip_err("COUNT %d %lld was %lld is now %lld\n", key, value, + tmp, pc->sample->value[key]); +#endif + +errorout: gen_mutex_unlock(&pc->mutex); return; } -#ifdef __PVFS2_DISABLE_PERF_COUNTERS__ - #define PINT_perf_count(w,x,y,z) do{}while(0) -#else - #define PINT_perf_count __PINT_perf_count -#endif - /** * rolls over the current history window */ -void PINT_perf_rollover( - struct PINT_perf_counter* pc) +void PINT_perf_rollover( struct PINT_perf_counter* pc) { int i; - struct timeval tv; uint64_t int_time; + struct PINT_perf_sample *head, *tail; - if(!pc) + if(!pc || !pc->sample || !pc->sample->value) { /* do nothing if perf counter is not initialized */ return; } - gettimeofday(&tv, NULL); - int_time = ((uint64_t)tv.tv_sec)*1000 + tv.tv_usec/1000; + int_time = PINT_util_get_time_ms(); gen_mutex_lock(&pc->mutex); - /* rotate all values back one */ - if(pc->history_size > 1) + /* rotate newest sample to the back */ + head = pc->sample; + for(tail = head; tail && tail->next; tail = tail->next); + if(head != tail) { - for(i=0; ikey_count; i++) - { - memmove(&pc->value_matrix[i][1], &pc->value_matrix[i][0], - ((pc->history_size-1)*sizeof(int64_t))); - } - memmove(&pc->interval_array_ms[1], &pc->interval_array_ms[0], - ((pc->history_size-1)*sizeof(uint64_t))); - memmove(&pc->start_time_array_ms[1], &pc->start_time_array_ms[0], - ((pc->history_size-1)*sizeof(uint64_t))); - if(int_time > pc->start_time_array_ms[1]) - { - pc->interval_array_ms[1] = int_time - pc->start_time_array_ms[1]; - } + /* move head to the tail */ + pc->sample = head->next; + tail->next = head; + head->next = NULL; + memcpy(pc->sample, tail->next, sizeof(struct PINT_perf_sample)); } /* reset times for next interval */ - pc->start_time_array_ms[0] = int_time; - pc->interval_array_ms[0] = 0; + pc->sample->start_time_ms = int_time; + pc->sample->interval_ms = 0; - for(i=0; ikey_count; i++) + for(i = 0; i < pc->key_count; i++) { - /* reset next interval's value, unless preserve flag set */ if(!(pc->key_array[i].flag & PINT_PERF_PRESERVE)) { - pc->value_matrix[i][0] = 0; + memset(&pc->sample->value[i], 0, sizeof(int64_t)); } } @@ -304,59 +331,92 @@ void PINT_perf_rollover( * sets runtime tunable performance counter options * \returns 0 on success, -PVFS_error on failure */ -int PINT_perf_set_info( - struct PINT_perf_counter* pc, - enum PINT_perf_option option, - unsigned int arg) +int PINT_perf_set_info( struct PINT_perf_counter* pc, + enum PINT_perf_option option, + unsigned int arg) { - uint64_t* tmp_unsigned; - int64_t* tmp_signed; - int i; - - if(!pc) + if(!pc || !pc->sample || !pc->sample->value) { /* do nothing if perf counter is not initialized */ return 0; } + if (arg < 1) + { + /* bad argument */ + return(-PVFS_EINVAL); + } + gen_mutex_lock(&pc->mutex); switch(option) { - case PINT_PERF_HISTORY_SIZE: - if(arg <= pc->history_size) + case PINT_PERF_HISTORY_SIZE: + if(arg <= pc->history_size) + { + while(arg < pc->history_size) { - pc->history_size = arg; + struct PINT_perf_sample *s; + /* remove one sample from list */ + s = pc->sample->next; + if (s) + { + /* removing just behind first sample */ + pc->sample->next = s->next; + s->next = NULL; + free(s->value); + free(s); + pc->history_size--; + } + else + { + /* something is wrong */ + gen_mutex_unlock(&pc->mutex); + return(-PVFS_EINVAL); + } } - else + /* if history_size is now 1 stop the rollover SM */ + pc->running = (pc->history_size > 1); + } + else + { + while(arg > pc->history_size) { - /* we have to reallocate everything */ - /* NOTE: these macros will return error if needed, and - * counter instance will still be operational - */ - PINT_PERF_REALLOC_ARRAY(pc, - tmp_unsigned, - pc->start_time_array_ms, - arg, - uint64_t); - PINT_PERF_REALLOC_ARRAY(pc, - tmp_unsigned, - pc->interval_array_ms, - arg, - uint64_t); - for(i=0; ikey_count; i++) + struct PINT_perf_sample *s; + /* add one sample to list */ + s = (struct PINT_perf_sample *) + malloc(sizeof(struct PINT_perf_sample)); + if(!s) + { + gen_mutex_unlock(&pc->mutex); + return(-PVFS_ENOMEM); + } + memset(s, 0, sizeof(sizeof(struct PINT_perf_sample))); + s->value = (int64_t *) + malloc(pc->key_count * sizeof(int64_t)); + if(!s->value); { - PINT_PERF_REALLOC_ARRAY(pc, - tmp_signed, - pc->value_matrix[i], - arg, - int64_t); + free(s); + gen_mutex_unlock(&pc->mutex); + return(-PVFS_ENOMEM); } - pc->history_size = arg; + memset(s->value, 0, + sizeof(pc->key_count * sizeof(int64_t))); + /* adding just after first sample */ + s->next = pc->sample->next; + pc->sample->next = s; + pc->history_size++; } - break; - default: - gen_mutex_unlock(&pc->mutex); - return(-PVFS_EINVAL); + /* if not running start rollover SM */ + pc->running = (pc->history_size > 1); + } + break; + case PINT_PERF_UPDATE_INTERVAL: + if (arg > 0) + pc->interval = arg; + break; + default: + gen_mutex_unlock(&pc->mutex); + return(-PVFS_EINVAL); } gen_mutex_unlock(&pc->mutex); @@ -367,10 +427,9 @@ int PINT_perf_set_info( * retrieves runtime tunable performance counter options * \returns 0 on success, -PVFS_error on failure */ -int PINT_perf_get_info( - struct PINT_perf_counter* pc, - enum PINT_perf_option option, - unsigned int* arg) +int PINT_perf_get_info( struct PINT_perf_counter* pc, + enum PINT_perf_option option, + unsigned int* arg) { if(!pc) { @@ -381,15 +440,18 @@ int PINT_perf_get_info( gen_mutex_lock(&pc->mutex); switch(option) { - case PINT_PERF_HISTORY_SIZE: - *arg = pc->history_size; - break; - case PINT_PERF_KEY_COUNT: - *arg = pc->key_count; - break; - default: - gen_mutex_unlock(&pc->mutex); - return(-PVFS_EINVAL); + case PINT_PERF_HISTORY_SIZE: + *arg = pc->history_size; + break; + case PINT_PERF_KEY_COUNT: + *arg = pc->key_count; + break; + case PINT_PERF_UPDATE_INTERVAL: + *arg = pc->interval; + break; + default: + gen_mutex_unlock(&pc->mutex); + return(-PVFS_EINVAL); } gen_mutex_unlock(&pc->mutex); @@ -398,22 +460,34 @@ int PINT_perf_get_info( /** * retrieves measurement history + * + * This copies the data from the samples (stored in a linked list) into + * a temporary array where they can be inspected without worry of update + * this array will store up to max_key counters PLUS two time values, the + * start time and intervale, both as ms counts. The samples might have + * more or less keys in them, and the system might have more or less + * samples than space in the array. + * + * the array is really a 2D Matrix (keys+times vs history) but is treated + * as a 1D array because the sizes aren't well known until runtime, and + * even then can change. This results in some 2D indexing (i*max_key+2) + * in the loop where + * data is copied from the samples to the value_array. Also the location + * of the time stampls is generally index max_key, and max_key+1 */ void PINT_perf_retrieve( - struct PINT_perf_counter* pc, /**< performance counter */ - int64_t** value_matrix, /**< 2d matrix to fill in with measurements */ - uint64_t* start_time_array_ms, /**< array of start times */ - uint64_t* interval_array_ms, /**< array of interval lengths */ - int max_key, /**< max key value (1st dimension) */ - int max_history) /**< max history (2nd dimension) */ + struct PINT_perf_counter* pc, /* performance counter */ + int64_t *value_array, /* array of output measurements */ + int max_key, /* max key value (1st dimension) */ + int max_history) /* max history (2nd dimension) */ { int i; int tmp_max_key; int tmp_max_history; - struct timeval tv; uint64_t int_time; + struct PINT_perf_sample *s; - if(!pc) + if(!pc || !pc->sample || !pc->sample->value) { /* do nothing if perf counter is not initialized */ return; @@ -432,68 +506,80 @@ void PINT_perf_retrieve( if(max_key > pc->key_count || max_history > pc->history_size) { - /* zero out value matrix, we won't use all fields */ - for(i=0; i pc->history_size) - { - /* zero out time arrays, we won't use all fields */ - memset(start_time_array_ms, 0, (max_history*sizeof(uint64_t))); - memset(interval_array_ms, 0, (max_history*sizeof(uint64_t))); + memset(value_array, 0, + (max_history * (max_key + 2) * sizeof(int64_t))); } /* copy data out */ - for(i=0; isample; i < tmp_max_history && s; i++, s = s->next) { - memcpy(value_matrix[i], pc->value_matrix[i], - (tmp_max_history*sizeof(int64_t))); + /* copy counters */ + memcpy(&value_array[i * (max_key + 2)], s->value, + (tmp_max_key * sizeof(int64_t))); + /* copy time codes */ + value_array[(i * (max_key + 2)) + max_key] = s->start_time_ms; + value_array[(i * (max_key + 2)) + max_key + 1] = s->interval_ms; } - memcpy(start_time_array_ms, pc->start_time_array_ms, - (tmp_max_history*sizeof(uint64_t))); - memcpy(interval_array_ms, pc->interval_array_ms, - (tmp_max_history*sizeof(uint64_t))); + +#if 0 +/* debug code prints first sample to log */ +{int k; for(k=0;ksample->value[k]);} +#endif gen_mutex_unlock(&pc->mutex); /* fill in interval length for newest interval */ - gettimeofday(&tv, NULL); - int_time = ((uint64_t)tv.tv_sec) * 1000 + tv.tv_usec / 1000; - if(int_time > start_time_array_ms[0]) + int_time = PINT_util_get_time_ms(); + if(int_time > value_array[max_key]) { - interval_array_ms[0] = int_time - start_time_array_ms[0]; + value_array[max_key + 1] = int_time - value_array[max_key]; } + + /* auto-rollover when data is retrieved */ + /* this may obviate last step above */ + PINT_perf_rollover(pc); return; } -char* PINT_perf_generate_text( - struct PINT_perf_counter* pc, - int max_size) +char *PINT_perf_generate_text( struct PINT_perf_counter* pc, + int max_size) { int total_size = 0; int line_size = 0; int actual_size = 0; - char* tmp_str; - char* position; + char *tmp_str; + char *position; int i, j; uint64_t int_time; - struct timeval tv; time_t tmp_time_t; struct tm tmp_tm; +#ifdef WIN32 + struct tm *ptmp_tm; +#endif int ret; + struct PINT_perf_sample *s = NULL; + + if (!pc || !pc->sample || !pc->sample->value) + { + return NULL; + } gen_mutex_lock(&pc->mutex); - line_size = 26 + (24*pc->history_size); - total_size = (pc->key_count+2)*line_size + 1; + line_size = 26 + (24 * pc->history_size); + total_size = (pc->key_count + 2) * line_size + 1; actual_size = PVFS_util_min(total_size, max_size); - if((actual_size/line_size) < 3) + if((actual_size / line_size) < 3) { /* don't bother trying to display anything, can't fit any results in * that size @@ -501,7 +587,7 @@ char* PINT_perf_generate_text( return(NULL); } - tmp_str = (char*)malloc(actual_size*sizeof(char)); + tmp_str = (char *)malloc(actual_size * sizeof(char)); if(!tmp_str) { gen_mutex_unlock(&pc->mutex); @@ -512,16 +598,21 @@ char* PINT_perf_generate_text( /* start times */ sprintf(position, "%-24.24s: ", "Start times (hr:min:sec)"); position += 25; - for(i=0; ihistory_size; i++) + for(i = 0, s = pc->sample; i < pc->history_size && s; i++, s = s->next) { - if(pc->start_time_array_ms[i]) + PVFS_time start_i = (PVFS_time)s->start_time_ms; + if(start_i) { - tmp_time_t = pc->start_time_array_ms[i]/1000; + tmp_time_t = start_i / 1000; +#ifdef WIN32 + ptmp_tm = localtime(&tmp_time_t); + tmp_tm = *ptmp_tm; +#else localtime_r(&tmp_time_t, &tmp_tm); +#endif strftime(position, 11, " %H:%M:%S", &tmp_tm); position += 10; - sprintf(position, ".%03u", - (unsigned)(pc->start_time_array_ms[i]%1000)); + sprintf(position, ".%03u", (unsigned)(start_i % 1000)); position += 4; } else @@ -534,26 +625,29 @@ char* PINT_perf_generate_text( position++; /* fill in interval length for newest interval */ - gettimeofday(&tv, NULL); - int_time = ((uint64_t)tv.tv_sec) * 1000 + tv.tv_usec / 1000; - if(int_time > pc->start_time_array_ms[0]) + int_time = PINT_util_get_time_ms(); + if(int_time > pc->sample->interval_ms) { - pc->interval_array_ms[0] = int_time - pc->start_time_array_ms[0]; + pc->sample->interval_ms = int_time - pc->sample->start_time_ms; } /* intervals */ sprintf(position, "%-24.24s:", "Intervals (hr:min:sec)"); position += 25; - for(i=0; ihistory_size; i++) + for(i = 0, s = pc->sample; i < pc->history_size && s; i++, s = s->next) { - if(pc->interval_array_ms[i]) + PVFS_time interval_i = s->interval_ms; + if(interval_i) { - tmp_time_t = pc->interval_array_ms[i]/1000; + tmp_time_t = interval_i / 1000; +#ifdef WIN32 + gmtime_s(&tmp_tm, &tmp_time_t); +#else gmtime_r(&tmp_time_t, &tmp_tm); +#endif strftime(position, 11, " %H:%M:%S", &tmp_tm); position += 10; - sprintf(position, ".%03u", - (unsigned)(pc->interval_array_ms[i]%1000)); + sprintf(position, ".%03u", (unsigned)(interval_i % 1000)); position += 4; } else @@ -568,7 +662,7 @@ char* PINT_perf_generate_text( sprintf(position, "-------------------------"); position += 25; - for(i=0; ihistory_size; i++) + for(i = 0; i < pc->history_size; i++) { sprintf(position, "--------------"); position += 14; @@ -577,13 +671,17 @@ char* PINT_perf_generate_text( position++; /* values */ - for(i=0; ikey_count; i++) + for(i = 0; i < pc->key_count; i++) { sprintf(position, "%-24.24s:", pc->key_array[i].key_name); position += 25; - for(j=0; jhistory_size; j++) + for(j = 0, s = pc->sample; j < pc->history_size && s; j++, s = s->next) { - ret = snprintf(position, 15, " %13Ld", lld(pc->value_matrix[i][j])); +#ifdef WIN32 + ret = _snprintf(position, 15, " %13Ld", lld(s->value[i])); +#else + ret = snprintf(position, 15, " %13Ld", lld(s->value[i])); +#endif if(ret >= 15) { sprintf(position, "%14.14s", "Overflow!"); diff --git a/src/common/misc/pint-perf-counter.h b/src/common/misc/pint-perf-counter.h index 530b4fa..0a101ea 100644 --- a/src/common/misc/pint-perf-counter.h +++ b/src/common/misc/pint-perf-counter.h @@ -12,29 +12,16 @@ #include "gen-locks.h" enum { -PERF_DEFAULT_TIME_INTERVAL_SECS = 300, -PERF_DEFAULT_HISTORY_SIZE = 6, + PERF_DEFAULT_UPDATE_INTERVAL = 1000, /* msecs */ + PERF_DEFAULT_HISTORY_SIZE = 1, }; -/** flag that indicates that values for a particular key should be preserved +/** flag that indicates that values for a + * particular key should be preserved * across rollover rather than reset to 0 */ #define PINT_PERF_PRESERVE 1 -/* TODO: this may be moved in the long term; it is an enumeration of keys - * that pvfs2-server supports (used by trove and flow counters) - */ -enum PINT_server_perf_keys -{ - PINT_PERF_READ = 0, - PINT_PERF_WRITE = 1, - PINT_PERF_METADATA_READ = 2, - PINT_PERF_METADATA_WRITE = 3, - PINT_PERF_METADATA_DSPACE_OPS = 4, - PINT_PERF_METADATA_KEYVAL_OPS = 5, - PINT_PERF_REQSCHED = 6 -}; - /** enumeration of valid measurement operations */ enum PINT_perf_ops { @@ -46,8 +33,9 @@ enum PINT_perf_ops /** enumeration of runtime options */ enum PINT_perf_option { - PINT_PERF_HISTORY_SIZE = 1, /**< sets/gets the history size */ - PINT_PERF_KEY_COUNT = 2 /**< gets the key coung (cannot be set) */ + PINT_PERF_HISTORY_SIZE = 1, /**< sets/gets the history size */ + PINT_PERF_KEY_COUNT = 2, /**< gets the key count (cannot be set) */ + PINT_PERF_UPDATE_INTERVAL = 3 /**< sets/gets the update interval */ }; /** describes a single key to be stored in the perf counter interface */ @@ -58,36 +46,46 @@ struct PINT_perf_key int flag; /**< flags that modify behavior of values in this key */ }; -/** struct representing a perf counter instance */ +/** struct holding one sample for a multi-sample set of counters */ +struct PINT_perf_sample +{ + PVFS_time start_time_ms; /**< time this sample was started */ + PVFS_time interval_ms; /**< ms this sample lasted */ + int64_t *value; /**< this points to an array of counters */ + struct PINT_perf_sample *next; /**< link to next sample */ +}; + +/** struct representing a multi-sample set of perf counters */ struct PINT_perf_counter { gen_mutex_t mutex; struct PINT_perf_key* key_array; /**< keys (provided by initialize()) */ int key_count; /**< number of keys */ int history_size; /**< number of history intervals */ - /** matrix of statistics, first dimension is key, second is history */ - int64_t** value_matrix; - uint64_t* start_time_array_ms; /**< array of start times */ - uint64_t* interval_array_ms; /**< array of interval lengths */ + int running; /**< true if a rollover running */ + int interval; /**< milliseconds between rollovers */ + struct PINT_perf_sample *sample; /**< list of samples for this counter */ }; + /** server-wide perf counter structure */ +extern struct PINT_perf_key server_keys[]; + extern struct PINT_perf_counter *PINT_server_pc; -struct PINT_perf_counter* PINT_perf_initialize( - struct PINT_perf_key* key_array); +struct PINT_perf_counter* PINT_perf_initialize(struct PINT_perf_key *key); void PINT_perf_finalize( - struct PINT_perf_counter* pc); + struct PINT_perf_counter* pc); void PINT_perf_reset( - struct PINT_perf_counter* pc); + struct PINT_perf_counter* pc); void __PINT_perf_count( - struct PINT_perf_counter* pc, - int key, - int64_t value, - enum PINT_perf_ops op); + struct PINT_perf_counter* pc, + int key, + int64_t value, + enum PINT_perf_ops op); #ifdef __PVFS2_DISABLE_PERF_COUNTERS__ #define PINT_perf_count(w,x,y,z) do{}while(0) @@ -96,29 +94,29 @@ void __PINT_perf_count( #endif void PINT_perf_rollover( - struct PINT_perf_counter* pc); + struct PINT_perf_counter* pc); int PINT_perf_set_info( - struct PINT_perf_counter* pc, - enum PINT_perf_option option, - unsigned int arg); + struct PINT_perf_counter* pc, + enum PINT_perf_option option, + unsigned int arg); int PINT_perf_get_info( - struct PINT_perf_counter* pc, - enum PINT_perf_option option, - unsigned int* arg); + struct PINT_perf_counter* pc, + enum PINT_perf_option option, + unsigned int* arg); void PINT_perf_retrieve( - struct PINT_perf_counter* pc, - int64_t** value_matrix, - uint64_t* start_time_array_ms, - uint64_t* interval_array_ms, - int max_key, - int max_history); + struct PINT_perf_counter* pc, + int64_t* value_array, + int max_key, + int max_history); char* PINT_perf_generate_text( - struct PINT_perf_counter* pc, - int max_size); + struct PINT_perf_counter* pc, + int max_size); + +void PINT_free_pc (struct PINT_perf_counter *pc); #endif /* __PINT_PERF_COUNTER_H */ diff --git a/src/common/misc/pint-util.c b/src/common/misc/pint-util.c index 2f9b5f4..8e37c78 100644 --- a/src/common/misc/pint-util.c +++ b/src/common/misc/pint-util.c @@ -12,20 +12,70 @@ /* This file includes definitions of common internal utility functions */ #include #include + +#ifdef WIN32 +#include +#include "wincommon.h" + +/* uid and gid types */ +typedef unsigned int uid_t, gid_t; + +#else #include #include +#include +#include +#include +#include +#endif + +#define __PINT_REQPROTO_ENCODE_FUNCS_C #include "gen-locks.h" #include "pint-util.h" +#include "bmi.h" +#include "gossip.h" +#include "pvfs2-req-proto.h" + +#include "pvfs2-debug.h" +#include "bmi-byteswap.h" + +#ifdef HAVE_GETPWUID +static gen_mutex_t check_group_mutex = GEN_MUTEX_INITIALIZER; +static int pw_buf_size = 1024; // 1 KB +static int gr_buf_size = 1024*1024; // 1 MB +static char* check_group_pw_buffer = NULL; +static char* check_group_gr_buffer = NULL; +#endif +static int PINT_check_group(uid_t uid, gid_t gid); void PINT_time_mark(PINT_time_marker *out_marker) { +#ifdef WIN32 + FILETIME creation, exit, system, user; + ULARGE_INTEGER li_system, li_user; +#else struct rusage usage; +#endif gettimeofday(&out_marker->wtime, NULL); +#ifdef WIN32 + GetProcessTimes(GetCurrentProcess(), &creation, &exit, &system, &user); + li_system.LowPart = system.dwLowDateTime; + li_system.HighPart = system.dwHighDateTime; + li_user.LowPart = user.dwLowDateTime; + li_user.HighPart = user.dwHighDateTime; + + /* FILETIME is in 100-nanosecond increments */ + out_marker->stime.tv_sec = li_system.QuadPart / 10000000; + out_marker->stime.tv_usec = (li_system.QuadPart % 10000000) / 10; + out_marker->utime.tv_sec = li_system.QuadPart / 10000000; + out_marker->utime.tv_usec = (li_system.QuadPart % 10000000) / 10; +#else getrusage(RUSAGE_SELF, &usage); out_marker->utime = usage.ru_utime; out_marker->stime = usage.ru_stime; +#endif } void PINT_time_diff(PINT_time_marker mark1, @@ -91,6 +141,10 @@ int PINT_copy_object_attr(PVFS_object_attr *dest, PVFS_object_attr *src) { dest->group = src->group; } + if (src->mask & PVFS_ATTR_COMMON_CID) + { + dest->cid = src->cid; + } if (src->mask & PVFS_ATTR_COMMON_PERM) { dest->perms = src->perms; @@ -118,6 +172,16 @@ int PINT_copy_object_attr(PVFS_object_attr *dest, PVFS_object_attr *src) src->u.dir.dirent_count; } + if((src->objtype == PVFS_TYPE_METAFILE) && + (!(src->mask & PVFS_ATTR_META_UNSTUFFED))) + { + /* if this is a metafile, and does _not_ appear to be stuffed, + * then we should propigate the stuffed_size + */ + dest->u.meta.stuffed_size = + src->u.meta.stuffed_size; + } + if (src->mask & PVFS_ATTR_DIR_HINT) { dest->u.dir.hint.dfile_count = @@ -136,7 +200,8 @@ int PINT_copy_object_attr(PVFS_object_attr *dest, PVFS_object_attr *src) src->u.dir.hint.dist_params_len; if (dest->u.dir.hint.dist_params_len > 0) { - dest->u.dir.hint.dist_params = strdup(src->u.dir.hint.dist_params); + dest->u.dir.hint.dist_params + = strdup(src->u.dir.hint.dist_params); if (dest->u.dir.hint.dist_params == NULL) { free(dest->u.dir.hint.dist_name); @@ -183,6 +248,7 @@ int PINT_copy_object_attr(PVFS_object_attr *dest, PVFS_object_attr *src) if (dest->u.meta.dfile_array) { free(dest->u.meta.dfile_array); + dest->u.meta.dfile_array = NULL; } } dest->u.meta.dfile_array = malloc(df_array_size); @@ -200,6 +266,41 @@ int PINT_copy_object_attr(PVFS_object_attr *dest, PVFS_object_attr *src) dest->u.meta.dfile_count = src->u.meta.dfile_count; } + if(src->mask & PVFS_ATTR_META_MIRROR_DFILES) + { + PVFS_size df_array_size = src->u.meta.dfile_count * + src->u.meta.mirror_copies_count * + sizeof(PVFS_handle); + + if (df_array_size) + { + if ( (dest->mask & PVFS_ATTR_META_MIRROR_DFILES) + && (dest->u.meta.dfile_count > 0) + && (dest->u.meta.mirror_copies_count > 0) ) + { + if (dest->u.meta.mirror_dfile_array) + { + free(dest->u.meta.mirror_dfile_array); + dest->u.meta.mirror_dfile_array = NULL; + } + } + dest->u.meta.mirror_dfile_array = malloc(df_array_size); + if (!dest->u.meta.mirror_dfile_array) + { + return ret; + } + memcpy(dest->u.meta.mirror_dfile_array, + src->u.meta.mirror_dfile_array, df_array_size); + } + else + { + dest->u.meta.mirror_dfile_array = NULL; + } + dest->u.meta.mirror_copies_count + = src->u.meta.mirror_copies_count; + } + + if(src->mask & PVFS_ATTR_META_DIST) { assert(src->u.meta.dist_size > 0); @@ -245,6 +346,15 @@ void PINT_free_object_attr(PVFS_object_attr *attr) if (attr->u.meta.dfile_array) { free(attr->u.meta.dfile_array); + attr->u.meta.dfile_array = NULL; + } + } + if (attr->mask & PVFS_ATTR_META_MIRROR_DFILES) + { + if (attr->u.meta.mirror_dfile_array) + { + free(attr->u.meta.mirror_dfile_array); + attr->u.meta.mirror_dfile_array = NULL; } } if (attr->mask & PVFS_ATTR_META_DIST) @@ -264,20 +374,24 @@ void PINT_free_object_attr(PVFS_object_attr *attr) attr->u.sym.target_path) { free(attr->u.sym.target_path); + attr->u.sym.target_path = NULL; } } } else if (attr->objtype == PVFS_TYPE_DIRECTORY) { - if ((attr->mask & PVFS_ATTR_DIR_HINT) || (attr->mask & PVFS_ATTR_DIR_DIRENT_COUNT)) + if ((attr->mask & PVFS_ATTR_DIR_HINT) || + (attr->mask & PVFS_ATTR_DIR_DIRENT_COUNT)) { if (attr->u.dir.hint.dist_name) { free(attr->u.dir.hint.dist_name); + attr->u.dir.hint.dist_name = NULL; } if (attr->u.dir.hint.dist_params) { free(attr->u.dir.hint.dist_params); + attr->u.dir.hint.dist_params = NULL; } } } @@ -309,6 +423,23 @@ char *PINT_util_get_object_type(int objtype) return obj_types[6]; } +/* + * this is just a wrapper for gettimeofday + */ +void PINT_util_get_current_timeval(struct timeval *tv) +{ + gettimeofday(tv, NULL); +} + +int PINT_util_get_timeval_diff(struct timeval *tv_start, struct timeval *tv_end) +{ + return (tv_end->tv_sec * 1e6 + tv_end->tv_usec) - + (tv_start->tv_sec * 1e6 + tv_start->tv_usec); +} + +/* + * this returns time in seconds + */ PVFS_time PINT_util_get_current_time(void) { struct timeval t = {0,0}; @@ -319,6 +450,47 @@ PVFS_time PINT_util_get_current_time(void) return current_time; } +/* + * this gets time in ms - warning, can roll over + */ +PVFS_time PINT_util_get_time_ms(void) +{ + struct timeval t = {0,0}; + PVFS_time current_time = 0; + + gettimeofday(&t, NULL); + current_time = ((PVFS_time)t.tv_sec) * 1000 + t.tv_usec / 1000; + return current_time; +} + +/* + * this gets time in us - warning, can roll over + */ +PVFS_time PINT_util_get_time_us(void) +{ + struct timeval t = {0,0}; + PVFS_time current_time = 0; + + gettimeofday(&t, NULL); + current_time = ((PVFS_time)t.tv_sec) * 1000000 + t.tv_usec; + return current_time; +} + +/* parses a struct timeval into a readable timestamp string*/ +/* assumes sufficient memory has been allocated for str, no checking */ +/* to be safe, make str a 64 character string atleast */ +void PINT_util_parse_timeval(struct timeval tv, char *str) +{ + time_t now; + struct tm *currentTime; + + now = tv.tv_sec; + currentTime = localtime(&now); + strftime(str, 64, "%m/%d/%Y %H:%M:%S", currentTime); + + return; +} + PVFS_time PINT_util_mktime_version(PVFS_time time) { struct timeval t = {0,0}; @@ -334,6 +506,663 @@ PVFS_time PINT_util_mkversion_time(PVFS_time version) return (PVFS_time)(version >> 32); } +struct timespec PINT_util_get_abs_timespec(int microsecs) +{ + struct timeval now, add, result; + struct timespec tv; + + gettimeofday(&now, NULL); + add.tv_sec = (microsecs / 1e6); + add.tv_usec = (microsecs % 1000000); +#ifdef WIN32 + result.tv_sec = add.tv_sec + now.tv_sec; + result.tv_usec = add.tv_usec + now.tv_usec; + if (result.tv_usec >= 1000000) + { + result.tv_usec -= 1000000; + result.tv_sec++; + } +#else + timeradd(&now, &add, &result); +#endif + tv.tv_sec = result.tv_sec; + tv.tv_nsec = result.tv_usec * 1e3; + return tv; +} + +void PINT_util_gen_credentials( + PVFS_credentials *credentials) +{ + assert(credentials); + + memset(credentials, 0, sizeof(PVFS_credentials)); +#ifndef WIN32 + /* TODO */ + credentials->uid = geteuid(); + credentials->gid = getegid(); +#endif +} + +/* Windows - inline functions can't be exported to other libraries */ + +#ifndef WIN32 +inline +#endif +void encode_PVFS_BMI_addr_t(char **pptr, const PVFS_BMI_addr_t *x) +{ + const char *addr_str; + + addr_str = BMI_addr_rev_lookup(*x); + encode_string(pptr, &addr_str); +} + +/* determines how much protocol space a BMI_addr_t encoding will consume */ +#ifndef WIN32 +inline +#endif +int encode_PVFS_BMI_addr_t_size_check(const PVFS_BMI_addr_t *x) +{ + const char *addr_str; + addr_str = BMI_addr_rev_lookup(*x); + return(encode_string_size_check(&addr_str)); +} +#ifndef WIN32 +inline +#endif +void decode_PVFS_BMI_addr_t(char **pptr, PVFS_BMI_addr_t *x) +{ + char *addr_string; + decode_string(pptr, &addr_string); + BMI_addr_lookup(x, addr_string); +} + +#ifndef WIN32 +inline +#endif +void encode_PVFS_sys_layout(char **pptr, const struct PVFS_sys_layout_s *x) +{ + int tmp_size; + int i; + + /* figure out how big this encoding will be first */ + + tmp_size = 16; /* enumeration and list count */ + for(i=0 ; iserver_list.count; i++) + { + /* room for each server encoding */ + tmp_size += encode_PVFS_BMI_addr_t_size_check(&(x)->server_list.servers[i]); + } + + if(tmp_size > PVFS_REQ_LIMIT_LAYOUT) + { + /* don't try to encode everything. Just set pptr too high so that + * we hit error condition in encode function + */ + gossip_err("Error: layout too large to encode in request protocol.\n"); + *(pptr) += extra_size_PVFS_servreq_create + 1; + return; + } + + /* otherwise we are in business */ + encode_enum(pptr, &x->algorithm); + encode_skip4(pptr, NULL); + encode_int32_t(pptr, &x->server_list.count); + encode_skip4(pptr, NULL); + for(i=0 ; iserver_list.count; i++) + { + encode_PVFS_BMI_addr_t(pptr, &(x)->server_list.servers[i]); + } +} + +#ifndef WIN32 +inline +#endif +void decode_PVFS_sys_layout(char **pptr, struct PVFS_sys_layout_s *x) +{ + int i; + + decode_enum(pptr, &x->algorithm); + decode_skip4(pptr, NULL); + decode_int32_t(pptr, &x->server_list.count); + decode_skip4(pptr, NULL); + if(x->server_list.count) + { + x->server_list.servers = malloc(x->server_list.count*sizeof(*(x->server_list.servers))); + assert(x->server_list.servers); + } + for(i=0 ; iserver_list.count; i++) + { + decode_PVFS_BMI_addr_t(pptr, &(x)->server_list.servers[i]); + } +} + +char *PINT_util_guess_alias(void) +{ + char tmp_alias[1024]; + char *tmpstr; + int ret; + + /* hmm...failed to find alias as part of the server config filename, + * use the hostname to guess + */ + ret = gethostname(tmp_alias, 1024); + if(ret != 0) + { + gossip_err("Failed to get hostname while attempting to guess " + "alias. Use -a to specify the alias for this server " + "process directly\n"); + return NULL; + } + + tmpstr = strstr(tmp_alias, "."); + if(tmpstr) + { + *tmpstr = 0; + } + return strdup(tmp_alias); +} + +/* PINT_check_mode() + * + * checks to see if the type of access described by "access_type" is permitted + * for user "uid" of group "gid" on the object with attributes "attr" + * + * returns 0 on success, -PVFS_EACCES if permission is not granted + */ +int PINT_check_mode( + PVFS_object_attr *attr, + PVFS_uid uid, PVFS_gid gid, + enum PINT_access_type access_type) +{ + int in_group_flag = 0; + int ret = 0; + + /* if we don't have masks for the permission information that we + * need, then the system is broken + */ + assert(attr->mask & PVFS_ATTR_COMMON_UID && + attr->mask & PVFS_ATTR_COMMON_GID && + attr->mask & PVFS_ATTR_COMMON_PERM); + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - check_mode called --- " + "(uid=%d,gid=%d,access_type=%d)\n", uid, gid, access_type); + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - object attributes --- " + "(uid=%d,gid=%d,mode=%d)\n", attr->owner, attr->group, + attr->perms); + + /* give root permission, no matter what */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, + " - checking if uid (%d) is root ...\n", uid); + if (uid == 0) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return 0; + } + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + + /* see if uid matches object owner */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - checking if owner (%d) " + "matches uid (%d)...\n", attr->owner, uid); + if(attr->owner == uid) + { + /* see if object user permissions match access type */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - checking if permissions " + "(%d) allows access type (%d) for user...\n", attr->perms, access_type); + if(access_type == PINT_ACCESS_READABLE && (attr->perms & + PVFS_U_READ)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + if(access_type == PINT_ACCESS_WRITABLE && (attr->perms & + PVFS_U_WRITE)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + if(access_type == PINT_ACCESS_EXECUTABLE && (attr->perms & + PVFS_U_EXECUTE)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + } + else + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + } + + /* see if other bits allow access */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - checking if permissions " + "(%d) allows access type (%d) by others...\n", attr->perms, access_type); + if(access_type == PINT_ACCESS_READABLE && (attr->perms & + PVFS_O_READ)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + if(access_type == PINT_ACCESS_WRITABLE && (attr->perms & + PVFS_O_WRITE)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + if(access_type == PINT_ACCESS_EXECUTABLE && (attr->perms & + PVFS_O_EXECUTE)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + + /* see if gid matches object group */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - checking if group (%d) " + "matches gid (%d)...\n", attr->group, gid); + if(attr->group == gid) + { + /* default group match */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + in_group_flag = 1; + } + else + { + /* no default group match, check supplementary groups */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - checking for" + " supplementary group match...\n"); + ret = PINT_check_group(uid, attr->group); + if(ret == 0) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + in_group_flag = 1; + } + else + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + if(ret != -PVFS_ENOENT) + { + /* system error; not just failed match */ + return(ret); + } + } + } + + if(in_group_flag) + { + /* see if object group permissions match access type */ + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - checking if permissions " + "(%d) allows access type (%d) for group...\n", attr->perms, access_type); + if(access_type == PINT_ACCESS_READABLE && (attr->perms & + PVFS_G_READ)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + if(access_type == PINT_ACCESS_WRITABLE && (attr->perms & + PVFS_G_WRITE)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + if(access_type == PINT_ACCESS_EXECUTABLE && (attr->perms & + PVFS_G_EXECUTE)) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - yes\n"); + return(0); + } + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, " - no\n"); + } + + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "******PINT_check_mode: denying access\n"); + /* default case: access denied */ + return -PVFS_EACCES; +} + +/* PINT_check_group() + * + * checks to see if uid is a member of gid + * + * returns 0 on success, -PVFS_ENOENT if not a member, other PVFS error codes + * on system failure + */ +static int PINT_check_group(uid_t uid, gid_t gid) +{ +#ifdef HAVE_GETPWUID + struct passwd pwd; + struct passwd* pwd_p = NULL; + struct group grp; + struct group* grp_p = NULL; + int i = 0; + int ret = -1; + + /* Explanation: + * + * We use the _r variants of getpwuid and getgrgid in order to insure + * thread safety; particularly if this function ever gets called in a + * client side situation in which we can't prevent the application from + * making conflicting calls. + * + * These _r functions require that a buffer be supplied for the user and + * group information, however. These buffers may be unconfortably large + * for the stack, so we malloc them on a static pointer and then mutex + * lock this function so that it can still be reentrant. + */ + + gen_mutex_lock(&check_group_mutex); + + if(!check_group_pw_buffer) + { + check_group_pw_buffer = (char*)malloc(pw_buf_size); + check_group_gr_buffer = (char*)malloc(gr_buf_size); + if(!check_group_pw_buffer || !check_group_gr_buffer) + { + if(check_group_pw_buffer) + { + free(check_group_pw_buffer); + check_group_pw_buffer = NULL; + } + if(check_group_gr_buffer) + { + free(check_group_gr_buffer); + check_group_gr_buffer = NULL; + } + gen_mutex_unlock(&check_group_mutex); + return(-PVFS_ENOMEM); + } + } + + /* get user information */ + ret = getpwuid_r(uid, &pwd, check_group_pw_buffer, pw_buf_size, &pwd_p); + if(ret != 0 || pwd_p == NULL) + { + gen_mutex_unlock(&check_group_mutex); + gossip_err("Get user info for (uid=%d) failed." + "errno [%d] error_msg [%s]\n", + uid, ret, strerror(ret)); + return(-PVFS_EINVAL); + } + + /* check primary group */ + if(pwd.pw_gid == gid) + { + gen_mutex_unlock(&check_group_mutex); + return 0; + } + + /* get the members of the group */ + ret = getgrgid_r(gid, &grp, check_group_gr_buffer, gr_buf_size, &grp_p); + if(ret != 0 || grp_p == NULL) + { + gen_mutex_unlock(&check_group_mutex); + gossip_err("Get members for group (gid=%d) failed." + "errno [%d] error_msg [%s]\n", + gid, ret, strerror(ret)); + return(-PVFS_EINVAL); + } + + for(i = 0; grp.gr_mem[i] != NULL; i++) + { + if(0 == strcmp(pwd.pw_name, grp.gr_mem[i])) + { + gen_mutex_unlock(&check_group_mutex); + return 0; + } + } + + gen_mutex_unlock(&check_group_mutex); + return(-PVFS_ENOENT); +#else + return 0; +#endif +} + +/* Checks if a given user is part of any groups that matches the file gid */ +static int in_group_p(PVFS_uid uid, PVFS_gid gid, PVFS_gid attr_group) +{ + if (attr_group == gid) + return 1; + if (PINT_check_group(uid, attr_group) == 0) + return 1; + return 0; +} + +/* + * Return 0 if requesting clients is granted want access to the object + * by the acl. Returns -PVFS_E... otherwise. + */ +int PINT_check_acls(void *acl_buf, size_t acl_size, + PVFS_object_attr *attr, + PVFS_uid uid, PVFS_gid gid, int want) +{ + pvfs2_acl_entry pe, *pa; + int i = 0, found = 0, count = 0; + assert(attr->mask & PVFS_ATTR_COMMON_UID && + attr->mask & PVFS_ATTR_COMMON_GID && + attr->mask & PVFS_ATTR_COMMON_PERM); + + if (acl_size == 0) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "no acl's present.. denying access\n"); + return -PVFS_EACCES; + } + + /* keyval for ACLs includes a \0. so subtract the thingie */ + acl_size--; + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "PINT_check_acls: read keyval size " + " %d (%d acl entries)\n", + (int) acl_size, + (int) (acl_size / sizeof(pvfs2_acl_entry))); + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "uid = %d, gid = %d, want = %d\n", + uid, gid, want); + + assert(acl_buf); + /* if the acl format doesn't look valid, then return an error rather than + * asserting; we don't want the server to crash due to an invalid keyval + */ + if((acl_size % sizeof(pvfs2_acl_entry)) != 0) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "invalid acls on object\n"); + return(-PVFS_EACCES); + } + count = acl_size / sizeof(pvfs2_acl_entry); + + for (i = 0; i < count; i++) + { + pa = (pvfs2_acl_entry *) acl_buf + i; + /* + NOTE: Remember that keyval is encoded as lebf, so convert it + to host representation + */ + pe.p_tag = bmitoh32(pa->p_tag); + pe.p_perm = bmitoh32(pa->p_perm); + pe.p_id = bmitoh32(pa->p_id); + pa = &pe; + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "Decoded ACL entry %d " + "(p_tag %d, p_perm %d, p_id %d)\n", + i, pa->p_tag, pa->p_perm, pa->p_id); + switch(pa->p_tag) + { + case PVFS2_ACL_USER_OBJ: + /* (May have been checked already) */ + if (attr->owner == uid) + goto check_perm; + break; + case PVFS2_ACL_USER: + if (pa->p_id == uid) + goto mask; + break; + case PVFS2_ACL_GROUP_OBJ: + if (in_group_p(uid, gid, attr->group)) + { + found = 1; + if ((pa->p_perm & want) == want) + goto mask; + } + break; + case PVFS2_ACL_GROUP: + if (in_group_p(uid, gid, pa->p_id)) { + found = 1; + if ((pa->p_perm & want) == want) + goto mask; + } + break; + case PVFS2_ACL_MASK: + break; + case PVFS2_ACL_OTHER: + if (found) + { + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "(1) PINT_check_acls:" + "returning access denied\n"); + return -PVFS_EACCES; + } + else + goto check_perm; + default: + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "(2) PINT_check_acls: " + "returning EIO\n"); + return -PVFS_EIO; + } + } + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "(3) PINT_check_acls: returning EIO\n"); + return -PVFS_EIO; +mask: + /* search the remaining entries */ + i = i + 1; + for (; i < count; i++) + { + pvfs2_acl_entry me, *mask_obj = (pvfs2_acl_entry *) acl_buf + i; + + /* + NOTE: Again, since pvfs2_acl_entry is in lebf, we need to + convert it to host endian format + */ + me.p_tag = bmitoh32(mask_obj->p_tag); + me.p_perm = bmitoh32(mask_obj->p_perm); + me.p_id = bmitoh32(mask_obj->p_id); + mask_obj = &me; + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "Decoded (mask) ACL entry %d " + "(p_tag %d, p_perm %d, p_id %d)\n", + i, mask_obj->p_tag, mask_obj->p_perm, mask_obj->p_id); + if (mask_obj->p_tag == PVFS2_ACL_MASK) + { + if ((pa->p_perm & mask_obj->p_perm & want) == want) + return 0; + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "(4) PINT_check_acls:" + "returning access denied (mask)\n"); + return -PVFS_EACCES; + } + } + +check_perm: + if ((pa->p_perm & want) == want) + return 0; + gossip_debug(GOSSIP_PERMISSIONS_DEBUG, "(5) PINT_check_acls: returning" + "access denied\n"); + return -PVFS_EACCES; +} + +#ifdef WIN32 +int PINT_statfs_lookup(const char *path, struct statfs *buf) +{ + char *abs_path, *root_path; + int rc, start, index, slash_max, slash_count; + DWORD sect_per_cluster, bytes_per_sect, free_clusters, total_clusters; + + if (path == NULL || buf == NULL) + { + errno = EFAULT; + return -1; + } + + /* allocate a buffer to get an absolute path */ + abs_path = (char *) malloc(MAX_PATH + 1); + if (_fullpath(abs_path, path, MAX_PATH) == NULL) + { + free(abs_path); + errno = ENOENT; + return -1; + } + + /* allocate buffer for root path */ + root_path = (char *) malloc(strlen(abs_path) + 1); + + /* parse out the root directory--it will be in + \\MyServer\MyFolder\ form or C:\ form */ + if (abs_path[0] == '\\' && abs_path[1] == '\\') + { + start = 2; + slash_max = 2; + } + else + { + start = 0; + slash_max = 1; + } + + slash_count = 0; + index = start; + + while (abs_path[index] && slash_count < slash_max) + { + if (abs_path[index++] == '\\') + slash_count++; + } + + /* copy root path */ + strncpy_s(root_path, strlen(abs_path)+1, abs_path, index); + + rc = 0; + if (GetDiskFreeSpace(root_path, §_per_cluster, &bytes_per_sect, + &free_clusters, &total_clusters)) + { + buf->f_type = 0; /* not used by PVFS */ + buf->f_bsize = (uint64_t) sect_per_cluster * bytes_per_sect; + buf->f_bavail = buf->f_bfree = (uint64_t) free_clusters; + buf->f_blocks = (uint64_t) total_clusters; + buf->f_fsid = 0; /* no meaningful definition on Windows */ + } + else + { + errno = GetLastError(); + rc = -1; + } + + free(root_path); + free(abs_path); + + return rc; + +} + +int PINT_statfs_fd_lookup(int fd, struct statfs *buf) +{ + HANDLE handle; + char *path; + int rc; + + /* get handle from fd */ + handle = (HANDLE) _get_osfhandle(fd); + + /* get file path from handle */ + path = (char *) malloc(MAX_PATH + 1); + /* Note: only available on Vista/WS2008 and later */ + if (GetFinalPathNameByHandle(handle, path, MAX_PATH, 0) != 0) + { + free(path); + errno = GetLastError(); + return -1; + } + + rc = PINT_statfs_lookup(path, buf); + + free(path); + + return rc; + +} + +#endif /* * Local variables: * c-indent-level: 4 diff --git a/src/common/misc/pint-util.h b/src/common/misc/pint-util.h index bb96d01..3fae703 100644 --- a/src/common/misc/pint-util.h +++ b/src/common/misc/pint-util.h @@ -32,6 +32,11 @@ do{ \ (dest)->perms = (src)->perms; \ (dest)->mask |= PVFS_ATTR_COMMON_PERM; \ } \ + if ((src)->mask & PVFS_ATTR_SYS_CID) \ + { \ + (dest)->cid = (src)->cid; \ + (dest)->mask |= PVFS_ATTR_COMMON_CID; \ + } \ if ((src)->mask & PVFS_ATTR_SYS_ATIME) \ { \ (dest)->mask |= PVFS_ATTR_COMMON_ATIME; \ @@ -110,6 +115,31 @@ void PINT_time_diff(PINT_time_marker mark1, #define PINT_statfs_blocks(_statfs) (_statfs)->f_blocks #define PINT_statfs_fsid(_statfs) (_statfs)->f_fsid +#elif defined(WIN32) +/* the statfs type must be defined on Windows */ +typedef unsigned long fsid_t; +struct statfs { + uint64_t f_type; + uint64_t f_bsize; + uint64_t f_blocks; + uint64_t f_bfree; + uint64_t f_bavail; + uint64_t f_files; + uint64_t f_ffree; + fsid_t f_fsid; + uint64_t f_namelen; +}; + +#define PINT_statfs_t struct statfs +/* Use functions with Windows API calls--defined in pint-util.c */ +int PINT_statfs_lookup(const char *path, struct statfs *buf); +int PINT_statfs_fd_lookup(int fd, struct statfs *buf); +#define PINT_statfs_bsize(_statfs) (_statfs)->f_bsize +#define PINT_statfs_bavail(_statfs) (_statfs)->f_bavail +#define PINT_statfs_bfree(_statfs) (_statfs)->f_bfree +#define PINT_statfs_blocks(_statfs) (_statfs)->f_blocks +#define PINT_statfs_fsid(_statfs) (_statfs)->f_fsid + #else #error OS does not have sys/vfs.h or sys/mount.h. @@ -119,10 +149,17 @@ void PINT_time_diff(PINT_time_marker mark1, char *PINT_util_get_object_type(int objtype); PVFS_time PINT_util_get_current_time(void); +PVFS_time PINT_util_get_time_ms(void); +PVFS_time PINT_util_get_time_us(void); +void PINT_util_get_current_timeval(struct timeval *tv); +int PINT_util_get_timeval_diff(struct timeval *tv_start, struct timeval *tv_end); +void PINT_util_parse_timeval(struct timeval tv, char *str); PVFS_time PINT_util_mktime_version(PVFS_time time); PVFS_time PINT_util_mkversion_time(PVFS_time version); +struct timespec PINT_util_get_abs_timespec(int microsecs); + void PINT_util_digest_init(void); void PINT_util_digest_finalize(void); @@ -132,6 +169,27 @@ int PINT_util_digest_sha1(const void *input_message, size_t input_length, int PINT_util_digest_md5(const void *input_message, size_t input_length, char **output, size_t *output_length); +char *PINT_util_guess_alias(void); + +void PINT_util_gen_credentials( + PVFS_credentials *credentials); + +enum PINT_access_type +{ + PINT_ACCESS_EXECUTABLE = 1, + PINT_ACCESS_WRITABLE = 2, + PINT_ACCESS_READABLE = 4, +}; + +int PINT_check_mode( + PVFS_object_attr *attr, + PVFS_uid uid, PVFS_gid gid, + enum PINT_access_type access_type); + +int PINT_check_acls(void *acl_buf, size_t acl_size, + PVFS_object_attr *attr, + PVFS_uid uid, PVFS_gid gid, int want); + #endif /* __PINT_UTIL_H */ /* diff --git a/src/common/misc/pvfs2-debug.c b/src/common/misc/pvfs2-debug.c index 474639b..6c3d507 100644 --- a/src/common/misc/pvfs2-debug.c +++ b/src/common/misc/pvfs2-debug.c @@ -11,146 +11,6 @@ #include "pvfs2-debug.h" -/* a private internal type */ -typedef struct -{ - const char *keyword; - uint64_t mask_val; -} __keyword_mask_t; - -#define __DEBUG_ALL ((uint64_t) -1) - -/* map all config keywords to pvfs2 debug masks here */ -static __keyword_mask_t s_keyword_mask_map[] = -{ - /* Log trove debugging info. Same as 'trove'.*/ - { "storage", GOSSIP_TROVE_DEBUG }, - /* Log trove debugging info. Same as 'storage'. */ - { "trove", GOSSIP_TROVE_DEBUG }, - /* Log trove operations. */ - { "trove_op", GOSSIP_TROVE_OP_DEBUG }, - /* Log network debug info. */ - { "network", GOSSIP_BMI_DEBUG_ALL }, - /* Log server info, including new operations. */ - { "server", GOSSIP_SERVER_DEBUG }, - /* Log client sysint info. This is only useful for the client. */ - { "client", GOSSIP_CLIENT_DEBUG }, - /* Debug the varstrip distribution */ - { "varstrip", GOSSIP_VARSTRIP_DEBUG }, - /* Log job info */ - { "job", GOSSIP_JOB_DEBUG }, - /* Debug PINT_process_request calls. EXTREMELY verbose! */ - { "request", GOSSIP_REQUEST_DEBUG }, - /* Log request scheduler events */ - { "reqsched", GOSSIP_REQ_SCHED_DEBUG }, - /* Log the flow protocol events, including flowproto_multiqueue */ - { "flowproto", GOSSIP_FLOW_PROTO_DEBUG }, - /* Log flow calls */ - { "flow", GOSSIP_FLOW_DEBUG }, - /* Debug the client name cache. Only useful on the client. */ - { "ncache", GOSSIP_NCACHE_DEBUG }, - /* Debug read-ahead cache events. Only useful on the client. */ - { "mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG }, - /* Debug the attribute cache. Only useful on the client. */ - { "acache", GOSSIP_ACACHE_DEBUG }, - /* Log/Debug distribution calls */ - { "distribution", GOSSIP_DIST_DEBUG }, - /* Debug the server-side dbpf attribute cache */ - { "dbpfattrcache", GOSSIP_DBPF_ATTRCACHE_DEBUG }, - /* Debug the client lookup state machine. */ - { "lookup", GOSSIP_LOOKUP_DEBUG }, - /* Debug the client remove state macine. */ - { "remove", GOSSIP_REMOVE_DEBUG }, - /* Debug the server getattr state machine. */ - { "getattr", GOSSIP_GETATTR_DEBUG }, - /* Debug the server setattr state machine. */ - { "setattr", GOSSIP_SETATTR_DEBUG }, - /* vectored getattr server state machine */ - { "listattr", GOSSIP_LISTATTR_DEBUG }, - /* Debug the client and server get ext attributes SM. */ - { "geteattr", GOSSIP_GETEATTR_DEBUG }, - /* Debug the client and server set ext attributes SM. */ - { "seteattr", GOSSIP_SETEATTR_DEBUG }, - /* Debug the readdir operation (client and server) */ - { "readdir", GOSSIP_READDIR_DEBUG }, - /* Debug the mkdir operation (server only) */ - { "mkdir", GOSSIP_MKDIR_DEBUG }, - /* Debug the io operation (reads and writes) - * for both the client and server */ - { "io", GOSSIP_IO_DEBUG }, - /* Debug the server's open file descriptor cache */ - { "open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG }, - /* Debug permissions checking on the server */ - { "permissions", GOSSIP_PERMISSIONS_DEBUG }, - /* Debug the cancel operation */ - { "cancel", GOSSIP_CANCEL_DEBUG }, - /* Debug the msgpair state machine */ - { "msgpair", GOSSIP_MSGPAIR_DEBUG }, - /* Debug the client core app */ - { "clientcore", GOSSIP_CLIENTCORE_DEBUG }, - /* Debug the client timing state machines (job timeout, etc.) */ - { "clientcore_timing", GOSSIP_CLIENTCORE_TIMING_DEBUG }, - /* network encoding */ - { "endecode", GOSSIP_ENDECODE_DEBUG }, - /* Show server file (metadata) accesses (both modify and read-only). */ - { "access", GOSSIP_ACCESS_DEBUG }, - /* Show more detailed server file accesses */ - { "access_detail", GOSSIP_ACCESS_DETAIL_DEBUG }, - /* Debug the listeattr operation */ - { "listeattr", GOSSIP_LISTEATTR_DEBUG }, - /* Debug the state machine management code */ - { "sm", GOSSIP_STATE_MACHINE_DEBUG }, - /* Debug the metadata dbpf keyval functions */ - { "keyval", GOSSIP_DBPF_KEYVAL_DEBUG }, - /* Debug the metadata sync coalescing code */ - { "coalesce", GOSSIP_DBPF_COALESCE_DEBUG }, - /* Display the hostnames instead of IP addrs in debug output */ - { "access_hostnames", GOSSIP_ACCESS_HOSTNAMES }, - /* Show the client device events */ - { "user_dev", GOSSIP_USER_DEV_DEBUG }, - /* Debug the fsck tool */ - { "fsck", GOSSIP_FSCK_DEBUG }, - { "bstream", GOSSIP_BSTREAM_DEBUG }, - /* Everything except the periodic events. Useful for debugging */ - { "verbose", - (__DEBUG_ALL & ~(GOSSIP_PERFCOUNTER_DEBUG | GOSSIP_STATE_MACHINE_DEBUG | - GOSSIP_ENDECODE_DEBUG | GOSSIP_USER_DEV_DEBUG)) - }, - /* No debug output */ - { "none", GOSSIP_NO_DEBUG }, - /* Everything */ - { "all", __DEBUG_ALL } -}; -#undef __DEBUG_ALL - -static const int num_keyword_mask_map = (int) \ -(sizeof(s_keyword_mask_map) / sizeof(__keyword_mask_t)); - -/* map all kmod keywords to kmod debug masks here */ -static __keyword_mask_t s_kmod_keyword_mask_map[] = -{ - {"super" , GOSSIP_SUPER_DEBUG}, - {"inode" , GOSSIP_INODE_DEBUG}, - {"file" , GOSSIP_FILE_DEBUG}, - {"dir" , GOSSIP_DIR_DEBUG}, - {"utils" , GOSSIP_UTILS_DEBUG}, - {"wait" , GOSSIP_WAIT_DEBUG}, - {"acl" , GOSSIP_ACL_DEBUG}, - {"dcache", GOSSIP_DCACHE_DEBUG}, - {"dev" , GOSSIP_DEV_DEBUG}, - {"name" , GOSSIP_NAME_DEBUG}, - {"bufmap", GOSSIP_BUFMAP_DEBUG}, - {"cache" , GOSSIP_CACHE_DEBUG}, - {"proc" , GOSSIP_PROC_DEBUG}, - {"xattr" , GOSSIP_XATTR_DEBUG}, - {"init" , GOSSIP_INIT_DEBUG}, - {"none" , GOSSIP_NO_DEBUG}, - {"all" , GOSSIP_MAX_DEBUG} -}; - -static const int num_kmod_keyword_mask_map = (int) \ -(sizeof(s_kmod_keyword_mask_map) / sizeof(__keyword_mask_t)); - static uint64_t debug_to_mask(__keyword_mask_t *mask_map, int num_mask_map, const char *event_logging) { diff --git a/src/common/misc/pvfs2-hint.c b/src/common/misc/pvfs2-hint.c new file mode 100644 index 0000000..9eb5f95 --- /dev/null +++ b/src/common/misc/pvfs2-hint.c @@ -0,0 +1,197 @@ +#include + +#define __PINT_REQPROTO_ENCODE_FUNCS_C + +#include +#include +#include + +#include "pvfs2-hint.h" +#include "gossip.h" + +/* Size: 4 for sentinel at the end, 8 + string size for each element, + * type + length*/ +int32_t PINT_hint_calc_size(const PVFS_hint * hint){ + int count = 4; + PVFS_hint * act; + +#ifdef NO_PVFS_HINT_SUPPORT + return 0; +#endif + + for( act = (PVFS_hint *) hint ; act != NULL ; act = act->next_hint){ + if (hint_types[act->type].transfer_to_server){ + /* length + type + act. string + 8-byte alignment*/ + count += 4 + 4 + roundup8(act->length); + } + } + return (int32_t) count; +} + + +int PINT_hint_encode(const PVFS_hint * hint, char * buffer, int * out_length, int max_length){ +#ifdef NO_PVFS_HINT_SUPPORT + return 0; +#endif + + PVFS_hint * act; + char * start_buffer = buffer; + const int32_t number_hint_types = NUMBER_HINT_TYPES; + + for( act = (PVFS_hint *) hint ; act != NULL ; act = act->next_hint){ + if (hint_types[act->type].transfer_to_server){ + if ( buffer - start_buffer + act->length + 8 <= max_length){ + encode_int32_t(& buffer, & (act->type)); + encode_string(& buffer, &(act->hint)); + + continue; + } + gossip_err("PINT_hint_encode too many hints !\n"); + return -PVFS_ENOMEM; + } + } + + encode_int32_t(& buffer, & number_hint_types); + + *out_length = buffer - start_buffer; + /*printf("encode_l %d\n",*out_length);*/ + return 0; +} + +/* + * out_hint must be NULL before running the function ! + */ +int PINT_hint_decode(PVFS_hint ** out_hint, const char * buffer, int * out_length){ +#ifdef NO_PVFS_HINT_SUPPORT + * out_hint = NULL; + * out_length = 0; + return 0; +#endif + + PVFS_hint * hint = NULL; + char * buff = (char *) buffer; + char * cur_hint; + const char * start_buffer = buffer; + int32_t act_hint_type; + + int ret; + + while(1){ + decode_int32_t(& buff, & act_hint_type); + if ( act_hint_type == NUMBER_HINT_TYPES ) { + break; + } + + assert(act_hint_type < NUMBER_HINT_TYPES); + decode_string(& buff, &(cur_hint)); + + ret = PVFS_add_hint(& hint, act_hint_type, cur_hint); + + if ( ret < 0 ) { + gossip_err("PINT_hint_decode error ! %d - %s", -ret, + strerror(-ret)); + return ret; + } + } + + * out_length = buff - start_buffer; + /*printf("decode_l %d\n",*out_length);*/ + * out_hint = hint; + return 0; +} + + +void PVFS_free_hint( + PVFS_hint hint) +{ + PINT_hint * act = hint; + PINT_hint * old; + + while(act != NULL) + { + old = act; + act = act->next; + + free(old->hint); + free(old); + } +} + +/* + * example environment variable + * PVFS2_HINTS = + *'REQUEST_ID:blubb+CREATE_SET_DATAFILE_NODES:localhost,localhost' + */ +int PINT_hint_add_environment_hints(PVFS_hint ** out_hint) +{ + char * env; + char * env_copy; + char * save_ptr; + char * aktvar; + int len; + if( out_hint == NULL ) + { + return 1; + } + env = getenv("PVFS2_HINTS"); + if( env == NULL ) + { + return 0; + } + len = strlen(env); + env_copy = (char *) malloc(sizeof(char) * (len+1)); + strncpy(env_copy, env, len+1); + + /* parse hints and do not overwrite already specified hints !*/ + aktvar = strtok_r(env_copy, "+", & save_ptr); + while( aktvar != NULL ) + { + enum pvfs2_hint_type hint_type; + char * rest; + + rest = index(aktvar, ':'); + if (rest == NULL) + { + gossip_err("Environment variable PVFS2_HINTS is malformed starting with: %s\n", + save_ptr); + free(env_copy); + return 0; + } + + *rest = 0; + + hint_type = PVFS_hint_get_type(aktvar); + if( hint_type == -1) + { + gossip_err("Environment variable PVFS2_HINTS is malformed, unknown " + " hint name: %s\n", aktvar); + } + else + { + char * old_hint; + old_hint = PVFS_get_hint(*out_hint, hint_type); + + /* do not overwrite old hints */ + if ( old_hint == NULL ) + { + PVFS_add_hint( out_hint, hint_type, rest+1 ); + } + } + + aktvar = strtok_r(NULL, "+", & save_ptr); + } + + free(env_copy); + return 1; +} + + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/common/misc/pvfs2-internal.h b/src/common/misc/pvfs2-internal.h index ddf2ade..13422ca 100644 --- a/src/common/misc/pvfs2-internal.h +++ b/src/common/misc/pvfs2-internal.h @@ -78,6 +78,12 @@ #define SYMLINK_TARGET_KEYSTR "st\0" #define SYMLINK_TARGET_KEYLEN 3 +#define METAFILE_LAYOUT_KEYSTR "ml\0" +#define METAFILE_LAYOUT_KEYLEN 3 + +#define NUM_DFILES_REQ_KEYSTR "nd\0" +#define NUM_DFILES_REQ_KEYLEN 3 + /* Optional xattrs have "user.pvfs2." as a prefix */ #define SPECIAL_DIST_NAME_KEYSTR "dist_name\0" #define SPECIAL_DIST_NAME_KEYLEN 21 @@ -87,6 +93,15 @@ #define SPECIAL_NUM_DFILES_KEYLEN 22 #define SPECIAL_METAFILE_HINT_KEYSTR "meta_hint\0" #define SPECIAL_METAFILE_HINT_KEYLEN 21 +#define SPECIAL_MIRROR_PARAMS_KEYSTR "mirror\0" +#define SPECIAL_MIRROR_PARAMS_KEYLEN 18 +#define SPECIAL_MIRROR_COPIES_KEYSTR "mirror.copies\0" +#define SPECIAL_MIRROR_COPIES_KEYLEN 25 +#define SPECIAL_MIRROR_HANDLES_KEYSTR "mirror.handles\0" +#define SPECIAL_MIRROR_HANDLES_KEYLEN 26 +#define SPECIAL_MIRROR_STATUS_KEYSTR "mirror.status\0" +#define SPECIAL_MIRROR_STATUS_KEYLEN 25 + #define IO_MAX_REGIONS 64 diff --git a/src/common/misc/pvfs2-types-debug.h b/src/common/misc/pvfs2-types-debug.h index db1cffe..9e2585d 100644 --- a/src/common/misc/pvfs2-types-debug.h +++ b/src/common/misc/pvfs2-types-debug.h @@ -27,8 +27,10 @@ static inline void PINT_attrmask_print(int debug, uint32_t attrmask) if (attrmask & PVFS_ATTR_COMMON_TYPE) gossip_debug(debug, "\tPVFS_ATTR_COMMON_TYPE\n"); if (attrmask & PVFS_ATTR_META_DIST) gossip_debug(debug, "\tPVFS_ATTR_META_DIST\n"); if (attrmask & PVFS_ATTR_META_DFILES) gossip_debug(debug, "\tPVFS_ATTR_META_DFILES\n"); + if (attrmask & PVFS_ATTR_META_MIRROR_DFILES) gossip_debug(debug, "\tPVFS_ATTR_META_MIRROR_DFILES\n"); if (attrmask & PVFS_ATTR_DATA_SIZE) gossip_debug(debug, "\tPVFS_ATTR_DATA_SIZE\n"); - if (attrmask & PVFS_ATTR_SYS_SIZE) gossip_debug(debug, "\tPVFS_ATTR_SYS_SIZE\n"); + if (attrmask & PVFS_ATTR_DIR_DIRENT_COUNT) gossip_debug(debug, "\tPVFS_ATTR_DIR_DIRENT_COUNT\n"); + if (attrmask & PVFS_ATTR_DIR_HINT) gossip_debug(debug, "\tPVFS_ATTR_DIR_HINT\n"); } /* diff --git a/src/common/misc/pvfs2-util.c b/src/common/misc/pvfs2-util.c index 525f1b2..4dba566 100644 --- a/src/common/misc/pvfs2-util.c +++ b/src/common/misc/pvfs2-util.c @@ -14,10 +14,12 @@ #include #include #include -#include #include +#ifndef WIN32 +#include #include #include +#endif #include "pvfs2-config.h" #include "pvfs2-sysint.h" @@ -31,6 +33,7 @@ #include "realpath.h" #include "pint-sysint-utils.h" #include "pvfs2-internal.h" +#include "pint-util.h" #ifdef HAVE_MNTENT_H @@ -94,12 +97,6 @@ static int parse_encoding_string( static int parse_num_dfiles_string(const char* cp, int* num_dfiles); -static int PINT_util_resolve_absolute( - const char* local_path, - PVFS_fs_id* out_fs_id, - char* out_fs_path, - int out_fs_path_max); - struct PVFS_sys_mntent* PVFS_util_gen_mntent( char* config_server, char* fs_name) @@ -140,7 +137,7 @@ struct PVFS_sys_mntent* PVFS_util_gen_mntent( } tmp_ent->flowproto = FLOWPROTO_DEFAULT; - tmp_ent->encoding = ENCODING_DEFAULT; + tmp_ent->encoding = PVFS2_ENCODING_DEFAULT; return(tmp_ent); } @@ -154,16 +151,6 @@ void PVFS_util_gen_mntent_release(struct PVFS_sys_mntent* mntent) return; } -void PVFS_util_gen_credentials( - PVFS_credentials *credentials) -{ - assert(credentials); - - memset(credentials, 0, sizeof(PVFS_credentials)); - credentials->uid = geteuid(); - credentials->gid = getegid(); -} - int PVFS_util_get_umask(void) { static int mask = 0, set = 0; @@ -215,6 +202,7 @@ int PVFS_util_copy_sys_attr( dest_attr->atime = src_attr->atime; dest_attr->mtime = src_attr->mtime; dest_attr->ctime = src_attr->ctime; + dest_attr->cid = src_attr->cid; dest_attr->dfile_count = src_attr->dfile_count; dest_attr->objtype = src_attr->objtype; dest_attr->mask = src_attr->mask; @@ -332,11 +320,12 @@ const PVFS_util_tab *PVFS_util_parse_pvfstab( mntent->pvfs_fs_name = strdup(rindex(mntent->the_pvfs_config_server, '/')); mntent->pvfs_fs_name++; mntent->flowproto = FLOWPROTO_DEFAULT; - mntent->encoding = ENCODING_DEFAULT; + mntent->encoding = PVFS2_ENCODING_DEFAULT; mntent->mnt_dir = strdup(epenv); tmp = index(mntent->mnt_dir, '='); *tmp = 0; mntent->mnt_opts = strdup("rw"); + mntent->fs_id = PVFS_FS_ID_NULL; return &s_stat_tab_array[0]; } @@ -579,7 +568,7 @@ const PVFS_util_tab *PVFS_util_parse_pvfstab( /* pick an encoding to use with the server */ current_tab->mntent_array[i].encoding = - ENCODING_DEFAULT; + PVFS2_ENCODING_DEFAULT; cp = PINT_fstab_entry_hasopt(tmp_ent, "encoding"); if (cp) { @@ -1012,10 +1001,16 @@ int PVFS_util_resolve( char* parent_path = NULL; int base_len = 0; + if(strlen(local_path) > (PVFS_NAME_MAX-1)) + { + gossip_err("Error: PVFS_util_resolve() input path too long.\n"); + return(-PVFS_ENAMETOOLONG); + } + /* the most common case first; just try to resolve the path that we * were given */ - ret = PINT_util_resolve_absolute(local_path, out_fs_id, out_fs_path, + ret = PVFS_util_resolve_absolute(local_path, out_fs_id, out_fs_path, out_fs_path_max); if(ret == 0) { @@ -1073,7 +1068,7 @@ int PVFS_util_resolve( return(-PVFS_ENOENT); } - ret = PINT_util_resolve_absolute(tmp_path, out_fs_id, out_fs_path, + ret = PVFS_util_resolve_absolute(tmp_path, out_fs_id, out_fs_path, out_fs_path_max); free(tmp_path); @@ -1172,43 +1167,55 @@ int PVFS_util_init_defaults(void) #define KILOBYTE 1024 #define MEGABYTE (1024 * KILOBYTE) #define GIGABYTE (1024 * MEGABYTE) -/* -#define TERABYTE (1024 * GIGABYTE) -#define PETABYTE (1024 * TERABYTE) -#define EXABYTE (1024 * PETABYTE) -#define ZETTABYTE (1024 * EXABYTE) -#define YOTTABYTE (1024 * ZETTABYTE) -*/ +#define TERABYTE (1024llu * GIGABYTE) +#define PETABYTE (1024llu * TERABYTE) +#define EXABYTE (1024llu * PETABYTE) +#define ZETTABYTE (1024llu * EXABYTE) +#define YOTTABYTE (1024llu * ZETTABYTE) + /*****************/ /* si size units */ /*****************/ #define SI_KILOBYTE 1000 #define SI_MEGABYTE (1000 * SI_KILOBYTE) #define SI_GIGABYTE (1000 * SI_MEGABYTE) -/* -#define SI_TERABYTE (1000 * SI_GIGABYTE) -#define SI_PETABYTE (1000 * SI_TERABYTE) -#define SI_EXABYTE (1000 * SI_PETABYTE) -#define SI_ZETTABYTE (1000 * SI_EXABYTE) -#define SI_YOTTABYTE (1000 * SI_ZETTABYTE) -*/ -#define NUM_SIZES 3 +#define SI_TERABYTE (1000llu * SI_GIGABYTE) +#define SI_PETABYTE (1000llu * SI_TERABYTE) +#define SI_EXABYTE (1000llu * SI_PETABYTE) +#define SI_ZETTABYTE (1000llu * SI_EXABYTE) +#define SI_YOTTABYTE (1000llu * SI_ZETTABYTE) + +#if SIZEOF_LONG_INT == 8 +#define NUM_SIZES 5 +#else +#define NUM_SIZES 4 +#endif static PVFS_size PINT_s_size_table[NUM_SIZES] = { - /*YOTTABYTE, ZETTABYTE, EXABYTE, PETABYTE, TERABYTE, */ + /*YOTTABYTE, ZETTABYTE, EXABYTE, */ +#if SIZEOF_LONG_INT == 8 + PETABYTE, + TERABYTE, +#endif GIGABYTE, MEGABYTE, KILOBYTE }; static PVFS_size PINT_s_si_size_table[NUM_SIZES] = { - /*SI_YOTTABYTE, SI_ZETTABYTE, SI_EXABYTE, SI_PETABYTE, SI_TERABYTE, */ + /*SI_YOTTABYTE, SI_ZETTABYTE, SI_EXABYTE, */ +#if SIZEOF_LONG_INT == 8 + SI_PETABYTE, SI_TERABYTE, +#endif SI_GIGABYTE, SI_MEGABYTE, SI_KILOBYTE }; static const char *PINT_s_str_size_table[NUM_SIZES] = { - /*"Y", "Z", "E", "P","T", */ + /*"Y", "Z", "E", */ +#if SIZEOF_LONG_INT == 8 + "P","T", +#endif "G", "M", "K" }; @@ -1476,8 +1483,8 @@ static int parse_encoding_string( const char *name; enum PVFS_encoding_type val; } enc_str[] = - { { "default", ENCODING_DEFAULT }, - { "defaults", ENCODING_DEFAULT }, + { { "default", PVFS2_ENCODING_DEFAULT }, + { "defaults", PVFS2_ENCODING_DEFAULT }, { "direct", ENCODING_DIRECT }, { "le_bfield", ENCODING_LE_BFIELD }, { "xdr", ENCODING_XDR } }; @@ -1583,7 +1590,11 @@ uint32_t PVFS_util_sys_to_object_attr_mask( if (sys_attrmask & PVFS_ATTR_SYS_DFILE_COUNT) { - attrmask |= PVFS_ATTR_META_DFILES; + attrmask |= (PVFS_ATTR_META_DFILES | PVFS_ATTR_META_MIRROR_DFILES); + } + if (sys_attrmask & PVFS_ATTR_SYS_MIRROR_COPIES_COUNT) + { + attrmask |= PVFS_ATTR_META_MIRROR_DFILES; } if (sys_attrmask & PVFS_ATTR_SYS_DIRENT_COUNT) @@ -1601,6 +1612,33 @@ uint32_t PVFS_util_sys_to_object_attr_mask( attrmask |= PVFS_ATTR_SYMLNK_TARGET; } + /* we need the distribution in order to calculate block size */ + if (sys_attrmask & PVFS_ATTR_SYS_BLKSIZE) + { + attrmask |= PVFS_ATTR_META_DIST; + } + + if(sys_attrmask & PVFS_ATTR_SYS_UID) + attrmask |= PVFS_ATTR_COMMON_UID; + if(sys_attrmask & PVFS_ATTR_SYS_GID) + attrmask |= PVFS_ATTR_COMMON_GID; + if(sys_attrmask & PVFS_ATTR_SYS_PERM) + attrmask |= PVFS_ATTR_COMMON_PERM; + if(sys_attrmask & PVFS_ATTR_SYS_CID) + attrmask |= PVFS_ATTR_COMMON_CID; + if(sys_attrmask & PVFS_ATTR_SYS_ATIME) + attrmask |= PVFS_ATTR_COMMON_ATIME; + if(sys_attrmask & PVFS_ATTR_SYS_CTIME) + attrmask |= PVFS_ATTR_COMMON_CTIME; + if(sys_attrmask & PVFS_ATTR_SYS_MTIME) + attrmask |= PVFS_ATTR_COMMON_MTIME; + if(sys_attrmask & PVFS_ATTR_SYS_TYPE) + attrmask |= PVFS_ATTR_COMMON_TYPE; + if(sys_attrmask & PVFS_ATTR_SYS_ATIME_SET) + attrmask |= PVFS_ATTR_COMMON_ATIME_SET; + if(sys_attrmask & PVFS_ATTR_SYS_MTIME_SET) + attrmask |= PVFS_ATTR_COMMON_MTIME_SET; + gossip_debug(GOSSIP_GETATTR_DEBUG, "attrmask being passed to server: "); PINT_attrmask_print(GOSSIP_GETATTR_DEBUG, attrmask); @@ -1657,10 +1695,22 @@ uint32_t PVFS_util_object_to_sys_attr_mask( { sys_mask |= PVFS_ATTR_SYS_DFILE_COUNT; } + if (obj_mask & PVFS_ATTR_META_MIRROR_DFILES) + { + sys_mask |= PVFS_ATTR_SYS_MIRROR_COPIES_COUNT; + } + if (obj_mask & PVFS_ATTR_META_DIST) + { + sys_mask |= PVFS_ATTR_SYS_BLKSIZE; + } if (obj_mask & PVFS_ATTR_DIR_HINT) { sys_mask |= PVFS_ATTR_SYS_DIR_HINT; } + + /* NOTE: the PVFS_ATTR_META_UNSTUFFED is intentionally not exposed + * outside of the system interface + */ return sys_mask; } @@ -1713,7 +1763,7 @@ static int parse_num_dfiles_string(const char* cp, int* num_dfiles) return 0; } -/* PINT_util_resolve_absolute() +/* PVFS_util_resolve_absolute() * * given a local path of a file that may reside on a pvfs2 volume, * determine what the fsid and fs relative path is. Makes no attempt @@ -1721,7 +1771,7 @@ static int parse_num_dfiles_string(const char* cp, int* num_dfiles) * * returns 0 on succees, -PVFS_error on failure */ -static int PINT_util_resolve_absolute( +int PVFS_util_resolve_absolute( const char* local_path, PVFS_fs_id* out_fs_id, char* out_fs_path, @@ -1920,6 +1970,12 @@ int32_t PVFS_util_translate_mode(int mode, int suid) #undef NUM_MODES } +void PVFS_util_gen_credentials( + PVFS_credentials *credentials) +{ + return(PINT_util_gen_credentials(credentials)); +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/common/misc/realpath.c b/src/common/misc/realpath.c index 1158db3..389ea7f 100644 --- a/src/common/misc/realpath.c +++ b/src/common/misc/realpath.c @@ -30,15 +30,47 @@ #ifndef PATH_MAX #define PATH_MAX 8192 #endif +#ifndef WIN32 #include +#include +#endif #include #include #include #include "realpath.h" -#include "pvfs2-types.h" +#include +#include "pvfs2-util.h" #define MAX_READLINKS 32 +#ifdef WIN32 +/* PINT_realpath() + * + * canonicalizes path and places the result into resolved_path. Includes + * cleaning of symbolic links, trailing slashes, and .. or . components. + * maxreslth is the maximum length allowed in resolved_path. + * + * returns 0 on success, -PVFS_error on failure. + */ +int PINT_realpath( + const char *path, + char *resolved_path, + int maxreslth) +{ + char *ret_path; + + if (resolved_path == NULL || path == NULL) + return -PVFS_EINVAL; + + /* just use CRT version for now */ + ret_path = _fullpath(resolved_path, path, maxreslth); + + if (ret_path == NULL) + return -PVFS_EINVAL; + + return 0; +} +#else /* PINT_realpath() * * canonicalizes path and places the result into resolved_path. Includes @@ -52,6 +84,7 @@ int PINT_realpath( char *resolved_path, int maxreslth) { + PVFS_fs_id fs_id; int readlinks = 0; char *npath; char link_path[PATH_MAX + 1]; @@ -122,7 +155,29 @@ int PINT_realpath( /* See if last pathname component is a symlink. */ *npath = '\0'; + +#ifndef BUILD_USRINT + /* see if this part of the path has a PVFS mount point */ + ret = PVFS_util_resolve_absolute(resolved_path, &fs_id, + link_path, PATH_MAX); + /* we don't care about the output of resolve */ + /* link_path was just a placeholder */ + memset(link_path, 0, PATH_MAX); + if (ret == 0) + { + n = readlink(resolved_path, link_path, PATH_MAX); + } + else + { + n = syscall(SYS_readlink, resolved_path, link_path, PATH_MAX); +#if 0 + /* this doesn't work, a syscall should certainly work */ + n = glibc_ops.readlink(resolved_path, link_path, PATH_MAX); +#endif + } +#else n = readlink(resolved_path, link_path, PATH_MAX); +#endif /* BUILD_USRINT */ if (n < 0) { /* EINVAL means the file exists but isn't a symlink. */ @@ -179,6 +234,7 @@ int PINT_realpath( free(buf); return ret; } +#endif /* WIN32 */ /* * Local variables: diff --git a/src/common/misc/server-config-mgr.c b/src/common/misc/server-config-mgr.c index 652aabc..42fe030 100644 --- a/src/common/misc/server-config-mgr.c +++ b/src/common/misc/server-config-mgr.c @@ -8,7 +8,9 @@ #include #include #include +#ifndef WIN32 #include +#endif #include #include #include @@ -191,7 +193,9 @@ int PINT_server_config_mgr_reload_cached_config_interface(void) "Reloading handle mappings for fs_id %d\n", cur_fs->coll_id); - ret = PINT_cached_config_handle_load_mapping(cur_fs); + ret = + PINT_cached_config_handle_load_mapping(cur_fs, + config->server_config); if (ret) { PVFS_perror( diff --git a/src/common/misc/server-config-mgr.h b/src/common/misc/server-config-mgr.h index 5cec1c5..a36b1d4 100644 --- a/src/common/misc/server-config-mgr.h +++ b/src/common/misc/server-config-mgr.h @@ -36,6 +36,7 @@ int PINT_server_config_mgr_get_abs_min_handle_recycle_time(void); #define PINT_server_config_mgr_get_config __PINT_server_config_mgr_get_config #define PINT_server_config_mgr_put_config __PINT_server_config_mgr_put_config #elif defined(__PVFS2_SERVER__) +#include "src/server/pvfs2-server.h" #define PINT_server_config_mgr_get_config(__fsid) get_server_config_struct() static inline void PINT_server_config_mgr_put_config( struct server_configuration_s *config_s) { return; } diff --git a/src/common/misc/server-config.c b/src/common/misc/server-config.c index 465c91a..169d806 100644 --- a/src/common/misc/server-config.c +++ b/src/common/misc/server-config.c @@ -8,11 +8,16 @@ #include #include #include +#ifndef WIN32 #include +#endif #include #include #include #include +#ifdef WIN32 +#include +#endif #include "src/common/dotconf/dotconf.h" #include "server-config.h" @@ -34,7 +39,9 @@ static const char * replace_old_keystring(const char * oldkey); static DOTCONF_CB(get_logstamp); -static DOTCONF_CB(get_storage_space); +static DOTCONF_CB(get_storage_path); +static DOTCONF_CB(get_data_path); +static DOTCONF_CB(get_meta_path); static DOTCONF_CB(enter_defaults_context); static DOTCONF_CB(exit_defaults_context); #ifdef USE_TRUSTED @@ -67,6 +74,7 @@ static DOTCONF_CB(get_name); static DOTCONF_CB(get_logfile); static DOTCONF_CB(get_logtype); static DOTCONF_CB(get_event_logging_list); +static DOTCONF_CB(get_event_tracing); static DOTCONF_CB(get_filesystem_collid); static DOTCONF_CB(get_alias_list); static DOTCONF_CB(check_this_server); @@ -93,6 +101,7 @@ static DOTCONF_CB(get_attr_cache_size); static DOTCONF_CB(get_attr_cache_max_num_elems); static DOTCONF_CB(get_trove_sync_meta); static DOTCONF_CB(get_trove_sync_data); +static DOTCONF_CB(get_file_stuffing); static DOTCONF_CB(get_db_cache_size_bytes); static DOTCONF_CB(get_trove_max_concurrent_io); static DOTCONF_CB(get_db_cache_type); @@ -104,6 +113,8 @@ static DOTCONF_CB(get_server_job_bmi_timeout); static DOTCONF_CB(get_server_job_flow_timeout); static DOTCONF_CB(get_client_job_bmi_timeout); static DOTCONF_CB(get_client_job_flow_timeout); +static DOTCONF_CB(get_precreate_batch_size); +static DOTCONF_CB(get_precreate_low_threshold); static DOTCONF_CB(get_client_retry_limit); static DOTCONF_CB(get_client_retry_delay); static DOTCONF_CB(get_secret_key); @@ -112,6 +123,16 @@ static DOTCONF_CB(get_coalescing_low_watermark); static DOTCONF_CB(get_trove_method); static DOTCONF_CB(get_osd_type); static DOTCONF_CB(get_osd_dir_type); +static DOTCONF_CB(get_create_type); +static DOTCONF_CB(get_member_attr_type); +static DOTCONF_CB(get_eco_type); +static DOTCONF_CB(get_eco_num); +static DOTCONF_CB(get_small_file_size); +static DOTCONF_CB(directio_thread_num); +static DOTCONF_CB(directio_ops_per_queue); +static DOTCONF_CB(directio_timeout); +static DOTCONF_CB(tree_width); +static DOTCONF_CB(tree_threshhold); static FUNC_ERRORHANDLER(errorhandler); const char *contextchecker(command_t *cmd, unsigned long mask); @@ -142,7 +163,8 @@ static char *get_handle_range_str( int meta_handle_range); static host_alias_s *find_host_alias_ptr_by_alias( struct server_configuration_s *config_s, - char *alias); + char *alias, + int *index); static struct host_handle_mapping_s *get_or_add_handle_mapping( PINT_llist *list, char *alias); @@ -527,6 +549,9 @@ static const configoption_t options[] = {"EventLogging",ARG_LIST, get_event_logging_list,NULL, CTX_DEFAULTS|CTX_SERVER_OPTIONS,"none,"}, + {"EnableTracing",ARG_STR, get_event_tracing,NULL, + CTX_DEFAULTS|CTX_SERVER_OPTIONS,"no"}, + /* At startup each pvfs server allocates space for a set number * of incoming requests to prevent the allocation delay at the beginning * of each unexpected request. This parameter specifies the number @@ -540,17 +565,51 @@ static const configoption_t options[] = {"UnexpectedRequests",ARG_INT, get_unexp_req,NULL, CTX_DEFAULTS|CTX_SERVER_OPTIONS,"50"}, - /* Specifies the local path for the pvfs2 server to use as storage space. - * This option specifies the default path for all servers and will appear + /* DEPRECATED + * Specifies the local path for the pvfs2 server to use as + * storage space for data files and metadata files. This option should not + * be used in conjuction with DataStorageSpace or MetadataStorageSpace. + * This option is only meant as a migration path for configurations where i + * users do not want (or don't expect to need to) modify their configuration + * to run this version. + * + * This option specifies the default path for all servers and will appear * in the Defaults context. * * NOTE: This can be overridden in the tag on a per-server * basis. Look at the "Option" tag for more details * Example: * - * StorageSpace /tmp/pvfs.storage + * StorageSpace /tmp/pvfs-data.storage + * DEPRECATED. + */ + {"StorageSpace",ARG_STR, get_storage_path,NULL, + CTX_DEFAULTS|CTX_SERVER_OPTIONS,NULL}, + + /* Specifies the local path for the pvfs2 server to use as storage space + * for data files. This option specifies the default path for all servers + * and will appear in the Defaults context. + * + * NOTE: This can be overridden in the tag on a per-server + * basis. Look at the "Option" tag for more details + * Example: + * + * DataStorageSpace /tmp/pvfs-data.storage + */ + {"DataStorageSpace",ARG_STR, get_data_path,NULL, + CTX_DEFAULTS|CTX_SERVER_OPTIONS,NULL}, + + /* Specifies the local path for the pvfs2 server to use as storage space + * for metadata files. This option specifies the default path for all + * servers and will appear in the Defaults context. + * + * NOTE: This can be overridden in the tag on a per-server + * basis. Look at the "Option" tag for more details + * Example: + * + * MetadataStorageSpace /tmp/pvfs-meta.storage */ - {"StorageSpace",ARG_STR, get_storage_space,NULL, + {"MetadataStorageSpace",ARG_STR, get_meta_path,NULL, CTX_DEFAULTS|CTX_SERVER_OPTIONS,NULL}, /* Current implementations of TCP on most systems use a window @@ -616,6 +675,39 @@ static const configoption_t options[] = {"ClientRetryDelayMilliSecs",ARG_INT, get_client_retry_delay,NULL, CTX_DEFAULTS, "2000"}, + /* Specifies the number of handles to be preceated at a time from each + * server using the batch create request. One value is specified for each + * type of DS handle. Order is important, it matches the order the types + * are defined in the PVFS_ds_type enum, which lives in + * include/pvfs2-types.h. If that enum changes, it must be changed here + * to match. Currently, this parameter follows the order: + * + * PVFS_TYPE_NONE + * PVFS_TYPE_METAFILE + * PVFS_TYPE_DATAFILE + * PVFS_TYPE_DIRECTORY + * PVFS_TYPE_SYMLINK + * PVFS_TYPE_DIRDATA + * PVFS_TYPE_INTERNAL + * + */ + {"PrecreateBatchSize",ARG_LIST, get_precreate_batch_size,NULL, + CTX_DEFAULTS|CTX_SERVER_OPTIONS, "0, 32, 512, 32, 32, 32, 0" }, + + /* Precreate pools will be "topped off" if they fall below this value. + * One value is specified for each DS handle type. This parameter operates + * the same as the PrecreateBatchSize in that each count coorespends to + * one DS handle type. The order of types is identical to the + * PrecreateBatchSize defined above. */ + {"PrecreateLowThreshold",ARG_LIST, get_precreate_low_threshold,NULL, + CTX_DEFAULTS|CTX_SERVER_OPTIONS, "0, 16, 256, 16, 16, 16, 0"}, + + /* Specifies if file stuffing should be enabled or not. Default is + * enabled; this option is only provided for benchmarking purposes + */ + {"FileStuffing",ARG_STR, get_file_stuffing, NULL, + CTX_FILESYSTEM,"yes"}, + /* This specifies the frequency (in milliseconds) * that performance monitor should be updated * when the pvfs server is running in admin mode. @@ -669,7 +761,7 @@ static const configoption_t options[] = * * usec: [%H:%M:%S.%U] * - * datetime: [%m/%d %H:%M] + * datetime: [%m/%d/%Y %H:%M:%S] * * thread: [%H:%M:%S.%U (%lu)] * @@ -875,8 +967,25 @@ static const configoption_t options[] = {"CoalescingLowWatermark", ARG_INT, get_coalescing_low_watermark, NULL, CTX_STORAGEHINTS, "1"}, - /* This option specifies the method used for trove. Currently the - * dbpf method is the default. Other methods include 'alt-aio'. + /* This option specifies the method used for trove. The method specifies + * how both metadata and data are stored and managed by the PVFS servers. + * Currently the + * alt-aio method is the default. Possible methods are: + *
    + *
  • alt-aio. This uses a thread-based implementation of Asynchronous IO. + *
  • directio. This uses a direct I/O implementation to perform I/O + * operations to datafiles. This method may give significant performance + * improvement if PVFS servers are running over shared storage, especially + * for large I/O accesses. For local storage, including RAID setups, + * the alt-aio method is recommended. + * + *
  • null-aio. This method is an implementation + * that does no disk I/O at all + * and is only useful for development or debugging purposes. It can + * be used to test the performance of the network without doing I/O to disk. + *
  • dbpf. Uses the system's Linux AIO implementation. No longer + * recommended in production environments. + *
* * Note that this option can be specified in either the * Defaults context of the main fs.conf, or in a filesystem specific @@ -892,7 +1001,7 @@ static const configoption_t options[] = * StorageHints context for that filesystem. */ {"TroveMethod", ARG_STR, get_trove_method, NULL, - CTX_DEFAULTS|CTX_STORAGEHINTS, "dbpf"}, + CTX_DEFAULTS|CTX_STORAGEHINTS, "alt-aio"}, /* Specifies the file system's key for use in HMAC-based digests of * client operations. @@ -906,6 +1015,52 @@ static const configoption_t options[] = { "OSDType", ARG_STR, get_osd_type, NULL, CTX_DEFAULTS, "none" }, { "OSDdirType", ARG_STR, get_osd_dir_type, NULL, CTX_DEFAULTS, "pvfs" }, + /* + * Specifies if postcreating is enabled or not + */ + { "PostCreate", ARG_STR, get_create_type, NULL, CTX_DEFAULTS, "no" }, + + /* + * Specifies if getting member attributes is enabled or not + */ + { "MemberAttr", ARG_STR, get_member_attr_type, NULL, CTX_DEFAULTS, "no" }, + + /* + * Specifies if energy saving mode is enabled or not + */ + { "EnergySaving", ARG_STR, get_eco_type, NULL, CTX_DEFAULTS, "no" }, + + /* + * Specifies how many nodes are going to be used in energy saving mode + */ + { "NumECOnodes", ARG_INT, get_eco_num, NULL, CTX_DEFAULTS, 0 }, + + /* Specifies the size of the small file transition point */ + {"SmallFileSize", ARG_INT, get_small_file_size, NULL, CTX_FILESYSTEM, NULL}, + + /* Specifies the number of threads that should be started to service + * Direct I/O operations. This defaults to 30. + */ + {"DirectIOThreadNum", ARG_INT, directio_thread_num, NULL, + CTX_STORAGEHINTS, "30"}, + + /* Specifies the number of operations to service at once in Direct I/O mode. + */ + {"DirectIOOpsPerQueue", ARG_INT, directio_ops_per_queue, NULL, + CTX_STORAGEHINTS, "10"}, + + /* Specifies the timeout in Direct I/O to wait before checking the next queue. */ + {"DirectIOTimeout", ARG_INT, directio_timeout, NULL, + CTX_STORAGEHINTS, "1000"}, + + /* Specifies the number of partitions to use for tree communication. */ + {"TreeWidth", ARG_INT, tree_width, NULL, + CTX_FILESYSTEM, "2"}, + + /* Specifies the minimum number of servers to contact before tree communication kicks in. */ + {"TreeThreshhold", ARG_INT, tree_threshhold, NULL, + CTX_FILESYSTEM, "2"}, + LAST_OPTION }; @@ -916,7 +1071,9 @@ static const configoption_t options[] = * global_config_filename - common config file for all servers * and clients * server_alias_name - alias (if any) provided for this server - * (ignored on client side) + * client side can provide to check + * for a local server + * server_flag - true if running on a server * * Returns: 0 on success; 1 on failure * @@ -924,7 +1081,8 @@ static const configoption_t options[] = int PINT_parse_config( struct server_configuration_s *config_obj, char *global_config_filename, - char *server_alias_name) + char *server_alias_name, + int server_flag) { struct server_configuration_s *config_s; configfile_t *configfile = (configfile_t *)0; @@ -939,6 +1097,11 @@ int PINT_parse_config( config_s = config_obj; memset(config_s, 0, sizeof(struct server_configuration_s)); + if (server_flag && !server_alias_name) + { + gossip_err("Server alias not provided for server config\n"); + return 1; + } config_s->server_alias = server_alias_name; /* set some global defaults for optional parameters */ config_s->logstamp_type = GOSSIP_LOGSTAMP_DEFAULT; @@ -982,20 +1145,34 @@ int PINT_parse_config( { struct host_alias_s *halias; halias = find_host_alias_ptr_by_alias( - config_s, server_alias_name); + config_s, server_alias_name, &config_s->host_index); if (!halias || !halias->bmi_address) { - gossip_err("Configuration file error. " + if (server_flag) + { + gossip_err("Configuration file error. " "No host ID specified for alias %s.\n", server_alias_name); - return 1; + return 1; + } } - config_s->host_id = strdup(halias->bmi_address); + else + { + /* save alias bmi_address */ + config_s->host_id = strdup(halias->bmi_address); + } + } + + if (server_flag && !config_s->data_path) + { + gossip_err("Configuration file error. " + "No data storage path specified for alias %s.\n", server_alias_name); + return 1; } - if (server_alias_name && !config_s->storage_path) + if (server_flag && !config_s->meta_path) { gossip_err("Configuration file error. " - "No storage path specified for alias %s.\n", server_alias_name); + "No metadata storage path specified for alias %s.\n", server_alias_name); return 1; } @@ -1079,7 +1256,34 @@ DOTCONF_CB(get_logstamp) } -DOTCONF_CB(get_storage_space) +DOTCONF_CB(get_storage_path) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + if(config_s->configuration_context == CTX_SERVER_OPTIONS && + config_s->my_server_options == 0) + { + return NULL; + } + + if( config_s->data_path ) + { + free(config_s->data_path); + } + + if( config_s->meta_path ) + { + free(config_s->meta_path); + } + + config_s->data_path = + (cmd->data.str ? strdup(cmd->data.str) : NULL); + config_s->meta_path = + (cmd->data.str ? strdup(cmd->data.str) : NULL); + return NULL; +} + +DOTCONF_CB(get_data_path) { struct server_configuration_s *config_s = (struct server_configuration_s *)cmd->context; @@ -1088,11 +1292,31 @@ DOTCONF_CB(get_storage_space) { return NULL; } - if (config_s->storage_path) + if (config_s->data_path) { - free(config_s->storage_path); + free(config_s->data_path); } - config_s->storage_path = + + config_s->data_path = + (cmd->data.str ? strdup(cmd->data.str) : NULL); + return NULL; +} + +DOTCONF_CB(get_meta_path) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + if(config_s->configuration_context == CTX_SERVER_OPTIONS && + config_s->my_server_options == 0) + { + return NULL; + } + if (config_s->meta_path) + { + free(config_s->meta_path); + } + + config_s->meta_path = (cmd->data.str ? strdup(cmd->data.str) : NULL); return NULL; } @@ -1168,11 +1392,12 @@ DOTCONF_CB(enter_filesystem_context) /* fill any fs defaults here */ fs_conf->flowproto = FLOWPROTO_DEFAULT; - fs_conf->encoding = ENCODING_DEFAULT; + fs_conf->encoding = PVFS2_ENCODING_DEFAULT; fs_conf->trove_sync_meta = TROVE_SYNC; fs_conf->trove_sync_data = TROVE_SYNC; fs_conf->fp_buffer_size = -1; fs_conf->fp_buffers_per_flow = -1; + fs_conf->file_stuffing = 1; if (!config_s->file_systems) { @@ -1387,6 +1612,9 @@ DOTCONF_CB(get_tcp_buffer_send) return NULL; } +#ifdef WIN32 +#define strcasecmp stricmp +#endif DOTCONF_CB(get_tcp_bind_specific) { struct server_configuration_s *config_s = @@ -1426,6 +1654,115 @@ DOTCONF_CB(get_server_job_bmi_timeout) return NULL; } +DOTCONF_CB(get_precreate_batch_size) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + int i = 0, j = 0, token_count = 0, counts[7], count_count=0; + char **tokens; + + if(config_s->configuration_context == CTX_SERVER_OPTIONS && + config_s->my_server_options == 0) + { + return NULL; + } + + if (config_s->precreate_batch_size != NULL) + { + free(config_s->precreate_batch_size); + config_s->precreate_batch_size = NULL; + } + + /* so this seems silly but a config option of type ARG_LIST doesn't + * split on commas (which is claimed to be the delimiter) but on white + * space. That could possibly be fixed. So, until it is we have to handle + * the possibility of multiple arguments with some number of values per + * argument. */ + for(i = 0; i < cmd->arg_count; i++) + { + token_count = PINT_split_string_list( &tokens, cmd->data.list[i]); + for(j = 0; j < token_count; ++j) + { + counts[count_count++] = atoi(tokens[j]); + } + PINT_free_string_list(tokens, token_count); + } + + /* make sure we scrounged up the right number of values */ + if( count_count != PVFS_DS_TYPE_COUNT ) + { + return "PrecreateBatchSize must contain counts for each DS " + "type in the order NONE, METAFILE, DATAFILE, DIRECTORY, " + "SYMLINK, DIRDATA, INTERNAL\n"; + } + + config_s->precreate_batch_size = calloc( PVFS_DS_TYPE_COUNT, sizeof(int)); + if( config_s->precreate_batch_size == NULL ) + { + return "PrecreateBatchSize malloc failure"; + } + + for( i = 0; i < count_count; i++ ) + { + config_s->precreate_batch_size[i] = counts[i]; + } + + return NULL; +} + +DOTCONF_CB(get_precreate_low_threshold) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + int i = 0, j = 0, token_count = 0, counts[7], count_count=0; + char **tokens; + + if(config_s->configuration_context == CTX_SERVER_OPTIONS && + config_s->my_server_options == 0) + { + return NULL; + } + + if (config_s->precreate_low_threshold != NULL) + { + free(config_s->precreate_low_threshold); + config_s->precreate_low_threshold = NULL; + } + + /* handle multiple values per arguments, gross */ + for(i = 0; i < cmd->arg_count; i++) + { + token_count = PINT_split_string_list( &tokens, cmd->data.list[i]); + for(j = 0; j < token_count; ++j) + { + counts[count_count++] = atoi(tokens[j]); + } + PINT_free_string_list(tokens, token_count); + } + + /* make sure we scrounged up the right number of values */ + if( count_count != PVFS_DS_TYPE_COUNT ) + { + return "PrecreateLowThreshold must contain counts for each DS " + "type in the order NONE, METAFILE, DATAFILE, DIRECTORY, " + "SYMLINK, DIRDATA, INTERNAL\n"; + } + + config_s->precreate_low_threshold = + calloc( PVFS_DS_TYPE_COUNT, sizeof(int)); + if( config_s->precreate_low_threshold == NULL ) + { + return "PrecreateLowThreshold malloc failure"; + } + + for( i = 0; i < count_count; i++ ) + { + config_s->precreate_low_threshold[i] = counts[i]; + } + + return NULL; +} + DOTCONF_CB(get_server_job_flow_timeout) { struct server_configuration_s *config_s = @@ -1539,6 +1876,26 @@ DOTCONF_CB(get_event_logging_list) return NULL; } +DOTCONF_CB(get_event_tracing) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + if(config_s->configuration_context == CTX_SERVER_OPTIONS && + config_s->my_server_options == 0) + { + return NULL; + } + if(!strcmp(cmd->data.str, "yes")) + { + config_s->enable_events = 1; + } + else + { + config_s->enable_events = 0; + } + return NULL; +} + DOTCONF_CB(get_flow_module_list) { int i = 0, len = 0; @@ -1961,6 +2318,11 @@ DOTCONF_CB(get_trusted_network) struct server_configuration_s *config_s = (struct server_configuration_s *)cmd->context; + if (cmd->arg_count == 0) + { + return NULL; + } + config_s->allowed_masks = (int *) calloc(cmd->arg_count, sizeof(int)); if (config_s->allowed_masks == NULL) { @@ -1986,6 +2348,7 @@ DOTCONF_CB(get_trusted_network) config_s->allowed_networks_count = 0; return("Parse error in netmask specification\n"); } + /* okay, we enable trusted network as well */ config_s->network_enabled = 1; return NULL; @@ -2054,6 +2417,10 @@ DOTCONF_CB(get_flow_buffers_per_flow) fs_conf = (struct filesystem_configuration_s *) PINT_llist_head(config_s->file_systems); fs_conf->fp_buffers_per_flow = cmd->data.value; + if(fs_conf->fp_buffers_per_flow < 2) + { + return("Error: FlowBuffersPerFlow must be at least 2.\n"); + } return NULL; } @@ -2154,6 +2521,33 @@ DOTCONF_CB(get_attr_cache_max_num_elems) return NULL; } +DOTCONF_CB(get_file_stuffing) +{ + struct filesystem_configuration_s *fs_conf = NULL; + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + fs_conf = (struct filesystem_configuration_s *) + PINT_llist_head(config_s->file_systems); + assert(fs_conf); + + if(strcasecmp(cmd->data.str, "yes") == 0) + { + fs_conf->file_stuffing = 1; + } + else if(strcasecmp(cmd->data.str, "no") == 0) + { + fs_conf->file_stuffing = 0; + } + else + { + return("FileStuffing value must be 'yes' or 'no'.\n"); + } + + return NULL; +} + + DOTCONF_CB(get_trove_sync_meta) { struct filesystem_configuration_s *fs_conf = NULL; @@ -2345,8 +2739,13 @@ DOTCONF_CB(get_alias_list) struct server_configuration_s *config_s = (struct server_configuration_s *)cmd->context; struct host_alias_s *cur_alias = NULL; + int i = 0; + int len = 0; + char *ptr; - assert(cmd->arg_count == 2); + if (cmd->arg_count < 2) { + return "Error: alias must include at least one bmi address"; + } /* prevent users from adding the same alias twice */ if(config_s->host_aliases && @@ -2360,7 +2759,17 @@ DOTCONF_CB(get_alias_list) cur_alias = (host_alias_s *) malloc(sizeof(host_alias_s)); cur_alias->host_alias = strdup(cmd->data.list[0]); - cur_alias->bmi_address = strdup(cmd->data.list[1]); + + cur_alias->bmi_address = (char *)calloc(1, 2048); + ptr = cur_alias->bmi_address; + for (i=1; i < cmd->arg_count; i++) { + strncat(ptr, cmd->data.list[i], 2048 - len); + len += strlen(cmd->data.list[i]); + if (i+1 < cmd->arg_count) { + strncat(ptr, ",", 2048 - len); + } + len++; + } if (!config_s->host_aliases) { @@ -2416,12 +2825,12 @@ DOTCONF_CB(get_range_list) is_new_handle_mapping = 1; handle_mapping->alias_mapping = find_host_alias_ptr_by_alias( - config_s, cmd->data.list[i-1]); + config_s, cmd->data.list[i-1], NULL); } assert(handle_mapping->alias_mapping == find_host_alias_ptr_by_alias( - config_s, cmd->data.list[i-1])); + config_s, cmd->data.list[i-1], NULL)); if (!handle_mapping->handle_range && !handle_mapping->handle_extent_array.extent_array) @@ -2606,6 +3015,14 @@ DOTCONF_CB(get_trove_method) { *method = TROVE_METHOD_DBPF_ALTAIO; } + else if(!strcmp(cmd->data.str, "null-aio")) + { + *method = TROVE_METHOD_DBPF_NULLAIO; + } + else if(!strcmp(cmd->data.str, "directio")) + { + *method = TROVE_METHOD_DBPF_DIRECTIO; + } else { return "Error unknown TroveMethod option\n"; @@ -2613,6 +3030,60 @@ DOTCONF_CB(get_trove_method) return NULL; } +static DOTCONF_CB(get_create_type) +{ + const char *str = cmd->data.str; + struct server_configuration_s *config = cmd->context; + const char *ret = NULL; + + if (!strcmp(str, "no")) + config->post_create = NO_POST_CREATE; + else if (!strcmp(str, "yes")) + config->post_create = POST_CREATE; + else + ret = "get_create_type: unknown FileSystem PostCreate option\n"; + return ret; +} + +static DOTCONF_CB(get_member_attr_type) +{ + const char *str = cmd->data.str; + struct server_configuration_s *config = cmd->context; + const char *ret = NULL; + + if (!strcmp(str, "no")) + config->member_attr = NO_MEMBER_ATTR; + else if (!strcmp(str, "yes")) + config->member_attr = GET_MEMBER_ATTR; + else + ret = "get_member_attr_type: unknown FileSystem MemberAttr option\n"; + return ret; +} + +static DOTCONF_CB(get_eco_type) +{ + const char *str = cmd->data.str; + struct server_configuration_s *config = cmd->context; + const char *ret = NULL; + + if (!strcmp(str, "no")) + config->energysaving = NO_ENERGY_SAVING; + else if (!strcmp(str, "yes")) + config->energysaving = ENERGY_SAVING; + else + ret = "get_eco_type: unknown FileSystem EnergySaving option\n"; + return ret; +} + +static DOTCONF_CB(get_eco_num) +{ + struct server_configuration_s *config = cmd->context; + + config->econumnodes = cmd->data.value; + + return NULL; +} + static DOTCONF_CB(get_osd_type) { const char *str = cmd->data.str; @@ -2626,7 +3097,9 @@ static DOTCONF_CB(get_osd_type) else if (!strcmp(str, "metafile")) config->osd_type = OSD_METAFILE; else if (!strcmp(str, "mdfile")) + { config->osd_type = OSD_MDFILE; + } else ret = "get_osd_type: unknown FileSystem OSDType option\n"; return ret; @@ -2651,6 +3124,80 @@ static DOTCONF_CB(get_osd_dir_type) return ret; } +DOTCONF_CB(get_small_file_size) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + /* we must be in a storagehints inside a filesystem context */ + struct filesystem_configuration_s *fs_conf = + (struct filesystem_configuration_s *) PINT_llist_head(config_s->file_systems); + + fs_conf->small_file_size = cmd->data.value; + return NULL; +} + +DOTCONF_CB(directio_thread_num) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + struct filesystem_configuration_s *fs_conf = + (struct filesystem_configuration_s *) + PINT_llist_head(config_s->file_systems); + + fs_conf->directio_thread_num = cmd->data.value; + + return NULL; +} + +DOTCONF_CB(directio_ops_per_queue) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + struct filesystem_configuration_s *fs_conf = + (struct filesystem_configuration_s *) + PINT_llist_head(config_s->file_systems); + + fs_conf->directio_ops_per_queue = cmd->data.value; + + return NULL; +} + +DOTCONF_CB(directio_timeout) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + struct filesystem_configuration_s *fs_conf = + (struct filesystem_configuration_s *) + PINT_llist_head(config_s->file_systems); + + fs_conf->directio_timeout = cmd->data.value; + + return NULL; +} + +DOTCONF_CB(tree_width) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + config_s->tree_width = cmd->data.value; + + return NULL; +} + +DOTCONF_CB(tree_threshhold) +{ + struct server_configuration_s *config_s = + (struct server_configuration_s *)cmd->context; + + config_s->tree_threshhold = cmd->data.value; + + return NULL; +} /* * Function: PINT_config_release @@ -2673,10 +3220,16 @@ void PINT_config_release(struct server_configuration_s *config_s) config_s->host_id = NULL; } - if (config_s->storage_path) + if (config_s->data_path) { - free(config_s->storage_path); - config_s->storage_path = NULL; + free(config_s->data_path); + config_s->data_path = NULL; + } + + if (config_s->meta_path) + { + free(config_s->meta_path); + config_s->meta_path = NULL; } if (config_s->fs_config_filename) @@ -2691,6 +3244,18 @@ void PINT_config_release(struct server_configuration_s *config_s) config_s->fs_config_buf = NULL; } + if(config_s->precreate_batch_size) + { + free(config_s->precreate_batch_size); + config_s->precreate_batch_size = NULL; + } + + if(config_s->precreate_low_threshold) + { + free(config_s->precreate_low_threshold); + config_s->precreate_low_threshold = NULL; + } + if (config_s->logfile) { free(config_s->logfile); @@ -2980,6 +3545,17 @@ static void free_filesystem(void *ptr) free(fs->ro_netmasks); fs->ro_netmasks = NULL; } + /* free all root_squash_exception_hosts specifications */ + if (fs->root_squash_exceptions_hosts) + { + free_list_of_strings(fs->root_squash_exceptions_count, &fs->root_squash_exceptions_hosts); + fs->root_squash_exceptions_count = 0; + } + if (fs->root_squash_exceptions_netmasks) + { + free(fs->root_squash_exceptions_netmasks); + fs->root_squash_exceptions_netmasks = NULL; + } /* free all root_squash_hosts specifications */ if (fs->root_squash_hosts) { @@ -3181,17 +3757,20 @@ static void copy_filesystem( static host_alias_s *find_host_alias_ptr_by_alias( struct server_configuration_s *config_s, - char *alias) + char *alias, + int *index) { PINT_llist *cur = NULL; struct host_alias_s *ret = NULL; struct host_alias_s *cur_alias = NULL; + int ind = 0; if (config_s && alias) { cur = config_s->host_aliases; while(cur) { + ind++; cur_alias = PINT_llist_head(cur); if (!cur_alias) { @@ -3208,9 +3787,31 @@ static host_alias_s *find_host_alias_ptr_by_alias( cur = PINT_llist_next(cur); } } + if(index) *index = ind - 1; return ret; } +/* the static function below allocates a new mapping structure + * if one is not found. This wrapper removes it and returns + * NULL if not found + */ +struct host_handle_mapping_s *PINT_get_handle_mapping( + PINT_llist *list, + char *alias) +{ + struct host_handle_mapping_s *mapping; + mapping = get_or_add_handle_mapping(list, alias); + if (mapping && mapping->alias_mapping) + { + return mapping; + } + else + { + free(mapping); + return NULL; + } +} + static struct host_handle_mapping_s *get_or_add_handle_mapping( PINT_llist *list, char *alias) @@ -3620,13 +4221,21 @@ static int cache_config_files( { int fd = 0, nread = 0; struct stat statbuf; +#ifdef WIN32 + char working_dir[MAX_PATH+1]; +#else char *working_dir = NULL; +#endif char *my_global_fn = NULL; char buf[512] = {0}; assert(config_s); +#ifdef WIN32 + GetCurrentDirectory(MAX_PATH+1, working_dir); +#else working_dir = getenv("PWD"); +#endif /* pick some filenames if not provided */ my_global_fn = ((global_config_filename != NULL) ? @@ -3654,7 +4263,11 @@ static int cache_config_files( else { assert(working_dir); +#ifdef WIN32 + _snprintf(buf, 512, "%s\\%s",working_dir, my_global_fn); +#else snprintf(buf, 512, "%s/%s",working_dir, my_global_fn); +#endif my_global_fn = buf; goto open_global_config; } @@ -3734,8 +4347,8 @@ static char *get_handle_range_str( assert(cur_h_mapping->alias_mapping->host_alias); assert(cur_h_mapping->handle_range); - if (strcmp(cur_h_mapping->alias_mapping->host_alias, - my_alias) == 0) + if (strcmp(cur_h_mapping->alias_mapping->host_alias, + my_alias) == 0 || !strncmp(cur_h_mapping->alias_mapping->bmi_address, "osd", 3)) { ret = cur_h_mapping->handle_range; break; @@ -4161,7 +4774,7 @@ int PINT_config_pvfs2_mkspace( "storage space")); ret = pvfs2_mkspace( - config->storage_path, cur_fs->file_system_name, + config->data_path, config->meta_path, cur_fs->file_system_name, cur_fs->coll_id, root_handle, cur_meta_handle_range, cur_data_handle_range, create_collection_only, 1); @@ -4216,7 +4829,8 @@ int PINT_config_pvfs2_rmspace( GOSSIP_SERVER_DEBUG,"Removing existing PVFS2 %s\n", (remove_collection_only ? "collection" : "storage space")); - ret = pvfs2_rmspace(config->storage_path, + ret = pvfs2_rmspace(config->data_path, + config->meta_path, cur_fs->file_system_name, cur_fs->coll_id, remove_collection_only, diff --git a/src/common/misc/server-config.h b/src/common/misc/server-config.h index f0eb9bd..e64c9f1 100644 --- a/src/common/misc/server-config.h +++ b/src/common/misc/server-config.h @@ -86,6 +86,7 @@ typedef struct filesystem_configuration_s int immediate_completion; int coalescing_high_watermark; int coalescing_low_watermark; + int file_stuffing; char *secret_key; @@ -115,6 +116,12 @@ typedef struct filesystem_configuration_s PVFS_uid exp_anon_uid; PVFS_gid exp_anon_gid; + + int32_t small_file_size; + + int32_t directio_thread_num; + int32_t directio_ops_per_queue; + int32_t directio_timeout; } filesystem_configuration_s; typedef struct distribution_param_configuration_s @@ -134,10 +141,12 @@ typedef struct distribution_configuration_s typedef struct server_configuration_s { - char *host_id; + char *host_id; /* bmi_address of this server */ + int host_index; char *server_alias; /* the command line server-alias parameter */ int my_server_options; - char *storage_path; + char *data_path; /* path to data storage directory */ + char *meta_path; /* path to metadata storage directory */ char *fs_config_filename; /* the fs.conf file name */ size_t fs_config_buflen; /* the fs.conf file length */ char *fs_config_buf; /* the fs.conf file contents */ @@ -150,10 +159,13 @@ typedef struct server_configuration_s int client_retry_delay_ms; /* delay between retries */ int perf_update_interval; /* how quickly (in msecs) to update perf monitor */ + uint32_t *precreate_batch_size; /* batch size for each ds type */ + uint32_t *precreate_low_threshold; /* threshold for each ds type */ char *logfile; /* what log file to write to */ char *logtype; /* "file" or "syslog" destination */ enum gossip_logstamp logstamp_type; /* how to timestamp logs */ char *event_logging; + int enable_events; char *bmi_modules; /* BMI modules */ char *flow_modules; /* Flow modules */ @@ -190,6 +202,7 @@ typedef struct server_configuration_s */ int trove_method; + enum { OSD_NONE, /* stock pvfs */ OSD_DATAFILE, /* datafiles stored on OSDs */ @@ -203,13 +216,33 @@ typedef struct server_configuration_s OSD_DIR_OBJ, /* dirs in objs in collections */ } osd_dir_type; + enum { + NO_POST_CREATE, /* create the object first and then write to it */ + POST_CREATE, /* delay the creates, create and write at the same time */ + } post_create; + + enum { + NO_MEMBER_ATTR, /* fetch attributes of a single object at once */ + GET_MEMBER_ATTR, /* fetch attributes of all the objects that are members of a collection */ + } member_attr; + + enum { + NO_ENERGY_SAVING, /* fetch attributes of a single object at once */ + ENERGY_SAVING, /* fetch attributes of all the objects that are members of a collection */ + } energysaving; + + int econumnodes; + void *private_data; + int32_t tree_width; + int32_t tree_threshhold; } server_configuration_s; int PINT_parse_config( struct server_configuration_s *config_s, char *global_config_filename, - char *server_alias_name); + char *server_alias_name, + int server_flag); void PINT_config_release( struct server_configuration_s *config_s); @@ -274,6 +307,10 @@ PVFS_fs_id PINT_config_get_fs_id_by_fs_name( struct server_configuration_s *config_s, char *fs_name); +struct host_handle_mapping_s *PINT_get_handle_mapping( + PINT_llist *list, + char *alias); + PINT_llist *PINT_config_get_filesystems( struct server_configuration_s *config_s); diff --git a/src/common/misc/state-machine-fns.c b/src/common/misc/state-machine-fns.c index f6572c6..b316f61 100644 --- a/src/common/misc/state-machine-fns.c +++ b/src/common/misc/state-machine-fns.c @@ -16,27 +16,6 @@ #include "state-machine.h" #include "client-state-machine.h" -/* STATE-MACHINE-FNS.C - * - * This file implements a small collection of functions used when - * interacting with the state machine system implemented in - * state-machine.h. Probably you'll only need these functions in one - * file per instance of a state machine implementation. - * - * Note that state-machine.h must be included before this is included. - * This is usually accomplished through including some *other* file that - * includes state-machine.h, because state-machine.h needs a key #define - * before it can be included. - * - * The PINT_OP_STATE_TABLE has been replaced with a macro that must be #defined - * instead: PINT_OP_STATE_GET_MACHINE. - * This allows the _locate function to be used in the client as well. - * - * A good example of this is the pvfs2-server.h in the src/server directory, - * which includes state-machine.h at the bottom, and server-state-machine.c, - * which includes first pvfs2-server.h and then state-machine-fns.h. - */ - struct PINT_frame_s { int task_id; @@ -48,7 +27,7 @@ struct PINT_frame_s static struct PINT_state_s *PINT_pop_state(struct PINT_smcb *); static void PINT_push_state(struct PINT_smcb *, struct PINT_state_s *); static struct PINT_state_s *PINT_sm_task_map(struct PINT_smcb *smcb, int task_id); -static void PINT_sm_start_child_frames(struct PINT_smcb *smcb); +static void PINT_sm_start_child_frames(struct PINT_smcb *smcb, int* children_started); /* Function: PINT_state_machine_halt(void) Params: None @@ -70,32 +49,40 @@ int PINT_state_machine_halt(void) */ int PINT_state_machine_terminate(struct PINT_smcb *smcb, job_status_s *r) { - struct PINT_frame_s *my_frame, *f; + struct PINT_frame_s *f; + void *my_frame; job_id_t id; /* notify parent */ if (smcb->parent_smcb) { gossip_debug(GOSSIP_STATE_MACHINE_DEBUG, - "[SM Terminating Child]: (%p) %s:%s (error_code: %d)\n", + "[SM Terminating Child]: (%p) (error_code: %d)\n", smcb, /* skip pvfs2_ */ - PINT_state_machine_current_machine_name(smcb), - PINT_state_machine_current_state_name(smcb), (int32_t)r->error_code); assert(smcb->parent_smcb->children_running > 0); - my_frame = qlist_entry( - smcb->frames.next, struct PINT_frame_s, link); + my_frame = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + /* this will loop from TOS down to the base frame */ + /* base frame will not be processed */ + + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"[SM Terminating Child]: my_frame:%p\n",my_frame); +#ifdef WIN32 + qlist_for_each_entry(f, &smcb->parent_smcb->frames, link, struct PINT_frame_s) +#else qlist_for_each_entry(f, &smcb->parent_smcb->frames, link) +#endif { - if(my_frame->frame == f->frame) + if(my_frame == f->frame) { f->error = r->error_code; break; } } + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"[SM Terminating Child]: children_running:%d\n" + ,smcb->parent_smcb->children_running); if (--smcb->parent_smcb->children_running <= 0) { /* no more child state machines running, so we can @@ -103,11 +90,15 @@ int PINT_state_machine_terminate(struct PINT_smcb *smcb, job_status_s *r) */ job_null(0, smcb->parent_smcb, 0, r, &id, smcb->context); } - return SM_ACTION_DEFERRED; } + /* call state machine completion function */ if (smcb->terminate_fn) { + if (smcb->parent_smcb) + { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"[SM Terminating Child]: calling terminate function.\n"); + } (*smcb->terminate_fn)(smcb, r); } return 0; @@ -127,6 +118,7 @@ PINT_sm_action PINT_state_machine_invoke(struct PINT_smcb *smcb, PINT_sm_action retval; const char * state_name; const char * machine_name; + int children_started = 0; if (!(smcb) || !(smcb->current_state) || !(smcb->current_state->flag == SM_RUN || @@ -179,8 +171,13 @@ PINT_sm_action PINT_state_machine_invoke(struct PINT_smcb *smcb, if (retval == SM_ACTION_COMPLETE && smcb->current_state->flag == SM_PJMP) { /* start child SMs */ - PINT_sm_start_child_frames(smcb); - if (smcb->children_running > 0) + PINT_sm_start_child_frames(smcb, &children_started); + /* if any children were started, then we return DEFERRED (even + * though they may have all completed immediately). The last child + * issues a job_null that will drive progress from here and we don't + * want to cause a double transition. + */ + if (children_started > 0) retval = SM_ACTION_DEFERRED; else retval = SM_ACTION_COMPLETE; @@ -206,6 +203,9 @@ PINT_sm_action PINT_state_machine_start(struct PINT_smcb *smcb, job_status_s *r) */ smcb->immediate = 1; + /* set the base frame to be the current TOS, which should be 0 */ + smcb->base_frame = smcb->frame_count - 1; + /* run the current state action function */ ret = PINT_state_machine_invoke(smcb, r); if (ret == SM_ACTION_COMPLETE || ret == SM_ACTION_TERMINATE) @@ -241,6 +241,11 @@ PINT_sm_action PINT_state_machine_next(struct PINT_smcb *smcb, job_status_s *r) gossip_err("SM next called on invald smcb\n"); return -1; } + if(PINT_smcb_cancelled(smcb)) + { + return SM_ACTION_TERMINATE; + } + /* loop while invoke of new state returns COMPLETED */ do { /* loop while returning from nested SM */ @@ -437,7 +442,6 @@ static int PINT_smcb_mgmt_op(struct PINT_smcb *smcb) static int PINT_smcb_misc_op(struct PINT_smcb *smcb) { return smcb->op == PVFS_SERVER_GET_CONFIG - || smcb->op == PVFS_SERVER_FETCH_CONFIG || smcb->op == PVFS_CLIENT_JOB_TIMER || smcb->op == PVFS_CLIENT_PERF_COUNT_TIMER || smcb->op == PVFS_DEV_UNEXPECTED; @@ -514,6 +518,8 @@ int PINT_smcb_alloc( memset(*smcb, 0, sizeof(struct PINT_smcb)); INIT_QLIST_HEAD(&(*smcb)->frames); + (*smcb)->base_frame = -1; /* no frames yet */ + (*smcb)->frame_count = 0; /* if frame_size given, allocate a frame */ if (frame_size > 0) @@ -528,6 +534,7 @@ int PINT_smcb_alloc( /* zero out all members */ memset(new_frame, 0, frame_size); PINT_sm_push_frame(*smcb, 0, new_frame); + (*smcb)->base_frame = 0; } (*smcb)->op = op; (*smcb)->op_get_state_machine = getmach; @@ -549,13 +556,31 @@ void PINT_smcb_free(struct PINT_smcb *smcb) { struct PINT_frame_s *frame_entry, *tmp; assert(smcb); + + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"PINT_smcb_free: smcb:%p\n",smcb); + +#ifdef WIN32 + qlist_for_each_entry_safe(frame_entry, tmp, &smcb->frames, link, struct PINT_frame_s, struct PINT_frame_s) +#else qlist_for_each_entry_safe(frame_entry, tmp, &smcb->frames, link) +#endif { + if (frame_entry->frame) + { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"PINT_smcb_free: frame:%p \ttask-id:%d\n" + ,frame_entry->frame + ,frame_entry->task_id); + } + else + { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"PINT_smcb_free: NO FRAME ENTRIES.\n"); + } + if (frame_entry->frame && frame_entry->task_id == 0) { /* only free if task_id is 0 */ free(frame_entry->frame); - } + } qlist_del(&frame_entry->link); free(frame_entry); } @@ -565,61 +590,92 @@ void PINT_smcb_free(struct PINT_smcb *smcb) /* Function: PINT_pop_state * Params: pointer to an smcb pointer * Returns: - * Synopsis: pushes a SM pointer onto a stack for + * Synopsis: pops a SM pointer off of a stack for * implementing nested SMs - called by the * "next" routine above */ static struct PINT_state_s *PINT_pop_state(struct PINT_smcb *smcb) { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG, + "[SM pop_state]: (%p) op-id: %d stk-ptr: %d base-frm: %d\n", + smcb, smcb->op, smcb->stackptr, smcb->base_frame); + if(smcb->stackptr == 0) { + /* this is not an error, we terminate if we return NULL */ + /* this is return from main */ return NULL; } - return smcb->state_stack[--smcb->stackptr]; + + smcb->stackptr--; + smcb->base_frame = smcb->state_stack[smcb->stackptr].prev_base_frame; + return smcb->state_stack[smcb->stackptr].state; } /* Function: PINT_push_state * Params: pointer to an smcb pointer * Returns: - * Synopsis: pops a SM pointer off of a stack for + * Synopsis: pushes a SM pointer into a stack for * implementing nested SMs - called by the * "next" routine above */ static void PINT_push_state(struct PINT_smcb *smcb, struct PINT_state_s *p) { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG, + "[SM push_state]: (%p) op-id: %d stk-ptr: %d base-frm: %d\n", + smcb, smcb->op, smcb->stackptr, smcb->base_frame); + assert(smcb->stackptr < PINT_STATE_STACK_SIZE); - smcb->state_stack[smcb->stackptr++] = p; + smcb->state_stack[smcb->stackptr].prev_base_frame = smcb->base_frame; + smcb->base_frame = smcb->frame_count - 1; + smcb->state_stack[smcb->stackptr].state = p; + smcb->stackptr++; } /* Function: PINT_sm_frame * Params: pointer to smcb, stack index * Returns: pointer to frame * Synopsis: returns a frame off of the frame stack + * An index of 0 indicates the base frame specified in the SMCB + * A +'ve index indicates a frame pushed by this SM + * A -'ve index indicates a frame from a prior SM + * smcb->frames.next is the top of stack + * smcb->frames.prev is the bottom of stack */ void *PINT_sm_frame(struct PINT_smcb *smcb, int index) { struct PINT_frame_s *frame_entry; - struct qlist_head *next; + struct qlist_head *prev; + int target = smcb->base_frame + index; + + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG, + "[SM frame get]: (%p) op-id: %d index: %d base-frm: %d\n", + smcb, smcb->op, index, smcb->base_frame); if(qlist_empty(&smcb->frames)) { - gossip_debug(GOSSIP_STATE_MACHINE_DEBUG, - "FRAME GET smcb %p index %d -> frame: NULL\n", - smcb, index); + gossip_err("FRAME GET smcb %p index %d target %d -> List empty\n", + smcb, index, target); return NULL; } else { - int i = 0; - - next = smcb->frames.next; - while(i < index) + /* target should be 0 .. frame_count-1 now */ + if (target < 0 || target >= smcb->frame_count) + { + gossip_err("FRAME GET smcb %p index %d target %d -> Out of range\n", + smcb, index, target); + return NULL; + } + prev = smcb->frames.prev; + while(target) { - next = next->next; + target--; + prev = prev->prev; } - frame_entry = qlist_entry(next, struct PINT_frame_s, link); + frame_entry = qlist_entry(prev, struct PINT_frame_s, link); return frame_entry->frame; } } @@ -642,6 +698,7 @@ int PINT_sm_push_frame(struct PINT_smcb *smcb, int task_id, void *frame_p) } newframe->task_id = task_id; newframe->frame = frame_p; + newframe->error = 0; qlist_add(&newframe->link, &smcb->frames); smcb->frame_count++; return 0; @@ -711,51 +768,94 @@ static struct PINT_state_s *PINT_sm_task_map(struct PINT_smcb *smcb, int task_id static int child_sm_frame_terminate(struct PINT_smcb * smcb, job_status_s * js_p) { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"CHILD TERMINATE: smcb:%p.\n",smcb); + PINT_smcb_free(smcb); return 0; } /* Function: PINT_sm_start_child_frames - * Params: pointer to an smcb pointer + * Params: pointer to an smcb pointer and pointer to count of children + * started * Returns: number of children started * Synopsis: This starts all the enw child SMs based on the frame_stack * This is called by the invoke function above which expects the * number of children to be returned to decide if the state is * deferred or not. */ -static void PINT_sm_start_child_frames(struct PINT_smcb *smcb) +static void PINT_sm_start_child_frames(struct PINT_smcb *smcb, int* children_started) { int retval; struct PINT_smcb *new_sm; - struct PINT_frame_s *frame_entry; job_status_s r; - struct qlist_head *f; + struct PINT_frame_s *f; + void *my_frame; assert(smcb); memset(&r, 0, sizeof(job_status_s)); - qlist_for_each(f, &smcb->frames) + *children_started = 0; + + my_frame = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + /* Iterate once up front to determine how many children we are going to + * run. This has to be set before starting any children, otherwise if + * the first one immediately completes it will mistakenly believe it is + * the last one and signal the parent. + */ +#ifdef WIN32 + qlist_for_each_entry(f, &smcb->frames, link, struct PINT_frame_s) +#else + qlist_for_each_entry(f, &smcb->frames, link) +#endif { - /* skip the last since its the parent frame */ - if(f->next == &smcb->frames) + /* run from TOS until the parent frame */ + if(f->frame == my_frame) { break; } + /* increment parent's counter */ + smcb->children_running++; + } - frame_entry = qlist_entry(f, struct PINT_frame_s, link); - + /* let the caller know how many children are being started; it won't be + * able to tell from the running_count because they may all immediately + * complete before we leave this function. + */ + *children_started = smcb->children_running; +#ifdef WIN32 + qlist_for_each_entry(f, &smcb->frames, link, struct PINT_frame_s) +#else + qlist_for_each_entry(f, &smcb->frames, link) +#endif + { + /* run from TOS until the parent frame */ + if(f->frame == my_frame) + { + break; + } /* allocate smcb */ PINT_smcb_alloc(&new_sm, smcb->op, 0, NULL, child_sm_frame_terminate, smcb->context); /* set parent smcb pointer */ new_sm->parent_smcb = smcb; - /* increment parent's counter */ - smcb->children_running++; /* assign frame */ - PINT_sm_push_frame(new_sm, frame_entry->task_id, frame_entry->frame); + PINT_sm_push_frame(new_sm, f->task_id, f->frame); + /* locate SM to run */ - new_sm->current_state = PINT_sm_task_map(smcb, frame_entry->task_id); + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"START CHILD FRAMES: calling smcb is %p.\n",smcb); + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"START CHILD FRAMES: with frame: %p.\n",f->frame); + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"START CHILD FRAMES: and task id: %d.\n",f->task_id); + new_sm->current_state = PINT_sm_task_map(smcb, f->task_id); + + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"START CHILD FRAMES: new_sm->current_state is %s\n:" + ,(new_sm->current_state)?"VALID":"INVALID"); + if (new_sm->current_state) + { + gossip_debug(GOSSIP_STATE_MACHINE_DEBUG,"START CHILD FRAMES: new_sm->current_state->flag is %d\n" + ,new_sm->current_state->flag); + } + /* invoke SM */ retval = PINT_state_machine_start(new_sm, &r); if(retval < 0) diff --git a/src/common/misc/state-machine.h b/src/common/misc/state-machine.h index 832c0e6..3de9ad2 100644 --- a/src/common/misc/state-machine.h +++ b/src/common/misc/state-machine.h @@ -50,6 +50,27 @@ enum PINT_state_code { SM_RUN = 9 }; +/*define msgpairarray parameters for server-to-server requests*/ +#define PINT_serv_init_msgarray_params(sm_p, __fsid) \ +do { \ + PINT_sm_msgpair_params *mpp = &sm_p->msgarray_op.params; \ + struct server_configuration_s *server_config = \ + get_server_config_struct(); \ + mpp->job_context = server_job_context; \ + if (server_config) \ + { \ + mpp->job_timeout = server_config->client_job_bmi_timeout; \ + mpp->retry_limit = server_config->client_retry_limit; \ + mpp->retry_delay = server_config->client_retry_delay_ms; \ + } \ + else \ + { \ + mpp->job_timeout = PVFS2_CLIENT_JOB_BMI_TIMEOUT_DEFAULT; \ + mpp->retry_limit = PVFS2_CLIENT_RETRY_LIMIT_DEFAULT; \ + mpp->retry_delay = PVFS2_CLIENT_RETRY_DELAY_MS_DEFAULT; \ + } \ +} while (0) + /* these define things like stack size and so forth for the common * state machine code. * The state stack size limits the number of nested state machines that @@ -57,6 +78,12 @@ enum PINT_state_code { */ #define PINT_STATE_STACK_SIZE 8 +struct PINT_state_stack_s +{ + struct PINT_state_s *state; + int prev_base_frame; +}; + /* State machine control block - one per running instance of a state * machine */ @@ -65,10 +92,11 @@ typedef struct PINT_smcb /* state machine execution variables */ int stackptr; struct PINT_state_s *current_state; - struct PINT_state_s *state_stack[PINT_STATE_STACK_SIZE]; + struct PINT_state_stack_s state_stack[PINT_STATE_STACK_SIZE]; - struct qlist_head frames; - int frame_count; + struct qlist_head frames; /* circular list of frames */ + int base_frame; /* index of current base frame */ + int frame_count; /* number of frames in list */ /* usage specific routinet to look up SM from OP */ struct PINT_state_machine_s *(*op_get_state_machine)(int); @@ -161,11 +189,11 @@ int PINT_state_machine_complete(void *); * We assume the first 6 characters of every state machine name are "pvfs2_". */ #define PINT_state_machine_current_machine_name(smcb) \ - (((smcb)->current_state->parent_machine->name) + 6) + ((smcb)->current_state ? (((smcb)->current_state->parent_machine->name) + 6) : "UNKNOWN") /* This macro returns the current state invoked */ #define PINT_state_machine_current_state_name(smcb) \ - ((smcb)->current_state->state_name) + ((smcb)->current_state ? ((smcb)->current_state->state_name) : "UNKNOWN") /* Prototypes for functions defined in by state machine code */ int PINT_state_machine_halt(void); @@ -175,7 +203,11 @@ PINT_sm_action PINT_state_machine_invoke(struct PINT_smcb *, job_status_s *); PINT_sm_action PINT_state_machine_start(struct PINT_smcb *, job_status_s *); PINT_sm_action PINT_state_machine_continue( struct PINT_smcb *smcb, job_status_s *r); +#ifdef WIN32 +int PINT_state_machine_locate(struct PINT_smcb *); +#else int PINT_state_machine_locate(struct PINT_smcb *) __attribute__((used)); +#endif int PINT_smcb_set_op(struct PINT_smcb *smcb, int op); int PINT_smcb_op(struct PINT_smcb *smcb); int PINT_smcb_immediate_completion(struct PINT_smcb *smcb); @@ -196,8 +228,9 @@ void *PINT_sm_pop_frame(struct PINT_smcb *smcb, int *error_code, int *remaining); -/* This macro is used in calls to PINT_sm_fram() */ +/* This macro is used in calls to PINT_sm_frame() */ #define PINT_FRAME_CURRENT 0 +#define PINT_FRAME_TOP 1 struct PINT_state_machine_s pvfs2_void_sm; diff --git a/src/common/misc/str-utils.c b/src/common/misc/str-utils.c index de648dd..bf7067d 100644 --- a/src/common/misc/str-utils.c +++ b/src/common/misc/str-utils.c @@ -11,6 +11,14 @@ #include #include +#ifdef WIN32 +#include "wincommon.h" + +#define index(s, c) strchr(s, c) +#define snprintf(b, c, f, ...) _snprintf(b, c, f, __VA_ARGS__) +#define strdup(s) _strdup(s) +#endif + #include "str-utils.h" /* PINT_string_count_segments() @@ -143,32 +151,40 @@ int PINT_string_next_segment(char *pathname, char *ptr = (char *)0; /* initialize our starting position */ - if (*inout_segp == NULL) { - ptr = pathname; + if (*inout_segp == NULL) + { + ptr = pathname; } - else if (*opaquep != NULL) { - /* replace the '/', point just past it */ - ptr = (char *) *opaquep; - *ptr = '/'; - ptr++; + else if (*opaquep != NULL) + { + /* replace the '/', point just past it */ + ptr = (char *) *opaquep; + *ptr = '/'; + ptr++; } - else return -1; /* NULL *opaquep indicates last segment returned last time */ + else + return -1; /* NULL *opaquep indicates last segment returned last time */ /* at this point, the string is back in its original state */ /* jump past separators */ - while ((*ptr != '\0') && (*ptr == '/')) ptr++; - if (*ptr == '\0') return -1; /* all that was left was trailing '/'s */ + while ((*ptr != '\0') && (*ptr == '/')) + ptr++; + if (*ptr == '\0') + return -1; /* all that was left was trailing '/'s */ *inout_segp = ptr; /* find next separator */ - while ((*ptr != '\0') && (*ptr != '/')) ptr++; - if (*ptr == '\0') *opaquep = NULL; /* indicate last segment */ - else { - /* terminate segment and save position of terminator */ - *ptr = '\0'; - *opaquep = ptr; + while ((*ptr != '\0') && (*ptr != '/')) + ptr++; + if (*ptr == '\0') + *opaquep = NULL; /* indicate last segment */ + else + { + /* terminate segment and save position of terminator */ + *ptr = '\0'; + *opaquep = ptr; } return 0; } @@ -211,7 +227,9 @@ int PINT_parse_handle_ranges( `\0' but **endptr is `\0' on return, the entire string is valid. */ out_extent->first = out_extent->last = -#ifdef HAVE_STRTOULL +#if defined(WIN32) + (PVFS_handle)_strtoui64(p, &endchar, 0); +#elif defined(HAVE_STRTOULL) (PVFS_handle)strtoull(p, &endchar, 0); #else (PVFS_handle)strtoul(p, &endchar, 0); @@ -226,7 +244,9 @@ int PINT_parse_handle_ranges( switch (*endchar) { case '-': /* we got the first half of the range. grab 2nd half */ -#ifdef HAVE_STRTOULL +#if defined(WIN32) + out_extent->last = (PVFS_handle)_strtoui64(p, &endchar, 0); +#elif defined(HAVE_STRTOULL) out_extent->last = (PVFS_handle)strtoull(p, &endchar, 0); #else out_extent->last = (PVFS_handle)strtoul(p, &endchar, 0); @@ -311,14 +331,14 @@ int PINT_get_next_path(char *path, char **newpath, int skip) * segments*/ for(i =0; i < pathlen; i++) { - if (path[i] == '/') - { - num_slashes_seen++; - if (num_slashes_seen > skip) - { - break; - } - } + if (path[i] == '/') + { + num_slashes_seen++; + if (num_slashes_seen > skip) + { + break; + } + } } delimiter1 = i; @@ -327,7 +347,7 @@ int PINT_get_next_path(char *path, char **newpath, int skip) return (-PVFS_EINVAL); } - *newpath = malloc(pathlen - delimiter1); + *newpath = (char *) malloc(pathlen - delimiter1); if (*newpath == NULL) { return (-PVFS_ENOMEM); @@ -355,15 +375,15 @@ int PINT_split_string_list(char ***tokens, const char *comma_list) if (!comma_list || !tokens) { - return (0); + return (0); } /* count how many commas we have first */ holder = comma_list; while ((holder = index(holder, ','))) { - tokencount++; - holder++; + tokencount++; + holder++; } /* if we don't find any commas, just set the entire string to the first @@ -379,18 +399,18 @@ int PINT_split_string_list(char ***tokens, const char *comma_list) *tokens = (char **) malloc(sizeof(char *) * tokencount); if (!(*tokens)) { - return 0; + return 0; } if(1 == tokencount) { - (*tokens)[0] = strdup(comma_list); - if(!(*tokens)[0]) - { - tokencount = 0; - goto failure; - } - return tokencount; + (*tokens)[0] = strdup(comma_list); + if(!(*tokens)[0]) + { + tokencount = 0; + goto failure; + } + return tokencount; } /* copy out all of the tokenized strings */ @@ -398,42 +418,41 @@ int PINT_split_string_list(char ***tokens, const char *comma_list) end = comma_list + strlen(comma_list); for (i = 0; i < tokencount && holder; i++) { - holder2 = index(holder, ','); - if (!holder2) - { - holder2 = end; - } + holder2 = index(holder, ','); + if (!holder2) + { + holder2 = end; + } if (holder2 - holder == 0) { retval--; - goto out; + return (retval); } - (*tokens)[i] = (char *) malloc((holder2 - holder) + 1); - if (!(*tokens)[i]) - { - goto failure; - } - strncpy((*tokens)[i], holder, (holder2 - holder)); - (*tokens)[i][(holder2 - holder)] = '\0'; + (*tokens)[i] = (char *) malloc((holder2 - holder) + 1); + if (!(*tokens)[i]) + { + goto failure; + } + strncpy((*tokens)[i], holder, (holder2 - holder)); + (*tokens)[i][(holder2 - holder)] = '\0'; assert(strlen((*tokens)[i]) != 0); - holder = holder2 + 1; + holder = holder2 + 1; } -out: return (retval); - failure: +failure: /* free up any memory we allocated if we failed */ if (*tokens) { - for (i = 0; i < tokencount; i++) - { - if ((*tokens)[i]) - { - free((*tokens)[i]); - } - } - free(*tokens); + for (i = 0; i < tokencount; i++) + { + if ((*tokens)[i]) + { + free((*tokens)[i]); + } + } + free(*tokens); } return (0); } @@ -516,6 +535,140 @@ int PINT_remove_base_dir( return ret; } +#if 0 +/* Initial Windows version -- not yet sure about file path format */ +/* PINT_remove_dir_prefix() + * + * Strips prefix directory out of the path, output includes beginning + * backslash + * + * path and prefix must start with \\path or X:\ + * + * Parameters: + * pathname - pointer to directory string (absolute) + * prefix - pointer to prefix dir string (absolute) + * out_path - pointer to output dir string + * max_out_len - max length of out_base_dir buffer + * + * All incoming arguments must be valid and non-zero + * + * Returns 0 on success; -errno on failure + * + * Example inputs and outputs/return values: + * + * pathname: \\mnt\pvfs2\foo, prefix: \\mnt\pvfs2 + * out_path: \foo, returns 0 + * pathname: \\mnt\pvfs2\foo, prefix: \\mnt\pvfs2\ + * out_path: \foo, returns 0 + * pathname: \\mnt\pvfs2\foo\bar, prefix: \\mnt\pvfs2 + * out_path: \foo\bar, returns 0 + * pathname: X:\mnt\pvfs2\foo\bar, prefix: X:\ (or X:) + * out_path: \mnt\pvfs2\foo\bar, returns 0 + * + * invalid pathname input examples: + * pathname: \\mnt\foo\bar, prefix: \\mnt\pvfs2 + * out_path: undefined, returns -PVFS_ENOENT + * pathname: \\mnt\pvfs2fake\foo\bar, prefix: \\mnt\pvfs2 + * out_path: undefined, returns -PVFS_ENOENT + * pathname: \\mnt\foo\bar, prefix: mnt\pvfs2 + * out_path: undefined, returns -PVFS_EINVAL + * pathname: mnt\foo\bar, prefix: \\mnt\pvfs2 + * out_path: undefined, returns -PVFS_EINVAL + * out_max_len not large enough for buffer, returns -PVFS_ENAMETOOLONG + */ +int PINT_remove_dir_prefix( + const char *pathname, + const char *prefix, + char *out_path, + int out_max_len) +{ + int ret = -PVFS_EINVAL; + int valid; + int prefix_len, pathname_len; + int cut_index; + + if (!pathname || !prefix || strlen(pathname) < 2 || strlen(prefix) < 2 || + !out_path || !out_max_len) + { + return ret; + } + + /* make sure we are given absolute paths */ + valid = pathname[0] >= 'A' && pathname[0] <= 'z' && + pathname[1] == ':' && + prefix[0] >= 'A' && prefix[0] <= 'z' && + prefix[1] == ':'; + if (!valid) + { + valid = (strncmp(pathname, "\\\\", 2) == 0) && + (strncmp(prefix, "\\\\", 2) == 0); + } + + if (!valid) + { + return ret; + } + + prefix_len = strlen(prefix); + pathname_len = strlen(pathname); + + /* account for trailing slashes on prefix */ + while (prefix[prefix_len - 1] == '\\') + { + prefix_len--; + } + + /* if prefix_len is now zero, then prefix must have been root + * directory; return copy of entire pathname + */ + if (prefix_len == 0) + { + cut_index = 0; + } + else + { + /* make sure prefix would fit in pathname */ + if (prefix_len > (pathname_len + 1)) + return (-PVFS_ENOENT); + + if (strncmp(prefix, pathname, prefix_len) == 0) + { + /* apparent match; see if next element is a slash */ + if ((pathname[prefix_len] != '\\') && + (pathname[prefix_len] != '\0')) + return (-PVFS_ENOENT); + + /* this was indeed a match */ + /* in the case of no trailing slash cut_index will point to the end + * of "prefix" (NULL). */ + cut_index = prefix_len; + } + else + { + return (-PVFS_ENOENT); + } + } + + /* if we hit this point, then we were successful */ + + /* is the buffer large enough? */ + if ((1 + strlen(&(pathname[cut_index]))) > out_max_len) + return (-PVFS_ENAMETOOLONG); + + /* try to handle the case of no trailing slash */ + if (pathname[cut_index] == '\0') + { + out_path[0] = '\\'; + out_path[1] = '\0'; + } + else + /* copy out appropriate part of pathname */ + strcpy(out_path, &(pathname[cut_index])); + + return 0; +} + +#endif /* PINT_remove_dir_prefix() * * Strips prefix directory out of the path, output includes beginning diff --git a/src/common/misc/tcache.h b/src/common/misc/tcache.h index 91ab5dc..2ebd820 100644 --- a/src/common/misc/tcache.h +++ b/src/common/misc/tcache.h @@ -7,7 +7,11 @@ #ifndef __TCACHE_H #define __TCACHE_H +#ifndef WIN32 #include +#else +#include "wincommon.h" +#endif #include "pvfs2-types.h" #include "quicklist.h" #include "quickhash.h" diff --git a/src/common/misc/xattr-utils.h b/src/common/misc/xattr-utils.h index e416afa..fe41453 100644 --- a/src/common/misc/xattr-utils.h +++ b/src/common/misc/xattr-utils.h @@ -17,6 +17,10 @@ #include #endif +#ifdef WIN32 +typedef size_t ssize_t; +#endif + #ifndef HAVE_FGETXATTR_PROTOTYPE #ifndef HAVE_FGETXATTR_EXTRA_ARGS /* prototype taken from fgetxattr(2) on Fedora FC4 */ diff --git a/src/common/quickhash/quickhash.h b/src/common/quickhash/quickhash.h index ae33153..312d34b 100644 --- a/src/common/quickhash/quickhash.h +++ b/src/common/quickhash/quickhash.h @@ -219,14 +219,14 @@ static inline struct qhash_head *qhash_search_and_remove( void *key) { int index = 0; - struct qhash_head *tmp_link = NULL; + struct qhash_head *tmp_link = NULL, *tmp_link_safe = NULL; /* find the hash value */ index = table->hash(key, table->table_size); /* linear search at index to find match */ qhash_lock(&table->lock); - qhash_for_each(tmp_link, &(table->array[index])) + qhash_for_each_safe(tmp_link, tmp_link_safe, &(table->array[index])) { if (table->compare(key, tmp_link)) { @@ -251,7 +251,7 @@ static inline struct qhash_head *qhash_search_and_remove_at_index( struct qhash_table *table, int index) { - struct qhash_head *tmp_link = NULL; + struct qhash_head *tmp_link = NULL, *tmp_link_safe = NULL; if(index >= table->table_size) { @@ -259,7 +259,7 @@ static inline struct qhash_head *qhash_search_and_remove_at_index( } qhash_lock(&table->lock); - qhash_for_each(tmp_link, &(table->array[index])) + qhash_for_each_safe(tmp_link, tmp_link_safe, &(table->array[index])) { qhash_del(tmp_link); qhash_unlock(&table->lock); @@ -326,7 +326,7 @@ static inline int quickhash_64bit_hash(void *k, int table_size) static inline int quickhash_string_hash(void *k, int table_size) { const char *str = (char *)k; - uint32_t h, g; + uint32_t g, h = 0; while(*str) { @@ -341,15 +341,6 @@ static inline int quickhash_string_hash(void *k, int table_size) return (int)(h & ((uint64_t)(table_size - 1))); } -static inline int quickhash_voidp_hash(void *key, int tablesize) -{ -#if PVFS2_SIZEOF_VOIDP == 32 - return (int)(((int)key) & (tablesize - 1)); -#else - return quickhash_64bit_hash(key, tablesize); -#endif -} - #endif /* QUICKHASH_H */ /* diff --git a/src/common/quicklist/quicklist.h b/src/common/quicklist/quicklist.h index a7fdc4b..538fc15 100644 --- a/src/common/quicklist/quicklist.h +++ b/src/common/quicklist/quicklist.h @@ -19,17 +19,23 @@ #ifndef QUICKLIST_H #define QUICKLIST_H +#include + +#ifdef WIN32 +#include "wincommon.h" +#endif + struct qlist_head { - struct qlist_head *next, *prev; + struct qlist_head *next, *prev; }; #define QLIST_HEAD_INIT(name) { &(name), &(name) } #define QLIST_HEAD(name) \ - struct qlist_head name = QLIST_HEAD_INIT(name) + struct qlist_head name = QLIST_HEAD_INIT(name) #define INIT_QLIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ } while (0) /* @@ -39,13 +45,13 @@ struct qlist_head { * the prev/next entries already! */ static __inline__ void __qlist_add(struct qlist_head * new, - struct qlist_head * prev, - struct qlist_head * next) + struct qlist_head * prev, + struct qlist_head * next) { - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; } /** @@ -58,7 +64,7 @@ static __inline__ void __qlist_add(struct qlist_head * new, */ static __inline__ void qlist_add(struct qlist_head *new, struct qlist_head *head) { - __qlist_add(new, head, head->next); + __qlist_add(new, head, head->next); } /** @@ -71,7 +77,7 @@ static __inline__ void qlist_add(struct qlist_head *new, struct qlist_head *head */ static __inline__ void qlist_add_tail(struct qlist_head *new, struct qlist_head *head) { - __qlist_add(new, head->prev, head); + __qlist_add(new, head->prev, head); } /* @@ -82,10 +88,10 @@ static __inline__ void qlist_add_tail(struct qlist_head *new, struct qlist_head * the prev/next entries already! */ static __inline__ void __qlist_del(struct qlist_head * prev, - struct qlist_head * next) + struct qlist_head * next) { - next->prev = prev; - prev->next = next; + next->prev = prev; + prev->next = next; } /** @@ -95,7 +101,7 @@ static __inline__ void __qlist_del(struct qlist_head * prev, */ static __inline__ void qlist_del(struct qlist_head *entry) { - __qlist_del(entry->prev, entry->next); + __qlist_del(entry->prev, entry->next); } /** @@ -104,8 +110,8 @@ static __inline__ void qlist_del(struct qlist_head *entry) */ static __inline__ void qlist_del_init(struct qlist_head *entry) { - __qlist_del(entry->prev, entry->next); - INIT_QLIST_HEAD(entry); + __qlist_del(entry->prev, entry->next); + INIT_QLIST_HEAD(entry); } /** @@ -114,7 +120,24 @@ static __inline__ void qlist_del_init(struct qlist_head *entry) */ static __inline__ int qlist_empty(struct qlist_head *head) { - return head->next == head; + return head->next == head; +} + +/** + * qlist_pop - pop the first item off the list and return it + * @head: qlist to modify + */ +static __inline__ struct qlist_head* qlist_pop(struct qlist_head *head) +{ + struct qlist_head *item = NULL; + + if (!qlist_empty(head)) + { + item = head->next; + qlist_del(item); + } + + return item; } /** @@ -124,18 +147,18 @@ static __inline__ int qlist_empty(struct qlist_head *head) */ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head *head) { - struct qlist_head *first = qlist->next; + struct qlist_head *first = qlist->next; - if (first != qlist) { - struct qlist_head *last = qlist->prev; - struct qlist_head *at = head->next; + if (first != qlist) { + struct qlist_head *last = qlist->prev; + struct qlist_head *at = head->next; - first->prev = head; - head->next = first; + first->prev = head; + head->next = first; - last->next = at; - at->prev = last; - } + last->next = at; + at->prev = last; + } } /** @@ -145,7 +168,7 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head * @member: the name of the qlist_struct within the struct. */ #define qlist_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)((&((type *)0)->member)))) + ((type *)((char *)(ptr)-(unsigned long)((&((type *)0)->member)))) /** * qlist_for_each - iterate over a qlist @@ -153,7 +176,7 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head * @head: the head for your qlist. */ #define qlist_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) + for (pos = (head)->next; pos != (head); pos = pos->next) /** * list_for_each_safe - iterate over a list safe against @@ -163,8 +186,8 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head * @head: the head for your list. */ #define qlist_for_each_safe(pos, scratch, head) \ - for (pos = (head)->next, scratch = pos->next; pos != (head);\ - pos = scratch, scratch = pos->next) + for (pos = (head)->next, scratch = pos->next; pos != (head);\ + pos = scratch, scratch = pos->next) /** * qlist_for_each_entry - iterate over list of given type @@ -172,11 +195,17 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head * @head: the head for your list. * @member: the name of the list_struct within the struct. */ -#define qlist_for_each_entry(pos, head, member) \ - for (pos = qlist_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = qlist_entry(pos->member.next, typeof(*pos), member)) \ - +#ifdef WIN32 +#define qlist_for_each_entry(pos, head, member, type) \ + for (pos = qlist_entry((head)->next, type, member); \ + &pos->member != (head); \ + pos = qlist_entry(pos->member.next, type, member)) +#else +#define qlist_for_each_entry(pos, head, member) \ + for (pos = qlist_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = qlist_entry(pos->member.next, typeof(*pos), member)) +#endif /** * qlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @pos: the type * to use as a loop counter. @@ -184,10 +213,74 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head * @head: the head for your list. * @member: the name of the list_struct within the struct. */ +#ifdef WIN32 +#define qlist_for_each_entry_safe(pos, n, head, member, pos_type, n_type) \ + for (pos = qlist_entry((head)->next, pos_type, member), \ + n = qlist_entry(pos->member.next, pos_type, member); \ + &pos->member != (head); \ + pos = n, n = qlist_entry(n->member.next, n_type, member)) +#else #define qlist_for_each_entry_safe(pos, n, head, member) \ - for (pos = qlist_entry((head)->next, typeof(*pos), member), \ - n = qlist_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = qlist_entry(n->member.next, typeof(*n), member)) + for (pos = qlist_entry((head)->next, typeof(*pos), member), \ + n = qlist_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = qlist_entry(n->member.next, typeof(*n), member)) +#endif +static inline int qlist_exists(struct qlist_head *list, struct qlist_head *qlink) +{ + struct qlist_head *pos; + + if(qlist_empty(list)) return 0; + + qlist_for_each(pos, list) + { + if(pos == qlink) + { + return 1; + } + } + return 0; +} + +static inline int qlist_count(struct qlist_head *list) +{ + struct qlist_head *pos; + int count = 0; + + pos = list->next; + + while(pos != list) + { + ++count; + pos = pos->next; + } + + return count; +} + +static inline struct qlist_head * qlist_find( + struct qlist_head *list, + int (*compare)(struct qlist_head *, void *), + void *ptr) +{ + struct qlist_head *pos; + qlist_for_each(pos, list) + { + if(compare(pos, ptr)) + { + return pos; + } + } + return NULL; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ #endif /* QUICKLIST_H */ diff --git a/src/common/statecomp/codegen.c b/src/common/statecomp/codegen.c index 083f119..bccdcb3 100644 --- a/src/common/statecomp/codegen.c +++ b/src/common/statecomp/codegen.c @@ -22,7 +22,11 @@ static int needcomma = 1; +#ifdef WIN32 +static void gen_state_decl(struct state *s, char *state_name); +#else static void gen_state_decl(char *state_name); +#endif static void gen_runfunc_decl(char *func_name); static void gen_state_start(char *state_name, char *machine_name); static void gen_state_action( @@ -46,15 +50,25 @@ void gen_machine(char *machine_name) /* dump forward declarations of all the states */ for (s=states; s; s=s->next) { - if(s->action == ACTION_RUN) + if(s->action == ACTION_RUN || s->action == ACTION_PJMP) gen_runfunc_decl(s->function_or_machine); +#ifdef WIN32 + gen_state_decl(s, s->name); +#else gen_state_decl(s->name); +#endif } /* delcare the machine start point */ fprintf(out_file, "\nstruct PINT_state_machine_s %s = {\n", machine_name); +#ifdef WIN32 + /* Windows (VC++) does not support field names in structs */ + fprintf(out_file, "\t\"%s\", /* name */\n", machine_name); + fprintf(out_file, "\t&ST_%s /* first_state */\n", states->name); +#else fprintf(out_file, "\t.name = \"%s\",\n", machine_name); fprintf(out_file, "\t.first_state = &ST_%s\n", states->name); +#endif fprintf(out_file, "};\n\n"); /* generate all output */ @@ -127,7 +141,7 @@ static int runfunc_compare(void *key, struct qhash_head *link) static int runfunc_hash(void *key, int table_size) { char *k = (char *)key; - int h, g; + int g, h = 0; while(*k) { h = (h << 4) + *k++; @@ -167,22 +181,53 @@ static void gen_runfunc_decl(char *func_name) } } +#ifdef WIN32 +static void gen_state_decl(struct state *s, char *state_name) +#else static void gen_state_decl(char *state_name) +#endif { +#ifdef WIN32 + struct task *task; + struct transition *t; + int count; +#endif fprintf(out_file, "static struct PINT_state_s ST_%s;\n", state_name); +#ifdef WIN32 + /* determine PJMP count for array declaration */ + if (s->action == ACTION_PJMP) + { + for (task = s->task, count = 0; task; task = task->next, ++count) ; + fprintf(out_file, "static struct PINT_pjmp_tbl_s ST_%s_pjtbl[%d];\n", + state_name, count); + } + /* determine transition count for array declaration */ + for (t = s->transition, count = 0; t; t = t->next, ++count) ; + fprintf(out_file, "static struct PINT_tran_tbl_s ST_%s_trtbl[%d];\n", + state_name, count); +#else fprintf(out_file, "static struct PINT_pjmp_tbl_s ST_%s_pjtbl[];\n", state_name); fprintf(out_file, "static struct PINT_tran_tbl_s ST_%s_trtbl[];\n", state_name); +#endif } void gen_state_start(char *state_name, char *machine_name) { +#ifdef WIN32 + fprintf(out_file, + "static struct PINT_state_s ST_%s = {\n" + "\t \"%s\" , /* state_name */\n" + "\t &%s , /* parent_machine */\n", + state_name, state_name, machine_name); +#else fprintf(out_file, "static struct PINT_state_s ST_%s = {\n" "\t .state_name = \"%s\" ,\n" "\t .parent_machine = &%s ,\n", state_name, state_name, machine_name); +#endif } /** generates first two lines in the state machine (I think), @@ -195,24 +240,46 @@ void gen_state_action(enum state_action action, char *state_name) { switch (action) { - case ACTION_RUN: - fprintf(out_file, "\t .flag = SM_RUN ,\n"); + case ACTION_RUN: +#ifdef WIN32 + fprintf(out_file, "\t SM_RUN , /* flag */\n"); + fprintf(out_file, "\t { %s } , /* action.func */\n", run_func); + fprintf(out_file,"\t NULL , /* pjtbl */\n"); + fprintf(out_file,"\t ST_%s_trtbl /* trtbl */", state_name); +#else + fprintf(out_file, "\t .flag = SM_RUN ,\n"); fprintf(out_file, "\t .action.func = %s ,\n", run_func); fprintf(out_file,"\t .pjtbl = NULL ,\n"); fprintf(out_file,"\t .trtbl = ST_%s_trtbl ", state_name); - break; - case ACTION_PJMP: - fprintf(out_file, "\t .flag = SM_PJMP ,\n"); +#endif + break; + case ACTION_PJMP: +#ifdef WIN32 + fprintf(out_file, "\t SM_PJMP , /* flag */\n"); + fprintf(out_file, "\t { &%s }, /* action.func */\n", run_func); + fprintf(out_file,"\t ST_%s_pjtbl , /* pjtbl */\n", state_name); + fprintf(out_file,"\t ST_%s_trtbl /* trtbl */", state_name); + +#else + fprintf(out_file, "\t .flag = SM_PJMP ,\n"); fprintf(out_file, "\t .action.func = &%s ,\n", run_func); fprintf(out_file,"\t .pjtbl = ST_%s_pjtbl ,\n", state_name); fprintf(out_file,"\t .trtbl = ST_%s_trtbl ", state_name); - break; - case ACTION_JUMP: - fprintf(out_file, "\t .flag = SM_JUMP ,\n"); +#endif + break; + case ACTION_JUMP: +#ifdef WIN32 + fprintf(out_file, "\t SM_JUMP , /* flag */\n"); + fprintf(out_file, "\t { &%s }, /* action.nested */\n", run_func); + fprintf(out_file,"\t NULL , /* pjtbl */\n"); + fprintf(out_file,"\t ST_%s_trtbl /* trtbl */", state_name); +#else + fprintf(out_file, "\t .flag = SM_JUMP ,\n"); fprintf(out_file, "\t .action.nested = &%s ,\n", run_func); fprintf(out_file,"\t .pjtbl = NULL ,\n"); fprintf(out_file,"\t .trtbl = ST_%s_trtbl ", state_name); - break; +#endif + break; } /* generate the end of the state struct with refs to jump tbls */ } @@ -239,7 +306,11 @@ static void gen_return_code(char *return_code) { fprintf(out_file,",\n"); } +#ifdef WIN32 + fprintf(out_file, "\t{ %s ", return_code); +#else fprintf(out_file, "\t{ .return_value = %s ", return_code); +#endif needcomma = 1; } @@ -250,20 +321,41 @@ static void gen_next_state(enum transition_type type, char *new_state) fprintf(out_file,",\n"); } switch (type) { - case TRANS_PJMP: - fprintf(out_file, "\n\t .state_machine = &%s }", new_state); - break; - case TRANS_NEXT_STATE: - fprintf(out_file, "\t .next_state = &ST_%s }", new_state); - break; - case TRANS_RETURN: - terminate_path_flag = 1; - fprintf(out_file, "\t .flag = SM_RETURN }"); - break; - case TRANS_TERMINATE: - terminate_path_flag = 1; - fprintf(out_file, "\n\t .flag = SM_TERM }"); - break; + case TRANS_PJMP: +#ifdef WIN32 + fprintf(out_file, "\t SM_NONE ,\n"); /* flag */ + fprintf(out_file, "\n\t &%s }", new_state); +#else + fprintf(out_file, "\n\t .state_machine = &%s }", new_state); +#endif + break; + case TRANS_NEXT_STATE: +#ifdef WIN32 + fprintf(out_file, "\t SM_NONE ,\n"); /* flag */ + fprintf(out_file, "\t &ST_%s }", new_state); +#else + fprintf(out_file, "\t .next_state = &ST_%s }", new_state); +#endif + break; + case TRANS_RETURN: + terminate_path_flag = 1; +#ifdef WIN32 + fprintf(out_file, "\t SM_RETURN ,\n"); /* flag */ + fprintf(out_file, "\t NULL }"); /* next_state/state_machine */ + +#else + fprintf(out_file, "\t .flag = SM_RETURN }"); +#endif + break; + case TRANS_TERMINATE: + terminate_path_flag = 1; +#ifdef WIN32 + fprintf(out_file, "\t SM_TERM ,\n"); /* flag */ + fprintf(out_file, "\t NULL }"); /* next_state/state_machine */ +#else + fprintf(out_file, "\n\t .flag = SM_TERM }"); +#endif + break; } needcomma = 1; } diff --git a/src/common/statecomp/parser.y b/src/common/statecomp/parser.y index 90bded7..a4747c4 100644 --- a/src/common/statecomp/parser.y +++ b/src/common/statecomp/parser.y @@ -18,6 +18,10 @@ #include "statecomp.h" +#ifdef WIN32 +#define _STDLIB_H /* mark stdlib.h included */ +#endif + /* We never use this, disable default. */ #define YY_LOCATION_PRINT 0 diff --git a/src/common/statecomp/scanner.l b/src/common/statecomp/scanner.l index 27b31a5..0de18bf 100644 --- a/src/common/statecomp/scanner.l +++ b/src/common/statecomp/scanner.l @@ -46,7 +46,6 @@ int yyget_lineno(void); FILE *yyget_in(void); FILE *yyget_out(void); -int yyget_leng(void); char *yyget_text(void); void yyset_lineno(int line_number); void yyset_in(FILE *in_str ); @@ -80,7 +79,9 @@ id [_a-zA-Z][_0-9a-zA-Z]* "\%" {fprintf(out_file, "%s", yytext);} "\%\%" {BEGIN(CODE);} -"\%\%" {fprintf(out_file,"# %d \"%s\"\n", line, in_file_name); +"\%\%" {fprintf(out_file,"#ifndef WIN32\n"); + fprintf(out_file,"# %d \"%s\"\n", line, in_file_name); + fprintf(out_file,"#endif\n"); BEGIN(0);} "machine" {RETURNVAL(MACHINE);} diff --git a/src/common/statecomp/statecomp.c b/src/common/statecomp/statecomp.c index 6b75f3b..2a3e798 100644 --- a/src/common/statecomp/statecomp.c +++ b/src/common/statecomp/statecomp.c @@ -25,7 +25,9 @@ #include #include #include +#ifndef WIN32 #include +#endif #include "statecomp.h" @@ -37,6 +39,13 @@ int yyparse (void); #endif #endif +#ifdef WIN32 +#define __func__ __FUNCTION__ +#define unlink _unlink + +extern int yyparse(); +#endif + /* * Global Variables */ diff --git a/src/io/bmi/bmi-method-callback.h b/src/io/bmi/bmi-method-callback.h index 973139b..29734a2 100644 --- a/src/io/bmi/bmi-method-callback.h +++ b/src/io/bmi/bmi-method-callback.h @@ -9,8 +9,9 @@ #include "bmi-method-support.h" -PVFS_BMI_addr_t bmi_method_addr_reg_callback(bmi_method_addr_p map); -int bmi_method_addr_forget_callback(PVFS_BMI_addr_t addr); +BMI_addr_t bmi_method_addr_reg_callback(bmi_method_addr_p map); +int bmi_method_addr_forget_callback(BMI_addr_t addr); +void bmi_method_addr_drop_callback(char *method_name); #endif /* __BMI_METHOD_CALLBACK_H */ diff --git a/src/io/bmi/bmi-method-support.h b/src/io/bmi/bmi-method-support.h index 643ec54..0167b3a 100644 --- a/src/io/bmi/bmi-method-support.h +++ b/src/io/bmi/bmi-method-support.h @@ -13,6 +13,7 @@ #include "quicklist.h" #include "bmi-types.h" +#include "pint-event.h" #define BMI_MAX_CONTEXTS 16 @@ -43,6 +44,7 @@ struct bmi_method_addr { int method_type; void *method_data; /* area to be used by specific methods */ + void *parent; /* pointer back to generic BMI address info */ }; typedef struct bmi_method_addr *bmi_method_addr_p; @@ -56,12 +58,16 @@ struct bmi_method_unexpected_info bmi_msg_tag_t tag; }; +/* flags that can be set per method to affect behavior */ +#define BMI_METHOD_FLAG_NO_POLLING 1 + /* This is the table of interface functions that must be provided by BMI * methods. */ struct bmi_method_ops { const char *method_name; + int flags; int (*initialize) (bmi_method_addr_p, int, int); int (*finalize) (void); int (*set_info) (int, void *); @@ -78,7 +84,8 @@ struct bmi_method_ops enum bmi_buffer_type, bmi_msg_tag_t, void *, - bmi_context_id); + bmi_context_id, + PVFS_hint hints); int (*post_sendunexpected) (bmi_op_id_t *, bmi_method_addr_p, const void *, @@ -86,7 +93,8 @@ struct bmi_method_ops enum bmi_buffer_type, bmi_msg_tag_t, void *, - bmi_context_id); + bmi_context_id, + PVFS_hint hints); int (*post_recv) (bmi_op_id_t *, bmi_method_addr_p, void *, @@ -95,7 +103,8 @@ struct bmi_method_ops enum bmi_buffer_type, bmi_msg_tag_t, void *, - bmi_context_id); + bmi_context_id, + PVFS_hint hints); int (*test) (bmi_op_id_t, int *, bmi_error_code_t *, @@ -134,7 +143,8 @@ struct bmi_method_ops enum bmi_buffer_type, bmi_msg_tag_t, void *, - bmi_context_id); + bmi_context_id, + PVFS_hint hints); int (*post_recv_list) (bmi_op_id_t *, bmi_method_addr_p, void *const *, @@ -145,7 +155,8 @@ struct bmi_method_ops enum bmi_buffer_type, bmi_msg_tag_t, void *, - bmi_context_id); + bmi_context_id, + PVFS_hint Hints); int (*post_sendunexpected_list) (bmi_op_id_t *, bmi_method_addr_p, const void *const *, @@ -155,7 +166,8 @@ struct bmi_method_ops enum bmi_buffer_type, bmi_msg_tag_t, void *, - bmi_context_id); + bmi_context_id, + PVFS_hint hints); int (*open_context)(bmi_context_id); void (*close_context)(bmi_context_id); int (*cancel)(bmi_op_id_t, bmi_context_id); @@ -200,6 +212,7 @@ struct method_op int list_index; /* index of current buffer to xfer */ /* how much is completed in current buffer */ bmi_size_t cur_index_complete; + PINT_event_id event_id; }; typedef struct method_op method_op_st, *method_op_p; diff --git a/src/io/bmi/bmi-types.h b/src/io/bmi/bmi-types.h index f6c6bc7..684752a 100644 --- a/src/io/bmi/bmi-types.h +++ b/src/io/bmi/bmi-types.h @@ -23,24 +23,36 @@ #include #include "pvfs2-debug.h" -#include "pvfs2-types.h" -typedef PVFS_size bmi_size_t; /**< Data region size */ -typedef PVFS_msg_tag_t bmi_msg_tag_t; /**< User-specified message tag */ -typedef PVFS_context_id bmi_context_id; /**< Context identifier */ -typedef PVFS_id_gen_t bmi_op_id_t; /**< Reference to ongoing network op */ +#ifdef WIN32 +#include +#endif + +typedef int64_t bmi_size_t; /**< Data region size */ +typedef int32_t bmi_msg_tag_t; /**< User-specified message tag */ +typedef int64_t bmi_context_id; /**< Context identifier */ +typedef int64_t bmi_op_id_t; /**< Reference to ongoing network op */ +typedef struct PVFS_hint_s *bmi_hint; + + +#ifdef __PVFS2_TYPES_H +typedef PVFS_BMI_addr_t BMI_addr_t; +#else +typedef int64_t BMI_addr_t; +#endif /* TODO: not using a real type for this yet; need to specify what * error codes look like */ typedef int32_t bmi_error_code_t; /**< error code information */ -#define BMI_MAX_ADDR_LEN PVFS_MAX_SERVER_ADDR_LEN +#define BMI_MAX_ADDR_LEN 256 /** BMI method initialization flags */ enum { BMI_INIT_SERVER = 1, /**< set up to listen for unexpected messages */ - BMI_TCP_BIND_SPECIFIC = 2 /**< bind to a specific IP address if INIT_SERVER */ + BMI_TCP_BIND_SPECIFIC = 2, /**< bind to a specific IP address if INIT_SERVER */ + BMI_AUTO_REF_COUNT = 4 /**< automatically increment ref count on unexpected messages */ }; enum bmi_op_type @@ -77,88 +89,100 @@ enum BMI_TCP_BUFFER_RECEIVE_SIZE = 12, BMI_TCP_CLOSE_SOCKET = 13, BMI_OPTIMISTIC_BUFFER_REG = 14, + BMI_TCP_CHECK_UNEXPECTED = 15 +}; + +enum BMI_io_type +{ + BMI_IO_READ = 1, + BMI_IO_WRITE = 2 }; /** used to describe a memory region in passing down a registration * hint from IO routines. */ struct bmi_optimistic_buffer_info { const void *buffer; - PVFS_size len; - enum PVFS_io_type rw; + bmi_size_t len; + enum BMI_io_type rw; }; +#define BMI_ERROR_BIT (1 << 30) +#define BMI_NON_ERRNO_ERROR_BIT (1 << 29) +#define BMIE(num) (num|BMI_ERROR_BIT) +#define BMI_NON_ERROR_BIT (BMI_NON_ERRNO_ERROR_BIT|BMI_ERROR_BIT) + +#ifndef BMI_ERROR +#define BMI_ERROR (1 << 7) /* BMI-specific error */ +#endif + /* mappings from PVFS errors to BMI errors */ -#define BMI_EPERM (PVFS_EPERM | PVFS_ERROR_BMI) -#define BMI_ENOENT (PVFS_ENOENT | PVFS_ERROR_BMI) -#define BMI_EINTR (PVFS_EINTR | PVFS_ERROR_BMI) -#define BMI_EIO (PVFS_EIO | PVFS_ERROR_BMI) -#define BMI_ENXIO (PVFS_ENXIO | PVFS_ERROR_BMI) -#define BMI_EBADF (PVFS_EBADF | PVFS_ERROR_BMI) -#define BMI_EAGAIN (PVFS_EAGAIN | PVFS_ERROR_BMI) -#define BMI_ENOMEM (PVFS_ENOMEM | PVFS_ERROR_BMI) -#define BMI_EFAULT (PVFS_EFAULT | PVFS_ERROR_BMI) -#define BMI_EBUSY (PVFS_EBUSY | PVFS_ERROR_BMI) -#define BMI_EEXIST (PVFS_EEXIST | PVFS_ERROR_BMI) -#define BMI_ENODEV (PVFS_ENODEV | PVFS_ERROR_BMI) -#define BMI_ENOTDIR (PVFS_ENOTDIR | PVFS_ERROR_BMI) -#define BMI_EISDIR (PVFS_EISDIR | PVFS_ERROR_BMI) -#define BMI_EINVAL (PVFS_EINVAL | PVFS_ERROR_BMI) -#define BMI_EMFILE (PVFS_EMFILE | PVFS_ERROR_BMI) -#define BMI_EFBIG (PVFS_EFBIG | PVFS_ERROR_BMI) -#define BMI_ENOSPC (PVFS_ENOSPC | PVFS_ERROR_BMI) -#define BMI_EROFS (PVFS_EROFS | PVFS_ERROR_BMI) -#define BMI_EMLINK (PVFS_EMLINK | PVFS_ERROR_BMI) -#define BMI_EPIPE (PVFS_EPIPE | PVFS_ERROR_BMI) -#define BMI_EDEADLK (PVFS_EDEADLK | PVFS_ERROR_BMI) -#define BMI_ENAMETOOLONG (PVFS_ENAMETOOLONG | PVFS_ERROR_BMI) -#define BMI_ENOLCK (PVFS_ENOLCK | PVFS_ERROR_BMI) -#define BMI_ENOSYS (PVFS_ENOSYS | PVFS_ERROR_BMI) -#define BMI_ENOTEMPTY (PVFS_ENOTEMPTY | PVFS_ERROR_BMI) -#define BMI_ELOOP (PVFS_ELOOP | PVFS_ERROR_BMI) -#define BMI_EWOULDBLOCK (PVFS_EWOULDBLOCK | PVFS_ERROR_BMI) -#define BMI_ENOMSG (PVFS_ENOMSG | PVFS_ERROR_BMI) -#define BMI_EUNATCH (PVFS_EUNATCH | PVFS_ERROR_BMI) -#define BMI_EBADR (PVFS_EBADR | PVFS_ERROR_BMI) -#define BMI_EDEADLOCK (PVFS_EDEADLOCK | PVFS_ERROR_BMI) -#define BMI_ENODATA (PVFS_ENODATA | PVFS_ERROR_BMI) -#define BMI_ETIME (PVFS_ETIME | PVFS_ERROR_BMI) -#define BMI_ENONET (PVFS_ENONET | PVFS_ERROR_BMI) -#define BMI_EREMOTE (PVFS_EREMOTE | PVFS_ERROR_BMI) -#define BMI_ECOMM (PVFS_ECOMM | PVFS_ERROR_BMI) -#define BMI_EPROTO (PVFS_EPROTO | PVFS_ERROR_BMI) -#define BMI_EBADMSG (PVFS_EBADMSG | PVFS_ERROR_BMI) -#define BMI_EOVERFLOW (PVFS_EOVERFLOW | PVFS_ERROR_BMI) -#define BMI_ERESTART (PVFS_ERESTART | PVFS_ERROR_BMI) -#define BMI_EMSGSIZE (PVFS_EMSGSIZE | PVFS_ERROR_BMI) -#define BMI_EPROTOTYPE (PVFS_EPROTOTYPE | PVFS_ERROR_BMI) -#define BMI_ENOPROTOOPT (PVFS_ENOPROTOOPT | PVFS_ERROR_BMI) -#define BMI_EPROTONOSUPPORT (PVFS_EPROTONOSUPPORT | PVFS_ERROR_BMI) -#define BMI_EOPNOTSUPP (PVFS_EOPNOTSUPP | PVFS_ERROR_BMI) -#define BMI_EADDRINUSE (PVFS_EADDRINUSE | PVFS_ERROR_BMI) -#define BMI_EADDRNOTAVAIL (PVFS_EADDRNOTAVAIL | PVFS_ERROR_BMI) -#define BMI_ENETDOWN (PVFS_ENETDOWN | PVFS_ERROR_BMI) -#define BMI_ENETUNREACH (PVFS_ENETUNREACH | PVFS_ERROR_BMI) -#define BMI_ENETRESET (PVFS_ENETRESET | PVFS_ERROR_BMI) -#define BMI_ENOBUFS (PVFS_ENOBUFS | PVFS_ERROR_BMI) -#define BMI_ECONNRESET (PVFS_ECONNRESET | PVFS_ERROR_BMI) -#define BMI_ETIMEDOUT (PVFS_ETIMEDOUT | PVFS_ERROR_BMI) -#define BMI_ECONNREFUSED (PVFS_ECONNREFUSED | PVFS_ERROR_BMI) -#define BMI_EHOSTDOWN (PVFS_EHOSTDOWN | PVFS_ERROR_BMI) -#define BMI_EHOSTUNREACH (PVFS_EHOSTUNREACH | PVFS_ERROR_BMI) -#define BMI_EALREADY (PVFS_EALREADY | PVFS_ERROR_BMI) -#define BMI_EACCES (PVFS_EACCES | PVFS_ERROR_BMI) - -#define BMI_EREMOTEIO (PVFS_EREMOTEIO | PVFS_ERROR_BMI) -#define BMI_ENOMEDIUM (PVFS_ENOMEDIUM | PVFS_ERROR_BMI) -#define BMI_EMEDIUMTYPE (PVFS_EMEDIUMTYPE | PVFS_ERROR_BMI) - -#define BMI_ECANCEL (PVFS_ECANCEL | PVFS_ERROR_BMI) -#define BMI_EDEVINIT (PVFS_EDEVINIT | PVFS_ERROR_BMI) -#define BMI_EDETAIL (PVFS_EDETAIL | PVFS_ERROR_BMI) -#define BMI_EHOSTNTFD (PVFS_EHOSTNTFD | PVFS_ERROR_BMI) -#define BMI_EADDRNTFD (PVFS_EADDRNTFD | PVFS_ERROR_BMI) -#define BMI_ENORECVR (PVFS_ENORECVR | PVFS_ERROR_BMI) -#define BMI_ETRYAGAIN (PVFS_ETRYAGAIN | PVFS_ERROR_BMI) +#define BMI_EPERM (BMIE(1) | BMI_ERROR) +#define BMI_ENOENT (BMIE(2) | BMI_ERROR) +#define BMI_EINTR (BMIE(3) | BMI_ERROR) +#define BMI_EIO (BMIE(4) | BMI_ERROR) +#define BMI_ENXIO (BMIE(5) | BMI_ERROR) +#define BMI_EBADF (BMIE(6) | BMI_ERROR) +#define BMI_EAGAIN (BMIE(7) | BMI_ERROR) +#define BMI_ENOMEM (BMIE(8) | BMI_ERROR) +#define BMI_EFAULT (BMIE(9) | BMI_ERROR) +#define BMI_EBUSY (BMIE(10) | BMI_ERROR) +#define BMI_EEXIST (BMIE(11) | BMI_ERROR) +#define BMI_ENODEV (BMIE(12) | BMI_ERROR) +#define BMI_ENOTDIR (BMIE(13) | BMI_ERROR) +#define BMI_EISDIR (BMIE(14) | BMI_ERROR) +#define BMI_EINVAL (BMIE(15) | BMI_ERROR) +#define BMI_EMFILE (BMIE(16) | BMI_ERROR) +#define BMI_EFBIG (BMIE(17) | BMI_ERROR) +#define BMI_ENOSPC (BMIE(18) | BMI_ERROR) +#define BMI_EROFS (BMIE(19) | BMI_ERROR) +#define BMI_EMLINK (BMIE(20) | BMI_ERROR) +#define BMI_EPIPE (BMIE(21) | BMI_ERROR) +#define BMI_EDEADLK (BMIE(22) | BMI_ERROR) +#define BMI_ENAMETOOLONG (BMIE(23) | BMI_ERROR) +#define BMI_ENOLCK (BMIE(24) | BMI_ERROR) +#define BMI_ENOSYS (BMIE(25) | BMI_ERROR) +#define BMI_ENOTEMPTY (BMIE(26) | BMI_ERROR) +#define BMI_ELOOP (BMIE(27) | BMI_ERROR) +#define BMI_EWOULDBLOCK (BMIE(28) | BMI_ERROR) +#define BMI_ENOMSG (BMIE(29) | BMI_ERROR) +#define BMI_EUNATCH (BMIE(30) | BMI_ERROR) +#define BMI_EBADR (BMIE(31) | BMI_ERROR) +#define BMI_EDEADLOCK (BMIE(32) | BMI_ERROR) +#define BMI_ENODATA (BMIE(33) | BMI_ERROR) +#define BMI_ETIME (BMIE(34) | BMI_ERROR) +#define BMI_ENONET (BMIE(35) | BMI_ERROR) +#define BMI_EREMOTE (BMIE(36) | BMI_ERROR) +#define BMI_ECOMM (BMIE(37) | BMI_ERROR) +#define BMI_EPROTO (BMIE(38) | BMI_ERROR) +#define BMI_EBADMSG (BMIE(39) | BMI_ERROR) +#define BMI_EOVERFLOW (BMIE(40) | BMI_ERROR) +#define BMI_ERESTART (BMIE(41) | BMI_ERROR) +#define BMI_EMSGSIZE (BMIE(42) | BMI_ERROR) +#define BMI_EPROTOTYPE (BMIE(43) | BMI_ERROR) +#define BMI_ENOPROTOOPT (BMIE(44) | BMI_ERROR) +#define BMI_EPROTONOSUPPORT (BMIE(45) | BMI_ERROR) +#define BMI_EOPNOTSUPP (BMIE(46) | BMI_ERROR) +#define BMI_EADDRINUSE (BMIE(47) | BMI_ERROR) +#define BMI_EADDRNOTAVAIL (BMIE(48) | BMI_ERROR) +#define BMI_ENETDOWN (BMIE(49) | BMI_ERROR) +#define BMI_ENETUNREACH (BMIE(50) | BMI_ERROR) +#define BMI_ENETRESET (BMIE(51) | BMI_ERROR) +#define BMI_ENOBUFS (BMIE(52) | BMI_ERROR) +#define BMI_ETIMEDOUT (BMIE(53) | BMI_ERROR) +#define BMI_ECONNREFUSED (BMIE(54) | BMI_ERROR) +#define BMI_EHOSTDOWN (BMIE(55) | BMI_ERROR) +#define BMI_EHOSTUNREACH (BMIE(56) | BMI_ERROR) +#define BMI_EALREADY (BMIE(57) | BMI_ERROR) +#define BMI_EACCES (BMIE(58) | BMI_ERROR) +#define BMI_ECONNRESET (BMIE(59) | BMI_ERROR) + +#define BMI_ECANCEL ((1|BMI_NON_ERROR_BIT) | BMI_ERROR) +#define BMI_EDEVINIT ((2|BMI_NON_ERROR_BIT) | BMI_ERROR) +#define BMI_EDETAIL ((3|BMI_NON_ERROR_BIT) | BMI_ERROR) +#define BMI_EHOSTNTFD ((4|BMI_NON_ERROR_BIT) | BMI_ERROR) +#define BMI_EADDRNTFD ((5|BMI_NON_ERROR_BIT) | BMI_ERROR) +#define BMI_ENORECVR ((6|BMI_NON_ERROR_BIT) | BMI_ERROR) +#define BMI_ETRYAGAIN ((7|BMI_NON_ERROR_BIT) | BMI_ERROR) /** default bmi error translation function */ int bmi_errno_to_pvfs(int error); diff --git a/src/io/bmi/bmi.c b/src/io/bmi/bmi.c index 665c8fa..19d5942 100644 --- a/src/io/bmi/bmi.c +++ b/src/io/bmi/bmi.c @@ -14,7 +14,9 @@ #include #include #include +#ifndef WIN32 #include +#endif #include #include "bmi.h" @@ -27,6 +29,17 @@ #include "str-utils.h" #include "id-generator.h" #include "pvfs2-internal.h" +#include "pvfs2-debug.h" + +#ifdef WIN32 +#include "wincommon.h" + +#define EREMOTE 66 +#define EHOSTDOWN 112 +#endif + +static int bmi_initialized_count = 0; +static gen_mutex_t bmi_initialize_mutex = GEN_MUTEX_INITIALIZER; /* * List of BMI addrs currently managed. @@ -44,7 +57,18 @@ static gen_mutex_t forget_list_mutex = GEN_MUTEX_INITIALIZER; struct forget_item { struct qlist_head link; - PVFS_BMI_addr_t addr; + BMI_addr_t addr; +}; + +/* + * BMI trigger to reap all method resources for inactive addresses. + */ +static QLIST_HEAD(bmi_addr_force_drop_list); +static gen_mutex_t bmi_addr_force_drop_list_mutex = GEN_MUTEX_INITIALIZER; +struct drop_item +{ + struct qlist_head link; + char *method_name; }; /* @@ -66,6 +90,9 @@ extern struct bmi_method_ops bmi_ib_ops; #ifdef __STATIC_METHOD_BMI_PORTALS__ extern struct bmi_method_ops bmi_portals_ops; #endif +#ifdef __STATIC_METHOD_BMI_ZOID__ +extern struct bmi_method_ops bmi_zoid_ops; +#endif extern struct bmi_method_ops bmi_osd_ops; static struct bmi_method_ops *const static_methods[] = { @@ -84,6 +111,9 @@ static struct bmi_method_ops *const static_methods[] = { #ifdef __STATIC_METHOD_BMI_PORTALS__ &bmi_portals_ops, #endif +#ifdef __STATIC_METHOD_BMI_ZOID__ + &bmi_zoid_ops, +#endif #ifndef __PVFS2_SERVER__ /* only for client communications */ &bmi_osd_ops, @@ -110,18 +140,27 @@ static int active_method_count = 0; static gen_mutex_t active_method_count_mutex = GEN_MUTEX_INITIALIZER; static struct bmi_method_ops **active_method_table = NULL; -static struct { + +struct method_usage_t { int iters_polled; /* how many iterations since this method was polled */ int iters_active; /* how many iterations since this method had action */ int plan; -} *method_usage = NULL; + int flags; +}; + +static struct method_usage_t * expected_method_usage = NULL; +static struct method_usage_t * unexpected_method_usage = NULL; + static const int usage_iters_starvation = 100000; static const int usage_iters_active = 10000; +static int global_flags; static int activate_method(const char *name, const char *listen_addr, int flags); static void bmi_addr_drop(ref_st_p tmp_ref); +static void bmi_addr_force_drop(ref_st_p ref, ref_list_p ref_list); static void bmi_check_forget_list(void); +static void bmi_check_addr_force_drop (void); /** Initializes the BMI layer. Must be called before any other BMI * functions. @@ -146,6 +185,19 @@ int BMI_initialize(const char *method_list, char *proto = NULL; int addr_count = 0; + gen_mutex_lock(&bmi_initialize_mutex); + if(bmi_initialized_count > 0) + { + /* Already initialized! Just increment ref count and return. */ + ++bmi_initialized_count; + gen_mutex_unlock(&bmi_initialize_mutex); + return 0; + } + ++bmi_initialized_count; + gen_mutex_unlock(&bmi_initialize_mutex); + + global_flags = flags; + /* server must specify method list at startup, optional for client */ if (flags & BMI_INIT_SERVER) { if (!listen_addr || !method_list) @@ -158,6 +210,13 @@ int BMI_initialize(const char *method_list, } } + /* make sure that id generator is initialized if not already */ + ret = id_gen_safe_initialize(); + if(ret < 0) + { + return(ret); + } + /* make a new reference list */ cur_ref_list = ref_list_new(); if (!cur_ref_list) @@ -190,8 +249,14 @@ int BMI_initialize(const char *method_list, goto bmi_initialize_failure; } + gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, "BMI_initialize: " + "method_list=%s\n", method_list); + /* Today is that day! */ addr_count = PINT_split_string_list(&listen_addrs, listen_addr); + + gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, "BMI_initialize: " + "listen_addr=%s\n", listen_addr); for (i=0; i 0) + { + gen_mutex_unlock(&bmi_initialize_mutex); + return 0; + } + gen_mutex_unlock(&bmi_initialize_mutex); + gen_mutex_lock(&active_method_count_mutex); /* attempt to shut down active methods */ for (i = 0; i < active_method_count; i++) @@ -485,13 +565,25 @@ int BMI_finalize(void) free(known_method_table); known_method_count = 0; - if (method_usage) - free(method_usage); + if (expected_method_usage) + free(expected_method_usage); + + if (unexpected_method_usage) + free(unexpected_method_usage); /* destroy the reference list */ /* (side effect: destroys all method addresses as well) */ ref_list_cleanup(cur_ref_list); +#ifdef WIN32 + /* Windows Sockets finalize + This must be done here rather than bmi_wintcp--after all addresses + have been destroyed */ + WSACleanup(); +#endif + /* shut down id generator */ + id_gen_safe_finalize(); + return (0); } @@ -592,14 +684,15 @@ void BMI_close_context(bmi_context_id context_id) * \return 0 on success, -errno on failure. */ int BMI_post_recv(bmi_op_id_t * id, - PVFS_BMI_addr_t src, + BMI_addr_t src, void *buffer, bmi_size_t expected_size, bmi_size_t * actual_size, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + bmi_hint hints) { ref_st_p tmp_ref = NULL; int ret = -1; @@ -621,7 +714,7 @@ int BMI_post_recv(bmi_op_id_t * id, ret = tmp_ref->interface->post_recv( id, tmp_ref->method_addr, buffer, expected_size, actual_size, - buffer_type, tag, user_ptr, context_id); + buffer_type, tag, user_ptr, context_id, (PVFS_hint)hints); return (ret); } @@ -631,13 +724,14 @@ int BMI_post_recv(bmi_op_id_t * id, * \return 0 on success, -errno on failure. */ int BMI_post_send(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *buffer, bmi_size_t size, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + bmi_hint hints) { ref_st_p tmp_ref = NULL; int ret = -1; @@ -659,7 +753,7 @@ int BMI_post_send(bmi_op_id_t * id, ret = tmp_ref->interface->post_send( id, tmp_ref->method_addr, buffer, size, buffer_type, tag, - user_ptr, context_id); + user_ptr, context_id, (PVFS_hint)hints); return (ret); } @@ -669,13 +763,14 @@ int BMI_post_send(bmi_op_id_t * id, * \return 0 on success, -errno on failure. */ int BMI_post_sendunexpected(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *buffer, bmi_size_t size, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + bmi_hint hints) { ref_st_p tmp_ref = NULL; int ret = -1; @@ -697,7 +792,7 @@ int BMI_post_sendunexpected(bmi_op_id_t * id, ret = tmp_ref->interface->post_sendunexpected( id, tmp_ref->method_addr, buffer, size, buffer_type, tag, - user_ptr, context_id); + user_ptr, context_id, (PVFS_hint)hints); return (ret); } @@ -862,7 +957,8 @@ int BMI_testsome(int incount, * poll them all. Return idle_time per method too. */ static void -construct_poll_plan(int nmeth, int *idle_time_ms) +construct_poll_plan(struct method_usage_t * method_usage, + int nmeth, int *idle_time_ms) { int i, numplan; @@ -871,7 +967,8 @@ construct_poll_plan(int nmeth, int *idle_time_ms) ++method_usage[i].iters_polled; ++method_usage[i].iters_active; method_usage[i].plan = 0; - if (method_usage[i].iters_active <= usage_iters_active) { + if ((method_usage[i].iters_active <= usage_iters_active) && + (!(method_usage[i].flags & BMI_METHOD_FLAG_NO_POLLING))){ /* recently busy, poll */ if (0) gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, "%s: polling active meth %d: %d / %d\n", __func__, i, @@ -922,41 +1019,57 @@ int BMI_testunexpected(int incount, int ret = -1; int position = 0; int tmp_outcount = 0; +#ifdef WIN32 + struct bmi_method_unexpected_info *sub_info = + (struct bmi_method_unexpected_info *) + malloc(sizeof(struct bmi_method_unexpected_info) * incount); +#else struct bmi_method_unexpected_info sub_info[incount]; +#endif ref_st_p tmp_ref = NULL; int tmp_active_method_count = 0; /* figure out if we need to drop any stale addresses */ bmi_check_forget_list(); + bmi_check_addr_force_drop(); gen_mutex_lock(&active_method_count_mutex); tmp_active_method_count = active_method_count; gen_mutex_unlock(&active_method_count_mutex); if (max_idle_time_ms < 0) + { +#ifdef WIN32 + free(sub_info); +#endif return (bmi_errno_to_pvfs(-EINVAL)); + } *outcount = 0; - construct_poll_plan(tmp_active_method_count, &max_idle_time_ms); + construct_poll_plan(unexpected_method_usage, + tmp_active_method_count, &max_idle_time_ms); while (position < incount && i < tmp_active_method_count) { - if (method_usage[i].plan) { + if (unexpected_method_usage[i].plan) { ret = active_method_table[i]->testunexpected( (incount - position), &tmp_outcount, (&(sub_info[position])), max_idle_time_ms); if (ret < 0) { /* can't recover from this */ +#ifdef WIN32 + free(sub_info); +#endif gossip_lerr("Error: critical BMI_testunexpected failure.\n"); return (ret); } position += tmp_outcount; (*outcount) += tmp_outcount; - method_usage[i].iters_polled = 0; + unexpected_method_usage[i].iters_polled = 0; if (ret) - method_usage[i].iters_active = 0; + unexpected_method_usage[i].iters_active = 0; } i++; } @@ -973,13 +1086,23 @@ int BMI_testunexpected(int incount, if (!tmp_ref) { /* yeah, right */ +#ifdef WIN32 + free(sub_info); +#endif gossip_lerr("Error: critical BMI_testunexpected failure.\n"); gen_mutex_unlock(&ref_mutex); return (bmi_errno_to_pvfs(-EPROTO)); } + if(global_flags & BMI_AUTO_REF_COUNT) + { + tmp_ref->ref_count++; + } gen_mutex_unlock(&ref_mutex); info_array[i].addr = tmp_ref->bmi_addr; } +#ifdef WIN32 + free(sub_info); +#endif /* return 1 if anything completed */ if (ret == 0 && *outcount > 0) { @@ -1008,34 +1131,44 @@ int BMI_testcontext(int incount, int position = 0; int tmp_outcount = 0; int tmp_active_method_count = 0; +#ifndef WIN32 struct timespec ts; +#endif gen_mutex_lock(&active_method_count_mutex); tmp_active_method_count = active_method_count; gen_mutex_unlock(&active_method_count_mutex); if (max_idle_time_ms < 0) + { return (bmi_errno_to_pvfs(-EINVAL)); + } *outcount = 0; - if(tmp_active_method_count < 1) + if (tmp_active_method_count < 1) { /* nothing active yet, just snooze and return */ - if(max_idle_time_ms > 0) + if (max_idle_time_ms > 0) { - ts.tv_sec = 0; +#ifdef WIN32 + Sleep(2); +#else + ts.tv_sec = 0; ts.tv_nsec = 2000; nanosleep(&ts, NULL); +#endif } return(0); } - construct_poll_plan(tmp_active_method_count, &max_idle_time_ms); + construct_poll_plan(expected_method_usage, + tmp_active_method_count, &max_idle_time_ms); while (position < incount && i < tmp_active_method_count) { - if (method_usage[i].plan) { + if (expected_method_usage[i].plan) + { ret = active_method_table[i]->testcontext( incount - position, &out_id_array[position], @@ -1053,9 +1186,9 @@ int BMI_testcontext(int incount, } position += tmp_outcount; (*outcount) += tmp_outcount; - method_usage[i].iters_polled = 0; + expected_method_usage[i].iters_polled = 0; if (ret) - method_usage[i].iters_active = 0; + expected_method_usage[i].iters_active = 0; } i++; } @@ -1063,7 +1196,7 @@ int BMI_testcontext(int incount, /* return 1 if anything completed */ if (ret == 0 && *outcount > 0) { - for(i=0; i<*outcount; i++) + for (i = 0; i < *outcount; i++) { gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, "BMI_testcontext completing: %llu\n", llu(out_id_array[i])); @@ -1082,7 +1215,7 @@ int BMI_testcontext(int incount, * * \return Pointer to string on success, NULL on failure. */ -const char* BMI_addr_rev_lookup(PVFS_BMI_addr_t addr) +const char* BMI_addr_rev_lookup(BMI_addr_t addr) { ref_st_p tmp_ref = NULL; char* tmp_str = NULL; @@ -1111,7 +1244,7 @@ const char* BMI_addr_rev_lookup(PVFS_BMI_addr_t addr) * * \return Pointer to string on success, NULL on failure. */ -const char* BMI_addr_rev_lookup_unexpected(PVFS_BMI_addr_t addr) +const char* BMI_addr_rev_lookup_unexpected(BMI_addr_t addr) { ref_st_p tmp_ref = NULL; @@ -1139,7 +1272,7 @@ const char* BMI_addr_rev_lookup_unexpected(PVFS_BMI_addr_t addr) * * \return Pointer to buffer on success, NULL on failure. */ -void *BMI_memalloc(PVFS_BMI_addr_t addr, +void *BMI_memalloc(BMI_addr_t addr, bmi_size_t size, enum bmi_op_type send_recv) { @@ -1159,6 +1292,11 @@ void *BMI_memalloc(PVFS_BMI_addr_t addr, /* allocate the buffer using the method's mechanism */ new_buffer = tmp_ref->interface->memalloc(size, send_recv); + /* initialize buffer, if not NULL. */ + if (new_buffer) + { + memset(new_buffer,0,size); + } return (new_buffer); } @@ -1166,7 +1304,7 @@ void *BMI_memalloc(PVFS_BMI_addr_t addr, * * \return 0 on success, -errno on failure. */ -int BMI_memfree(PVFS_BMI_addr_t addr, +int BMI_memfree(BMI_addr_t addr, void *buffer, bmi_size_t size, enum bmi_op_type send_recv) @@ -1195,7 +1333,7 @@ int BMI_memfree(PVFS_BMI_addr_t addr, * * \return 0 on success, -errno on failure. */ -int BMI_unexpected_free(PVFS_BMI_addr_t addr, +int BMI_unexpected_free(BMI_addr_t addr, void *buffer) { ref_st_p tmp_ref = NULL; @@ -1226,7 +1364,7 @@ int BMI_unexpected_free(PVFS_BMI_addr_t addr, * * \return 0 on success, -errno on failure. */ -int BMI_set_info(PVFS_BMI_addr_t addr, +int BMI_set_info(BMI_addr_t addr, int option, void *inout_parameter) { @@ -1334,7 +1472,7 @@ int BMI_set_info(PVFS_BMI_addr_t addr, * * \return 0 on success, -errno on failure. */ -int BMI_get_info(PVFS_BMI_addr_t addr, +int BMI_get_info(BMI_addr_t addr, int option, void *inout_parameter) { @@ -1422,7 +1560,7 @@ int BMI_get_info(PVFS_BMI_addr_t addr, * \return 1 on success, -errno on failure and 0 if it is not part of * the specified range */ -int BMI_query_addr_range (PVFS_BMI_addr_t addr, const char *id_string, int netmask) +int BMI_query_addr_range (BMI_addr_t addr, const char *id_string, int netmask) { int ret = -1; int i = 0, failed = 1; @@ -1498,7 +1636,7 @@ int BMI_query_addr_range (PVFS_BMI_addr_t addr, const char *id_string, int netma * * \return 0 on success, -errno on failure. */ -int BMI_addr_lookup(PVFS_BMI_addr_t * new_addr, +int BMI_addr_lookup(BMI_addr_t * new_addr, const char *id_string) { @@ -1589,6 +1727,7 @@ int BMI_addr_lookup(PVFS_BMI_addr_t * new_addr, /* fill in the details */ new_ref->method_addr = meth_addr; + meth_addr->parent = new_ref; new_ref->id_string = (char *) malloc(strlen(id_string) + 1); if (!new_ref->id_string) { @@ -1630,7 +1769,7 @@ int BMI_addr_lookup(PVFS_BMI_addr_t * new_addr, * -errno on failure. */ int BMI_post_send_list(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *const *buffer_list, const bmi_size_t *size_list, int list_count, @@ -1639,7 +1778,8 @@ int BMI_post_send_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + bmi_hint hints) { ref_st_p tmp_ref = NULL; int ret = -1; @@ -1675,7 +1815,7 @@ int BMI_post_send_list(bmi_op_id_t * id, ret = tmp_ref->interface->post_send_list( id, tmp_ref->method_addr, buffer_list, size_list, list_count, total_size, buffer_type, tag, user_ptr, - context_id); + context_id, (PVFS_hint)hints); return (ret); } @@ -1697,7 +1837,7 @@ int BMI_post_send_list(bmi_op_id_t * id, * -errno on failure. */ int BMI_post_recv_list(bmi_op_id_t * id, - PVFS_BMI_addr_t src, + BMI_addr_t src, void *const *buffer_list, const bmi_size_t *size_list, int list_count, @@ -1706,7 +1846,8 @@ int BMI_post_recv_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + bmi_hint hints) { ref_st_p tmp_ref = NULL; int ret = -1; @@ -1742,7 +1883,7 @@ int BMI_post_recv_list(bmi_op_id_t * id, ret = tmp_ref->interface->post_recv_list( id, tmp_ref->method_addr, buffer_list, size_list, list_count, total_expected_size, total_actual_size, - buffer_type, tag, user_ptr, context_id); + buffer_type, tag, user_ptr, context_id, (PVFS_hint)hints); return (ret); } @@ -1763,7 +1904,7 @@ int BMI_post_recv_list(bmi_op_id_t * id, * -errno on failure. */ int BMI_post_sendunexpected_list(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *const *buffer_list, const bmi_size_t *size_list, int list_count, @@ -1771,7 +1912,8 @@ int BMI_post_sendunexpected_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + bmi_hint hints) { ref_st_p tmp_ref = NULL; int ret = -1; @@ -1808,7 +1950,7 @@ int BMI_post_sendunexpected_list(bmi_op_id_t * id, ret = tmp_ref->interface->post_sendunexpected_list( id, tmp_ref->method_addr, buffer_list, size_list, list_count, total_size, buffer_type, tag, user_ptr, - context_id); + context_id, (PVFS_hint)hints); return (ret); } @@ -1879,7 +2021,7 @@ int BMI_cancel(bmi_op_id_t id, * * returns 0 on success, -errno on failure */ -PVFS_BMI_addr_t bmi_method_addr_reg_callback(bmi_method_addr_p map) +BMI_addr_t bmi_method_addr_reg_callback(bmi_method_addr_p map) { ref_st_p new_ref = NULL; @@ -1899,6 +2041,7 @@ PVFS_BMI_addr_t bmi_method_addr_reg_callback(bmi_method_addr_p map) */ new_ref->method_addr = map; new_ref->id_string = NULL; + map->parent = new_ref; /* check the method_type from the method_addr pointer to know * which interface to use */ @@ -1910,7 +2053,7 @@ PVFS_BMI_addr_t bmi_method_addr_reg_callback(bmi_method_addr_p map) return new_ref->bmi_addr; } -int bmi_method_addr_forget_callback(PVFS_BMI_addr_t addr) +int bmi_method_addr_forget_callback(BMI_addr_t addr) { struct forget_item* tmp_item = NULL; @@ -1932,6 +2075,51 @@ int bmi_method_addr_forget_callback(PVFS_BMI_addr_t addr) return (0); } +/* + * Signal BMI to drop inactive connections for this method. + */ +void bmi_method_addr_drop_callback (char* method_name) +{ + struct drop_item *item = + (struct drop_item *) malloc(sizeof(struct drop_item)); + + /* + * If we can't allocate, just return. + * Maybe this will succeed next time. + */ + if (!item) return; + + item->method_name = method_name; + + gen_mutex_lock(&bmi_addr_force_drop_list_mutex); + qlist_add(&item->link, &bmi_addr_force_drop_list); + gen_mutex_unlock(&bmi_addr_force_drop_list_mutex); + + return; +} + + +/** + * Try to increase method_usage_t struct to include room for a new method. + */ +static int grow_method_usage (struct method_usage_t ** p, int newflags) +{ + struct method_usage_t * x = *p; + *p = malloc((active_method_count + 1) * sizeof(**p)); + if (!*p) { + *p = x; + return 0; + } + if (active_method_count) { + memcpy(*p, x, active_method_count * sizeof(**p)); + free(x); + } + memset(&((*p)[active_method_count]), 0, sizeof(**p)); + (*p)[active_method_count].flags = newflags; + + return 1; + } + /* * Attempt to insert this name into the list of active methods, * and bring it up. @@ -1982,17 +2170,16 @@ activate_method(const char *name, const char *listen_addr, int flags) } active_method_table[active_method_count] = meth; - x = method_usage; - method_usage = malloc((active_method_count + 1) * sizeof(*method_usage)); - if (!method_usage) { - method_usage = x; - return -ENOMEM; - } - if (active_method_count) { - memcpy(method_usage, x, active_method_count * sizeof(*method_usage)); - free(x); - } - memset(&method_usage[active_method_count], 0, sizeof(*method_usage)); + if (!grow_method_usage (&unexpected_method_usage, meth->flags)) + return -ENOMEM; + + /** + * If we run out of memory here, the unexpected_method_usage will be + * larger than strictly required but there is no memory leak. + */ + + if (!grow_method_usage (&expected_method_usage, meth->flags)) + return -ENOMEM; ++active_method_count; @@ -2010,6 +2197,11 @@ activate_method(const char *name, const char *listen_addr, int flags) /* this is a bit of a hack */ new_addr->method_type = active_method_count - 1; } + + gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, "activate_method: " + "listen_addr=%s, active_method_count-1=%d, flags=%d\n", + listen_addr, active_method_count-1, flags); + ret = meth->initialize(new_addr, active_method_count - 1, flags); if (ret < 0) { gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, @@ -2105,7 +2297,7 @@ case err: bmi_errno = BMI_##err; break */ static void bmi_check_forget_list(void) { - PVFS_BMI_addr_t tmp_addr; + BMI_addr_t tmp_addr; struct forget_item* tmp_item; ref_st_p tmp_ref = NULL; @@ -2147,9 +2339,9 @@ static void bmi_check_forget_list(void) static void bmi_addr_drop(ref_st_p tmp_ref) { struct method_drop_addr_query query; + int ret = 0; query.response = 0; query.addr = tmp_ref->method_addr; - int ret = 0; /* reference count is zero; ask module if it wants us to discard * the address; TCP will tell us to drop addresses for which the @@ -2172,6 +2364,64 @@ static void bmi_addr_drop(ref_st_p tmp_ref) return; } + +/* bmi_addr_force_drop + * + * Destroys a complete BMI address, including forcing the method to clean up + * its portion. + * + * NOTE: must be called with ref list mutex held + */ +static void bmi_addr_force_drop(ref_st_p ref, ref_list_p ref_list) +{ + gossip_debug(GOSSIP_BMI_DEBUG_CONTROL, + "[BMI CONTROL]: %s: bmi discarding address: %llu\n", + __func__, llu(ref->bmi_addr)); + + ref_list_rem(ref_list, ref->bmi_addr); + dealloc_ref_st(ref); + + return; +} + +/* + * bmi_check_addr_force_drop + * + * Checks to see if any method has requested freeing resources. + */ +static void bmi_check_addr_force_drop (void) +{ + struct drop_item *drop_item = NULL; + ref_st_p ref_item = NULL; + + gen_mutex_lock(&bmi_addr_force_drop_list_mutex); + while (!qlist_empty(&bmi_addr_force_drop_list)) + { + drop_item = qlist_entry(qlist_pop(&bmi_addr_force_drop_list), + struct drop_item, + link); + gen_mutex_unlock(&bmi_addr_force_drop_list_mutex); + gen_mutex_lock(&ref_mutex); +#ifdef WIN32 + qlist_for_each_entry(ref_item, cur_ref_list, list_link, ref_st) +#else + qlist_for_each_entry(ref_item, cur_ref_list, list_link) +#endif + { + if ((ref_item->ref_count == 0) && + (ref_item->interface->method_name == drop_item->method_name)) + { + bmi_addr_force_drop(ref_item, cur_ref_list); + } + } + gen_mutex_unlock(&ref_mutex); + gen_mutex_lock(&bmi_addr_force_drop_list_mutex); + } + gen_mutex_unlock(&bmi_addr_force_drop_list_mutex); + + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/io/bmi/bmi.h b/src/io/bmi/bmi.h index 2216a58..c918221 100644 --- a/src/io/bmi/bmi.h +++ b/src/io/bmi/bmi.h @@ -26,7 +26,7 @@ struct BMI_unexpected_info { bmi_error_code_t error_code; - PVFS_BMI_addr_t addr; + BMI_addr_t addr; void *buffer; bmi_size_t size; bmi_msg_tag_t tag; @@ -43,32 +43,35 @@ int BMI_open_context(bmi_context_id* context_id); void BMI_close_context(bmi_context_id context_id); int BMI_post_send(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *buffer, bmi_size_t size, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + bmi_hint hints); int BMI_post_sendunexpected(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *buffer, bmi_size_t size, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + bmi_hint hints); int BMI_post_recv(bmi_op_id_t * id, - PVFS_BMI_addr_t src, + BMI_addr_t src, void *buffer, bmi_size_t expected_size, bmi_size_t * actual_size, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + bmi_hint hints); int BMI_test(bmi_op_id_t id, int *outcount, @@ -102,39 +105,39 @@ int BMI_testcontext(int incount, int max_idle_time_ms, bmi_context_id context_id); -void *BMI_memalloc(PVFS_BMI_addr_t addr, +void *BMI_memalloc(BMI_addr_t addr, bmi_size_t size, enum bmi_op_type send_recv); -int BMI_memfree(PVFS_BMI_addr_t addr, +int BMI_memfree(BMI_addr_t addr, void *buffer, bmi_size_t size, enum bmi_op_type send_recv); -int BMI_unexpected_free(PVFS_BMI_addr_t addr, +int BMI_unexpected_free(BMI_addr_t addr, void *buffer); -int BMI_set_info(PVFS_BMI_addr_t addr, +int BMI_set_info(BMI_addr_t addr, int option, void *inout_parameter); -int BMI_get_info(PVFS_BMI_addr_t addr, +int BMI_get_info(BMI_addr_t addr, int option, void *inout_parameter); -int BMI_addr_lookup(PVFS_BMI_addr_t * new_addr, +int BMI_addr_lookup(BMI_addr_t * new_addr, const char *id_string); -const char* BMI_addr_rev_lookup(PVFS_BMI_addr_t addr); +const char* BMI_addr_rev_lookup(BMI_addr_t addr); -const char* BMI_addr_rev_lookup_unexpected(PVFS_BMI_addr_t addr); +const char* BMI_addr_rev_lookup_unexpected(BMI_addr_t addr); -int BMI_query_addr_range (PVFS_BMI_addr_t addr, +int BMI_query_addr_range (BMI_addr_t addr, const char *id_string, int netmask); int BMI_post_send_list(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *const *buffer_list, const bmi_size_t* size_list, int list_count, @@ -143,10 +146,11 @@ int BMI_post_send_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + bmi_hint hints); int BMI_post_recv_list(bmi_op_id_t * id, - PVFS_BMI_addr_t src, + BMI_addr_t src, void *const *buffer_list, const bmi_size_t *size_list, int list_count, @@ -157,10 +161,11 @@ int BMI_post_recv_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + bmi_hint hints); int BMI_post_sendunexpected_list(bmi_op_id_t * id, - PVFS_BMI_addr_t dest, + BMI_addr_t dest, const void *const *buffer_list, const bmi_size_t *size_list, int list_count, @@ -169,7 +174,8 @@ int BMI_post_sendunexpected_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + bmi_hint hints); int BMI_cancel(bmi_op_id_t id, bmi_context_id context_id); diff --git a/src/io/bmi/bmi_gm/bmi-gm-addressing.h b/src/io/bmi/bmi_gm/bmi-gm-addressing.h index fa00cbd..d87dfae 100644 --- a/src/io/bmi/bmi_gm/bmi-gm-addressing.h +++ b/src/io/bmi/bmi_gm/bmi-gm-addressing.h @@ -32,7 +32,7 @@ struct gm_addr struct qlist_head gm_addr_list; unsigned int node_id; unsigned int port_id; - PVFS_BMI_addr_t bmi_addr; + BMI_addr_t bmi_addr; op_list_p send_queue; op_list_p handshake_queue; }; diff --git a/src/io/bmi/bmi_gm/bmi-gm.c b/src/io/bmi/bmi_gm/bmi-gm.c index 0831dce..8fd9866 100644 --- a/src/io/bmi/bmi_gm/bmi-gm.c +++ b/src/io/bmi/bmi_gm/bmi-gm.c @@ -29,6 +29,7 @@ #ifdef ENABLE_GM_REGCACHE #include "bmi-gm-regcache.h" #endif +#include "pvfs2-debug.h" static gen_mutex_t interface_mutex = GEN_MUTEX_INITIALIZER; static unsigned int bmi_gm_reserved_ports[BMI_GM_MAX_PORTS] = @@ -62,7 +63,8 @@ int BMI_gm_post_send(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_gm_post_send_list(bmi_op_id_t * id, bmi_method_addr_p dest, const void *const *buffer_list, @@ -72,7 +74,8 @@ int BMI_gm_post_send_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_gm_post_sendunexpected_list(bmi_op_id_t * id, bmi_method_addr_p dest, const void *const *buffer_list, @@ -82,7 +85,8 @@ int BMI_gm_post_sendunexpected_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_gm_post_sendunexpected(bmi_op_id_t * id, bmi_method_addr_p dest, const void *buffer, @@ -90,7 +94,8 @@ int BMI_gm_post_sendunexpected(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_gm_post_recv(bmi_op_id_t * id, bmi_method_addr_p src, void *buffer, @@ -99,7 +104,8 @@ int BMI_gm_post_recv(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_gm_post_recv_list(bmi_op_id_t * id, bmi_method_addr_p src, void *const *buffer_list, @@ -110,7 +116,8 @@ int BMI_gm_post_recv_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_gm_test(bmi_op_id_t id, int *outcount, bmi_error_code_t * error_code, @@ -150,6 +157,7 @@ char BMI_gm_method_name[] = "bmi_gm"; /* exported method interface */ const struct bmi_method_ops bmi_gm_ops = { .method_name = BMI_gm_method_name, + .flags = 0, .initialize = BMI_gm_initialize, .finalize = BMI_gm_finalize, .set_info = BMI_gm_set_info, @@ -905,7 +913,8 @@ int BMI_gm_post_send(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { int buffer_status = GM_BUF_USER_ALLOC; void *new_buffer = NULL; @@ -998,7 +1007,8 @@ int BMI_gm_post_send_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { int buffer_status = GM_BUF_USER_ALLOC; void *new_buffer = NULL; @@ -1017,7 +1027,7 @@ int BMI_gm_post_send_list(bmi_op_id_t * id, if(list_count == 1) { return(BMI_gm_post_send(id, dest, buffer_list[0], size_list[0], - buffer_type, tag, user_ptr, context_id)); + buffer_type, tag, user_ptr, context_id, hints)); } /* TODO: think about this some. For now this is going to be @@ -1101,7 +1111,8 @@ int BMI_gm_post_sendunexpected_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { int buffer_status = GM_BUF_USER_ALLOC; void *new_buffer = NULL; @@ -1121,7 +1132,7 @@ int BMI_gm_post_sendunexpected_list(bmi_op_id_t * id, if(list_count == 1) { return(BMI_gm_post_sendunexpected(id, dest, buffer_list[0], - size_list[0], buffer_type, tag, user_ptr, context_id)); + size_list[0], buffer_type, tag, user_ptr, context_id, hints)); } /* TODO: think about this some. For now this is going to be @@ -1190,7 +1201,8 @@ int BMI_gm_post_sendunexpected(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { int buffer_status = GM_BUF_USER_ALLOC; void *new_buffer = NULL; @@ -1263,7 +1275,8 @@ int BMI_gm_post_recv(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { method_op_p query_op = NULL; method_op_p new_method_op = NULL; @@ -1435,7 +1448,8 @@ int BMI_gm_post_recv_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { method_op_p query_op = NULL; method_op_p new_method_op = NULL; @@ -1457,7 +1471,7 @@ int BMI_gm_post_recv_list(bmi_op_id_t * id, if(list_count == 1) { return(BMI_gm_post_recv(id, src, buffer_list[0], size_list[0], - total_actual_size, buffer_type, tag, user_ptr, context_id)); + total_actual_size, buffer_type, tag, user_ptr, context_id, hints)); } /* what happens here ? diff --git a/src/io/bmi/bmi_gm/module.mk.in b/src/io/bmi/bmi_gm/module.mk.in index ac225db..839dd16 100644 --- a/src/io/bmi/bmi_gm/module.mk.in +++ b/src/io/bmi/bmi_gm/module.mk.in @@ -21,6 +21,7 @@ cfiles := bmi-gm-addr-list.c bmi-gm-bufferpool.c bmi-gm.c src := $(patsubst %,$(DIR)/%,$(cfiles)) LIBSRC += $(src) SERVERSRC += $(src) +LIBBMISRC += $(src) # # Extra cflags for files in this directory. diff --git a/src/io/bmi/bmi_ib/ib.c b/src/io/bmi/bmi_ib/ib.c index f8d6b41..0808797 100644 --- a/src/io/bmi/bmi_ib/ib.c +++ b/src/io/bmi/bmi_ib/ib.c @@ -22,6 +22,7 @@ #include /* bmi_method_addr_reg_callback */ #include /* gen_mutex_t ... */ #include +#include "pint-hint.h" #ifdef HAVE_VALGRIND_H #include @@ -81,10 +82,8 @@ static void encourage_recv_incoming(struct buf_head *bh, msg_type_t type, static void encourage_rts_done_waiting_buffer(struct ib_work *sq); static int send_cts(struct ib_work *rq); static void ib_close_connection(ib_connection_t *c); -#ifndef __PVFS2_SERVER__ static int ib_tcp_client_connect(ib_method_addr_t *ibmap, struct bmi_method_addr *remote_map); -#endif static int ib_tcp_server_check_new_connections(void); static int ib_block_for_activity(int timeout_ms); @@ -124,23 +123,26 @@ static int ib_check_cq(void) debug(4, "%s: found something", __func__); ++ret; if (wc.status != 0) { + struct buf_head *bh = ptr_from_int64(wc.id); /* opcode is not necessarily valid; only wr_id, status, qp_num, * and vendor_err can be relied upon */ if (wc.opcode == BMI_IB_OP_SEND) { - debug(0, "%s: entry id 0x%llx SEND error %s", __func__, - llu(wc.id), wc_status_string(wc.status)); + debug(0, "%s: entry id 0x%llx SEND error %s to %s", __func__, + llu(wc.id), wc_status_string(wc.status), bh->c->peername); if (wc.id) { ib_connection_t *c = ptr_from_int64(wc.id); if (c->cancelled) { debug(0, "%s: ignoring send error on cancelled conn to %s", - __func__, c->peername); + __func__, bh->c->peername); } } } else { - error("%s: entry id 0x%llx opcode %s error %s", __func__, + warning("%s: entry id 0x%llx opcode %s error %s from %s", + __func__, llu(wc.id), wc_opcode_string(wc.opcode), - wc_status_string(wc.status)); + wc_status_string(wc.status), bh->c->peername); + continue; } } @@ -612,19 +614,24 @@ encourage_recv_incoming(struct buf_head *bh, msg_type_t type, u_int32_t byte_len } } - bmi_ib_assert(rq, "%s: mop_id %llx in RTS_DONE message not found", - __func__, llu(mh_rts_done.mop_id)); - + if (rq == NULL) { + warning("%s: mop_id %llx in RTS_DONE message not found", + __func__, llu(mh_rts_done.mop_id)); + } + else { #if MEMCACHE_BOUNCEBUF - memcpy_to_buflist(&rq->buflist, reg_recv_buflist_buf, + memcpy_to_buflist(&rq->buflist, reg_recv_buflist_buf, rq->buflist.tot_len); #else - memcache_deregister(ib_device->memcache, &rq->buflist); + memcache_deregister(ib_device->memcache, &rq->buflist); #endif + } post_rr(c, bh); - rq->state.recv = RQ_RTS_WAITING_USER_TEST; + if (rq) { + rq->state.recv = RQ_RTS_WAITING_USER_TEST; + } } else if (type == MSG_BYE) { /* @@ -792,13 +799,13 @@ ensure_connected(struct bmi_method_addr *remote_map) int ret = 0; ib_method_addr_t *ibmap = remote_map->method_data; - if (!ibmap->c) -#ifdef __PVFS2_SERVER__ - /* cannot actively connect */ - ret = 1; -#else + if (!ibmap->c && ibmap->reconnect_flag) ret = ib_tcp_client_connect(ibmap, remote_map); -#endif + else if(!ibmap->c && !ibmap->reconnect_flag) + ret = 1; /* cannot actively connect */ + else + ret = 0; + return ret; } @@ -896,7 +903,8 @@ static int BMI_ib_post_send(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *buffer, bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, - bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id) + bmi_msg_tag_t tag, void *user_ptr, bmi_context_id + context_id, PVFS_hint hints __unused) { return post_send(id, remote_map, 0, &buffer, &total_size, total_size, tag, user_ptr, context_id, 0); @@ -906,7 +914,8 @@ static int BMI_ib_post_send_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *const *buffers, const bmi_size_t *sizes, int list_count, bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, - bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id) + bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id, PVFS_hint + hints __unused) { return post_send(id, remote_map, list_count, buffers, sizes, total_size, tag, user_ptr, context_id, 0); @@ -917,7 +926,8 @@ BMI_ib_post_sendunexpected(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *buffer, bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, PVFS_hint hints + __unused) { return post_send(id, remote_map, 0, &buffer, &total_size, total_size, tag, user_ptr, context_id, 1); @@ -930,7 +940,8 @@ BMI_ib_post_sendunexpected_list(bmi_op_id_t *id, struct bmi_method_addr *remote_ bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, PVFS_hint hints + __unused) { return post_send(id, remote_map, list_count, buffers, sizes, total_size, tag, user_ptr, context_id, 1); @@ -1067,7 +1078,7 @@ static int BMI_ib_post_recv(bmi_op_id_t *id, struct bmi_method_addr *remote_map, void *buffer, bmi_size_t expected_len, bmi_size_t *actual_len __unused, enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, PVFS_hint hints __unused) { return post_recv(id, remote_map, 0, &buffer, &expected_len, expected_len, tag, user_ptr, context_id); @@ -1078,7 +1089,7 @@ BMI_ib_post_recv_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, void *const *buffers, const bmi_size_t *sizes, int list_count, bmi_size_t tot_expected_len, bmi_size_t *tot_actual_len __unused, enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, PVFS_hint hints __unused) { return post_recv(id, remote_map, list_count, buffers, sizes, tot_expected_len, tag, user_ptr, context_id); @@ -1496,8 +1507,7 @@ BMI_ib_cancel(bmi_op_id_t id, bmi_context_id context_id __unused) /* pin when sending rts, so also must dereg in this state */ if (sq->state.send == SQ_WAITING_RTS_SEND_COMPLETION || sq->state.send == SQ_WAITING_RTS_SEND_COMPLETION_GOT_CTS || - sq->state.send == SQ_WAITING_CTS || - sq->state.send == SQ_WAITING_DATA_SEND_COMPLETION) + sq->state.send == SQ_WAITING_CTS) memcache_deregister(ib_device->memcache, &sq->buflist); # endif #endif @@ -1512,8 +1522,7 @@ BMI_ib_cancel(bmi_op_id_t id, bmi_context_id context_id __unused) memcache_deregister(ib_device->memcache, &rq->buflist); # if MEMCACHE_EARLY_REG /* pin on post, dereg all these */ - if (rq->state.recv == RQ_RTS_WAITING_CTS_SEND_COMPLETION || - rq->state.recv == RQ_RTS_WAITING_RTS_DONE) + if (rq->state.recv == RQ_RTS_WAITING_CTS_SEND_COMPLETION) memcache_deregister(ib_device->memcache, &rq->buflist); if (rq->state.recv == RQ_WAITING_INCOMING && rq->buflist.tot_len > ib_device->eager_buf_payload) @@ -1544,7 +1553,8 @@ BMI_ib_rev_lookup(struct bmi_method_addr *meth) * Build and fill an IB-specific method_addr structure. */ static struct bmi_method_addr *ib_alloc_method_addr(ib_connection_t *c, - char *hostname, int port) + char *hostname, int port, + int reconnect_flag) { struct bmi_method_addr *map; ib_method_addr_t *ibmap; @@ -1554,6 +1564,8 @@ static struct bmi_method_addr *ib_alloc_method_addr(ib_connection_t *c, ibmap->c = c; ibmap->hostname = hostname; ibmap->port = port; + ibmap->reconnect_flag = reconnect_flag; + ibmap->ref_count = 1; return map; } @@ -1606,6 +1618,7 @@ static struct bmi_method_addr *BMI_ib_method_addr_lookup(const char *id) ib_method_addr_t *ibmap = c->remote_map->method_data; if (ibmap->port == port && !strcmp(ibmap->hostname, hostname)) { map = c->remote_map; + ibmap->ref_count++; break; } } @@ -1616,7 +1629,11 @@ static struct bmi_method_addr *BMI_ib_method_addr_lookup(const char *id) free(hostname); /* found it */ else { - map = ib_alloc_method_addr(0, hostname, port); /* alloc new one */ + /* set reconnect flag on this addr; we will be acting as a client + * for this connection and will be responsible for making sure that + * the connection is established + */ + map = ib_alloc_method_addr(0, hostname, port, 1); /* alloc new one */ /* but don't call bmi_method_addr_reg_callback! */ } @@ -1712,7 +1729,6 @@ static void ib_close_connection(ib_connection_t *c) free(c); } -#ifndef __PVFS2_SERVER__ /* * Blocking connect initiated by a post_sendunexpected{,_list}, or * post_recv* @@ -1760,7 +1776,6 @@ static int ib_tcp_client_connect(ib_method_addr_t *ibmap, } return 0; } -#endif /* * On a server, initialize a socket for listening for new connections. @@ -1830,7 +1845,11 @@ static int ib_tcp_server_check_new_connections(void) goto out_unlock; } - c->remote_map = ib_alloc_method_addr(c, hostname, port); + /* don't set reconnect flag on this addr; we are a server in this + * case and the peer will be responsible for maintaining the + * connection + */ + c->remote_map = ib_alloc_method_addr(c, hostname, port, 0); /* register this address with the method control layer */ c->bmi_addr = bmi_method_addr_reg_callback(c->remote_map); if (c->bmi_addr == 0) @@ -1944,8 +1963,12 @@ static int BMI_ib_set_info(int option, void *param __unused) case BMI_DROP_ADDR: { struct bmi_method_addr *map = param; ib_method_addr_t *ibmap = map->method_data; - free(ibmap->hostname); - free(map); + ibmap->ref_count--; + if (ibmap->ref_count == 0) + { + free(ibmap->hostname); + free(map); + } break; } case BMI_OPTIMISTIC_BUFFER_REG: { @@ -2104,6 +2127,7 @@ static int BMI_ib_finalize(void) const struct bmi_method_ops bmi_ib_ops = { .method_name = "bmi_ib", + .flags = 0, .initialize = BMI_ib_initialize, .finalize = BMI_ib_finalize, .set_info = BMI_ib_set_info, diff --git a/src/io/bmi/bmi_ib/ib.h b/src/io/bmi/bmi_ib/ib.h index 73d54ae..0f17e74 100644 --- a/src/io/bmi/bmi_ib/ib.h +++ b/src/io/bmi/bmi_ib/ib.h @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #ifdef __GNUC__ /* confuses debugger */ @@ -58,7 +61,7 @@ typedef struct { void *priv; - PVFS_BMI_addr_t bmi_addr; + BMI_addr_t bmi_addr; } ib_connection_t; /* @@ -78,6 +81,8 @@ typedef struct { char *hostname; int port; ib_connection_t *c; + int reconnect_flag; + int ref_count; } ib_method_addr_t; /* diff --git a/src/io/bmi/bmi_ib/module.mk.in b/src/io/bmi/bmi_ib/module.mk.in index 0dbd065..9adbe24 100644 --- a/src/io/bmi/bmi_ib/module.mk.in +++ b/src/io/bmi/bmi_ib/module.mk.in @@ -31,6 +31,7 @@ endif src := $(patsubst %,$(DIR)/%,$(cfiles)) LIBSRC += $(src) SERVERSRC += $(src) +LIBBMISRC += $(src) # # Add extra include paths and warnings just for this directory. diff --git a/src/io/bmi/bmi_ib/openib.c b/src/io/bmi/bmi_ib/openib.c index 2cc2d49..94f1fe3 100644 --- a/src/io/bmi/bmi_ib/openib.c +++ b/src/io/bmi/bmi_ib/openib.c @@ -38,6 +38,9 @@ struct openib_device_priv { int nic_max_sge; int nic_max_wr; + /* max MTU reported by NIC port */ + int max_mtu; + /* * Temp array for filling scatter/gather lists to pass to IB functions, * allocated once at start to max size defined as reported by the qp. @@ -91,6 +94,7 @@ static void openib_post_rr(const ib_connection_t *c, struct buf_head *bh); int openib_ib_initialize(void); static void openib_ib_finalize(void); + /* * Build new conneciton. */ @@ -301,10 +305,16 @@ static void init_connection_modify_qp(struct ibv_qp *qp, uint32_t remote_qp_num, attr.max_dest_rd_atomic = 1; attr.ah_attr.dlid = remote_lid; attr.ah_attr.port_num = od->nic_port; - attr.path_mtu = IBV_MTU; + if (od->max_mtu > IBV_MTU) { + attr.path_mtu = od->max_mtu; + } + else { + attr.path_mtu = IBV_MTU; + } attr.rq_psn = 0; attr.dest_qp_num = remote_qp_num; attr.min_rnr_timer = 31; + debug(1, "%s: attr.path_mtu=%d", __func__, attr.path_mtu); ret = ibv_modify_qp(qp, &attr, mask); if (ret) error_xerrno(ret, "%s: ibv_modify_qp INIT -> RTR", __func__); @@ -322,8 +332,10 @@ static void init_connection_modify_qp(struct ibv_qp *qp, uint32_t remote_qp_num, attr.sq_psn = 0; attr.max_rd_atomic = 1; attr.timeout = 26; /* 4.096us * 2^26 = 5 min */ - attr.retry_cnt = 20; - attr.rnr_retry = 20; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + debug(1, "%s: attr.timeout=%d, attr.retry_cnt=%d, attr.rnr_retry=%d", + __func__, attr.timeout, attr.retry_cnt, attr.rnr_retry); ret = ibv_modify_qp(qp, &attr, mask); if (ret) error_xerrno(ret, "%s: ibv_modify_qp RTR -> RTS", __func__); @@ -691,6 +703,7 @@ static const char *openib_wc_status_string(int status) return s; } +#ifdef HAVE_IBV_GET_DEVICES static const char *openib_port_state_string(enum ibv_port_state state) { const char *s = "(UNKNOWN)"; @@ -705,6 +718,7 @@ static const char *openib_port_state_string(enum ibv_port_state state) } return s; } +#endif /* HAVE_IBV_GET_DEVICES */ static const char *async_event_type_string(enum ibv_event_type event_type) { @@ -731,6 +745,7 @@ static const char *async_event_type_string(enum ibv_event_type event_type) #ifdef HAVE_IBV_EVENT_CLIENT_REREGISTER CASE(IBV_EVENT_CLIENT_REREGISTER); #endif + CASE(IBV_EVENT_GID_CHANGE); } return s; } @@ -796,6 +811,7 @@ static void openib_mem_deregister(memcache_entry_t *c) c->buf, lld(c->len), c->memkeys.lkey, c->memkeys.rkey); } +#ifdef HAVE_IBV_GET_DEVICES static struct ibv_device *get_nic_handle(void) { struct ibv_device *nic_handle; @@ -830,6 +846,7 @@ static struct ibv_device *get_nic_handle(void) return nic_handle; } +#endif /* HAVE_IBV_GET_DEVICES */ static int openib_check_async_events(void) { @@ -849,14 +866,104 @@ static int openib_check_async_events(void) } +/* function like the original get_nic_handle, only for newer OFED + * versions that accept get_device_list. This function returns + * the first active HCA device which returns a valid IBV_PORT_ACTIVE. + * + * This is going to make the customizable IBV_PORT funcunality a pita. + * Inputs: + * od* : preallocated from openib_ib_initialize + * ctx* : allocated by ibv_open_device inside this func, but located + * at od->ctx + * hca_port* : hca_port attributes + * Returns : + * od* : possibly filled out by ibv_query_port + * ret : 0 on good, !0 on failure (FATAL) + * hca_port : queried, comes in empty + */ + +static int return_active_nic_handle (struct openib_device_priv* od, struct ibv_port_attr * hca_port ) +{ + int ret = 0, i=0; + struct ibv_device *nic_handle = NULL; + struct ibv_device **hca_list; + int num_devs = 0; + struct ibv_context *ctx; + + /* make this configurable once we decide how + * adding more than one HCA REALLY complicates the configuable + * nature that we had discussed */ + od->nic_port = IBV_PORT; + + hca_list = ibv_get_device_list(&num_devs); + + if(num_devs <= 0) // FATAL!! + { + error("%s : NO IB DEVICES FOUND ", __func__); + } + else + { // return a device which is active + for(i=0;ictx=ctx; + if (!od->ctx || ctx==NULL || !ctx) { + error("%s: ibv_open_device", __func__); + return -ENOSYS; + } + ret = ibv_query_port(ctx, od->nic_port, hca_port ); + +// ret = ibv_query_port(od->ctx, od->nic_port, hca_port ); + if(ret) + error_xerrno(ret, "%s: ibv_query_port", __func__); + + if(hca_port->state != IBV_PORT_ACTIVE) + { + // in this case, continue, delete old hca_port info + ret = ibv_close_device(od->ctx); // not sure if this breaks + if(ret) + error_xerrno(ret,"%s: couldnt close device",__func__); + + memset(hca_port,0,sizeof(struct ibv_port_attr)); + warning("%s: found an inactive device/port",__func__); + + // if we get to num_devs, no valid devices found + if(i == (num_devs-1)) // FATAL + { + warning("%s: No Active IB ports/devices found", __func__); + return -ENOSYS; + } + + continue; + } + // if we get here, we had a valid device found, done searching + else { + od->max_mtu = hca_port->max_mtu; + break; + } + } + + } + + VALGRIND_MAKE_MEM_DEFINED(ctx, sizeof(*ctx)); + // cleanup + ibv_free_device_list(hca_list); + return 0; +} + /* * Startup, once per application. */ int openib_ib_initialize(void) { int flags, ret = 0; +#ifdef HAVE_IBV_GET_DEVICES struct ibv_device *nic_handle; struct ibv_context *ctx; +#endif /* HAVE_IBV_GET_DEVICES */ int cqe_num; /* local variables, mainly for debug */ struct openib_device_priv *od; struct ibv_port_attr hca_port; @@ -864,6 +971,10 @@ int openib_ib_initialize(void) debug(1, "%s: init", __func__); + od = bmi_ib_malloc(sizeof(*od)); + ib_device->priv = od; + +#ifdef HAVE_IBV_GET_DEVICES nic_handle = get_nic_handle(); if (!nic_handle) { warning("%s: no NIC found", __func__); @@ -877,30 +988,11 @@ int openib_ib_initialize(void) return -ENOSYS; } VALGRIND_MAKE_MEM_DEFINED(ctx, sizeof(*ctx)); - - od = bmi_ib_malloc(sizeof(*od)); - ib_device->priv = od; - - /* set the function pointers for openib */ - ib_device->func.new_connection = openib_new_connection; - ib_device->func.close_connection = openib_close_connection; - ib_device->func.drain_qp = openib_drain_qp; - ib_device->func.ib_initialize = openib_ib_initialize; - ib_device->func.ib_finalize = openib_ib_finalize; - ib_device->func.post_sr = openib_post_sr; - ib_device->func.post_rr = openib_post_rr; - ib_device->func.post_sr_rdmaw = openib_post_sr_rdmaw; - ib_device->func.check_cq = openib_check_cq; - ib_device->func.prepare_cq_block = openib_prepare_cq_block; - ib_device->func.ack_cq_completion_event = openib_ack_cq_completion_event; - ib_device->func.wc_status_string = openib_wc_status_string; - ib_device->func.mem_register = openib_mem_register; - ib_device->func.mem_deregister = openib_mem_deregister; - ib_device->func.check_async_events = openib_check_async_events; - od->ctx = ctx; od->nic_port = IBV_PORT; /* maybe let this be configurable */ + if(!od->ctx) warning("%s: CTX=0",__func__); + /* get the lid and verify port state */ ret = ibv_query_port(od->ctx, od->nic_port, &hca_port); if (ret) @@ -918,6 +1010,40 @@ int openib_ib_initialize(void) if (ret) error_xerrno(ret, "%s: ibv_query_device", __func__); VALGRIND_MAKE_MEM_DEFINED(&hca_cap, sizeof(hca_cap)); +#else + ret = return_active_nic_handle(od, &hca_port); + if(ret) + return -ENOSYS; +#endif + + //od->ctx = ctx; + od->nic_lid = hca_port.lid; + + /* Query the device for the max_ requests and such */ + ret = ibv_query_device(od->ctx, &hca_cap); + if (ret) + error_xerrno(ret, "%s: ibv_query_device", __func__); + VALGRIND_MAKE_MEM_DEFINED(&hca_cap, sizeof(hca_cap)); + + + /* set the function pointers for openib */ + ib_device->func.new_connection = openib_new_connection; + ib_device->func.close_connection = openib_close_connection; + ib_device->func.drain_qp = openib_drain_qp; + ib_device->func.ib_initialize = openib_ib_initialize; + ib_device->func.ib_finalize = openib_ib_finalize; + ib_device->func.post_sr = openib_post_sr; + ib_device->func.post_rr = openib_post_rr; + ib_device->func.post_sr_rdmaw = openib_post_sr_rdmaw; + ib_device->func.check_cq = openib_check_cq; + ib_device->func.prepare_cq_block = openib_prepare_cq_block; + ib_device->func.ack_cq_completion_event = openib_ack_cq_completion_event; + ib_device->func.wc_status_string = openib_wc_status_string; + ib_device->func.mem_register = openib_mem_register; + ib_device->func.mem_deregister = openib_mem_deregister; + ib_device->func.check_async_events = openib_check_async_events; + + debug(1, "%s: max %d completion queue entries", __func__, hca_cap.max_cq); cqe_num = IBV_NUM_CQ_ENTRIES; @@ -946,10 +1072,10 @@ int openib_ib_initialize(void) error("%s: ibv_create_cq failed", __func__); /* use non-blocking IO on the async fd and completion fd */ - flags = fcntl(ctx->async_fd, F_GETFL); + flags = fcntl(od->ctx->async_fd, F_GETFL); if (flags < 0) error_errno("%s: get async fd flags", __func__); - if (fcntl(ctx->async_fd, F_SETFL, flags | O_NONBLOCK) < 0) + if (fcntl(od->ctx->async_fd, F_SETFL, flags | O_NONBLOCK) < 0) error_errno("%s: set async fd nonblocking", __func__); flags = fcntl(od->channel->fd, F_GETFL); @@ -994,3 +1120,6 @@ static void openib_ib_finalize(void) ib_device->priv = NULL; } + + + diff --git a/src/io/bmi/bmi_mx/README b/src/io/bmi/bmi_mx/README index f5da0bc..9f0ce9b 100644 --- a/src/io/bmi/bmi_mx/README +++ b/src/io/bmi/bmi_mx/README @@ -67,7 +67,6 @@ The options are: BMX_BUFF_NUM Number of managed buffers BMX_DEBUG Turn on gossip messages BMX_MEM_ACCT Track memory usage - BMX_LOGGING Turn on MPE logging BMX_SERVER_RXS Additional rxs for servers BMX_TIMEOUT Timeout for all MX messages BMX_DB_MASK Determine which debug messages to print @@ -116,11 +115,6 @@ track the pre-allocated buffers managed by bmi_mx). It also does not track memory allcoated before bmi_mx is started such as during BMI_mx_method_addr_lookup(). -BMX_LOGGING -This is not generally recommended. It turns on support for MPE logging but -it requires modifying the Makefile.in script and re-generating configure. -Contact help myri.com for assistance. - BMX_SERVER_RXS The server will receive messages from unknown peers. This value determines how many additional RXs to allocate to handle these messages. The upper-bound @@ -143,6 +137,21 @@ environment when the application starts. Using MX_RCACHE improves performance slightly for the metadata and IO servers as well as when using MPI-IO and PVFS2. +4. Valid bmi_mx storage paths + +Valid bmi_mx storage paths include the MX hostname and the endpoint ID. MX +hostnames include the UTS hostname and optionally a board index if the machine +has multiple Myricom NICs. Thus, valid bmi_mx storage paths are either: + +mx://hostname:board:ep_id + +or + +mx://hostname:ep_id + +Use the first option if mx_info lists hostname:board and use the second option +if mx_info simply shows a hostname. + ====================== II. bmi_mx Performance ====================== diff --git a/src/io/bmi/bmi_mx/module.mk.in b/src/io/bmi/bmi_mx/module.mk.in index 6b7fd2d..a686fd1 100644 --- a/src/io/bmi/bmi_mx/module.mk.in +++ b/src/io/bmi/bmi_mx/module.mk.in @@ -21,10 +21,11 @@ cfiles := mx.c src := $(patsubst %,$(DIR)/%,$(cfiles)) LIBSRC += $(src) SERVERSRC += $(src) +LIBBMISRC += $(src) # # Extra cflags for files in this directory. # -MODCFLAGS_$(DIR) := -I@MX_INCLUDES@ +MODCFLAGS_$(DIR) := -I@MX_INCDIR@ endif # BUILD_MX diff --git a/src/io/bmi/bmi_mx/mx.c b/src/io/bmi/bmi_mx/mx.c index 60aca79..ff7bcf0 100644 --- a/src/io/bmi/bmi_mx/mx.c +++ b/src/io/bmi/bmi_mx/mx.c @@ -8,25 +8,27 @@ */ #include "mx.h" +#include "pint-hint.h" +#include "pint-event.h" +#include "pvfs2-debug.h" + static int tmp_id = 0; /* temporary id until bmi_mx is init'ed */ struct bmx_data *bmi_mx = NULL; /* global state for bmi_mx */ +mx_status_t BMX_NO_STATUS; + #if BMX_MEM_ACCT uint64_t mem_used = 0; /* bytes used */ gen_mutex_t mem_used_lock; /* lock */ #endif -#if BMX_LOGGING -int send_start; -int send_finish; -int recv_start; -int recv_finish; -int sendunex_start; -int sendunex_finish; -int recvunex_start; -int recvunex_finish; -#endif +/* statics for event logging */ +static PINT_event_type bmi_mx_send_event_id __attribute__ ((unused)); +static PINT_event_type bmi_mx_recv_event_id __attribute__ ((unused)); + +static PINT_event_group bmi_mx_event_group __attribute__ ((unused)); +static pid_t bmi_mx_pid __attribute__ ((unused)); mx_unexp_handler_action_t bmx_unexpected_recv(void *context, mx_endpoint_addr_t source, @@ -34,10 +36,60 @@ bmx_unexpected_recv(void *context, mx_endpoint_addr_t source, static int bmx_peer_connect(struct bmx_peer *peer); +static void +bmx_create_peername(void); + +void * +BMI_mx_memalloc(bmi_size_t size, enum bmi_op_type send_recv); + +static int +bmx_peer_init_state(struct bmx_peer *peer) __attribute__ ((unused)); + +/**** Completion function token handling ****************************/ +/* We should not hold any locks when calling mx_test[_any](), + * mx_wait_any() or mx_cancel(). We want to avoid races between them, + * however. So, before calling any completion function, the caller + * must hold this token. These functions implement a token system (i.e. + * semaphore) that will wake up mx_wait_any() to reduce blocking times + * for the calling function. + */ + +static void +bmx_get_completion_token(void) +{ + int done = 0; + + do { + gen_mutex_lock(&bmi_mx->bmx_completion_lock); + if (bmi_mx->bmx_refcount == 1) { + bmi_mx->bmx_refcount--; + done = 1; + gen_mutex_unlock(&bmi_mx->bmx_completion_lock); + } else { + assert(bmi_mx->bmx_refcount == 0); + /* someone has the lock, wake the MX endpoint in + * case they are blocking in mx_wait_any() */ + gen_mutex_unlock(&bmi_mx->bmx_completion_lock); + mx_wakeup(bmi_mx->bmx_ep); + } + } while (!done); + + return; +} + +static void +bmx_release_completion_token(void) +{ + gen_mutex_lock(&bmi_mx->bmx_completion_lock); + bmi_mx->bmx_refcount++; + assert(bmi_mx->bmx_refcount == 1); + gen_mutex_unlock(&bmi_mx->bmx_completion_lock); + return; +} /**** TX/RX handling functions **************************************/ -void +static void bmx_ctx_free(struct bmx_ctx *ctx) { if (ctx == NULL) return; @@ -60,7 +112,7 @@ bmx_ctx_free(struct bmx_ctx *ctx) return; } -int +static int bmx_ctx_alloc(struct bmx_ctx **ctxp, enum bmx_req_type type) { struct bmx_ctx *ctx = NULL; @@ -142,11 +194,13 @@ bmx_ctx_alloc(struct bmx_ctx **ctxp, enum bmx_req_type type) return 0; } -void +static void bmx_ctx_init(struct bmx_ctx *ctx) { struct bmx_peer *peer = NULL; + BMX_ENTER; + if (ctx == NULL) return; peer = ctx->mxc_peer; @@ -157,9 +211,9 @@ bmx_ctx_init(struct bmx_ctx *ctx) /* ctx->mxc_global_list */ if (!qlist_empty(&ctx->mxc_list)) { - if (peer != NULL) gen_mutex_lock(&peer->mxp_lock); - qlist_del_init(&ctx->mxc_list); - if (peer != NULL) gen_mutex_unlock(&peer->mxp_lock); + debug(BMX_DB_ERR, "%s %s still on a list", __func__, + ctx->mxc_type == BMX_REQ_TX ? "tx" : "rx"); + exit(1); } ctx->mxc_mop = NULL; @@ -187,44 +241,34 @@ bmx_ctx_init(struct bmx_ctx *ctx) /* ctx->mxc_get */ /* ctx->mxc_put */ + BMX_EXIT; return; } /* add to peer's queued txs/rxs list */ -void +static void bmx_q_ctx(struct bmx_ctx *ctx) { struct bmx_peer *peer = ctx->mxc_peer; list_t *queue = ctx->mxc_type == BMX_REQ_TX ? &peer->mxp_queued_txs : &peer->mxp_queued_rxs; + BMX_ENTER; ctx->mxc_state = BMX_CTX_QUEUED; gen_mutex_lock(&peer->mxp_lock); qlist_add_tail(&ctx->mxc_list, queue); gen_mutex_unlock(&peer->mxp_lock); - return; -} - -/* remove from peer's queued txs/rxs list */ -void -bmx_deq_ctx(struct bmx_ctx *ctx) -{ - struct bmx_peer *peer = ctx->mxc_peer; - - if (!qlist_empty(&ctx->mxc_list)) { - gen_mutex_lock(&peer->mxp_lock); - qlist_del_init(&ctx->mxc_list); - gen_mutex_unlock(&peer->mxp_lock); - } + BMX_EXIT; return; } /* add to peer's pending rxs list */ -void +static void bmx_q_pending_ctx(struct bmx_ctx *ctx) { struct bmx_peer *peer = ctx->mxc_peer; + BMX_ENTER; ctx->mxc_state = BMX_CTX_PENDING; if (ctx->mxc_type == BMX_REQ_RX) { if (peer) { @@ -233,15 +277,17 @@ bmx_q_pending_ctx(struct bmx_ctx *ctx) gen_mutex_unlock(&peer->mxp_lock); } } + BMX_EXIT; return; } /* remove from peer's pending rxs list */ -void +static void bmx_deq_pending_ctx(struct bmx_ctx *ctx) { struct bmx_peer *peer = ctx->mxc_peer; + BMX_ENTER; if (ctx->mxc_state == BMX_CTX_PENDING) { ctx->mxc_state = BMX_CTX_COMPLETED; } @@ -252,25 +298,86 @@ bmx_deq_pending_ctx(struct bmx_ctx *ctx) gen_mutex_unlock(&peer->mxp_lock); } } + BMX_EXIT; return; } -/* add to the global canceled list */ -void -bmx_q_canceled_ctx(struct bmx_ctx *ctx, bmi_error_code_t error) +/* dequeue from unexpected rx list */ +static void +bmx_deq_unex_rx(struct bmx_ctx **rxp) { - ctx->mxc_state = BMX_CTX_CANCELED; - if (error < 0) - ctx->mxc_mxstat.code = error; - else - ctx->mxc_mxstat.code = -error; - gen_mutex_lock(&bmi_mx->bmx_canceled_lock); - qlist_add_tail(&ctx->mxc_list, &bmi_mx->bmx_canceled); - gen_mutex_unlock(&bmi_mx->bmx_canceled_lock); + struct bmx_ctx *rx = NULL; + list_t *list = &bmi_mx->bmx_unex_rxs; + + BMX_ENTER; + gen_mutex_lock(&bmi_mx->bmx_unex_rxs_lock); + if (!qlist_empty(list)) { + rx = qlist_entry(list->next, struct bmx_ctx, mxc_list); + qlist_del_init(&rx->mxc_list); + } + gen_mutex_unlock(&bmi_mx->bmx_unex_rxs_lock); + *rxp = rx; + + BMX_EXIT; return; } -struct bmx_ctx * +/* add to the completion queue for the appropriate context */ +static void +bmx_q_completed(struct bmx_ctx *ctx, enum bmx_ctx_state state, + mx_status_t status, bmi_error_code_t error) +{ + int id = 0; + gen_mutex_t *lock = NULL; + list_t *list = NULL; + + BMX_ENTER; + + ctx->mxc_state = state; + ctx->mxc_mxstat = status; + ctx->mxc_error = error < 0 ? error : -error; + + if (ctx->mxc_type == BMX_REQ_RX && + ctx->mxc_msg_type == BMX_MSG_UNEXPECTED) { + list = &bmi_mx->bmx_unex_rxs; + lock = &bmi_mx->bmx_unex_rxs_lock; + } else { + id = (int) ctx->mxc_mop->context_id; + lock = &bmi_mx->bmx_done_q_lock[id]; + list = &bmi_mx->bmx_done_q[id]; + } + + + gen_mutex_lock(lock); + qlist_add_tail(&ctx->mxc_list, list); + gen_mutex_unlock(lock); + BMX_EXIT; + return; +} + +static void +bmx_deq_completed(struct bmx_ctx **ctxp, bmi_context_id context_id) +{ + int id = (int) context_id; + list_t *list = &bmi_mx->bmx_done_q[id]; + gen_mutex_t *lock = &bmi_mx->bmx_done_q_lock[id]; + struct bmx_ctx *ctx = NULL; + + BMX_ENTER; + + gen_mutex_lock(lock); + if (!qlist_empty(list)) { + ctx = qlist_entry(list->next, struct bmx_ctx, mxc_list); + qlist_del_init(&ctx->mxc_list); + } + gen_mutex_unlock(lock); + *ctxp = ctx; + + BMX_EXIT; + return; +} + +static struct bmx_ctx * bmx_get_idle_rx(void) { struct bmx_ctx *rx = NULL; @@ -301,32 +408,37 @@ bmx_get_idle_rx(void) return rx; } -void -bmx_put_idle_rx(struct bmx_ctx *rx) +static void +bmx_put_idle_ctx(struct bmx_ctx *ctx) { - if (rx == NULL) { - debug(BMX_DB_WARN, "put_idle_rx() called with NULL"); - return; - } - if (rx->mxc_type != BMX_REQ_RX) { - debug(BMX_DB_WARN, "put_idle_rx() called with a TX"); + list_t *list = &bmi_mx->bmx_idle_txs; + gen_mutex_t *lock = &bmi_mx->bmx_idle_txs_lock; + + if (ctx == NULL) { + debug(BMX_DB_WARN, "put_idle_ctx() called with NULL"); return; } - if (rx->mxc_get != rx->mxc_put + 1) { - debug(BMX_DB_ERR, "put_idle_rx() get (%llu) != put (%llu) + 1", - (unsigned long long) rx->mxc_get, - (unsigned long long) rx->mxc_put); + ctx->mxc_put++; + if (ctx->mxc_get != ctx->mxc_put) { + debug(BMX_DB_ERR, "put_idle_ctx() get (%llu) != put (%llu)", + (unsigned long long) ctx->mxc_get, + (unsigned long long) ctx->mxc_put); exit(1); } - bmx_ctx_init(rx); - rx->mxc_put++; - gen_mutex_lock(&bmi_mx->bmx_idle_rxs_lock); - qlist_add(&rx->mxc_list, &bmi_mx->bmx_idle_rxs); - gen_mutex_unlock(&bmi_mx->bmx_idle_rxs_lock); + bmx_ctx_init(ctx); + + if (ctx->mxc_type == BMX_REQ_RX) { + list = &bmi_mx->bmx_idle_rxs; + lock = &bmi_mx->bmx_idle_rxs_lock; + } + + gen_mutex_lock(lock); + qlist_add(&ctx->mxc_list, list); + gen_mutex_unlock(lock); return; } -void +static void bmx_reduce_idle_rxs(int count) { int i = 0; @@ -342,7 +454,7 @@ bmx_reduce_idle_rxs(int count) return; } -struct bmx_ctx * +static struct bmx_ctx * bmx_get_idle_tx(void) { struct bmx_ctx *tx = NULL; @@ -373,34 +485,9 @@ bmx_get_idle_tx(void) return tx; } -void -bmx_put_idle_tx(struct bmx_ctx *tx) -{ - if (tx == NULL) { - debug(BMX_DB_WARN, "put_idle_tx() called with NULL"); - return; - } - if (tx->mxc_type != BMX_REQ_TX) { - debug(BMX_DB_WARN, "put_idle_tx() called with a TX"); - return; - } - if (tx->mxc_get != tx->mxc_put + 1) { - debug(BMX_DB_ERR, "put_idle_tx() get (%llu) != put (%llu) + 1", - (unsigned long long) tx->mxc_get, - (unsigned long long) tx->mxc_put); - exit(1); - } - bmx_ctx_init(tx); - tx->mxc_put++; - gen_mutex_lock(&bmi_mx->bmx_idle_txs_lock); - qlist_add(&tx->mxc_list, &bmi_mx->bmx_idle_txs); - gen_mutex_unlock(&bmi_mx->bmx_idle_txs_lock); - return; -} - /**** peername parsing functions **************************************/ -int +static int bmx_verify_hostname(char *host) { int ret = 0; @@ -423,7 +510,7 @@ bmx_verify_hostname(char *host) return 0; } -int +static int bmx_verify_num_str(char *num_str) { int ret = 0; @@ -443,51 +530,48 @@ bmx_verify_num_str(char *num_str) /* parse mx://hostname:board:ep_id/filesystem/ + * or mx://hostname:ep_id/filesystem/ * this is pretty robust but if strtol() fails for board or ep_id, it * returns 0 and we do not know that it failed. * This handles legal hostnames (1-63 chars) include a-zA-Z0-9 as well as . and - * It will accept IPv4 addresses but not IPv6 (too many semicolons) */ -int +static int bmx_parse_peername(const char *peername, char **hostname, uint32_t *board, uint32_t *ep_id) { - int ret = 0; - int len = 0; - int colon1_found = 0; - int colon2_found = 0; - char *s = NULL; - char *colon1 = NULL; - char *colon2 = NULL; - char *fs = NULL; - char *host = NULL; - uint32_t bd = 0; - uint32_t ep = 0; - - if (peername == NULL || hostname == NULL || board == NULL || ep_id == NULL) { - debug(BMX_DB_INFO, "parse_peername() called with invalid parameter"); + int ret = 0; + int colon1_found = 0; + int colon2_found = 0; + char *tmp_peername = NULL; + char *colon1 = NULL; + char *colon2 = NULL; + char *fs = NULL; + char *host = NULL; + uint32_t bd = -1; + uint32_t ep = 0; + + tmp_peername = string_key("mx",peername); + if (!tmp_peername) { + debug(BMX_DB_INFO, "parse_peername() called with invalid peername"); return -BMI_EINVAL; } - if (peername[0] != 'm' || - peername[1] != 'x' || - peername[2] != ':' || - peername[3] != '/' || - peername[4] != '/') { - debug(BMX_DB_INFO, "parse_peername() peername does not start with mx://"); - return -1; + if (tmp_peername == NULL || hostname == NULL || board == NULL || ep_id == NULL) { + debug(BMX_DB_INFO, "parse_peername() called with invalid parameter"); + return -BMI_EINVAL; } - - s = strdup(&peername[5]); - fs = strchr(s, '/'); + fs = strchr(tmp_peername, '/'); if (fs) { *fs = '\0'; } - colon1 = strchr(s, ':'); + colon1 = strchr(tmp_peername, ':'); if (!colon1) { debug(BMX_DB_INFO, "parse_peername() strchr() failed"); } else { - colon2 = strrchr(s, ':'); + colon2 = strrchr(tmp_peername, ':'); if (colon1 == colon2) { - debug(BMX_DB_INFO, "parse_peername() strrchr() returned the same ':'"); + /* colon2_found == 0 */ + debug(BMX_DB_INFO, "parse_peername() MX hostname does not " + "include a board number"); } else { colon2_found = 1; *colon2 = '\0'; @@ -495,10 +579,17 @@ bmx_parse_peername(const char *peername, char **hostname, uint32_t *board, uint3 colon1_found = 1; *colon1 = '\0'; } - /* s = hostname\0board\0ep_id\0filesystem + /* if MX hostname includes board number... + * s = hostname\0board\0ep_id\0filesystem * colon1 = \0board\0ep_id\0filesystem * colon2 = \0ep_id\0filesystem * fs = \0filesystem + * + * else if MX hostname does _not_ include a board number... + * s = hostname\0ep_id\0filesystem + * colon1 = \0ep_id\0filesystem + * colon2 = \0ep_id\0filesystem + * fs = \0filesystem */ colon1++; @@ -510,46 +601,49 @@ bmx_parse_peername(const char *peername, char **hostname, uint32_t *board, uint3 NULL != strchr(colon2, ':')) { debug(BMX_DB_INFO, "parse_peername() too many ':' (%s %s)", colon1, colon2); - len = sizeof(*s); - free(s); + free(tmp_peername); return -1; } } - host = strdup(s); + host = strdup(tmp_peername); if (!host) { debug(BMX_DB_MEM, "parse_peername() malloc() failed"); - free(s); + free(tmp_peername); return -1; } - strcpy(host, s); - if (colon1_found) { + if (colon1_found && colon2_found) { bd = (uint32_t) strtol(colon1, NULL, 0); - if (colon2_found) { - ep = (uint32_t) strtol(colon2, NULL, 0); - } + ep = (uint32_t) strtol(colon2, NULL, 0); + } else if (colon1_found && !colon2_found) { + ep = (uint32_t) strtol(colon2, NULL, 0); + } else { + debug(BMX_DB_WARN, "%s is not a valid hostname", host); + free(host); + free(tmp_peername); + return -1; } ret = bmx_verify_hostname(host); if (ret != 0) { debug(BMX_DB_INFO, "%s is not a valid hostname", host); free(host); - free(s); + free(tmp_peername); return -1; } ret = bmx_verify_num_str(colon1); if (ret != 0) { debug(BMX_DB_INFO, "%s is not a valid board ID", host); free(host); - free(s); + free(tmp_peername); return -1; } ret = bmx_verify_num_str(colon2); if (ret != 0) { debug(BMX_DB_INFO, "%s is not a valid endpoint ID", host); free(host); - free(s); + free(tmp_peername); return -1; } @@ -557,14 +651,14 @@ bmx_parse_peername(const char *peername, char **hostname, uint32_t *board, uint3 *board = bd; *ep_id = ep; - free(s); + free(tmp_peername); return 0; } /**** peer handling functions **************************************/ -void +static void bmx_peer_free(struct bmx_peer *peer) { struct bmx_method_addr *mxmap = peer->mxp_mxmap; @@ -586,7 +680,7 @@ bmx_peer_free(struct bmx_peer *peer) return; } -void +static void bmx_peer_addref(struct bmx_peer *peer) { gen_mutex_lock(&peer->mxp_lock); @@ -596,9 +690,10 @@ bmx_peer_addref(struct bmx_peer *peer) return; } -void +static void bmx_peer_decref(struct bmx_peer *peer) { + BMX_ENTER; gen_mutex_lock(&peer->mxp_lock); if (peer->mxp_refcount == 0) { debug(BMX_DB_WARN, "peer_decref() called for %s when refcount == 0", @@ -607,6 +702,8 @@ bmx_peer_decref(struct bmx_peer *peer) peer->mxp_refcount--; if (peer->mxp_refcount == 1 && peer->mxp_state == BMX_PEER_DISCONNECT) { /* all txs and rxs are completed or canceled, reset state */ + debug(BMX_DB_PEER, "Setting peer %s to BMX_PEER_INIT", + peer->mxp_mxmap->mxm_peername); peer->mxp_state = BMX_PEER_INIT; } gen_mutex_unlock(&peer->mxp_lock); @@ -622,10 +719,11 @@ bmx_peer_decref(struct bmx_peer *peer) gen_mutex_unlock(&bmi_mx->bmx_lock); bmx_peer_free(peer); } + BMX_EXIT; return; } -int +static int bmx_peer_alloc(struct bmx_peer **peerp, struct bmx_method_addr *mxmap) { int i = 0; @@ -654,7 +752,11 @@ bmx_peer_alloc(struct bmx_peer **peerp, struct bmx_method_addr *mxmap) INIT_QLIST_HEAD(&peer->mxp_list); memset(name, 0, sizeof(*name)); - sprintf(name, "%s:%d", mxmap->mxm_hostname, mxmap->mxm_board); + if (mxmap->mxm_board != -1) { + sprintf(name, "%s:%d", mxmap->mxm_hostname, mxmap->mxm_board); + } else { + sprintf(name, "%s", mxmap->mxm_hostname); + } mxret = mx_hostname_to_nic_id(name, &nic_id); if (mxret == MX_SUCCESS) { peer->mxp_nic_id = nic_id; @@ -694,9 +796,12 @@ bmx_peer_alloc(struct bmx_peer **peerp, struct bmx_method_addr *mxmap) bmx_peer_free(peer); return ret; } - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); } + /* on servers with server-to-server comms, we are racing + * between method_addr_lookup() and handle_conn_req() */ + bmx_peer_addref(peer); /* for the peers list */ gen_mutex_lock(&bmi_mx->bmx_peers_lock); qlist_add_tail(&peer->mxp_list, &bmi_mx->bmx_peers); @@ -708,11 +813,13 @@ bmx_peer_alloc(struct bmx_peer **peerp, struct bmx_method_addr *mxmap) return 0; } -int +static int bmx_peer_init_state(struct bmx_peer *peer) { int ret = 0; + BMX_ENTER; + gen_mutex_lock(&peer->mxp_lock); /* we have a ref for each pending tx and rx, don't init @@ -722,20 +829,26 @@ bmx_peer_init_state(struct bmx_peer *peer) ret = -1; } else { /* ok to init */ + debug(BMX_DB_PEER, "Setting peer %s to BMX_PEER_INIT", + peer->mxp_mxmap->mxm_peername); peer->mxp_state = BMX_PEER_INIT; } gen_mutex_unlock(&peer->mxp_lock); + BMX_EXIT; + return 0; } /**** startup/shutdown functions **************************************/ /* init bmi_mx */ -int +static int bmx_globals_init(int method_id) { + int i = 0; + #if BMX_MEM_ACCT mem_used = 0; gen_mutex_init(&mem_used_lock); @@ -752,6 +865,7 @@ bmx_globals_init(int method_id) /* bmi_mx->bmx_board */ /* bmi_mx->bmx_ep_id */ /* bmi_mx->bmx_ep */ + /* bmi_mx->bmx_sid */ /* bmi_mx->bmx_is_server */ INIT_QLIST_HEAD(&bmi_mx->bmx_peers); @@ -765,8 +879,17 @@ bmx_globals_init(int method_id) INIT_QLIST_HEAD(&bmi_mx->bmx_idle_rxs); gen_mutex_init(&bmi_mx->bmx_idle_rxs_lock); - INIT_QLIST_HEAD(&bmi_mx->bmx_canceled); - gen_mutex_init(&bmi_mx->bmx_canceled_lock); + gen_mutex_init(&bmi_mx->bmx_completion_lock); + /* set to 1 to allow testing to start */ + bmi_mx->bmx_refcount = 1; + + for (i = 0; i < BMI_MAX_CONTEXTS; i++) { + INIT_QLIST_HEAD(&bmi_mx->bmx_done_q[i]); + gen_mutex_init(&bmi_mx->bmx_done_q_lock[i]); + } + + INIT_QLIST_HEAD(&bmi_mx->bmx_unex_rxs); + gen_mutex_init(&bmi_mx->bmx_unex_rxs_lock); bmi_mx->bmx_next_id = 1; gen_mutex_init(&bmi_mx->bmx_lock); /* global lock, use for global txs, @@ -787,13 +910,13 @@ bmx_globals_init(int method_id) } -int +static int bmx_open_endpoint(mx_endpoint_t *ep, uint32_t board, uint32_t ep_id) { mx_return_t mxret = MX_SUCCESS; mx_param_t param; - /* This will tell MX use context IDs. Normally, MX has one + /* This will tell MX to use context IDs. Normally, MX has one * set of queues for posted recvs, unexpected, etc. This will * create seaparate sets of queues for each msg type. * The benefit is that we can call mx_test_any() for each @@ -801,7 +924,7 @@ bmx_open_endpoint(mx_endpoint_t *ep, uint32_t board, uint32_t ep_id) * matching recvs. */ param.key = MX_PARAM_CONTEXT_ID; param.val.context_id.bits = 4; - param.val.context_id.shift = 60; + param.val.context_id.shift = BMX_MSG_SHIFT; mxret = mx_open_endpoint(board, ep_id, BMX_MAGIC, ¶m, 1, ep); @@ -840,41 +963,7 @@ BMI_mx_initialize(bmi_method_addr_p listen_addr, int method_id, int init_flags) int ret = 0; mx_return_t mxret = MX_SUCCESS; -#if BMX_LOGGING - MPE_Init_log(); -#define BMX_LOG_STATE 1 -#if BMX_LOG_STATE - send_start = MPE_Log_get_event_number(); - send_finish = MPE_Log_get_event_number(); - recv_start = MPE_Log_get_event_number(); - recv_finish = MPE_Log_get_event_number(); - sendunex_start = MPE_Log_get_event_number(); - sendunex_finish = MPE_Log_get_event_number(); - recvunex_start = MPE_Log_get_event_number(); - recvunex_finish = MPE_Log_get_event_number(); - MPE_Describe_state(send_start, send_finish, "Send", "red"); - MPE_Describe_state(recv_start, recv_finish, "Recv", "blue"); - MPE_Describe_state(sendunex_start, sendunex_finish, "SendUnex", "orange"); - MPE_Describe_state(recvunex_start, recvunex_finish, "RecvUnex", "green"); -#else - MPE_Log_get_solo_eventID(&send_start); - MPE_Log_get_solo_eventID(&send_finish); - MPE_Log_get_solo_eventID(&recv_start); - MPE_Log_get_solo_eventID(&recv_finish); - MPE_Log_get_solo_eventID(&sendunex_start); - MPE_Log_get_solo_eventID(&sendunex_finish); - MPE_Log_get_solo_eventID(&recvunex_start); - MPE_Log_get_solo_eventID(&recvunex_finish); - MPE_Describe_info_event(send_start, "Send_start", "red1", "tag:%d"); - MPE_Describe_info_event(send_finish, "Send_finish", "red3", "tag:%d"); - MPE_Describe_info_event(recv_start, "Recv_start", "blue1", "tag:%d"); - MPE_Describe_info_event(recv_finish, "Recv_finish", "blue3", "tag:%d"); - MPE_Describe_info_event(sendunex_start, "SendUnex_start", "orange1", "tag:%d"); - MPE_Describe_info_event(sendunex_finish, "SendUnex_finish", "orange3", "tag:%d"); - MPE_Describe_info_event(recvunex_start, "RecvUnex_start", "green1", "tag:%d"); - MPE_Describe_info_event(recvunex_finish, "RecvUnex_finish", "green3", "tag:%d"); -#endif /* state or event */ -#endif /* BMX_LOGGING */ + BMX_ENTER; /* check params */ if (!!listen_addr ^ (init_flags & BMI_INIT_SERVER)) { @@ -895,6 +984,9 @@ BMI_mx_initialize(bmi_method_addr_p listen_addr, int method_id, int init_flags) /* return errors, do not abort */ mx_set_error_handler(MX_ERRORS_RETURN); + /* only complete sends after they are delivered */ + setenv("MX_ZOMBIE", "0", 1); + mxret = mx_init(); if (!(mxret == MX_SUCCESS || mxret == MX_ALREADY_INITIALIZED)) { debug(BMX_DB_WARN, "mx_init() failed with %s", mx_strerror(mxret)); @@ -907,11 +999,17 @@ BMI_mx_initialize(bmi_method_addr_p listen_addr, int method_id, int init_flags) if (init_flags & BMI_INIT_SERVER) { struct bmx_ctx *rx = NULL; struct bmx_method_addr *mxmap = listen_addr->method_data; + mx_endpoint_addr_t epa; + uint32_t ep_id = 0; + uint32_t sid = 0; + uint64_t nic_id = 0ULL; + struct bmx_peer *peer = NULL; - bmi_mx->bmx_hostname = (char *) mxmap->mxm_hostname; + bmi_mx->bmx_hostname = strdup(mxmap->mxm_hostname); bmi_mx->bmx_board = mxmap->mxm_board; bmi_mx->bmx_ep_id = mxmap->mxm_ep_id; bmi_mx->bmx_is_server = 1; + bmx_create_peername(); ret = bmx_open_endpoint(&bmi_mx->bmx_ep, mxmap->mxm_board, mxmap->mxm_ep_id); if (ret != 0) { @@ -919,12 +1017,20 @@ BMI_mx_initialize(bmi_method_addr_p listen_addr, int method_id, int init_flags) BMX_FREE(bmi_mx, sizeof(*bmi_mx)); exit(1); } + + /* get our MX session id */ + mx_get_endpoint_addr(bmi_mx->bmx_ep, &epa); + mx_decompose_endpoint_addr2(epa, &nic_id, &ep_id, &sid); + bmi_mx->bmx_sid = sid; + + bmx_peer_alloc(&peer, mxmap); + /* We allocate BMX_PEER_RX_NUM when we peer_alloc() * Allocate some here to catch the peer CONN_REQ */ for (i = 0; i < BMX_SERVER_RXS; i++) { ret = bmx_ctx_alloc(&rx, BMX_REQ_RX); if (ret == 0) { - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); } } } @@ -948,6 +1054,8 @@ BMI_mx_initialize(bmi_method_addr_p listen_addr, int method_id, int init_flags) #if BMX_MEM_ACCT debug(BMX_DB_MEM, "memory used at end of initialization %lld", llu(mem_used)); #endif + BMX_EXIT; + return 0; } @@ -956,7 +1064,7 @@ BMI_mx_finalize(void) { struct bmx_data *tmp = bmi_mx; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; gen_mutex_lock(&tmp->bmx_lock); @@ -1017,6 +1125,15 @@ BMI_mx_finalize(void) } #endif + if (bmi_mx->bmx_hostname) { + free(bmi_mx->bmx_hostname); + bmi_mx->bmx_hostname = NULL; + } + if (bmi_mx->bmx_peername) { + free(bmi_mx->bmx_peername); + bmi_mx->bmx_peername = NULL; + } + bmi_mx = NULL; gen_mutex_unlock(&tmp->bmx_lock); @@ -1026,12 +1143,7 @@ BMI_mx_finalize(void) #if BMX_MEM_ACCT debug(BMX_DB_MEM, "memory leaked at shutdown %lld", llu(mem_used)); #endif - -#if BMX_LOGGING - MPE_Finish_log("/tmp/bmi_mx.log"); -#endif - debug(BMX_DB_FUNC, "leaving %s", __func__); - + BMX_EXIT; return 0; } @@ -1060,6 +1172,8 @@ bmx_peer_disconnect(struct bmx_peer *peer, int mx_dis, bmi_error_code_t err) gen_mutex_unlock(&peer->mxp_lock); return; } + debug(BMX_DB_PEER, "Setting peer %s to BMX_PEER_DISCONNECT", + peer->mxp_mxmap->mxm_peername); peer->mxp_state = BMX_PEER_DISCONNECT; /* cancel queued txs */ @@ -1067,14 +1181,14 @@ bmx_peer_disconnect(struct bmx_peer *peer, int mx_dis, bmi_error_code_t err) list_t *queued_txs = &peer->mxp_queued_txs; tx = qlist_entry(queued_txs->next, struct bmx_ctx, mxc_list); qlist_del_init(&tx->mxc_list); - bmx_q_canceled_ctx(tx, err); + bmx_q_completed(tx, BMX_CTX_CANCELED, BMX_NO_STATUS, err); } /* cancel queued rxs */ while (!qlist_empty(&peer->mxp_queued_rxs)) { list_t *queued_rxs = &peer->mxp_queued_rxs; rx = qlist_entry(queued_rxs->next, struct bmx_ctx, mxc_list); qlist_del_init(&rx->mxc_list); - bmx_q_canceled_ctx(rx, err); + bmx_q_completed(rx, BMX_CTX_CANCELED, BMX_NO_STATUS, err); } /* try to cancel pending rxs */ qlist_for_each_entry_safe(rx, next, &peer->mxp_pending_rxs, mxc_list) { @@ -1082,7 +1196,7 @@ bmx_peer_disconnect(struct bmx_peer *peer, int mx_dis, bmi_error_code_t err) mx_cancel(bmi_mx->bmx_ep, &rx->mxc_mxreq, &result); if (result) { qlist_del_init(&rx->mxc_list); - bmx_q_canceled_ctx(rx, err); + bmx_q_completed(rx, BMX_CTX_CANCELED, BMX_NO_STATUS, err); } } gen_mutex_unlock(&peer->mxp_lock); @@ -1100,7 +1214,7 @@ BMI_mx_set_info(int option, void *inout_parameter) struct bmx_method_addr *mxmap = NULL; struct bmx_peer *peer = NULL; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; switch(option) { case BMI_DROP_ADDR: @@ -1115,10 +1229,16 @@ BMI_mx_set_info(int option, void *inout_parameter) peer = mxmap->mxm_peer; bmx_peer_disconnect(peer, 1, BMI_ENETRESET); } - if (!mxmap->mxm_peername) free((void *) mxmap->mxm_peername); - mxmap->mxm_peername = NULL; - if (!mxmap->mxm_hostname) free((void *) mxmap->mxm_hostname); - mxmap->mxm_hostname = NULL; + if (mxmap->mxm_peername) { + debug(BMX_DB_MEM, "freeing mxm_peername"); + free((void *) mxmap->mxm_peername); + mxmap->mxm_peername = NULL; + } + if (mxmap->mxm_hostname) { + debug(BMX_DB_MEM, "freeing mxm_hostname"); + free((void *) mxmap->mxm_hostname); + mxmap->mxm_hostname = NULL; + } debug(BMX_DB_PEER, "freeing map 0x%p", map); free(map); } @@ -1128,7 +1248,7 @@ BMI_mx_set_info(int option, void *inout_parameter) * handle that correctly. */ break; } - debug(BMX_DB_FUNC, "leaving %s", __func__); + BMX_EXIT; return 0; } @@ -1138,7 +1258,7 @@ BMI_mx_get_info(int option, void *inout_parameter) { int ret = 0; - debug(BMX_DB_FUNC, "entering %s with option=%d", __func__, option); + BMX_ENTER; switch(option) { case BMI_CHECK_MAXSIZE: @@ -1151,7 +1271,7 @@ BMI_mx_get_info(int option, void *inout_parameter) default: ret = -BMI_ENOSYS; } - debug(BMX_DB_FUNC, "leaving %s with ret=%d", __func__, ret); + BMX_EXIT; return ret; } @@ -1159,7 +1279,7 @@ BMI_mx_get_info(int option, void *inout_parameter) #define BMX_IO_BUF 1 #define BMX_UNEX_BUF 2 -void * +static void * bmx_memalloc(bmi_size_t size, int type) { void *buf = NULL; @@ -1281,31 +1401,31 @@ BMI_mx_unexpected_free(void *buf) { int ret = 0; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; ret = bmx_memfree(buf, BMX_UNEXPECTED_SIZE, BMX_UNEX_BUF); - debug(BMX_DB_FUNC, "leaving %s", __func__); + BMX_EXIT; return 0; } -void +static void bmx_parse_match(uint64_t match, uint8_t *type, uint32_t *id, uint32_t *tag) { - *type = (uint8_t) (match >> 60); - *id = (uint32_t) ((match >> 32) & 0xFFFFF); /* 20 bits */ - *tag = (uint32_t) (match & 0xFFFFFFFF); + *type = (uint8_t) (match >> BMX_MSG_SHIFT); + *id = (uint32_t) ((match >> BMX_ID_SHIFT) & BMX_MAX_PEER_ID); /* 20 bits */ + *tag = (uint32_t) (match & BMX_MAX_TAG); /* 32 bits */ return; } -void +static void bmx_create_match(struct bmx_ctx *ctx) { int connect = 0; uint64_t type = (uint64_t) ctx->mxc_msg_type; uint64_t id = 0ULL; - uint64_t tag = (uint64_t) ctx->mxc_tag; + uint64_t tag = (uint64_t) ((uint32_t) ctx->mxc_tag); if (ctx->mxc_msg_type == BMX_MSG_CONN_REQ || ctx->mxc_msg_type == BMX_MSG_CONN_ACK) { @@ -1330,12 +1450,12 @@ bmx_create_match(struct bmx_ctx *ctx) exit(1); } - ctx->mxc_match = (type << 60) | (id << 32) | tag; + ctx->mxc_match = (type << BMX_MSG_SHIFT) | (id << BMX_ID_SHIFT) | tag; return; } -bmi_error_code_t +static bmi_error_code_t bmx_mx_to_bmi_errno(enum mx_status_code code) { int err = 0; @@ -1398,7 +1518,7 @@ bmx_post_tx(struct bmx_ctx *tx) if (mxret != MX_SUCCESS) { ret = -BMI_ENOMEM; bmx_deq_pending_ctx(tx); /* uses peer lock */ - bmx_q_canceled_ctx(tx, BMI_ENOMEM); + bmx_q_completed(tx, BMX_CTX_CANCELED, BMX_NO_STATUS, BMI_ENOMEM); } } else { /* peer is not ready */ debug(BMX_DB_PEER, "%s peer is not ready (%d) q_ctx(tx) " @@ -1406,6 +1526,7 @@ bmx_post_tx(struct bmx_ctx *tx) llu(tx->mxc_match), lld(tx->mxc_nob)); bmx_q_ctx(tx); /* uses peer lock */ } + BMX_EXIT; return ret; } @@ -1415,6 +1536,8 @@ bmx_ensure_connected(struct bmx_method_addr *mxmap) int ret = 0; struct bmx_peer *peer = mxmap->mxm_peer; + /* NOTE: can this happen? we call peer_alloc() when using + * method_addr_lookup() */ if (peer == NULL) { ret = bmx_peer_alloc(&peer, mxmap); if (ret != 0) { @@ -1440,21 +1563,24 @@ bmx_post_send_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, int numbufs, const void *const *buffers, const bmi_size_t *sizes, bmi_size_t total_size, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id, int is_unexpected) + bmi_context_id context_id, int is_unexpected, + PVFS_hint hints) { struct bmx_ctx *tx = NULL; struct method_op *mop = NULL; struct bmx_method_addr *mxmap = NULL; struct bmx_peer *peer = NULL; int ret = 0; + PINT_event_id eid = 0; -#if BMX_LOGGING - if (!is_unexpected) { - MPE_Log_event(send_start, (int) tag, NULL); - } else { - MPE_Log_event(sendunex_start, (int) tag, NULL); - } -#endif + PINT_EVENT_START( + bmi_mx_send_event_id, bmi_mx_pid, NULL, &eid, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + PINT_HINT_GET_OP_ID(hints), + total_size); mxmap = remote_map->method_data; @@ -1488,7 +1614,7 @@ bmx_post_send_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, BMX_MALLOC(segs, (numbufs * sizeof(*segs))); if (segs == NULL) { - bmx_put_idle_tx(tx); + bmx_put_idle_ctx(tx); bmx_peer_decref(peer); ret = -BMI_ENOMEM; goto out; @@ -1509,7 +1635,7 @@ bmx_post_send_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, } if (is_unexpected && tx->mxc_nob > (long long) BMX_UNEXPECTED_SIZE) { - bmx_put_idle_tx(tx); + bmx_put_idle_ctx(tx); bmx_peer_decref(peer); ret = -BMI_EINVAL; goto out; @@ -1525,25 +1651,30 @@ bmx_post_send_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, BMX_MALLOC(mop, sizeof(*mop)); if (mop == NULL) { - bmx_put_idle_tx(tx); + bmx_put_idle_ctx(tx); bmx_peer_decref(peer); ret = -BMI_ENOMEM; goto out; } - debug(BMX_DB_CTX, "TX id_gen_fast_register(%llu)", llu(mop->op_id)); id_gen_fast_register(&mop->op_id, mop); + debug(BMX_DB_CTX, "TX id_gen_fast_register(%llu)", llu(mop->op_id)); mop->addr = remote_map; /* set of function pointers, essentially */ mop->method_data = tx; mop->user_ptr = user_ptr; mop->context_id = context_id; + mop->event_id = eid; *id = mop->op_id; tx->mxc_mop = mop; + assert(context_id == mop->context_id); + assert(context_id == tx->mxc_mop->context_id); + bmx_create_match(tx); - debug(BMX_DB_CTX, "%s tag= %d length= %d %s op_id= %llu", __func__, tag, - (int) total_size, is_unexpected ? "UNEXPECTED" : "EXPECTED", - llu(mop->op_id)); + debug(BMX_DB_CTX, "%s tag= %d length= %d %s op_id= %llu context_id= %lld", + __func__, tag, (int) total_size, + is_unexpected ? "UNEXPECTED" : "EXPECTED", + llu(mop->op_id), lld(context_id)); ret = bmx_post_tx(tx); @@ -1555,48 +1686,77 @@ static int BMI_mx_post_send(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *buffer, bmi_size_t size, enum bmi_buffer_type buffer_flag __unused, - bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id) + bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id, + PVFS_hint hints) { - debug(BMX_DB_FUNC, "entering %s", __func__); + int ret = 0; + BMX_ENTER; + + ret = bmx_post_send_common(id, remote_map, 1, &buffer, &size, size, + tag, user_ptr, context_id, 0, hints); - return bmx_post_send_common(id, remote_map, 1, &buffer, &size, size, - tag, user_ptr, context_id, 0); + BMX_EXIT; + + return ret; } static int BMI_mx_post_send_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *const *buffers, const bmi_size_t *sizes, int list_count, bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, - bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id) + bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id, + PVFS_hint hints) { - debug(BMX_DB_FUNC, "entering %s", __func__); + int ret = 0; - return bmx_post_send_common(id, remote_map, list_count, buffers, sizes, - total_size, tag, user_ptr, context_id, 0); + BMX_ENTER; + + ret = bmx_post_send_common(id, remote_map, list_count, buffers, sizes, + total_size, tag, user_ptr, context_id, 0, + hints); + + BMX_EXIT; + + return ret; } static int BMI_mx_post_sendunexpected(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *buffer, bmi_size_t size, enum bmi_buffer_type buffer_flag __unused, - bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id) + bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id, + PVFS_hint hints) { - debug(BMX_DB_FUNC, "entering %s", __func__); + int ret = 0; - return bmx_post_send_common(id, remote_map, 1, &buffer, &size, size, - tag, user_ptr, context_id, 1); + BMX_ENTER; + + ret = bmx_post_send_common(id, remote_map, 1, &buffer, &size, size, + tag, user_ptr, context_id, 1, hints); + + BMX_EXIT; + + return ret; } static int BMI_mx_post_sendunexpected_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, const void *const *buffers, const bmi_size_t *sizes, int list_count, bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, - bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id) + bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id, + PVFS_hint hints) { - debug(BMX_DB_FUNC, "entering %s", __func__); + int ret = 0; + + BMX_ENTER; return bmx_post_send_common(id, remote_map, list_count, buffers, sizes, - total_size, tag, user_ptr, context_id, 1); + total_size, tag, user_ptr, context_id, 1, + hints); + + BMX_EXIT; + + return ret; } /* if (peer->mxp_state == BMX_PEER_READY) @@ -1625,11 +1785,11 @@ bmx_post_rx(struct bmx_ctx *rx) segs = rx->mxc_seg_list; } mxret = mx_irecv(bmi_mx->bmx_ep, segs, rx->mxc_nseg, - rx->mxc_match, -1ULL, (void *) rx, &rx->mxc_mxreq); + rx->mxc_match, BMX_MASK_ALL, (void *) rx, &rx->mxc_mxreq); if (mxret != MX_SUCCESS) { ret = -BMI_ENOMEM; bmx_deq_pending_ctx(rx); /* uses peer lock */ - bmx_q_canceled_ctx(rx, BMI_ENOMEM); + bmx_q_completed(rx, BMX_CTX_CANCELED, BMX_NO_STATUS, BMI_ENOMEM); } } else { /* peer is not ready */ debug(BMX_DB_PEER, "%s peer is not ready (%d) q_ctx(rx) match= 0x%llx " @@ -1637,6 +1797,7 @@ bmx_post_rx(struct bmx_ctx *rx) llu(rx->mxc_match), (long long) rx->mxc_nob); bmx_q_ctx(rx); /* uses peer lock */ } + BMX_EXIT; return ret; } @@ -1644,17 +1805,24 @@ static int bmx_post_recv_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, int numbufs, void *const *buffers, const bmi_size_t *sizes, bmi_size_t tot_expected_len, bmi_msg_tag_t tag, - void *user_ptr, bmi_context_id context_id) + void *user_ptr, bmi_context_id context_id, + PVFS_hint hints) { int ret = 0; struct bmx_ctx *rx = NULL; struct method_op *mop = NULL; struct bmx_method_addr *mxmap = NULL; struct bmx_peer *peer = NULL; + PINT_event_id eid = 0; -#if BMX_LOGGING - MPE_Log_event(recv_start, (int) tag, NULL); -#endif + PINT_EVENT_START( + bmi_mx_recv_event_id, bmi_mx_pid, NULL, &eid, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + PINT_HINT_GET_OP_ID(hints), + tot_expected_len); mxmap = remote_map->method_data; @@ -1690,7 +1858,7 @@ bmx_post_recv_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, BMX_MALLOC(segs, (numbufs * sizeof(*segs))); if (segs == NULL) { - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); bmx_peer_decref(peer); ret = -BMI_ENOMEM; goto out; @@ -1712,17 +1880,18 @@ bmx_post_recv_common(bmi_op_id_t *id, struct bmi_method_addr *remote_map, BMX_MALLOC(mop, sizeof(*mop)); if (mop == NULL) { - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); bmx_peer_decref(peer); ret = -BMI_ENOMEM; goto out; } - debug(BMX_DB_CTX, "RX id_gen_fast_register(%llu)", llu(mop->op_id)); id_gen_fast_register(&mop->op_id, mop); + debug(BMX_DB_CTX, "RX id_gen_fast_register(%llu)", llu(mop->op_id)); mop->addr = remote_map; /* set of function pointers, essentially */ mop->method_data = rx; mop->user_ptr = user_ptr; mop->context_id = context_id; + mop->event_id = eid; *id = mop->op_id; rx->mxc_mop = mop; @@ -1740,12 +1909,20 @@ static int BMI_mx_post_recv(bmi_op_id_t *id, struct bmi_method_addr *remote_map, void *buffer, bmi_size_t expected_len, bmi_size_t *actual_len __unused, enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { - debug(BMX_DB_FUNC, "entering %s", __func__); + int ret = 0; - return bmx_post_recv_common(id, remote_map, 1, &buffer, &expected_len, - expected_len, tag, user_ptr, context_id); + BMX_ENTER; + + ret = bmx_post_recv_common(id, remote_map, 1, &buffer, &expected_len, + expected_len, tag, user_ptr, context_id, + hints); + + BMX_EXIT; + + return ret; } static int @@ -1753,12 +1930,20 @@ BMI_mx_post_recv_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, void *const *buffers, const bmi_size_t *sizes, int list_count, bmi_size_t tot_expected_len, bmi_size_t *tot_actual_len __unused, enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { - debug(BMX_DB_FUNC, "entering %s", __func__); + int ret = 0; + + BMX_ENTER; - return bmx_post_recv_common(id, remote_map, list_count, buffers, sizes, - tot_expected_len, tag, user_ptr, context_id); + ret = bmx_post_recv_common(id, remote_map, list_count, buffers, sizes, + tot_expected_len, tag, user_ptr, context_id, + hints); + + BMX_EXIT; + + return ret; } static void @@ -1767,6 +1952,8 @@ bmx_peer_post_queued_rxs(struct bmx_peer *peer) struct bmx_ctx *rx = NULL; list_t *queued_rxs = &peer->mxp_queued_rxs; + BMX_ENTER; + gen_mutex_lock(&peer->mxp_lock); while (!qlist_empty(queued_rxs)) { if (peer->mxp_state != BMX_PEER_READY) { @@ -1781,6 +1968,8 @@ bmx_peer_post_queued_rxs(struct bmx_peer *peer) } gen_mutex_unlock(&peer->mxp_lock); + BMX_EXIT; + return; } @@ -1790,6 +1979,8 @@ bmx_peer_post_queued_txs(struct bmx_peer *peer) struct bmx_ctx *tx = NULL; list_t *queued_txs = &peer->mxp_queued_txs; + BMX_ENTER; + gen_mutex_lock(&peer->mxp_lock); while (!qlist_empty(queued_txs)) { if (peer->mxp_state != BMX_PEER_READY) { @@ -1806,6 +1997,8 @@ bmx_peer_post_queued_txs(struct bmx_peer *peer) } gen_mutex_unlock(&peer->mxp_lock); + BMX_EXIT; + return; } @@ -1817,19 +2010,19 @@ bmx_post_unexpected_recv(mx_endpoint_addr_t source, uint8_t type, uint32_t id, int ret = 0; struct bmx_ctx *rx = NULL; struct bmx_peer *peer = NULL; + void *peerp = (void *) &peer; mx_return_t mxret = MX_SUCCESS; + BMX_ENTER; + if (id == 0 && tag == 0 && type == 0) { bmx_parse_match(match, &type, &id, &tag); } -#if BMX_LOGGING - MPE_Log_event(recvunex_start, (int) tag, NULL); -#endif - rx = bmx_get_idle_rx(); if (rx != NULL) { - mx_get_endpoint_addr_context(source, (void **) &peer); + mx_get_endpoint_addr_context(source, &peerp); + peer = (struct bmx_peer *) peerp; if (peer == NULL) { debug(BMX_DB_PEER, "unknown peer sent message 0x%llx " "length %u", llu(match), length); @@ -1853,18 +2046,20 @@ bmx_post_unexpected_recv(mx_endpoint_addr_t source, uint8_t type, uint32_t id, debug(BMX_DB_CTX, "%s rx match= 0x%llx length= %lld", __func__, llu(rx->mxc_match), lld(rx->mxc_nob)); mxret = mx_irecv(bmi_mx->bmx_ep, &rx->mxc_seg, rx->mxc_nseg, - rx->mxc_match, -1ULL, (void *) rx, &rx->mxc_mxreq); + rx->mxc_match, BMX_MASK_ALL, (void *) rx, &rx->mxc_mxreq); if (mxret != MX_SUCCESS) { debug((BMX_DB_MX|BMX_DB_CTX), "mx_irecv() failed with %s for an " "unexpected recv with tag %d length %d", mx_strerror(mxret), tag, length); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); ret = -1; } } else { ret = -1; } + BMX_EXIT; + return ret; } @@ -1885,6 +2080,7 @@ bmx_unexpected_recv(void *context, mx_endpoint_addr_t source, uint32_t id = 0; uint32_t tag = 0; struct bmx_peer *peer = NULL; + void *peerp = &peer; mx_return_t mxret = MX_SUCCESS; bmx_parse_match(match_value, &type, &id, &tag); @@ -1909,12 +2105,12 @@ bmx_unexpected_recv(void *context, mx_endpoint_addr_t source, debug(BMX_DB_CONN, "%s rx match= 0x%llx length= %lld", __func__, llu(rx->mxc_match), lld(rx->mxc_nob)); mxret = mx_irecv(bmi_mx->bmx_ep, &rx->mxc_seg, rx->mxc_nseg, - rx->mxc_match, -1ULL, (void *) rx, &rx->mxc_mxreq); + rx->mxc_match, BMX_MASK_ALL, (void *) rx, &rx->mxc_mxreq); if (mxret != MX_SUCCESS) { debug(BMX_DB_CONN, "mx_irecv() failed for an " "unexpected recv with %s", mx_strerror(mxret)); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); ret = MX_RECV_FINISHED; } } else { @@ -1923,11 +2119,8 @@ bmx_unexpected_recv(void *context, mx_endpoint_addr_t source, break; case BMX_MSG_CONN_ACK: /* the server is replying to our CONN_REQ */ - if (bmi_mx->bmx_is_server) { - debug(BMX_DB_ERR, "server receiving CONN_ACK"); - exit(1); - } - mx_get_endpoint_addr_context(source, (void **) &peer); + mx_get_endpoint_addr_context(source, &peerp); + peer = (struct bmx_peer *) peerp; if (peer == NULL) { debug((BMX_DB_CONN|BMX_DB_PEER), "receiving CONN_ACK but " "the endpoint context does not have a peer"); @@ -1952,7 +2145,9 @@ bmx_unexpected_recv(void *context, mx_endpoint_addr_t source, break; case BMX_MSG_UNEXPECTED: if (!bmi_mx->bmx_is_server) { - mx_get_endpoint_addr_context(source, (void **) &peer); + void *peerp = &peer; + mx_get_endpoint_addr_context(source, &peerp); + peer = (struct bmx_peer *) peerp; debug(BMX_DB_ERR, "client receiving unexpected message " "from %s with mask 0x%llx length %u", peer == NULL ? "unknown" : peer->mxp_mxmap->mxm_peername, @@ -1987,6 +2182,8 @@ bmx_alloc_method_addr(const char *peername, const char *hostname, uint32_t board struct bmi_method_addr *map = NULL; struct bmx_method_addr *mxmap = NULL; + BMX_ENTER; + if (bmi_mx == NULL) { map = bmi_alloc_method_addr( tmp_id, (bmi_size_t) sizeof(*mxmap)); @@ -1998,11 +2195,13 @@ bmx_alloc_method_addr(const char *peername, const char *hostname, uint32_t board mxmap = map->method_data; mxmap->mxm_map = map; mxmap->mxm_peername = strdup(peername); - mxmap->mxm_hostname = hostname; + mxmap->mxm_hostname = strdup(hostname); mxmap->mxm_board = board; mxmap->mxm_ep_id = ep_id; /* mxmap->mxm_peer */ + BMX_EXIT; + return map; } @@ -2019,10 +2218,9 @@ bmx_handle_icon_req(void) { uint32_t result = 0; - if (bmi_mx->bmx_is_server) return; do { - uint64_t match = (uint64_t) BMX_MSG_ICON_REQ << 60; - uint64_t mask = (uint64_t) 0xF << 60; + uint64_t match = (uint64_t) BMX_MSG_ICON_REQ << BMX_MSG_SHIFT; + uint64_t mask = BMX_MASK_MSG; mx_status_t status; mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); @@ -2038,7 +2236,7 @@ bmx_handle_icon_req(void) mxmap->mxm_peername, mx_strstatus(status.code)); if (status.code != MX_STATUS_SUCCESS) { - debug((BMX_DB_CONN|BMX_DB_PEER|BMX_DB_WARN), + debug((BMX_DB_CONN|BMX_DB_PEER), "%s: connect to %s failed with %s", __func__, mxmap->mxm_peername, mx_strstatus(status.code)); bmx_peer_disconnect(peer, 0, bmx_mx_to_bmi_errno(status.code)); @@ -2092,7 +2290,7 @@ bmx_handle_icon_req(void) return; } -/* test for CONN_REQ messages (on the server) +/* test for received CONN_REQ messages (on the server) * if found * create peer * create mxmap @@ -2102,9 +2300,9 @@ static void bmx_handle_conn_req(void) { uint32_t result = 0; - uint64_t match = (uint64_t) BMX_MSG_CONN_REQ << 60; - uint64_t mask = (uint64_t) 0xF << 60; - uint64_t ack = (uint64_t) BMX_MSG_ICON_ACK << 60; + uint64_t match = (uint64_t) BMX_MSG_CONN_REQ << BMX_MSG_SHIFT; + uint64_t mask = BMX_MASK_MSG; + uint64_t ack = (uint64_t) BMX_MSG_ICON_ACK << BMX_MSG_SHIFT; mx_status_t status; do { @@ -2112,7 +2310,10 @@ bmx_handle_conn_req(void) if (result) { uint8_t type = 0; uint32_t id = 0; + uint32_t sid = 0; uint32_t version = 0; + uint64_t nic_id = 0ULL; + uint32_t ep_id = 0; mx_request_t request; struct bmx_ctx *rx = NULL; struct bmx_peer *peer = NULL; @@ -2129,15 +2330,14 @@ bmx_handle_conn_req(void) tx->mxc_peer->mxp_mxmap->mxm_peername); /* drop ref taken before mx_iconnect() */ bmx_peer_decref(tx->mxc_peer); - bmx_put_idle_tx(tx); + bmx_put_idle_ctx(tx); continue; } else if (status.code != MX_STATUS_SUCCESS) { bmx_peer_decref(rx->mxc_peer); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); continue; } bmx_parse_match(rx->mxc_match, &type, &id, &version); - if (version != BMX_VERSION) { /* TODO send error conn_ack */ debug(BMX_DB_WARN, "version mismatch with peer " @@ -2145,16 +2345,22 @@ bmx_handle_conn_req(void) "0x%x)", (char *) rx->mxc_buffer, BMX_VERSION, version); bmx_peer_decref(rx->mxc_peer); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); continue; } if (bmi_mx->bmx_is_server == 0) { debug(BMX_DB_WARN, "received CONN_REQ on a client."); bmx_peer_decref(rx->mxc_peer); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); continue; } - mx_get_endpoint_addr_context(status.source, (void **) &peer); + mx_decompose_endpoint_addr2(status.source, &nic_id, + &ep_id, &sid); + { + void *peerp = &peer; + mx_get_endpoint_addr_context(status.source, &peerp); + peer = (struct bmx_peer *) peerp; + } if (peer == NULL) { /* new peer */ int ret = 0; char *host = NULL; @@ -2173,7 +2379,7 @@ bmx_handle_conn_req(void) "failed on %s", (char *) rx->mxc_buffer); bmx_peer_decref(rx->mxc_peer); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); continue; } map = bmx_alloc_method_addr(peername, host, @@ -2182,33 +2388,44 @@ bmx_handle_conn_req(void) debug((BMX_DB_CONN|BMX_DB_MEM), "unable to alloc a " "method addr for %s", peername); bmx_peer_decref(rx->mxc_peer); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); continue; } + free(host); mxmap = map->method_data; ret = bmx_peer_alloc(&peer, mxmap); if (ret != 0) { debug((BMX_DB_CONN|BMX_DB_MEM), "unable to alloc a " "peer for %s", peername); bmx_peer_decref(rx->mxc_peer); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); continue; } - } else { /* reconnecting peer */ + } else if (sid != peer->mxp_sid) { /* reconnecting peer */ /* cancel queued txs and rxs, pending rxs */ - debug((BMX_DB_CONN|BMX_DB_PEER), "%s peer %s reconnecting", - __func__, peer->mxp_mxmap->mxm_peername); - bmx_peer_disconnect(peer, 0, BMI_ENETRESET); + debug((BMX_DB_CONN|BMX_DB_PEER), "%s peer " + "%s reconnecting", __func__, + peer->mxp_mxmap->mxm_peername); + if (peer->mxp_state == BMX_PEER_READY) + bmx_peer_disconnect(peer, 0, BMI_ENETRESET); + mxmap = peer->mxp_mxmap; + } else { + debug((BMX_DB_CONN|BMX_DB_PEER), "%s peer " + "%s reconnecting with same sid", __func__, + peer->mxp_mxmap->mxm_peername); mxmap = peer->mxp_mxmap; } gen_mutex_lock(&peer->mxp_lock); + debug(BMX_DB_PEER, "Setting peer %s to BMX_PEER_WAIT", + peer->mxp_mxmap->mxm_peername); peer->mxp_state = BMX_PEER_WAIT; peer->mxp_tx_id = id; + peer->mxp_sid = sid; gen_mutex_unlock(&peer->mxp_lock); bmx_peer_addref(peer); /* add ref until completion of CONN_ACK */ mx_iconnect(bmi_mx->bmx_ep, peer->mxp_nic_id, mxmap->mxm_ep_id, BMX_MAGIC, ack, peer, &request); - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); } } while (result); @@ -2232,8 +2449,8 @@ bmx_handle_icon_ack(void) if (!bmi_mx->bmx_is_server) return; do { - uint64_t match = (uint64_t) BMX_MSG_ICON_ACK << 60; - uint64_t mask = (uint64_t) 0xF << 60; + uint64_t match = (uint64_t) BMX_MSG_ICON_ACK << BMX_MSG_SHIFT; + uint64_t mask = BMX_MASK_MSG; mx_status_t status; mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); @@ -2255,6 +2472,8 @@ bmx_handle_icon_ack(void) } gen_mutex_lock(&peer->mxp_lock); peer->mxp_epa = status.source; + debug(BMX_DB_PEER, "Setting peer %s to BMX_PEER_READY", + peer->mxp_mxmap->mxm_peername); peer->mxp_state = BMX_PEER_READY; /* NOTE no need to call bmx_peer_post_queued_[rxs|txs]() * since the server should not have any queued msgs */ @@ -2288,6 +2507,8 @@ bmx_handle_icon_ack(void) mx_isend(bmi_mx->bmx_ep, &tx->mxc_seg, tx->mxc_nseg, peer->mxp_epa, tx->mxc_match, (void *) tx, &tx->mxc_mxreq); if (!peer->mxp_exist) { + debug(BMX_DB_PEER, "calling bmi_method_addr_reg_callback" + "on %s", peer->mxp_mxmap->mxm_peername); bmi_method_addr_reg_callback(peer->mxp_map); peer->mxp_exist = 1; } @@ -2307,10 +2528,10 @@ bmx_handle_conn_ack(void) uint32_t result = 0; struct bmx_ctx *tx = NULL; - if (!bmi_mx->bmx_is_server) return; + if (!bmi_mx->bmx_is_server) goto out; do { - uint64_t match = (uint64_t) BMX_MSG_CONN_ACK << 60; - uint64_t mask = (uint64_t) 0xF << 60; + uint64_t match = (uint64_t) BMX_MSG_CONN_ACK << BMX_MSG_SHIFT; + uint64_t mask = BMX_MASK_MSG; mx_status_t status; mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); @@ -2319,100 +2540,106 @@ bmx_handle_conn_ack(void) debug(BMX_DB_CONN, "%s returned tx match 0x%llx with %s", __func__, llu(tx->mxc_match), mx_strstatus(status.code)); bmx_peer_decref(tx->mxc_peer); - bmx_put_idle_tx(tx); + bmx_put_idle_ctx(tx); } } while (result); +out: return; } static void bmx_connection_handlers(void) { + static int count = 0; + int print = (count++ % 1000 == 0); + + if (print) + BMX_ENTER; + /* push connection messages along */ bmx_handle_icon_req(); bmx_handle_conn_req(); bmx_handle_icon_ack(); bmx_handle_conn_ack(); + if (print) + BMX_EXIT; + return; +} + +static void +bmx_complete_ctx(struct bmx_ctx *ctx, bmi_op_id_t *outid, bmi_error_code_t *err, + bmi_size_t *size, void **user_ptr) +{ + struct bmx_peer *peer = ctx->mxc_peer; + + *outid = ctx->mxc_mop->op_id; + *err = ctx->mxc_error; + *size = ctx->mxc_mxstat.xfer_length; + if (user_ptr) + *user_ptr = ctx->mxc_mop->user_ptr; + PINT_EVENT_END( + (ctx->mxc_type == BMX_REQ_TX ? + bmi_mx_send_event_id : bmi_mx_recv_event_id), + bmi_mx_pid, NULL, ctx->mxc_mop->event_id, + *outid, *size); + + id_gen_fast_unregister(ctx->mxc_mop->op_id); + BMX_FREE(ctx->mxc_mop, sizeof(*ctx->mxc_mop)); + bmx_put_idle_ctx(ctx); + bmx_peer_decref(peer); /* drop the ref taken in [send|recv]_common */ + return; } static int BMI_mx_test(bmi_op_id_t id, int *outcount, bmi_error_code_t *err, bmi_size_t *size, void **user_ptr, int max_idle_time __unused, - bmi_context_id context_id __unused) + bmi_context_id context_id) { uint32_t result = 0; struct method_op *mop = NULL; struct bmx_ctx *ctx = NULL; struct bmx_peer *peer = NULL; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; bmx_connection_handlers(); + bmx_get_completion_token(); + mop = id_gen_fast_lookup(id); ctx = mop->method_data; peer = ctx->mxc_peer; + assert(context_id == mop->context_id); + if (ctx->mxc_type == BMX_REQ_RX) + assert(ctx->mxc_msg_type != BMX_MSG_UNEXPECTED); + assert(context_id == ctx->mxc_mop->context_id); + switch (ctx->mxc_state) { + case BMX_CTX_COMPLETED: case BMX_CTX_CANCELED: - /* we are racing with testcontext */ - gen_mutex_lock(&bmi_mx->bmx_canceled_lock); - if (ctx->mxc_state != BMX_CTX_CANCELED) { - gen_mutex_unlock(&bmi_mx->bmx_canceled_lock); - return 0; - } + gen_mutex_lock(&bmi_mx->bmx_done_q_lock[(int) context_id]); qlist_del_init(&ctx->mxc_list); - gen_mutex_unlock(&bmi_mx->bmx_canceled_lock); + gen_mutex_unlock(&bmi_mx->bmx_done_q_lock[(int) context_id]); + bmx_complete_ctx(ctx, &id, err, size, user_ptr); *outcount = 1; - *err = ctx->mxc_mxstat.code; - if (ctx->mxc_mop) { - if (user_ptr) { - *user_ptr = ctx->mxc_mop->user_ptr; - } - id_gen_fast_unregister(ctx->mxc_mop->op_id); - BMX_FREE(ctx->mxc_mop, sizeof(*ctx->mxc_mop)); - } - bmx_peer_decref(peer); - if (ctx->mxc_type == BMX_REQ_TX) { - bmx_put_idle_tx(ctx); - } else { - bmx_put_idle_rx(ctx); - } break; case BMX_CTX_PENDING: - /* racing with mx_test_any() in textcontext? */ mx_test(bmi_mx->bmx_ep, &ctx->mxc_mxreq, &ctx->mxc_mxstat, &result); if (result) { - *outcount = 1; - if (ctx->mxc_mxstat.code == MX_STATUS_SUCCESS) { - *err = 0; - *size = ctx->mxc_mxstat.xfer_length; - } else { - *err = bmx_mx_to_bmi_errno(ctx->mxc_mxstat.code); - } - if (ctx->mxc_mop) { - if (user_ptr) { - *user_ptr = ctx->mxc_mop->user_ptr; - } - id_gen_fast_unregister(ctx->mxc_mop->op_id); - BMX_FREE(ctx->mxc_mop, sizeof(*ctx->mxc_mop)); - } bmx_deq_pending_ctx(ctx); - if (ctx->mxc_type == BMX_REQ_TX) { - bmx_put_idle_tx(ctx); - } else { - bmx_put_idle_rx(ctx); - } - bmx_peer_decref(peer); + bmx_complete_ctx(ctx, &id, err, size, user_ptr); + *outcount = 1; } break; default: debug(BMX_DB_CTX, "%s called on %s with state %d", __func__, ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", ctx->mxc_state); } - debug(BMX_DB_FUNC, "leaving %s", __func__); + bmx_release_completion_token(); + BMX_EXIT; return 0; } @@ -2425,51 +2652,47 @@ BMI_mx_testcontext(int incount, bmi_op_id_t *outids, int *outcount, { int i = 0; int completed = 0; + int old = 0; uint64_t match = 0ULL; - uint64_t mask = (uint64_t) 0xF << 60; + uint64_t mask = BMX_MASK_MSG; struct bmx_ctx *ctx = NULL; struct bmx_peer *peer = NULL; - list_t *canceled = &bmi_mx->bmx_canceled; int wait = 0; + static int count = 0; + int print = 0; + + if (count++ % 1000 == 0) { + BMX_ENTER; + print = 1; + } bmx_connection_handlers(); - /* always return canceled messages first */ - while (completed < incount && !qlist_empty(canceled)) { - gen_mutex_lock(&bmi_mx->bmx_canceled_lock); - ctx = qlist_entry(canceled->next, struct bmx_ctx, mxc_list); - qlist_del_init(&ctx->mxc_list); - /* change state in case test is trying to reap it as well */ - ctx->mxc_state = BMX_CTX_COMPLETED; - gen_mutex_unlock(&bmi_mx->bmx_canceled_lock); - peer = ctx->mxc_peer; - outids[completed] = ctx->mxc_mop->op_id; - errs[completed] = ctx->mxc_mxstat.code; - if (user_ptrs) - user_ptrs[completed] = ctx->mxc_mop->user_ptr; - id_gen_fast_unregister(ctx->mxc_mop->op_id); - BMX_FREE(ctx->mxc_mop, sizeof(*ctx->mxc_mop)); - completed++; - if (ctx->mxc_type == BMX_REQ_TX) { - bmx_put_idle_tx(ctx); - } else { - bmx_put_idle_rx(ctx); - } - bmx_peer_decref(peer); /* drop the ref taken in [send|recv]_common */ - if (completed > 0) { - debug(BMX_DB_CTX, "%s found %d canceled messages", - __func__, completed); + bmx_get_completion_token(); + + /* always return queued, completed messages first */ + do { + bmx_deq_completed(&ctx, context_id); + if (ctx) { + bmx_complete_ctx(ctx, &outids[completed], &errs[completed], + &sizes[completed], &user_ptrs[completed]); + completed++; } - } + } while (completed < incount && ctx != NULL); - /* return completed messages + if (completed > 0) + debug(BMX_DB_CTX, "%s found %d completed messages", __func__, completed); + + /* try to complete expected messages * we will always try (incount - completed) times even * if some iterations have no result */ - match = (uint64_t) BMX_MSG_EXPECTED << 60; + + match = (uint64_t) BMX_MSG_EXPECTED << BMX_MSG_SHIFT; for (i = completed; i < incount; i++) { uint32_t result = 0; mx_status_t status; - int old = completed; + + old = completed; if (wait == 0 || wait == 2) { mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); @@ -2479,129 +2702,134 @@ BMI_mx_testcontext(int incount, bmi_op_id_t *outids, int *outcount, &status, &result); wait = 2; } + if (result) { ctx = (struct bmx_ctx *) status.context; + bmx_deq_pending_ctx(ctx); + if (ctx->mxc_mop->context_id != context_id) { + bmx_q_completed(ctx, BMX_CTX_COMPLETED, status, + bmx_mx_to_bmi_errno(status.code)); + continue; + } + ctx->mxc_mxstat = status; peer = ctx->mxc_peer; debug(BMX_DB_CTX, "%s completing expected %s with match 0x%llx " - "for %s with op_id %llu length %d %s", __func__, - ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", + "for %s with op_id %llu length %d %s " + "context_id= %d mop->context_id= %d", + __func__, ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", llu(ctx->mxc_match), peer->mxp_mxmap->mxm_peername, llu(ctx->mxc_mop->op_id), status.xfer_length, - mx_strstatus(status.code)); + mx_strstatus(status.code), (int) context_id, + (int) ctx->mxc_mop->context_id); - if (!qlist_empty(&ctx->mxc_list)) { - gen_mutex_lock(&peer->mxp_lock); - qlist_del_init(&ctx->mxc_list); - gen_mutex_unlock(&peer->mxp_lock); - } - outids[completed] = ctx->mxc_mop->op_id; - if (status.code == MX_SUCCESS) { - errs[completed] = 0; - sizes[completed] = status.xfer_length; - } else { - errs[completed] = bmx_mx_to_bmi_errno(status.code); - } - if (user_ptrs) - user_ptrs[completed] = ctx->mxc_mop->user_ptr; - id_gen_fast_unregister(ctx->mxc_mop->op_id); - BMX_FREE(ctx->mxc_mop, sizeof(*ctx->mxc_mop)); + bmx_complete_ctx(ctx, &outids[completed], &errs[completed], + &sizes[completed], &user_ptrs[completed]); completed++; -#if BMX_LOGGING - if (ctx->mxc_type == BMX_REQ_TX) { - MPE_Log_event(send_finish, (int) ctx->mxc_tag, NULL); - } else { - MPE_Log_event(recv_finish, (int) ctx->mxc_tag, NULL); - } -#endif - if (ctx->mxc_type == BMX_REQ_TX) { - bmx_put_idle_tx(ctx); - } else { - bmx_put_idle_rx(ctx); - } - bmx_peer_decref(peer); /* drop the ref taken in [send|recv]_common */ - } - if (completed - old > 0) { - debug(BMX_DB_CTX, "%s found %d expected messages", - __func__, completed - old); } } - /* check for completed unexpected sends */ - match = (uint64_t) BMX_MSG_UNEXPECTED << 60; - if (!bmi_mx->bmx_is_server) { /* client */ - int old = completed; - for (i = completed; i < incount; i++) { - uint32_t result = 0; - mx_status_t status; + if (completed - old > 0) + debug(BMX_DB_CTX, "%s found %d expected messages", __func__, completed - old); + + /* try to complete unexpected sends */ + + match = (uint64_t) BMX_MSG_UNEXPECTED << BMX_MSG_SHIFT; + + old = completed; + for (i = completed; i < incount; i++) { + uint32_t result = 0; + mx_status_t status; + int again = 1; + + ctx = NULL; + + while (!ctx && again) { + again = 0; mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); if (result) { ctx = (struct bmx_ctx *) status.context; + bmx_deq_pending_ctx(ctx); peer = ctx->mxc_peer; - debug(BMX_DB_CTX, "%s completing unexpected %s with " - "match 0x%llx for %s with op_id %llu", - __func__, - ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", - llu(ctx->mxc_match), - peer->mxp_mxmap->mxm_peername, - llu(ctx->mxc_mop->op_id)); - - if (!qlist_empty(&ctx->mxc_list)) { - gen_mutex_lock(&peer->mxp_lock); - qlist_del_init(&ctx->mxc_list); - gen_mutex_unlock(&peer->mxp_lock); - } - outids[completed] = ctx->mxc_mop->op_id; - if (status.code == MX_SUCCESS) { - errs[completed] = 0; - sizes[completed] = status.xfer_length; - } else { - errs[completed] = bmx_mx_to_bmi_errno(status.code); - } - if (user_ptrs) - user_ptrs[completed] = ctx->mxc_mop->user_ptr; - id_gen_fast_unregister(ctx->mxc_mop->op_id); - BMX_FREE(ctx->mxc_mop, sizeof(*ctx->mxc_mop)); - completed++; -#if BMX_LOGGING - MPE_Log_event(sendunex_finish, (int) ctx->mxc_tag, NULL); -#endif - - if (ctx->mxc_type == BMX_REQ_TX) { - bmx_put_idle_tx(ctx); - } else { - bmx_put_idle_rx(ctx); + if (ctx->mxc_type == BMX_REQ_RX || + ctx->mxc_mop->context_id != context_id) { + /* queue until testunexpected or queue + * until testcontext for the correct context */ + bmx_q_completed(ctx, BMX_CTX_COMPLETED, status, + bmx_mx_to_bmi_errno(status.code)); + result = 0; + again = 1; + ctx = NULL; } - bmx_peer_decref(peer); /* drop the ref taken in [send|recv]_common */ } } - if (completed - old > 0) { - debug(BMX_DB_CTX, "%s found %d unexpected tx messages", - __func__, completed - old); + if (result) { + debug(BMX_DB_CTX, "%s completing unexpected %s with " + "match 0x%llx for %s with op_id %llu", + __func__, + ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", + llu(ctx->mxc_match), + peer->mxp_mxmap->mxm_peername, + llu(ctx->mxc_mop->op_id)); + + ctx->mxc_mxstat = status; + bmx_complete_ctx(ctx, &outids[completed], &errs[completed], + &sizes[completed], &user_ptrs[completed]); + + if (status.code != MX_SUCCESS) { + debug(BMX_DB_CTX, "%s unexpected send completed with " + "error %s", __func__, mx_strstatus(status.code)); + bmx_peer_disconnect(peer, 0, BMI_ENETRESET); + } + completed++; } } + bmx_release_completion_token(); + + if (completed - old > 0) { + debug(BMX_DB_CTX, "%s found %d unexpected tx messages", + __func__, completed - old); + } + + if (print) + BMX_EXIT; *outcount = completed; return completed; } +/* test for unexpected receives only, not unex sends */ static int BMI_mx_testunexpected(int incount __unused, int *outcount, struct bmi_method_unexpected_info *ui, int max_idle_time __unused) { - uint32_t result = 0; - uint64_t match = (uint64_t) BMX_MSG_UNEXPECTED << 60; - uint64_t mask = (uint64_t) 0xF << 60; + uint32_t result = 0; + uint64_t match = ((uint64_t) BMX_MSG_UNEXPECTED << BMX_MSG_SHIFT); + uint64_t mask = BMX_MASK_MSG; mx_status_t status; + static int count = 0; + int print = 0; + struct bmx_ctx *rx = NULL; + struct bmx_peer *peer = NULL; + int again = 1; + + if (count++ % 1000 == 0) { + BMX_ENTER; + print = 1; + } bmx_connection_handlers(); + bmx_get_completion_token(); + /* if the unexpected handler cannot get a rx, it does not post a receive. - * probe for unexpected and post a rx */ + * probe for unexpected and post a rx. */ mx_iprobe(bmi_mx->bmx_ep, match, mask, &status, &result); if (result) { int ret = 0; - ret = bmx_post_unexpected_recv(status.source, 0, 0, 0, status.match_info, status.xfer_length); + ret = bmx_post_unexpected_recv(status.source, 0, 0, 0, + status.match_info, + status.xfer_length); if (ret != 0) { debug(BMX_DB_CTX, "%s mx_iprobe() found rx with match 0x%llx " "length %d but could not receive it", __func__, @@ -2609,14 +2837,36 @@ BMI_mx_testunexpected(int incount __unused, int *outcount, } } - /* check for unexpected messages */ + /* check for unexpected receives */ *outcount = 0; - mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); + + bmx_deq_unex_rx(&rx); + if (rx) { + result = 1; + status = rx->mxc_mxstat; + peer = rx->mxc_peer; + } + + while (!rx && again) { + again = 0; + mx_test_any(bmi_mx->bmx_ep, match, mask, &status, &result); + if (result) { + rx = (struct bmx_ctx *) status.context; + bmx_deq_pending_ctx(rx); + peer = rx->mxc_peer; + if (rx->mxc_type == BMX_REQ_TX) { + bmx_q_completed(rx, BMX_CTX_COMPLETED, status, + bmx_mx_to_bmi_errno(status.code)); + result = 0; + again = 1; + rx = NULL; + } + } + } + if (result) { - struct bmx_ctx *rx = (struct bmx_ctx *) status.context; - struct bmx_peer *peer = rx->mxc_peer; - debug(BMX_DB_CTX, "*** %s completing RX with match 0x%llx for %s", - __func__, llu(rx->mxc_match), peer->mxp_mxmap->mxm_peername); + debug(BMX_DB_CTX, "%s completing RX with match 0x%llx for %s", + __func__, llu(rx->mxc_match), peer->mxp_mxmap->mxm_peername); ui->error_code = 0; ui->addr = peer->mxp_map; @@ -2628,18 +2878,15 @@ BMI_mx_testunexpected(int incount __unused, int *outcount, rx->mxc_seg.segment_ptr = rx->mxc_buffer; ui->tag = rx->mxc_tag; - if (!qlist_empty(&rx->mxc_list)) { - gen_mutex_lock(&peer->mxp_lock); - qlist_del_init(&rx->mxc_list); - gen_mutex_unlock(&peer->mxp_lock); - } -#if BMX_LOGGING - MPE_Log_event(recvunex_finish, (int) rx->mxc_tag, NULL); -#endif - bmx_put_idle_rx(rx); + bmx_put_idle_ctx(rx); bmx_peer_decref(peer); /* drop the ref taken in unexpected_recv() */ *outcount = 1; } + bmx_release_completion_token(); + + if (print) + BMX_EXIT; + return 0; } @@ -2648,8 +2895,15 @@ bmx_create_peername(void) { char peername[MX_MAX_HOSTNAME_LEN + 28]; /* mx://host:board:ep_id\0 */ - sprintf(peername, "mx://%s:%u:%u", bmi_mx->bmx_hostname, bmi_mx->bmx_board, - bmi_mx->bmx_ep_id); + if (bmi_mx->bmx_board != -1) { + /* mx://host:board:ep_id\0 */ + sprintf(peername, "mx://%s:%u:%u", bmi_mx->bmx_hostname, + bmi_mx->bmx_board, bmi_mx->bmx_ep_id); + } else { + /* mx://host:ep_id\0 */ + sprintf(peername, "mx://%s:%u", bmi_mx->bmx_hostname, + bmi_mx->bmx_ep_id); + } bmi_mx->bmx_peername = strdup(peername); return; } @@ -2660,14 +2914,15 @@ bmx_peer_connect(struct bmx_peer *peer) int ret = 0; uint64_t nic_id = 0ULL; mx_request_t request; - uint64_t match = (uint64_t) BMX_MSG_ICON_REQ << 60; + uint64_t match = (uint64_t) BMX_MSG_ICON_REQ << BMX_MSG_SHIFT; struct bmx_method_addr *mxmap = peer->mxp_mxmap; - if (bmi_mx->bmx_is_server) { - return 1; - } + BMX_ENTER; + gen_mutex_lock(&peer->mxp_lock); if (peer->mxp_state == BMX_PEER_INIT) { + debug(BMX_DB_PEER, "Setting peer %s to BMX_PEER_WAIT", + peer->mxp_mxmap->mxm_peername); peer->mxp_state = BMX_PEER_WAIT; } else { gen_mutex_unlock(&peer->mxp_lock); @@ -2683,7 +2938,7 @@ bmx_peer_connect(struct bmx_peer *peer) mx_endpoint_addr_t epa; ret = bmx_open_endpoint(&bmi_mx->bmx_ep, - bmi_mx->bmx_board, + MX_ANY_NIC, MX_ANY_ENDPOINT); if (ret != 0) { debug((BMX_DB_MX|BMX_DB_CONN), "failed to open endpoint when " @@ -2693,7 +2948,10 @@ bmx_peer_connect(struct bmx_peer *peer) } mx_get_endpoint_addr(bmi_mx->bmx_ep, &epa); /* get our nic_id and ep_id */ - mx_decompose_endpoint_addr(epa, &nic_id, &bmi_mx->bmx_ep_id); + mx_decompose_endpoint_addr2(epa, &nic_id, &bmi_mx->bmx_ep_id, + &bmi_mx->bmx_sid); + /* get our board number */ + mx_nic_id_to_board_number(nic_id, &bmi_mx->bmx_board); /* get our hostname */ mx_nic_id_to_hostname(nic_id, host); bmi_mx->bmx_hostname = strdup(host); @@ -2706,6 +2964,9 @@ bmx_peer_connect(struct bmx_peer *peer) colon = strchr(bmi_mx->bmx_hostname, ':'); if (colon != NULL) { *colon = '\0'; + } else { + /* no board number in our name */ + bmi_mx->bmx_board = -1; } /* create our peername */ bmx_create_peername(); @@ -2714,6 +2975,9 @@ bmx_peer_connect(struct bmx_peer *peer) * by calling mx_iconnect() w/BMX_MSG_ICON_REQ */ mx_iconnect(bmi_mx->bmx_ep, peer->mxp_nic_id, mxmap->mxm_ep_id, BMX_MAGIC, match, (void *) peer, &request); + + BMX_EXIT; + return ret; } @@ -2729,13 +2993,14 @@ static struct bmi_method_addr * BMI_mx_method_addr_lookup(const char *id) { int ret = 0; + int len = 0; char *host = NULL; uint32_t board = 0; uint32_t ep_id = 0; - struct bmi_method_addr *map = NULL; + struct bmi_method_addr *map = NULL; struct bmx_method_addr *mxmap = NULL; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; debug(BMX_DB_INFO, "%s with id %s", __func__, id); ret = bmx_parse_peername(id, &host, &board, &ep_id); @@ -2754,7 +3019,8 @@ BMI_mx_method_addr_lookup(const char *id) mxmap->mxm_board == board && mxmap->mxm_ep_id == ep_id) { map = peer->mxp_map; - BMX_FREE(host, sizeof(*host)); + len = strlen(host); + BMX_FREE(host, len); break; } } @@ -2763,7 +3029,7 @@ BMI_mx_method_addr_lookup(const char *id) if (map == NULL) { map = bmx_alloc_method_addr(id, host, board, ep_id); - if (bmi_mx != NULL && ! bmi_mx->bmx_is_server) { /* we are a client */ + if (bmi_mx != NULL) { struct bmx_peer *peer = NULL; mxmap = map->method_data; @@ -2779,9 +3045,10 @@ BMI_mx_method_addr_lookup(const char *id) " failed with %d", __func__, ret); } } + if (map != NULL) free(host); } out: - debug(BMX_DB_FUNC, "leaving %s", __func__); + BMX_EXIT; return map; } @@ -2800,44 +3067,69 @@ BMI_mx_close_context(bmi_context_id context_id __unused) /* NOTE There may be a race between this and BMI_mx_testcontext(). */ static int -BMI_mx_cancel(bmi_op_id_t id, bmi_context_id context_id __unused) +BMI_mx_cancel(bmi_op_id_t id, bmi_context_id context_id) { struct method_op *mop; struct bmx_ctx *ctx = NULL; struct bmx_peer *peer = NULL; uint32_t result = 0; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; + + bmx_get_completion_token(); mop = id_gen_fast_lookup(id); ctx = mop->method_data; peer = ctx->mxc_peer; + assert(context_id == ctx->mxc_mop->context_id); + debug(BMX_DB_CTX, "%s %s op_id %llu mxc_state %d peer state %d", __func__, ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", llu(ctx->mxc_mop->op_id), ctx->mxc_state, peer->mxp_state); + + /* avoid race with connection setup */ + gen_mutex_lock(&peer->mxp_lock); + switch (ctx->mxc_state) { case BMX_CTX_QUEUED: - /* we are racing with the connection setup */ - bmx_deq_ctx(ctx); - bmx_q_canceled_ctx(ctx, BMI_ECANCEL); + qlist_del_init(&ctx->mxc_list); + gen_mutex_unlock(&peer->mxp_lock); + bmx_q_completed(ctx, BMX_CTX_CANCELED, BMX_NO_STATUS, BMI_ECANCEL); break; case BMX_CTX_PENDING: + gen_mutex_unlock(&peer->mxp_lock); if (ctx->mxc_type == BMX_REQ_TX) { - bmx_peer_disconnect(peer, 1, BMI_ENETRESET); + /* see if it completed first */ + mx_test(bmi_mx->bmx_ep, &ctx->mxc_mxreq, &ctx->mxc_mxstat, &result); + if (result == 1) { + debug(BMX_DB_CTX, "%s completed TX op_id %llu " + "mxc_state %d peer state %d status.code %s", + __func__, llu(ctx->mxc_mop->op_id), ctx->mxc_state, + peer->mxp_state, mx_strstatus(ctx->mxc_mxstat.code)); + bmx_deq_pending_ctx(ctx); + bmx_q_completed(ctx, BMX_CTX_CANCELED, + ctx->mxc_mxstat, BMI_ECANCEL); + } else { + /* and if not, then disconnect() */ + bmx_peer_disconnect(peer, 1, BMI_ENETRESET); + } } else { /* BMX_REQ_RX */ mx_cancel(bmi_mx->bmx_ep, &ctx->mxc_mxreq, &result); if (result == 1) { bmx_deq_pending_ctx(ctx); - bmx_q_canceled_ctx(ctx, BMI_ECANCEL); + bmx_q_completed(ctx, BMX_CTX_CANCELED, + BMX_NO_STATUS, BMI_ECANCEL); } } break; default: - debug(BMX_DB_WARN, "%s called on %s with state %d", __func__, + debug(BMX_DB_CTX, "%s called on %s with state %d", __func__, ctx->mxc_type == BMX_REQ_TX ? "TX" : "RX", ctx->mxc_state); } - debug(BMX_DB_FUNC, "leaving %s", __func__); + bmx_release_completion_token(); + + BMX_EXIT; return 0; } @@ -2848,7 +3140,7 @@ BMI_mx_rev_lookup(struct bmi_method_addr *meth) { struct bmx_method_addr *mxmap = meth->method_data; - debug(BMX_DB_FUNC, "entering %s", __func__); + BMX_ENTER; if (mxmap->mxm_peer && mxmap->mxm_peer->mxp_state != BMX_PEER_DISCONNECT) return mxmap->mxm_peername; @@ -2860,6 +3152,7 @@ BMI_mx_rev_lookup(struct bmi_method_addr *meth) const struct bmi_method_ops bmi_mx_ops = { .method_name = "bmi_mx", + .flags = 0, .initialize = BMI_mx_initialize, .finalize = BMI_mx_finalize, .set_info = BMI_mx_set_info, diff --git a/src/io/bmi/bmi_mx/mx.h b/src/io/bmi/bmi_mx/mx.h index 10da911..f396b93 100644 --- a/src/io/bmi/bmi_mx/mx.h +++ b/src/io/bmi/bmi_mx/mx.h @@ -15,6 +15,7 @@ #include #include #include +#include #include /* needed for callback handler, etc. */ #include @@ -53,11 +54,6 @@ typedef struct qlist_head list_t; /* easier to type */ #define BMX_MEM_TWEAK 1 /* use buffer list for mem[alloc|free] */ #define BMX_DEBUG 1 /* enable debug (gossip) statements */ #define BMX_MEM_ACCT 0 /* track number of bytes alloc's and freed */ -#define BMX_LOGGING 0 /* use MPE logging routines */ - -#if BMX_LOGGING -#include "mpe.h" -#endif #if BMX_MEM_TWEAK /* Allocate 16 4MB buffers for use with BMI_mx_mem[alloc|free] */ @@ -107,8 +103,7 @@ typedef struct qlist_head list_t; /* easier to type */ /* BMX [UN]EXPECTED msgs use the 64-bits of the match info as follows: * Bits Description * 60-63 Msg type - * 56-59 Reserved for credits if implemented - * 52-55 Reserved + * 52-59 Reserved * 32-51 Peer id (of the sender, assigned by receiver) * 0-31 bmi_msg_tag_t */ @@ -116,14 +111,20 @@ typedef struct qlist_head list_t; /* easier to type */ /* BMX CONN_[REQ|ACK] msgs use the 64-bits of the match info as follows: * Bits Description * 60-63 Msg type - * 56-59 Reserved for credits if implemented - * 52-55 Reserved + * 52-59 Reserved * 32-51 Peer id (to use when contacting the sender) * 0-31 Version */ -#define BMX_MAX_PEER_ID ((1<<20) - 1) /* 20 bits - actually 1,048,574 peers +#define BMX_MSG_SHIFT 60 +#define BMX_ID_SHIFT 32 +#define BMX_MASK_ALL (~0ULL) +#define BMX_MASK_MSG (0xFULL << BMX_MSG_SHIFT) + +#define BMX_MAX_PEER_ID ((1<<20) - 1) /* 20 bits - actually 1,048,574 peers 1 to 1,048,575 */ +#define BMX_MAX_TAG (~0U) /* 32 bits */ + #define BMX_TIMEOUT (20 * 1000) /* msg timeout in milliseconds */ @@ -146,6 +147,7 @@ struct bmx_data uint32_t bmx_board; /* my MX board index */ uint32_t bmx_ep_id; /* my MX endpoint ID */ mx_endpoint_t bmx_ep; /* my MX endpoint */ + uint32_t bmx_sid; /* my MX session id */ int bmx_is_server; /* am I a server? */ list_t bmx_peers; /* list of all peers */ @@ -159,8 +161,17 @@ struct bmx_data list_t bmx_idle_rxs; /* available for receiving */ gen_mutex_t bmx_idle_rxs_lock; /* idle_rxs lock */ - list_t bmx_canceled; /* canceled reqs waiting for test */ - gen_mutex_t bmx_canceled_lock; /* canceled list lock */ + gen_mutex_t bmx_completion_lock; /* lock for test* functions */ + int bmx_refcount; /* try to avoid races between test* + and cancel functions */ + + /* completed expected msgs + * including unexpected sends */ + list_t bmx_done_q[BMI_MAX_CONTEXTS]; + gen_mutex_t bmx_done_q_lock[BMI_MAX_CONTEXTS]; + + list_t bmx_unex_rxs; /* completed unexpected recvs */ + gen_mutex_t bmx_unex_rxs_lock; /* completed unexpected recvs lock */ uint32_t bmx_next_id; /* for the next peer_id */ gen_mutex_t bmx_lock; /* global lock - use for global rxs, @@ -190,20 +201,21 @@ enum bmx_peer_state { struct bmx_method_addr { - struct method_addr *mxm_map; /* peer's method_addr */ - const char *mxm_peername; /* mx://hostname/board/ep_id */ - const char *mxm_hostname; /* peer's hostname */ - uint32_t mxm_board; /* peer's MX board index */ - uint32_t mxm_ep_id; /* peer's MX endpoint ID */ - struct bmx_peer *mxm_peer; /* peer pointer */ + struct bmi_method_addr *mxm_map; /* peer's bmi_method_addrt */ + const char *mxm_peername; /* mx://hostname/board/ep_id */ + const char *mxm_hostname; /* peer's hostname */ + uint32_t mxm_board; /* peer's MX board index */ + uint32_t mxm_ep_id; /* peer's MX endpoint ID */ + struct bmx_peer *mxm_peer; /* peer pointer */ }; struct bmx_peer { - struct method_addr *mxp_map; /* his method_addr * */ + struct bmi_method_addr *mxp_map; /* his bmi_method_addr * */ struct bmx_method_addr *mxp_mxmap; /* his bmx_method_addr */ uint64_t mxp_nic_id; /* his NIC id */ mx_endpoint_addr_t mxp_epa; /* his MX endpoint address */ + uint32_t mxp_sid; /* his MX session id */ int mxp_exist; /* have we connected before? */ enum bmx_peer_state mxp_state; /* INIT, WAIT, READY, DISCONNECT */ @@ -265,6 +277,7 @@ struct bmx_ctx bmi_size_t mxc_nob; /* number of bytes (int64_t) */ mx_request_t mxc_mxreq; /* MX request */ mx_status_t mxc_mxstat; /* MX status */ + bmi_error_code_t mxc_error; /* BMI error code */ uint64_t mxc_get; /* # of times returned from idle list */ uint64_t mxc_put; /* # of times returned to idle list */ @@ -308,4 +321,14 @@ struct bmx_connreq #define debug(lvl,fmt,...) do { } while (0) #endif /* BMX_DEBUG */ +#define BMX_ENTER \ + do { \ + debug(BMX_DB_FUNC, "entering %s", __func__); \ + } while (0); + +#define BMX_EXIT \ + do { \ + debug(BMX_DB_FUNC, "exiting %s", __func__); \ + } while (0); + #endif /* __mx_h */ diff --git a/src/io/bmi/bmi_portals/README b/src/io/bmi/bmi_portals/README index 2db04de..b193ca6 100644 --- a/src/io/bmi/bmi_portals/README +++ b/src/io/bmi/bmi_portals/README @@ -92,19 +92,21 @@ sort of user pointer that Portals provides. Match lists ----------- -Diagram of the various match lists used. +Diagram of the various match lists used. The two bars in each +of the match lists separate the 64 bits into: 2 bits, 30 bits, 32 bits, +in that order. preposted receives - match bmi_tag -> preposted buf + match 0 | seqno | bmi_tag -> preposted buf - match bmi_tag -> preposted buf + match 0 | seqno | bmi_tag -> preposted buf ... outgoing sends - match 2 << 32 | bmi_tag <- preposted buf, respond to get + match 1 | seqno | bmi_tag <- preposted buf, respond to get ... @@ -114,36 +116,41 @@ mark nonpreopsted receive buffers - match 0 << 32 | any -> nonprepost buffer1, max size + match 0 | any | any -> nonprepost buffer1, max size - match 0 << 32 | any -> nonprepost buffer2, max size + match 0 | any | any -> nonprepost buffer2, max size unexpected message buffers - match 1 << 32 | any -> unexpected buffer1, max size + match 2 | any | any -> unexpected buffer1, max size - match 1 << 32 | any -> unexpected buffer2, max size + match 2 | any | any -> unexpected buffer2, max size zero - match 0 << 32 | any -> no buffer, trunc, max size 0 + match 0 | any | any -> no buffer, trunc, max size 0 Preposted receives must come first and be in order so that they match -for expected incoming messages. The order of nonprepost and unexpected -buffers doesn't matter, so we let them mix up among themselves. The -mark entry is used to be able to find the point between the prepost -and other entries, otherwise we'd need lots of code to track that by hand. +for expected incoming messages. The outgoing posted sends come next, and +can be mixed up with the receives since they have a unique bit 62 set. + +The order of nonprepost and unexpected buffers doesn't matter, so we let them +mix up among themselves. The mark entry is used to be able to find the point +between the prepost and other entries, otherwise we'd need lots of code to +track that by hand. The nonprepost and unexpected buffers are managed as "circular" lists, where one is filled up until it is unlinked, then it is reposted after -the other that has now started to fill up. +the other that has now started to fill up. They are actually all mixed +up in the area between mark and zero, as it doesn't matter which order +they appear in. Nonpreopst messages are kept in the buffers until the app posts a receive that matches. If they fill up, later messages fall off the bottom. Working apps will pre-post their receives before the sender tries to send to them. -Unexpected messages are a protocol feature of BMI. A special high-bit +Unexpected messages are a protocol feature of BMI. A special high-bit 63 indicates this. They are limited in size by the protocol (8k here), and are always new requests from a client to a server. As they arrive, they are immediately copied into new buffers that are handed back to the server @@ -156,6 +163,15 @@ md at the end that just generates an event on the sender and receiver. The receiver does a get to read the data from the sender later when the app finally posts the receive. +Note that BMI will post multiple sends to the same dest with the same +tag. With this scheme, a race condition can develop where peer A sends tag +5 and tag 5, then peer B recvs and acks tag 5, then peer B recvs the second +tag 5 to its zero md, then peer B has an internal post_recv and does a get +to tag 5. This get hits the _first_ tag 5 on peer A, as peer A has not +gotten around to processing the ack and unlinking that one. To get around +this, we add a sequence number that sits in the 30 bits just above the 32 +bits reserved for the tag. + TODO Notes ---------- diff --git a/src/io/bmi/bmi_portals/module.mk.in b/src/io/bmi/bmi_portals/module.mk.in index f33f8b7..b8416f8 100644 --- a/src/io/bmi/bmi_portals/module.mk.in +++ b/src/io/bmi/bmi_portals/module.mk.in @@ -21,6 +21,7 @@ cfiles := portals.c src := $(patsubst %,$(DIR)/%,$(cfiles)) LIBSRC += $(src) SERVERSRC += $(src) +LIBBMISRC += $(src) # # Add extra include paths and warnings just for this directory. diff --git a/src/io/bmi/bmi_portals/portals.c b/src/io/bmi/bmi_portals/portals.c index 3c3a75d..5ecf542 100644 --- a/src/io/bmi/bmi_portals/portals.c +++ b/src/io/bmi/bmi_portals/portals.c @@ -8,8 +8,8 @@ #include #include -#ifdef __LIBCATAMOUNT__ -/* Cray XT3 version */ +#if defined(__LIBCATAMOUNT__) || defined(__CRAYXT_COMPUTE_LINUX_TARGET) || defined(__CRAYXT_SERVICE) +/* Cray XT3 and XT4 version, both catamount and compute-node-linux */ #define PTL_IFACE_DEFAULT PTL_IFACE_SS #include #include @@ -23,6 +23,7 @@ #endif #include +#include #define __PINT_REQPROTO_ENCODE_FUNCS_C /* include definitions */ #include /* bmi_method_ops */ #include /* bmi_method_addr_reg_callback */ @@ -72,7 +73,6 @@ static gen_mutex_t eq_mutex = GEN_MUTEX_INITIALIZER; * method_addrs. */ static int bmi_portals_method_id; -static int bmi_portals_nic_type; /* * Various static ptls objects. One per instance of the code. @@ -125,13 +125,32 @@ static const char *PtlEventKindStr(ptl_event_kind_t ev_kind) /* * Match bits. The lower 32 bits always carry the bmi_tag. If this bit - * in the top is set, it is an unexpected message. The second set is used - * when posting a _send_, strangely enough. If the send is too long, + * in the top is set, it is an unexpected message. The secondmost top bit is + * used when posting a _send_, strangely enough. If the send is too long, * and the receiver has not preposted, later the receiver will issue a Get * to us for the data. That get will use the second set of match bits. + * + * The rest of the 30 top bits are used to encode a sequence number per + * peer. As BMI can post multiple sends with the same tag, we have to + * be careful that if send #2 for a given tag goes to the zero_md, that + * when he does the get, he grabs from buffer #2, not buffer #1 because + * the sender was too slow in unlinking it. */ -static const uint64_t match_bits_unexpected = 1ULL << 32; -static const uint64_t match_bits_long_send = 2ULL << 32; +static const uint64_t match_bits_unexpected = 1ULL << 63; /* 8... */ +static const uint64_t match_bits_long_send = 1ULL << 62; /* 4... */ +static const uint32_t match_bits_seqno_max = 1UL << 30; +static const int match_bits_seqno_shift = 32; + +static uint64_t mb_from_tag_and_seqno(uint32_t tag, uint32_t seqno) +{ + uint64_t mb; + + mb = seqno; + mb <<= match_bits_seqno_shift; + mb |= tag; + /* caller may set the long send bit too */ + return mb; +} /* * Buffer for incoming unexpected send messages. Only the server needs @@ -145,20 +164,33 @@ static const uint64_t match_bits_long_send = 2ULL << 32; #define UNEXPECTED_MESSAGE_SIZE (8 << 10) #define UNEXPECTED_QUEUE_SIZE (256 << 10) #define UNEXPECTED_NUM_MD 2 +#define UNEXPECTED_SIZE_PER_MD (UNEXPECTED_QUEUE_SIZE/UNEXPECTED_NUM_MD) + +#define UNEXPECTED_MD_INDEX_OFFSET (1) +#define NONPREPOST_MD_INDEX_OFFSET (UNEXPECTED_NUM_MD + 1) static char *unexpected_buf = NULL; /* poor-man's circular buffer */ static ptl_handle_me_t unexpected_me[UNEXPECTED_NUM_MD]; static ptl_handle_md_t unexpected_md[UNEXPECTED_NUM_MD]; +static int unexpected_need_repost[UNEXPECTED_NUM_MD]; +static int unexpected_need_repost_sum; +static int unexpected_is_posted[UNEXPECTED_NUM_MD]; -static int unexpected_md_index(ptl_handle_md_t md) +/* + * This scheme relies on the zero page being unused, i.e. addrsesses + * from 0 up to 4k or so. + */ +static int unexpected_md_index(void *user_ptr) { int i; + uintptr_t d = (uintptr_t) user_ptr; - for (i=0; i= UNEXPECTED_MD_INDEX_OFFSET && + d < UNEXPECTED_MD_INDEX_OFFSET + UNEXPECTED_NUM_MD) + return d - UNEXPECTED_MD_INDEX_OFFSET; + else + return -1; } /* @@ -191,14 +223,16 @@ static ptl_handle_md_t nonprepost_md[NONPREPOST_NUM_MD]; static int nonprepost_is_posted[NONPREPOST_NUM_MD]; static int nonprepost_refcnt[NONPREPOST_NUM_MD]; -static int nonprepost_md_index(ptl_handle_md_t md) +static int nonprepost_md_index(void *user_ptr) { int i; + uintptr_t d = (uintptr_t) user_ptr; - for (i=0; i= NONPREPOST_MD_INDEX_OFFSET && + d < NONPREPOST_MD_INDEX_OFFSET + NONPREPOST_NUM_MD) + return d - NONPREPOST_MD_INDEX_OFFSET; + else + return -1; } /* @@ -212,6 +246,8 @@ struct bmip_method_addr { char *hostname; /* given by user, converted to a nid by us */ char *peername; /* for rev_lookup */ ptl_process_id_t pid; /* this is a struct with u32 nid + u32 pid */ + uint32_t seqno_out; /* each send has a separate sequence number */ + uint32_t seqno_in; }; static QLIST_HEAD(pma_list); @@ -226,6 +262,7 @@ enum work_state { RQ_WAITING_INCOMING, RQ_WAITING_GET, RQ_WAITING_USER_TEST, + RQ_WAITING_USER_POST, RQ_LEN_ERROR, RQ_CANCELLED, }; @@ -247,6 +284,8 @@ static const char *state_name(enum work_state state) return "RQ_WAITING_GET"; case RQ_WAITING_USER_TEST: return "RQ_WAITING_USER_TEST"; + case RQ_WAITING_USER_POST: + return "RQ_WAITING_USER_POST"; case RQ_LEN_ERROR: return "RQ_LEN_ERROR"; case RQ_CANCELLED: @@ -269,6 +308,7 @@ struct bmip_work { bmi_size_t actual_len; /* recv: possibly shorter than posted */ bmi_msg_tag_t bmi_tag; /* recv: unexpected or nonpp tag that arrived */ + uint64_t match_bits; /* recv: full match bits, including seqno */ int is_unexpected; /* send: if user posted this as unexpected */ @@ -276,7 +316,9 @@ struct bmip_work { /* send: send me for possible get */ ptl_handle_md_t md; /* recv: prepost or get destination, to cancel */ /* send: send md for possible get */ - int me_unlink; /* send: me must be unlinked at test time */ + ptl_handle_me_t tme; + ptl_handle_md_t tmd; + int saw_send_end_and_ack; /* send: make sure both states before unlink */ /* non-preposted receive, keep ref to a nonpp static buffer */ const void *nonpp_buf; /* pointer to nonpp buffer in MD */ @@ -314,7 +356,10 @@ static QLIST_HEAD(q_done); static struct bmi_method_addr *addr_from_nidpid(ptl_process_id_t pid); static void unexpected_repost(int which); static int nonprepost_init(void); +static int nonprepost_fini(void); +static int unexpected_fini(void); static void nonprepost_repost(int which); +static const char *bmip_rev_lookup(struct bmi_method_addr *addr); /*---------------------------------------------------------------------------- @@ -327,36 +372,71 @@ static void nonprepost_repost(int which); static int handle_event(ptl_event_t *ev) { struct bmip_work *sq, *rq; - int which; + int which, ret; if (ev->ni_fail_type != 0) { gossip_err("%s: ni err %d\n", __func__, ev->ni_fail_type); return -EIO; } - debug(2, "%s: event type %s\n", __func__, PtlEventKindStr(ev->type)); + debug(6, "%s: event type %s\n", __func__, PtlEventKindStr(ev->type)); switch (ev->type) { case PTL_EVENT_SEND_END: - /* ignore this state, already on the waiting list */ + /* + * Sometimes this state happens _after_ the ACK. Boggle. Cannot + * unlink the sq until this state. Doing it in the ack state may be + * too early. But we don't know if it is safe to unlink until the + * ack comes back and says if he received it, or if he will do a + * Get on the MD. So just mark a flag. It goes to two only if + * the ack indicated the other side will not need to do a get. + * + * Note that an outgoing get request also triggers this. Sigh. + */ sq = ev->md.user_ptr; - debug(2, "%s: sq %p went out\n", __func__, sq); + if (sq->type == BMI_RECV) { + rq = ev->md.user_ptr; + debug(2, "%s, rq %p stat %s get went out\n", __func__, rq, + state_name(rq->state)); + break; + } + debug(2, "%s: sq %p went out len %llu/%llu mb %llx\n", __func__, sq, + ev->mlength, ev->rlength, ev->match_bits); + if (!sq->is_unexpected && ++sq->saw_send_end_and_ack == 2) { + debug(2, "%s: saw end last, unlinking %p me %d (md %d)\n", + __func__, sq, sq->me, sq->md); + ret = PtlMEUnlink(sq->me); + if (ret) + gossip_err("%s: PtlMEUnlink sq %p: %s\n", __func__, + sq, PtlErrorStr(ret)); + } break; case PTL_EVENT_ACK: /* recv an ack from him, advance the state and unlink */ sq = ev->md.user_ptr; - debug(2, "%s: sq %p ack received\n", __func__, sq); + debug(2, "%s: sq %p ack rcvd len %llu/%llu\n", + __func__, sq, ev->mlength, ev->rlength); + + /* + * the rlength always comes back as 0 on catamount, even if we + * sent 51200 bytes + */ if (ev->mlength != ev->rlength) { - assert(ev->mlength == 0); - debug(2, "%s: truncated, get ready for the get\n", __func__); + gossip_err("%s: mlen %llu and rlen %llu do not agree\n", __func__, + ev->mlength, ev->rlength); + exit(1); } if (ev->mlength > 0) { - /* Would like to unlink here, but "me in use" error happens - * sometimes. Avoid race by doing it at test time. */ - if (!sq->is_unexpected) - sq->me_unlink = 1; + /* make sure both SEND_END and ACK happened for these */ + if (!sq->is_unexpected && ++sq->saw_send_end_and_ack == 2) { + debug(2, "%s: saw ack last, unlinking %p\n", __func__, sq); + ret = PtlMEUnlink(sq->me); + if (ret) + gossip_err("%s: PtlMEUnlink sq %p: %s\n", __func__, + sq, PtlErrorStr(ret)); + } sq->state = SQ_WAITING_USER_TEST; gen_mutex_lock(&list_mutex); qlist_del(&sq->list); @@ -375,21 +455,23 @@ static int handle_event(ptl_event_t *ev) case PTL_EVENT_PUT_END: /* * Peer did a send to us. Four cases: - * 1. expected pre-posted receive, our rq in user_ptr. - * 2. unexpected message, user_ptr is &unexpected_md[i]; - * 3. non-preposted message, user_ptr is &preposted_md[i]; - * 4. zero md, non-preposted that was too big and truncated + * 1. unexpected message, user_ptr is &unexpected_md[i]; + * 2a. non-preposted message, user_ptr is &preposted_md[i]; + * 2b. zero md, non-preposted that was too big and truncated + * 3. expected pre-posted receive, our rq in user_ptr. */ - which = unexpected_md_index(ev->md_handle); + which = unexpected_md_index(ev->md.user_ptr); if (which >= 0) { /* build new unexpected rq and copy in the data */ - debug(2, "%s: unexpected len %lld put to us\n", __func__, - lld(ev->mlength)); + debug(2, "%s: unexpected len %lld put to us, mb %llx\n", __func__, + lld(ev->mlength), llu(ev->match_bits)); rq = malloc(sizeof(*rq)); if (!rq) { gossip_err("%s: alloc unexpected rq\n", __func__); break; } + if (ev->mlength > UNEXPECTED_MESSAGE_SIZE) + exit(1); /* * malloc this separately to hand to testunexpected caller; that @@ -397,7 +479,6 @@ static int handle_event(ptl_event_t *ev) * easier. */ rq->type = BMI_RECV; - rq->me_unlink = 0; rq->unex_buf = malloc(ev->mlength); if (!rq->unex_buf) { gossip_err("%s: alloc unexpected rq data\n", __func__); @@ -412,18 +493,39 @@ static int handle_event(ptl_event_t *ev) gen_mutex_lock(&list_mutex); qlist_add_tail(&rq->list, &q_unexpected_done); gen_mutex_unlock(&list_mutex); + debug(1, "%s: unexpected %d offset %llu\n", __func__, which, + llu(ev->offset)); + if (UNEXPECTED_SIZE_PER_MD - ev->offset < UNEXPECTED_MESSAGE_SIZE) { + debug(1, "%s: reposting unexpected %d\n", __func__, which); + if (unexpected_need_repost[which] == 0) { + unexpected_need_repost[which] = 1; + ++unexpected_need_repost_sum; + } + } + /* try to unpost some, if they are free now */ + if (unexpected_need_repost_sum) { + for (which = 0; which < UNEXPECTED_NUM_MD; which++) { + if (unexpected_need_repost[which]) + unexpected_repost(which); + } + } break; } - which = nonprepost_md_index(ev->md_handle); + which = nonprepost_md_index(ev->md.user_ptr); if (which >= 0 || ev->md_handle == zero_md) { /* build new nonprepost rq, but just keep pointer to the data, or * if truncated, build the req but no data to hang onto */ - debug(2, "%s: nonprepost len %llu tag %llu put to us%s\n", - __func__, llu(ev->rlength), - llu(ev->match_bits & 0xffffffffULL), + debug(1, "%s: nonprepost len %llu/%llu mb %llx%s\n", + __func__, llu(ev->mlength), llu(ev->rlength), + ev->match_bits, ev->md_handle == zero_md ? ", truncated" : ""); + if (which >= 0 && ev->md_handle == zero_md) { + gossip_err("%s: which %d but zero md\n", __func__, which); + exit(1); + } + rq = malloc(sizeof(*rq)); if (!rq) { gossip_err("%s: alloc nonprepost rq\n", __func__); @@ -431,9 +533,10 @@ static int handle_event(ptl_event_t *ev) } rq->type = BMI_RECV; - rq->me_unlink = 0; + rq->state = RQ_WAITING_USER_POST; rq->actual_len = ev->rlength; rq->bmi_tag = ev->match_bits & 0xffffffffULL; /* just 32 bits */ + rq->match_bits = ev->match_bits; rq->mop.addr = addr_from_nidpid(ev->initiator); if (ev->md_handle == zero_md) { rq->nonpp_buf = NULL; @@ -443,6 +546,9 @@ static int handle_event(ptl_event_t *ev) /* keep a ref to this md until the recv finishes */ ++nonprepost_refcnt[rq->nonpp_md]; } + debug(2, "%s: rq %p NEW NONPREPOST mb 0x%llx%s\n", __func__, + rq, llu(rq->match_bits), + ev->md_handle == zero_md ? ", truncated" : ""); gen_mutex_lock(&list_mutex); qlist_add_tail(&rq->list, &q_recv_nonprepost); gen_mutex_unlock(&list_mutex); @@ -451,21 +557,45 @@ static int handle_event(ptl_event_t *ev) /* must be something we preposted, with user_ptr is rq */ rq = ev->md.user_ptr; +#ifdef DEBUG_CNL_ODDITIES + if ((uintptr_t) rq & 1) { + debug(1, "%s: OFF BY 1 rq %p\n", __func__, rq); + rq = (void *) ((uintptr_t) rq - 1); + } +#endif rq->actual_len = ev->rlength; /* attempted length sent */ rq->state = RQ_WAITING_USER_TEST; if (rq->actual_len > rq->tot_len) rq->state = RQ_LEN_ERROR; - debug(2, "%s: rq %p len %lld tag %d put to us\n", __func__, rq, - lld(rq->actual_len), rq->bmi_tag); + debug(1, "%s: rq %p len %lld tag 0x%llx mb 0x%llx thresh %d put to us\n", + __func__, rq, lld(rq->actual_len), llu(rq->bmi_tag), + llu(rq->match_bits), ev->md.threshold); gen_mutex_lock(&list_mutex); qlist_del(&rq->list); qlist_add_tail(&rq->list, &q_done); gen_mutex_unlock(&list_mutex); + +#ifdef DEBUG_CNL_ODDITIES + /* + * At least on linux compute nodes, the me does not auto-unlink + * properly, even though the md did get unlinked. It is necessary + * to undo the ME too. Note that the MD threshold is not updated + * to zero; it still sits at one (or whatever it was originally + * set up to be). + */ + /* ret = PtlMDUnlink(rq->md); debug(2, "md unlink %d gives %s\n", rq->md, PtlErrorStr(ret)); */ + /* ret = PtlMDUnlink(rq->tmd); debug(2, "tmd unlink %d gives %s\n", rq->tmd, PtlErrorStr(ret)); */ + ret = PtlMEUnlink(rq->me); debug(2, "me unlink %d gives %s\n", rq->me, PtlErrorStr(ret)); + ret = PtlMEUnlink(rq->tme); debug(2, "tme unlink %d gives %s\n", rq->tme, PtlErrorStr(ret)); +#endif break; case PTL_EVENT_GET_END: - /* our send, turned into a get from the receiver, is now done */ + /* our send, turned into a get from the receiver, is now done, as + * far as we are conerned, as he has gotten it from us */ sq = ev->md.user_ptr; + debug(1, "%s: peer got sq %p len %llu/%llu mb %llx\n", __func__, sq, + llu(ev->mlength), llu(ev->rlength), ev->match_bits); sq->state = SQ_WAITING_USER_TEST; gen_mutex_lock(&list_mutex); qlist_del(&sq->list); @@ -474,8 +604,8 @@ static int handle_event(ptl_event_t *ev) break; case PTL_EVENT_REPLY_END: - debug(2, "%s: get completed\n", __func__); rq = ev->md.user_ptr; + debug(2, "%s: get completed, rq %p\n", __func__, rq); rq->state = RQ_WAITING_USER_TEST; gen_mutex_lock(&list_mutex); qlist_del(&rq->list); @@ -484,19 +614,13 @@ static int handle_event(ptl_event_t *ev) break; case PTL_EVENT_UNLINK: - which = unexpected_md_index(ev->md_handle); - if (which >= 0) { - /* me was also unlinked; put both back at the end */ - debug(2, "%s: unlinked unexpected md %d, repost\n", __func__, - which); - unexpected_repost(which); - break; - } - - which = nonprepost_md_index(ev->md_handle); + /* XXX: does this ever get called on CNL? Apparently not. */ + debug(2, "%s: unlink event! user_ptr %p\n", __func__, ev->md.user_ptr); + which = nonprepost_md_index(ev->md.user_ptr); if (which >= 0) { - debug(2, "%s: unlinked nonprepost md %d, maybe repost\n", __func__, - which); + debug(1, "%s: unlinked nonprepost md %d, is_posted %d refcnt %d\n", + __func__, which, nonprepost_is_posted[which], + nonprepost_refcnt[which]); nonprepost_is_posted[which] = 0; if (nonprepost_refcnt[which] == 0) /* already satisfied all the recvs, can this happen so fast? */ @@ -504,7 +628,7 @@ static int handle_event(ptl_event_t *ev) break; } - debug(2, "%s: unlinked a send or recv\n", __func__); + debug(1, "%s: unlinked a send or recv, nothing to do\n", __func__); /* * Expected recv, unlink just cleans it up. Already got the send @@ -512,6 +636,15 @@ static int handle_event(ptl_event_t *ev) */ break; + case PTL_EVENT_SEND_START: + debug(0, "%s: send start, a debugging message thresh %d\n", __func__, + ev->md.threshold); + break; + case PTL_EVENT_PUT_START: + debug(0, "%s: put start, a debugging message, thresh %d\n", __func__, + ev->md.threshold); + break; + default: gossip_err("%s: unknown event %s\n", __func__, PtlEventKindStr(ev->type)); @@ -547,14 +680,14 @@ static int __check_eq(int idle_ms) ms = 0; /* just quickly pull events off */ if (ret == PTL_EQ_DROPPED) { /* oh well, hope things retry, just point this out */ - gossip_err("%s: PtlEQGet: dropped some completions\n", + gossip_err("%s: PtlEQPoll: dropped some completions\n", __func__); } } else if (ret == PTL_EQ_EMPTY) { ret = 0; break; } else { - gossip_err("%s: PtlEQGet: %s", __func__, PtlErrorStr(ret)); + gossip_err("%s: PtlEQPoll: %s\n", __func__, PtlErrorStr(ret)); ret = -EIO; break; } @@ -595,15 +728,13 @@ static void fill_done(struct bmip_work *w, bmi_op_id_t *id, bmi_size_t *size, if (w->state == RQ_LEN_ERROR) *err = -PVFS_EOVERFLOW; + debug(2, "%s: %s %p size %llu peer %s\n", __func__, + w->type == BMI_SEND ? "sq" : "rq", w, llu(*size), + bmip_rev_lookup(w->mop.addr)); + /* free resources too */ id_gen_fast_unregister(w->mop.op_id); qlist_del(&w->list); - /* work around "me/md in use" problem with doing this in the ack */ - if (w->me_unlink) { - int ret = PtlMEUnlink(w->me); - if (ret) - gossip_err("%s: PtlMEUnlink: %s\n", __func__, PtlErrorStr(ret)); - } free(w); } @@ -754,12 +885,14 @@ static int ensure_ni_initialized(struct bmip_method_addr *peer __unused, ptl_process_id_t my_pid) { int ret = 0; - static ptl_process_id_t no_pid; + ptl_process_id_t no_pid; + int nic_type; ptl_md_t zero_mdesc = { .threshold = PTL_MD_THRESH_INF, .max_size = 0, .options = PTL_MD_OP_PUT | PTL_MD_TRUNCATE | PTL_MD_MAX_SIZE | PTL_MD_EVENT_START_DISABLE, + .user_ptr = 0, }; /* already initialized */ @@ -778,19 +911,30 @@ static int ensure_ni_initialized(struct bmip_method_addr *peer __unused, * lookup server, figure out how route would go to it, choose * that interface. Yeah. */ + +#if defined(__CRAYXT_SERVICE) || defined(__CRAYXT_COMPUTE_LINUX_TARGET) + /* + * Magic for Cray XT service nodes and compute node linux. + * Catamount uses default, TCP uses default. + */ + nic_type = CRAY_USER_NAL; +#else + nic_type = PTL_IFACE_DEFAULT; +#endif + + /* needed for TCP */ /* setenv("PTL_IFACE", "eth0", 0); */ - ret = PtlNIInit(bmi_portals_nic_type, my_pid.pid, NULL, NULL, &ni); -#ifdef __LIBCATAMOUNT__ - if (bmi_portals_nic_type == PTL_IFACE_DEFAULT) { - if (ret == PTL_IFACE_DUP && ni != PTL_INVALID_HANDLE) { - ret = 0; /* already set up by pre-main on Cray compute nodes */ - ni_init_dup = 1; - } + + ret = PtlNIInit(nic_type, my_pid.pid, NULL, NULL, &ni); +#if defined(__LIBCATAMOUNT__) || defined(__CRAYXT_COMPUTE_LINUX_TARGET) + if (ret == PTL_IFACE_DUP && ni != PTL_INVALID_HANDLE) { + ret = 0; /* already set up by pre-main on catamount nodes */ + ni_init_dup = 1; } #endif if (ret) { /* error number is bogus here, do not try to decode it */ - gossip_err("%s: PtlNIInit failed: %d\n", __func__, ret); + gossip_err("%s: PtlNIInit failed: %s\n", __func__, PtlErrorStr(ret)); ni = PTL_INVALID_HANDLE; /* init call nulls it out */ ret = -EIO; goto out; @@ -812,7 +956,7 @@ static int ensure_ni_initialized(struct bmip_method_addr *peer __unused, debug(0, "%s: runtime thinks my id is %d.%d\n", __func__, id.nid, id.pid); } -#ifndef __LIBCATAMOUNT__ +#if !(defined(__LIBCATAMOUNT__) || defined(__CRAYXT_SERVICE) || defined(__CRAYXT_COMPUTE_LINUX_TARGET)) /* * Need an access control entry to allow everybody to talk, else root * cannot talk to random user, e.g. Not implemented on Cray. @@ -850,7 +994,8 @@ static int ensure_ni_initialized(struct bmip_method_addr *peer __unused, /* "zero" grabs just the header (of nonprepost, not unexpected), drops the * contents */ - ret = PtlMEAttach(ni, ptl_index, any_pid, 0, 0xffffffffULL, PTL_RETAIN, + ret = PtlMEAttach(ni, ptl_index, any_pid, 0, + (0x3fffffffULL << 32) | 0xffffffffULL, PTL_RETAIN, PTL_INS_AFTER, &zero_me); if (ret) { gossip_err("%s: PtlMEAttach zero: %s\n", __func__, PtlErrorStr(ret)); @@ -906,7 +1051,7 @@ static void build_mdesc(struct bmip_work *w, ptl_md_t *mdesc, int numbufs, void *const *buffers, const bmi_size_t *sizes) { mdesc->threshold = 1; - mdesc->options = PTL_MD_EVENT_START_DISABLE; + mdesc->options = 0; /* PTL_MD_EVENT_START_DISABLE; */ mdesc->eq_handle = eq; mdesc->user_ptr = w; @@ -937,7 +1082,7 @@ post_send(bmi_op_id_t *id, struct bmi_method_addr *addr, { struct bmip_method_addr *pma = addr->method_data; struct bmip_work *sq; - uint64_t tag; + uint64_t mb; int ret; ptl_md_t mdesc; @@ -955,7 +1100,7 @@ post_send(bmi_op_id_t *id, struct bmi_method_addr *addr, goto out; } sq->type = BMI_SEND; - sq->me_unlink = 0; + sq->saw_send_end_and_ack = 0; sq->tot_len = total_size; sq->is_unexpected = is_unexpected; fill_mop(sq, id, addr, user_ptr, context_id); @@ -963,26 +1108,36 @@ post_send(bmi_op_id_t *id, struct bmi_method_addr *addr, build_mdesc(sq, &mdesc, numbufs, (void *const *)(uintptr_t) buffers, sizes); mdesc.threshold = 2; /* put, ack */ - debug(2, "%s: sq %p len %lld peer %s tag %d\n", __func__, sq, - lld(total_size), pma->peername, bmi_tag); - sq->state = SQ_WAITING_ACK; gen_mutex_lock(&list_mutex); qlist_add_tail(&sq->list, &q_send_waiting_ack); gen_mutex_unlock(&list_mutex); /* if not unexpected, use an ME in case he has to come get it */ - tag = bmi_tag; if (sq->is_unexpected) { + + debug(2, "%s: sq %p len %lld peer %s tag %d unexpected\n", __func__, sq, + lld(total_size), pma->peername, bmi_tag); /* md without any match entry, for sending */ - tag |= match_bits_unexpected; + mb = match_bits_unexpected | bmi_tag; ret = PtlMDBind(ni, mdesc, PTL_UNLINK, &sq->md); if (ret) { gossip_err("%s: PtlMDBind: %s\n", __func__, PtlErrorStr(ret)); return -EIO; } + debug(2, "%s: bound md %d\n", __func__, sq->md); } else { - ret = PtlMEInsert(mark_me, pma->pid, match_bits_long_send | tag, + /* seqno increments on every expected send (only) */ + if (++pma->seqno_out >= match_bits_seqno_max) + pma->seqno_out = 0; + mb = mb_from_tag_and_seqno(bmi_tag, pma->seqno_out); + + debug(2, "%s: sq %p len %lld peer %s tag %d seqno %u mb 0x%llx\n", + __func__, sq, lld(total_size), pma->peername, bmi_tag, + pma->seqno_out, llu(mb)); + + /* long-send bit only on the ME, not as the outgoing mb in PtlPut */ + ret = PtlMEInsert(mark_me, pma->pid, match_bits_long_send | mb, 0, PTL_UNLINK, PTL_INS_BEFORE, &sq->me); if (ret) { gossip_err("%s: PtlMEInsert: %s\n", __func__, PtlErrorStr(ret)); @@ -1003,7 +1158,10 @@ post_send(bmi_op_id_t *id, struct bmi_method_addr *addr, } } - ret = PtlPut(sq->md, PTL_ACK_REQ, pma->pid, ptl_index, 0, tag, 0, 0); + sq->bmi_tag = bmi_tag; /* both for debugging dumps */ + sq->match_bits = mb; + + ret = PtlPut(sq->md, PTL_ACK_REQ, pma->pid, ptl_index, 0, mb, 0, 0); if (ret) { gossip_err("%s: PtlPut: %s\n", __func__, PtlErrorStr(ret)); return -EIO; @@ -1103,10 +1261,18 @@ static int match_nonprepost_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, int ret = 0; ptl_md_t mdesc; struct bmip_work *rq; + uint64_t mb; + /* expected match bits */ + mb = mb_from_tag_and_seqno(tag, pma->seqno_in); + + /* XXX: remove bmi_tag comparison if match_bits works */ gen_mutex_lock(&list_mutex); qlist_for_each_entry(rq, &q_recv_nonprepost, list) { - if (rq->mop.addr == addr && rq->bmi_tag == tag) { + debug(2, "%s: compare rq %p addr %p =? %p tag %u =? %u mb 0x%llx =? 0x%llx\n", __func__, + rq, rq->mop.addr, addr, rq->bmi_tag, tag, llu(rq->match_bits), + llu(mb)); + if (rq->mop.addr == addr && rq->bmi_tag == tag && rq->match_bits == mb) { found = 1; qlist_del(&rq->list); break; @@ -1137,6 +1303,7 @@ static int match_nonprepost_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, nonprepost_repost(rq->nonpp_md); } rq->state = RQ_WAITING_USER_TEST; + debug(2, "%s: found short message rq %p, copied\n", __func__, rq); goto foundout; } @@ -1156,7 +1323,7 @@ static int match_nonprepost_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, rq->tot_len = total_size; build_mdesc(rq, &mdesc, numbufs, buffers, sizes); - mdesc.options |= PTL_MD_OP_GET; + mdesc.threshold = 2; /* XXX: on Cray only, this must be 2, not 1 */ ret = PtlMDBind(ni, mdesc, PTL_UNLINK, &rq->md); if (ret) { @@ -1166,9 +1333,11 @@ static int match_nonprepost_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, goto out; } - ret = PtlGet(rq->md, pma->pid, ptl_index, 0, match_bits_long_send | tag, 0); + mb |= match_bits_long_send; + debug(2, "%s: rq %p doing get mb 0x%llx\n", __func__, rq, llu(mb)); + ret = PtlGet(rq->md, pma->pid, ptl_index, 0, mb, 0); if (ret) { - gossip_err("%s: PtlGetRegion: %s\n", __func__, PtlErrorStr(ret)); + gossip_err("%s: PtlGet: %s\n", __func__, PtlErrorStr(ret)); ret = -EIO; free(rq); goto out; @@ -1200,25 +1369,31 @@ static int post_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, struct bmip_work *rq = NULL; ptl_md_t mdesc; int ret, ms = 0; + uint64_t mb = 0; ret = ensure_ni_initialized(pma, any_pid); if (ret) goto out; - debug(2, "%s: len %lld peer %s tag %d\n", __func__, lld(total_size), - pma->peername, tag); + /* increment the expected seqno of the message he will send us */ + if (++pma->seqno_in >= match_bits_seqno_max) + pma->seqno_in = 0; + + debug(2, "%s: len %lld peer %s tag %d seqno %u\n", __func__, + lld(total_size), pma->peername, tag, pma->seqno_in); rq = NULL; gen_mutex_lock(&eq_mutex); /* do not let test threads manipulate eq */ restart: /* drain the EQ */ - debug(4, "%s: check eq\n", __func__); + debug(2, "%s: check eq\n", __func__); __check_eq(ms); - /* first check the unexpected receive queue */ - debug(4, "%s: match nonprepost?\n", __func__); + /* first check the nonpreposted receive queue */ + debug(2, "%s: match nonprepost?\n", __func__); ret = match_nonprepost_recv(id, addr, numbufs, buffers, sizes, total_size, tag, user_ptr, context_id); + if (ret != 0) { if (ret > 0) /* handled it via the nonprepost queue */ ret = 0; @@ -1234,11 +1409,11 @@ static int post_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, goto out; } rq->type = BMI_RECV; - rq->me_unlink = 0; rq->tot_len = total_size; rq->actual_len = 0; rq->bmi_tag = tag; fill_mop(rq, id, addr, user_ptr, context_id); + memset(&mdesc, 0, sizeof(mdesc)); build_mdesc(rq, &mdesc, numbufs, buffers, sizes); mdesc.threshold = 0; /* initially inactive */ mdesc.options |= PTL_MD_OP_PUT; @@ -1246,8 +1421,10 @@ static int post_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, /* put at the end of the preposted list, just before the first * nonprepost or unex ME. */ rq->me = PTL_INVALID_HANDLE; - debug(4, "%s: me insert\n", __func__); - ret = PtlMEInsert(mark_me, pma->pid, tag, 0, PTL_UNLINK, + debug(2, "%s: me insert\n", __func__); + mb = mb_from_tag_and_seqno(tag, pma->seqno_in); + rq->match_bits = mb; + ret = PtlMEInsert(mark_me, pma->pid, mb, 0, PTL_UNLINK, PTL_INS_BEFORE, &rq->me); if (ret) { gossip_err("%s: PtlMEInsert: %s\n", __func__, PtlErrorStr(ret)); @@ -1255,22 +1432,24 @@ static int post_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, goto out; } - debug(4, "%s: md attach\n", __func__); + debug(2, "%s: md attach\n", __func__); ret = PtlMDAttach(rq->me, mdesc, PTL_UNLINK, &rq->md); if (ret) { gossip_err("%s: PtlMDAttach: %s\n", __func__, PtlErrorStr(ret)); ret = -EIO; goto out; } + debug(2, "%s: me %d, md %d\n", __func__, rq->me, rq->md); } /* now update it atomically with respect to the event stream from the NIC */ mdesc.threshold = 1; - debug(4, "%s: md update\n", __func__); + debug(2, "%s: md update threshold to 1\n", __func__); ret = PtlMDUpdate(rq->md, NULL, &mdesc, eq); if (ret) { if (ret == PTL_MD_NO_UPDATE) { /* cannot block, other thread may have processed the event for us */ + debug(2, "%s: md update: no update\n", __func__); ms = PTL_TIME_FOREVER; goto restart; } @@ -1279,8 +1458,33 @@ static int post_recv(bmi_op_id_t *id, struct bmi_method_addr *addr, goto out; } +#ifdef DEBUG_CNL_ODDITIES + { + debug(2, "insert another\n"); + ret = PtlMEInsert(mark_me, pma->pid, 0, -1ULL, PTL_UNLINK, + PTL_INS_BEFORE, &rq->tme); + if (ret) { + gossip_err("%s: PtlMEInsert: %s\n", __func__, PtlErrorStr(ret)); + ret = -EIO; + goto out; + } - debug(4, "%s: done\n", __func__); + debug(2, "%s: md attach\n", __func__); + mdesc.user_ptr = (void *) ((uintptr_t) mdesc.user_ptr + 1); + ret = PtlMDAttach(rq->tme, mdesc, PTL_UNLINK, &rq->tmd); + if (ret) { + gossip_err("%s: PtlMDAttach: %s\n", __func__, PtlErrorStr(ret)); + ret = -EIO; + goto out; + } + debug(2, "%s: me %d, md %d\n", __func__, rq->tme, rq->tmd); + } +#endif + + + debug(2, "%s: rq %p waiting incoming, len %lld peer %s tag %d seqno %u mb 0x%llx\n", + __func__, rq, lld(total_size), pma->peername, tag, pma->seqno_in, + llu(mb)); rq->state = RQ_WAITING_INCOMING; gen_mutex_lock(&list_mutex); qlist_add_tail(&rq->list, &q_recv_waiting_incoming); @@ -1324,6 +1528,31 @@ static int bmip_post_recv_list(bmi_op_id_t *id, struct bmi_method_addr *remote_m tot_expected_len, tag, user_ptr, context_id); } +/* debugging */ +#define show_queue(q) do { \ + fprintf(stderr, #q "\n"); \ + qlist_for_each_entry(w, &q, list) { \ + fprintf(stderr, "%s %p state %s len %llu tag 0x%llx mb 0x%0llx\n", \ + w->type == BMI_SEND ? "sq" : "rq", \ + w, state_name(w->state), \ + w->type == BMI_SEND ? llu(w->tot_len) : llu(w->actual_len), \ + llu(w->bmi_tag), llu(w->match_bits)); \ + } \ +} while (0) + +static void dump_queues(int sig __unused) +{ + struct bmip_work *w; + + /* debugging */ + show_queue(q_send_waiting_ack); + show_queue(q_send_waiting_get); + show_queue(q_recv_waiting_incoming); + show_queue(q_recv_waiting_get); + show_queue(q_recv_nonprepost); + show_queue(q_unexpected_done); + show_queue(q_done); +} /* * Cancel. Grab the eq lock to keep things from finishing as we are @@ -1340,7 +1569,9 @@ static int bmip_cancel(bmi_op_id_t id, bmi_context_id context_id __unused) __check_eq(0); mop = id_gen_fast_lookup(id); w = mop->method_data; - debug(2, "%s: cancel %p state %s\n", __func__, w, state_name(w->state)); + fprintf(stderr, "%s: cancel %p state %s len %llu tag 0x%llx mb 0x%llx\n", + __func__, w, state_name(w->state), llu(w->tot_len), + llu(w->bmi_tag), llu(w->match_bits)); switch (w->state) { case SQ_WAITING_ACK: @@ -1371,6 +1602,7 @@ static int bmip_cancel(bmi_op_id_t id, bmi_context_id context_id __unused) case SQ_WAITING_USER_TEST: case RQ_WAITING_USER_TEST: + case RQ_WAITING_USER_POST: case RQ_LEN_ERROR: case SQ_CANCELLED: case RQ_CANCELLED: @@ -1387,6 +1619,11 @@ static int bmip_cancel(bmi_op_id_t id, bmi_context_id context_id __unused) out: gen_mutex_unlock(&eq_mutex); + + /* debugging */ + dump_queues(0); + + exit(1); return 0; } @@ -1418,7 +1655,7 @@ static struct bmi_method_addr *bmip_alloc_method_addr(const char *hostname, if (pma->pid.pid == pid.pid && pma->pid.nid == pid.nid) { /* relies on alloc_method_addr() working like it does */ map = &((struct bmi_method_addr *) pma)[-1]; - debug(2, "%s: found map %p from pma %p\n", __func__, map, pma); + debug(2, "%s: found matching peer %s\n", __func__, pma->peername); goto out; } } @@ -1436,16 +1673,19 @@ static struct bmi_method_addr *bmip_alloc_method_addr(const char *hostname, sprintf(pma->peername, "%s:%d", hostname, pid.pid); pma->pid = pid; + pma->seqno_in = 0; + pma->seqno_out = 0; qlist_add(&pma->list, &pma_list); if (register_with_bmi) { ret = bmi_method_addr_reg_callback(map); - if (ret < 0) { + if (!ret) { gossip_err("%s: bmi_method_addr_reg_callback failed\n", __func__); free(map); map = NULL; } } + debug(2, "%s: new peer %s\n", __func__, pma->peername); out: gen_mutex_unlock(&pma_mutex); @@ -1453,7 +1693,7 @@ static struct bmi_method_addr *bmip_alloc_method_addr(const char *hostname, } -#ifndef __LIBCATAMOUNT__ +#if !(defined(__LIBCATAMOUNT__) || defined(__CRAYXT_COMPUTE_LINUX_TARGET) || defined(__CRAYXT_SERVICE)) /* * Clients give hostnames. Convert these to Portals nids. This routine * specific for Portals-over-IP (tcp or utcp). @@ -1632,8 +1872,12 @@ static int unexpected_init(struct bmi_method_addr *listen_addr) * to repost the first. Sort of a circular buffer structure. This is * hopefully better than wasting a full 8k for every small control message. */ - for (i=0; i 2) + exit(0); + + /* unlink used-up one */ + if (unexpected_is_posted[which]) { + debug(1, "%s: trying unpost %d\n", __func__, which); + ret = PtlMEUnlink(unexpected_me[which]); + if (ret) { + gossip_err("%s: PtlMEUnlink %d: %s\n", __func__, which, + PtlErrorStr(ret)); + return; + } + debug(1, "%s: unposted %d\n", __func__, which); + unexpected_need_repost[which] = 0; + unexpected_is_posted[which] = 0; + --unexpected_need_repost_sum; + } /* only short messages that fit max_size go in here */ mdesc.start = nonprepost_buf + which * (NONPREPOST_QUEUE_SIZE / 2); @@ -1728,13 +2021,17 @@ static void nonprepost_repost(int which) | PTL_MD_MAX_SIZE; mdesc.max_size = NONPREPOST_MESSAGE_SIZE; mdesc.eq_handle = eq; + mdesc.user_ptr = (void *) (uintptr_t) (NONPREPOST_MD_INDEX_OFFSET + which); + + /* XXX: maybe need manual unlink like for unexpecteds on CNL */ /* also at the very end of the list */ - ret = PtlMEAttach(ni, ptl_index, any_pid, 0, - 0xffffffffULL, PTL_UNLINK, PTL_INS_AFTER, - &nonprepost_me[which]); + /* match anything as long as top two bits are zero */ + ret = PtlMEInsert(zero_me, any_pid, 0, + (0x3fffffffULL << 32) | 0xffffffffULL, + PTL_UNLINK, PTL_INS_BEFORE, &nonprepost_me[which]); if (ret) { - gossip_err("%s: PtlMEAttach: %s\n", __func__, PtlErrorStr(ret)); + gossip_err("%s: PtlMEInsert: %s\n", __func__, PtlErrorStr(ret)); return; } @@ -1759,14 +2056,13 @@ static int nonprepost_fini(void) nonprepost_refcnt[i]); if (!nonprepost_is_posted[i]) continue; - ret = PtlMDUnlink(nonprepost_md[i]); + /* MDs go away when MEs unlinked */ + ret = PtlMEUnlink(nonprepost_me[i]); if (ret) { - gossip_err("%s: PtlMDUnlink %d: %s\n", __func__, i, + gossip_err("%s: PtlMEUnlink %d: %s\n", __func__, i, PtlErrorStr(ret)); - return ret; } } - /* MEs are automatically discarded when MDs go away */ free(nonprepost_buf); return 0; } @@ -1873,24 +2169,6 @@ static int bmip_initialize(struct bmi_method_addr *listen_addr, bmi_portals_method_id = method_id; - bmi_portals_nic_type = PTL_IFACE_DEFAULT; -#ifdef __LIBCATAMOUNT__ - { - /* magic for Cray XT3 service nodes only; compute uses default, - * and TCP uses default */ - struct utsname buf; - - ret = uname(&buf); - if (ret) { - gossip_err("%s: uname failed: %m\n", __func__); - ret = -EIO; - goto out; - } - if (strcmp(buf.sysname, "Linux") == 0) - bmi_portals_nic_type = CRAY_USER_NAL; - } -#endif - ret = PtlInit(&numint); if (ret) { gossip_err("%s: PtlInit failed\n", __func__); @@ -1912,6 +2190,17 @@ static int bmip_initialize(struct bmi_method_addr *listen_addr, /* PtlNIDebug(PTL_INVALID_HANDLE, PTL_DBG_ALL | 0x00000000); */ /* PtlNIDebug(PTL_INVALID_HANDLE, PTL_DBG_DROP | 0x00000000); */ + /* catamount has different debug symbols, but never prints anything */ + PtlNIDebug(PTL_INVALID_HANDLE, PTL_DEBUG_ALL | PTL_DEBUG_NI_ALL); + /* PtlNIDebug(PTL_INVALID_HANDLE, PTL_DEBUG_DROP | 0x00000000); */ + +#if defined(__CRAYXT_SERVICE) + /* + * debug + */ + signal(SIGUSR1, dump_queues); +#endif + /* * Allocate and build MDs for a queue of unexpected messages from * all hosts. Drop lock for coming NI init call. @@ -1952,6 +2241,19 @@ static int bmip_finalize(void) if (unexpected_buf) unexpected_fini(); +#if 0 /* example code: stick this somewhere to test if the EQ is freeable */ + /* unexpected_fini(); */ + nonprepost_fini(); + ret = PtlMEUnlink(zero_me); + if (ret) + gossip_err("%s: PtlMEUnlink zero: %s\n", __func__, PtlErrorStr(ret)); + ret = PtlEQFree(eq); + if (ret) + gossip_err("%s: PtlEQFree: %s\n", __func__, PtlErrorStr(ret)); + printf("eqfree okay\n"); + exit(1); +#endif + /* destroy connection structures */ ret = PtlMEUnlink(mark_me); if (ret) @@ -1991,6 +2293,7 @@ static int bmip_query_addr_range(struct bmi_method_addr *mop __unused, const struct bmi_method_ops bmi_portals_ops = { .method_name = "bmi_portals", + .flags = 0, .initialize = bmip_initialize, .finalize = bmip_finalize, .set_info = bmip_set_info, diff --git a/src/io/bmi/bmi_tcp/bmi-tcp-addressing.h b/src/io/bmi/bmi_tcp/bmi-tcp-addressing.h index 9e75ad6..151e0ce 100644 --- a/src/io/bmi/bmi_tcp/bmi-tcp-addressing.h +++ b/src/io/bmi/bmi_tcp/bmi-tcp-addressing.h @@ -24,6 +24,11 @@ */ #define BMI_TCP_ZERO_READ_LIMIT 10 +/* wait no more than 10 seconds for a partial BMI header to arrive on a + * socket once we have detected part of it. + */ +#define BMI_TCP_HEADER_WAIT_SECONDS 10 + /* peer name types */ #define BMI_TCP_PEER_IP 1 #define BMI_TCP_PEER_HOSTNAME 2 @@ -47,7 +52,7 @@ struct tcp_allowed_connection_s { struct tcp_addr { bmi_method_addr_p map; /* points back to generic address */ \ - PVFS_BMI_addr_t bmi_addr; + BMI_addr_t bmi_addr; /* stores error code for addresses that are broken for some reason */ int addr_error; char *hostname; @@ -65,6 +70,8 @@ struct tcp_addr int sc_index; /* count of the number of sequential zero read operations */ int zero_read_limit; + /* timer for how long we wait on incomplete headers to arrive */ + int short_header_timer; /* flag used to determine if we can reconnect this address after failure */ int dont_reconnect; char* peer; diff --git a/src/io/bmi/bmi_tcp/bmi-tcp.c b/src/io/bmi/bmi_tcp/bmi-tcp.c index a59792f..551bf84 100644 --- a/src/io/bmi/bmi_tcp/bmi-tcp.c +++ b/src/io/bmi/bmi_tcp/bmi-tcp.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include "pint-mem.h" #include "pvfs2-config.h" #ifdef HAVE_NETDB_H @@ -38,21 +40,18 @@ #include "bmi-byteswap.h" #include "id-generator.h" #include "pint-event.h" +#include "pvfs2-debug.h" #ifdef USE_TRUSTED #include "server-config.h" #include "bmi-tcp-addressing.h" #endif #include "gen-locks.h" - -#define BMI_EVENT_START(__op, __id) \ - PINT_event_timestamp(PVFS_EVENT_API_BMI, __op, 0, __id, \ - PVFS_EVENT_FLAG_START) - -#define BMI_EVENT_END(__op, __size, __id) \ - PINT_event_timestamp(PVFS_EVENT_API_BMI, __op, __size, __id, \ - PVFS_EVENT_FLAG_END) +#include "pint-hint.h" +#include "pint-event.h" static gen_mutex_t interface_mutex = GEN_MUTEX_INITIALIZER; +static gen_cond_t interface_cond = GEN_COND_INITIALIZER; +static int sc_test_busy = 0; /* function prototypes */ int BMI_tcp_initialize(bmi_method_addr_p listen_addr, @@ -76,7 +75,8 @@ int BMI_tcp_post_send(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_tcp_post_sendunexpected(bmi_op_id_t * id, bmi_method_addr_p dest, const void *buffer, @@ -84,7 +84,8 @@ int BMI_tcp_post_sendunexpected(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_tcp_post_recv(bmi_op_id_t * id, bmi_method_addr_p src, void *buffer, @@ -93,7 +94,8 @@ int BMI_tcp_post_recv(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_tcp_test(bmi_op_id_t id, int *outcount, bmi_error_code_t * error_code, @@ -134,7 +136,8 @@ int BMI_tcp_post_send_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_tcp_post_recv_list(bmi_op_id_t * id, bmi_method_addr_p src, void *const *buffer_list, @@ -145,7 +148,8 @@ int BMI_tcp_post_recv_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id, bmi_method_addr_p dest, const void *const *buffer_list, @@ -155,7 +159,8 @@ int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id); + bmi_context_id context_id, + PVFS_hint hints); int BMI_tcp_open_context(bmi_context_id context_id); void BMI_tcp_close_context(bmi_context_id context_id); int BMI_tcp_cancel(bmi_op_id_t id, bmi_context_id context_id); @@ -177,7 +182,9 @@ struct tcp_msg_header #define BMI_TCP_ENC_HDR(hdr) \ do { \ - *((uint32_t*)&((hdr).enc_hdr[0])) = htobmi32((hdr).magic_nr); \ + uint32_t *tmp32; \ + tmp32 = (uint32_t *)&(hdr).enc_hdr[0]; \ + *(tmp32) = htobmi32((hdr).magic_nr); \ *((uint32_t*)&((hdr).enc_hdr[4])) = htobmi32((hdr).mode); \ *((uint64_t*)&((hdr).enc_hdr[8])) = htobmi64((hdr).tag); \ *((uint64_t*)&((hdr).enc_hdr[16])) = htobmi64((hdr).size); \ @@ -185,12 +192,15 @@ struct tcp_msg_header #define BMI_TCP_DEC_HDR(hdr) \ do { \ - (hdr).magic_nr = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[0]))); \ - (hdr).mode = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[4]))); \ - (hdr).tag = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[8]))); \ - (hdr).size = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[16]))); \ + uint32_t tmp32; \ + memcpy(&tmp32,&(hdr).enc_hdr[0],sizeof(uint32_t)); \ + (hdr).magic_nr = bmitoh32(tmp32); \ + (hdr).mode = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[4]))); \ + (hdr).tag = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[8]))); \ + (hdr).size = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[16]))); \ } while(0) + /* enumerate states that we care about */ enum bmi_tcp_state { @@ -237,7 +247,8 @@ static int enqueue_operation(op_list_p target_list, void *user_ptr, bmi_size_t actual_size, bmi_size_t expected_size, - bmi_context_id context_id); + bmi_context_id context_id, + int32_t event_id); static int tcp_cleanse_addr(bmi_method_addr_p map, int error_code); static int tcp_shutdown_addr(bmi_method_addr_p map); static int tcp_do_work(int max_idle_time); @@ -252,26 +263,28 @@ static int tcp_accept_init(int *socket, char** peer); static method_op_p alloc_tcp_method_op(void); static void dealloc_tcp_method_op(method_op_p old_op); static int handle_new_connection(bmi_method_addr_p map); -static int BMI_tcp_post_send_generic(bmi_op_id_t * id, - bmi_method_addr_p dest, - const void *const *buffer_list, - const bmi_size_t *size_list, - int list_count, - enum bmi_buffer_type buffer_type, - struct tcp_msg_header my_header, - void *user_ptr, - bmi_context_id context_id); +static int tcp_post_send_generic(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + enum bmi_buffer_type buffer_type, + struct tcp_msg_header my_header, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); static int tcp_post_recv_generic(bmi_op_id_t * id, - bmi_method_addr_p src, - void *const *buffer_list, - const bmi_size_t *size_list, - int list_count, - bmi_size_t expected_size, - bmi_size_t * actual_size, - enum bmi_buffer_type buffer_type, - bmi_msg_tag_t tag, - void *user_ptr, - bmi_context_id context_id); + bmi_method_addr_p src, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t expected_size, + bmi_size_t * actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints); static int payload_progress(int s, void *const *buffer_list, const bmi_size_t* size_list, int list_count, bmi_size_t total_size, int* list_index, bmi_size_t* current_index_complete, enum bmi_op_type send_recv, @@ -326,6 +339,8 @@ static struct static struct tcp_allowed_connection_s *gtcp_allowed_connection = NULL; #endif +static int check_unexpected = 1; + /* op_list_array indices */ enum { @@ -388,6 +403,11 @@ static int forceful_cancel_mode = 0; static int tcp_buffer_size_receive = 0; static int tcp_buffer_size_send = 0; +static PINT_event_type bmi_tcp_send_event_id; +static PINT_event_type bmi_tcp_recv_event_id; + +static PINT_event_group bmi_tcp_event_group; +static pid_t bmi_tcp_pid; /************************************************************************* * Visible Interface @@ -401,8 +421,8 @@ static int tcp_buffer_size_send = 0; * returns 0 on success, -errno on failure */ int BMI_tcp_initialize(bmi_method_addr_p listen_addr, - int method_id, - int init_flags) + int method_id, + int init_flags) { int ret = -1; @@ -415,8 +435,8 @@ int BMI_tcp_initialize(bmi_method_addr_p listen_addr, /* check args */ if ((init_flags & BMI_INIT_SERVER) && !listen_addr) { - gossip_lerr("Error: bad parameters given to TCP/IP module.\n"); - return (bmi_tcp_errno_to_pvfs(-EINVAL)); + gossip_lerr("Error: bad parameters given to TCP/IP module.\n"); + return (bmi_tcp_errno_to_pvfs(-EINVAL)); } gen_mutex_lock(&interface_mutex); @@ -428,46 +448,77 @@ int BMI_tcp_initialize(bmi_method_addr_p listen_addr, if (init_flags & BMI_INIT_SERVER) { - /* hang on to our local listening address if needed */ - tcp_method_params.listen_addr = listen_addr; - /* and initialize server functions */ - ret = tcp_server_init(); - if (ret < 0) - { - tmp_errno = bmi_tcp_errno_to_pvfs(ret); - gossip_err("Error: tcp_server_init() failure.\n"); - goto initialize_failure; - } + /* hang on to our local listening address if needed */ + tcp_method_params.listen_addr = listen_addr; + /* and initialize server functions */ + ret = tcp_server_init(); + if (ret < 0) + { + tmp_errno = bmi_tcp_errno_to_pvfs(ret); + gossip_err("Error: tcp_server_init() failure.\n"); + goto initialize_failure; + } } /* set up the operation lists */ for (i = 0; i < NUM_INDICES; i++) { - op_list_array[i] = op_list_new(); - if (!op_list_array[i]) - { - tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); - goto initialize_failure; - } + op_list_array[i] = op_list_new(); + if (!op_list_array[i]) + { + tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); + goto initialize_failure; + } } /* set up the socket collection */ if (tcp_method_params.method_flags & BMI_INIT_SERVER) { - tcp_addr_data = tcp_method_params.listen_addr->method_data; - tcp_socket_collection_p = BMI_socket_collection_init(tcp_addr_data->socket); + tcp_addr_data = tcp_method_params.listen_addr->method_data; + tcp_socket_collection_p = BMI_socket_collection_init(tcp_addr_data->socket); } else { - tcp_socket_collection_p = BMI_socket_collection_init(-1); + tcp_socket_collection_p = BMI_socket_collection_init(-1); } if (!tcp_socket_collection_p) { - tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); - goto initialize_failure; + tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); + goto initialize_failure; } + bmi_tcp_pid = getpid(); + PINT_event_define_group("bmi_tcp", &bmi_tcp_event_group); + + /* Define the send event: + * START: (client_id, request_id, rank, handle, op_id, send_size) + * STOP: (size_sent) + */ + PINT_event_define_event( + &bmi_tcp_event_group, +#ifdef __PVFS2_SERVER__ + "bmi_server_send", +#else + "bmi_client_send", +#endif + "%d%d%d%llu%d%d", + "%d", &bmi_tcp_send_event_id); + + /* Define the recv event: + * START: (client_id, request_id, rank, handle, op_id, recv_size) + * STOP: (size_received) + */ + PINT_event_define_event( + &bmi_tcp_event_group, +#ifdef __PVFS2_SERVER__ + "bmi_server_recv", +#else + "bmi_client_recv", +#endif + "%d%d%d%llu%d%d", + "%d", &bmi_tcp_recv_event_id); + gen_mutex_unlock(&interface_mutex); gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "TCP/IP module successfully initialized.\n"); @@ -478,14 +529,14 @@ int BMI_tcp_initialize(bmi_method_addr_p listen_addr, /* cleanup data structures and bail out */ for (i = 0; i < NUM_INDICES; i++) { - if (op_list_array[i]) - { - op_list_cleanup(op_list_array[i]); - } + if (op_list_array[i]) + { + op_list_cleanup(op_list_array[i]); + } } if (tcp_socket_collection_p) { - BMI_socket_collection_finalize(tcp_socket_collection_p); + BMI_socket_collection_finalize(tcp_socket_collection_p); } gen_mutex_unlock(&interface_mutex); return (tmp_errno); @@ -506,26 +557,26 @@ int BMI_tcp_finalize(void) /* shut down our listen addr, if we have one */ if ((tcp_method_params.method_flags & BMI_INIT_SERVER) - && tcp_method_params.listen_addr) + && tcp_method_params.listen_addr) { - dealloc_tcp_method_addr(tcp_method_params.listen_addr); + dealloc_tcp_method_addr(tcp_method_params.listen_addr); } /* note that this forcefully shuts down operations */ for (i = 0; i < NUM_INDICES; i++) { - if (op_list_array[i]) - { - op_list_cleanup(op_list_array[i]); - op_list_array[i] = NULL; - } + if (op_list_array[i]) + { + op_list_cleanup(op_list_array[i]); + op_list_array[i] = NULL; + } } /* get rid of socket collection */ if (tcp_socket_collection_p) { - BMI_socket_collection_finalize(tcp_socket_collection_p); - tcp_socket_collection_p = NULL; + BMI_socket_collection_finalize(tcp_socket_collection_p); + tcp_socket_collection_p = NULL; } /* NOTE: we are trusting the calling BMI layer to deallocate @@ -618,7 +669,8 @@ void *BMI_tcp_memalloc(bmi_size_t size, * preferences about how the memory should be configured. */ - return (calloc(1,(size_t) size)); +/* return (calloc(1,(size_t) size)); */ + return PINT_mem_aligned_alloc(size, 4096); } @@ -632,16 +684,7 @@ int BMI_tcp_memfree(void *buffer, bmi_size_t size, enum bmi_op_type send_recv) { - /* NOTE: I am not going to bother to check to see if it is really our - * buffer. This function trusts the caller. - * We also could care less whether it was a send or recv buffer. - */ - if (buffer) - { - free(buffer); - buffer = NULL; - } - + PINT_mem_aligned_free(buffer); return (0); } @@ -876,6 +919,13 @@ int BMI_tcp_set_info(int option, break; } #endif + case BMI_TCP_CHECK_UNEXPECTED: + { + check_unexpected = *(int *)inout_parameter; + ret = 0; + break; + } + default: gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "TCP hint %d not implemented.\n", option); @@ -956,7 +1006,8 @@ int BMI_tcp_post_send(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { struct tcp_msg_header my_header; int ret = -1; @@ -984,13 +1035,9 @@ int BMI_tcp_post_send(bmi_op_id_t * id, gen_mutex_lock(&interface_mutex); - ret = BMI_tcp_post_send_generic(id, dest, &buffer, - &size, 1, buffer_type, my_header, - user_ptr, context_id); - if(ret >= 0) - BMI_EVENT_START(PVFS_EVENT_BMI_SEND, *id); - if(ret == 1) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, size, *id); + ret = tcp_post_send_generic(id, dest, &buffer, + &size, 1, buffer_type, my_header, + user_ptr, context_id, hints); gen_mutex_unlock(&interface_mutex); return(ret); @@ -1011,7 +1058,8 @@ int BMI_tcp_post_sendunexpected(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { struct tcp_msg_header my_header; int ret = -1; @@ -1031,14 +1079,9 @@ int BMI_tcp_post_sendunexpected(bmi_op_id_t * id, gen_mutex_lock(&interface_mutex); - ret = BMI_tcp_post_send_generic(id, dest, &buffer, - &size, 1, buffer_type, my_header, - user_ptr, context_id); - if(ret >= 0) - BMI_EVENT_START(PVFS_EVENT_BMI_SEND, *id); - if(ret == 1) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, size, *id); - + ret = tcp_post_send_generic(id, dest, &buffer, + &size, 1, buffer_type, my_header, + user_ptr, context_id, hints); gen_mutex_unlock(&interface_mutex); return(ret); } @@ -1060,7 +1103,8 @@ int BMI_tcp_post_recv(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { int ret = -1; @@ -1082,14 +1126,9 @@ int BMI_tcp_post_recv(bmi_op_id_t * id, gen_mutex_lock(&interface_mutex); ret = tcp_post_recv_generic(id, src, &buffer, &expected_size, - 1, expected_size, actual_size, - buffer_type, tag, - user_ptr, context_id); - - if(ret >= 0) - BMI_EVENT_START(PVFS_EVENT_BMI_RECV, *id); - if(ret == 1) - BMI_EVENT_END(PVFS_EVENT_BMI_RECV, *actual_size, *id); + 1, expected_size, actual_size, + buffer_type, tag, + user_ptr, context_id, hints); gen_mutex_unlock(&interface_mutex); return (ret); @@ -1136,10 +1175,11 @@ int BMI_tcp_test(bmi_op_id_t id, } (*error_code) = query_op->error_code; (*actual_size) = query_op->actual_size; - if(query_op->send_recv == BMI_SEND) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, *actual_size, id); - else - BMI_EVENT_END(PVFS_EVENT_BMI_RECV, *actual_size, id); + PINT_EVENT_END( + (query_op->send_recv == BMI_SEND ? + bmi_tcp_send_event_id : bmi_tcp_recv_event_id), bmi_tcp_pid, NULL, + query_op->event_id, id, *actual_size); + dealloc_tcp_method_op(query_op); (*outcount)++; } @@ -1155,14 +1195,14 @@ int BMI_tcp_test(bmi_op_id_t id, * returns 0 on success, -errno on failure */ int BMI_tcp_testsome(int incount, - bmi_op_id_t * id_array, - int *outcount, - int *index_array, - bmi_error_code_t * error_code_array, - bmi_size_t * actual_size_array, - void **user_ptr_array, - int max_idle_time, - bmi_context_id context_id) + bmi_op_id_t * id_array, + int *outcount, + int *index_array, + bmi_error_code_t * error_code_array, + bmi_size_t * actual_size_array, + void **user_ptr_array, + int max_idle_time, + bmi_context_id context_id) { int ret = -1; method_op_p query_op = NULL; @@ -1174,39 +1214,40 @@ int BMI_tcp_testsome(int incount, ret = tcp_do_work(max_idle_time); if (ret < 0) { - gen_mutex_unlock(&interface_mutex); - return (ret); + gen_mutex_unlock(&interface_mutex); + return (ret); } for(i=0; imethod_data))->tcp_op_state == - BMI_TCP_COMPLETE) - { - assert(query_op->context_id == context_id); - /* this one's done; pop it out */ - op_list_remove(query_op); - error_code_array[*outcount] = query_op->error_code; - actual_size_array[*outcount] = query_op->actual_size; - index_array[*outcount] = i; - if (user_ptr_array != NULL) - { - user_ptr_array[*outcount] = query_op->user_ptr; - } - if(query_op->send_recv == BMI_SEND) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, query_op->actual_size, id_array[i]); - else - BMI_EVENT_END(PVFS_EVENT_BMI_RECV, query_op->actual_size, id_array[i]); - dealloc_tcp_method_op(query_op); - (*outcount)++; - } - } + if(id_array[i]) + { + /* NOTE: this depends on the user passing in valid id's; + * otherwise we segfault. + */ + query_op = (method_op_p)id_gen_fast_lookup(id_array[i]); + if(((struct tcp_op*)(query_op->method_data))->tcp_op_state == + BMI_TCP_COMPLETE) + { + assert(query_op->context_id == context_id); + /* this one's done; pop it out */ + op_list_remove(query_op); + error_code_array[*outcount] = query_op->error_code; + actual_size_array[*outcount] = query_op->actual_size; + index_array[*outcount] = i; + if (user_ptr_array != NULL) + { + user_ptr_array[*outcount] = query_op->user_ptr; + } + PINT_EVENT_END( + (query_op->send_recv == BMI_SEND ? + bmi_tcp_send_event_id : bmi_tcp_recv_event_id), + bmi_tcp_pid, NULL, + query_op->event_id, actual_size_array[*outcount]); + dealloc_tcp_method_op(query_op); + (*outcount)++; + } + } } gen_mutex_unlock(&interface_mutex); @@ -1292,7 +1333,8 @@ int BMI_tcp_testcontext(int incount, * that the next testunexpected call can pick it up without * delay */ - if(!op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP])) + if(check_unexpected && + !op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP])) { gen_mutex_unlock(&interface_mutex); return(0); @@ -1308,29 +1350,31 @@ int BMI_tcp_testcontext(int incount, } /* pop as many items off of the completion queue as we can */ - while((*outcount < incount) && (query_op = - op_list_shownext(completion_array[context_id]))) + while((*outcount < incount) && + (query_op = + op_list_shownext(completion_array[context_id]))) { assert(query_op); - assert(query_op->context_id == context_id); + assert(query_op->context_id == context_id); + + /* this one's done; pop it out */ + op_list_remove(query_op); + error_code_array[*outcount] = query_op->error_code; + actual_size_array[*outcount] = query_op->actual_size; + out_id_array[*outcount] = query_op->op_id; + if (user_ptr_array != NULL) + { + user_ptr_array[*outcount] = query_op->user_ptr; + } - /* this one's done; pop it out */ - op_list_remove(query_op); - error_code_array[*outcount] = query_op->error_code; - actual_size_array[*outcount] = query_op->actual_size; - out_id_array[*outcount] = query_op->op_id; - if (user_ptr_array != NULL) - { - user_ptr_array[*outcount] = query_op->user_ptr; - } - if(query_op->send_recv == BMI_SEND) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, query_op->actual_size, query_op->op_id); - else - BMI_EVENT_END(PVFS_EVENT_BMI_RECV, query_op->actual_size, query_op->op_id); + PINT_EVENT_END((query_op->send_recv == BMI_SEND ? + bmi_tcp_send_event_id : bmi_tcp_recv_event_id), + bmi_tcp_pid, NULL, query_op->event_id, + query_op->actual_size); - dealloc_tcp_method_op(query_op); + dealloc_tcp_method_op(query_op); query_op = NULL; - (*outcount)++; + (*outcount)++; } gen_mutex_unlock(&interface_mutex); @@ -1356,7 +1400,8 @@ int BMI_tcp_post_send_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { struct tcp_msg_header my_header; int ret = -1; @@ -1385,14 +1430,9 @@ int BMI_tcp_post_send_list(bmi_op_id_t * id, gen_mutex_lock(&interface_mutex); - ret = BMI_tcp_post_send_generic(id, dest, buffer_list, - size_list, list_count, buffer_type, - my_header, user_ptr, context_id); - if(ret >= 0) - BMI_EVENT_START(PVFS_EVENT_BMI_SEND, *id); - if(ret == 1) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, total_size, *id); - + ret = tcp_post_send_generic(id, dest, buffer_list, + size_list, list_count, buffer_type, + my_header, user_ptr, context_id, hints); gen_mutex_unlock(&interface_mutex); return(ret); } @@ -1415,7 +1455,8 @@ int BMI_tcp_post_recv_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { int ret = -1; @@ -1427,14 +1468,9 @@ int BMI_tcp_post_recv_list(bmi_op_id_t * id, gen_mutex_lock(&interface_mutex); ret = tcp_post_recv_generic(id, src, buffer_list, size_list, - list_count, total_expected_size, - total_actual_size, buffer_type, tag, user_ptr, - context_id); - - if(ret >= 0) - BMI_EVENT_START(PVFS_EVENT_BMI_RECV, *id); - if(ret == 1) - BMI_EVENT_END(PVFS_EVENT_BMI_RECV, *total_actual_size, *id); + list_count, total_expected_size, + total_actual_size, buffer_type, tag, user_ptr, + context_id, hints); gen_mutex_unlock(&interface_mutex); return (ret); @@ -1458,7 +1494,8 @@ int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id, enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, void *user_ptr, - bmi_context_id context_id) + bmi_context_id context_id, + PVFS_hint hints) { struct tcp_msg_header my_header; int ret = -1; @@ -1478,13 +1515,9 @@ int BMI_tcp_post_sendunexpected_list(bmi_op_id_t * id, gen_mutex_lock(&interface_mutex); - ret = BMI_tcp_post_send_generic(id, dest, buffer_list, - size_list, list_count, buffer_type, - my_header, user_ptr, context_id); - if(ret >= 0) - BMI_EVENT_START(PVFS_EVENT_BMI_SEND, *id); - if(ret == 1) - BMI_EVENT_END(PVFS_EVENT_BMI_SEND, total_size, *id); + ret = tcp_post_send_generic(id, dest, buffer_list, + size_list, list_count, buffer_type, + my_header, user_ptr, context_id, hints); gen_mutex_unlock(&interface_mutex); return(ret); @@ -1742,12 +1775,12 @@ int BMI_tcp_query_addr_range(bmi_method_addr_p map, const char *wildcard_string, /* Invalid network address */ if (inet_aton(tcp_wildcard, &network_addr.sin_addr) == 0) { - gossip_lerr("Invalid network specification: %s\n", tcp_wildcard); + gossip_err("Invalid network specification: %s\n", tcp_wildcard); return -EINVAL; } /* Matches the subnet mask! */ if ((map_addr.sin_addr.s_addr & mask_addr.sin_addr.s_addr) - == network_addr.sin_addr.s_addr) + == (network_addr.sin_addr.s_addr & mask_addr.sin_addr.s_addr)) { return 1; } @@ -1847,7 +1880,7 @@ void tcp_forget_addr(bmi_method_addr_p map, int error_code) { struct tcp_addr* tcp_addr_data = map->method_data; - PVFS_BMI_addr_t bmi_addr = tcp_addr_data->bmi_addr; + BMI_addr_t bmi_addr = tcp_addr_data->bmi_addr; int tmp_outcount; bmi_method_addr_p tmp_addr; int tmp_status; @@ -1858,8 +1891,11 @@ void tcp_forget_addr(bmi_method_addr_p map, /* perform a test to force the socket collection to act on the remove * request before continuing */ - BMI_socket_collection_testglobal(tcp_socket_collection_p, - 0, &tmp_outcount, &tmp_addr, &tmp_status, 0, &interface_mutex); + if(!sc_test_busy) + { + BMI_socket_collection_testglobal(tcp_socket_collection_p, + 0, &tmp_outcount, &tmp_addr, &tmp_status, 0); + } } tcp_shutdown_addr(map); @@ -2223,7 +2259,8 @@ static int enqueue_operation(op_list_p target_list, void *user_ptr, bmi_size_t actual_size, bmi_size_t expected_size, - bmi_context_id context_id) + bmi_context_id context_id, + int32_t eid) { method_op_p new_method_op = NULL; struct tcp_op *tcp_op_data = NULL; @@ -2238,6 +2275,7 @@ static int enqueue_operation(op_list_p target_list, } *id = new_method_op->op_id; + new_method_op->event_id = eid; /* set the fields */ new_method_op->send_recv = send_recv; @@ -2354,16 +2392,17 @@ static int enqueue_operation(op_list_p target_list, * completion, -errno on failure */ static int tcp_post_recv_generic(bmi_op_id_t * id, - bmi_method_addr_p src, - void *const *buffer_list, - const bmi_size_t *size_list, - int list_count, - bmi_size_t expected_size, - bmi_size_t * actual_size, - enum bmi_buffer_type buffer_type, - bmi_msg_tag_t tag, - void *user_ptr, - bmi_context_id context_id) + bmi_method_addr_p src, + void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + bmi_size_t expected_size, + bmi_size_t * actual_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) { method_op_p query_op = NULL; int ret = -1; @@ -2374,6 +2413,16 @@ static int tcp_post_recv_generic(bmi_op_id_t * id, bmi_size_t copy_size = 0; bmi_size_t total_copied = 0; int i; + PINT_event_id eid = 0; + + PINT_EVENT_START( + bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, &eid, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + PINT_HINT_GET_OP_ID(hints), + expected_size); tcp_addr_data = src->method_data; @@ -2382,9 +2431,11 @@ static int tcp_post_recv_generic(bmi_op_id_t * id, */ if(tcp_addr_data->addr_error && tcp_addr_data->dont_reconnect) { - gossip_debug(GOSSIP_BMI_DEBUG_TCP, - "Warning: BMI communication attempted on an address in failure mode.\n"); - return(tcp_addr_data->addr_error); + gossip_debug( + GOSSIP_BMI_DEBUG_TCP, + "Warning: BMI communication attempted " + "on an address in failure mode.\n"); + return(tcp_addr_data->addr_error); } /* lets make sure that the message hasn't already been fully @@ -2397,163 +2448,170 @@ static int tcp_post_recv_generic(bmi_op_id_t * id, key.msg_tag_yes = 1; query_op = - op_list_search(op_list_array[IND_RECV_EAGER_DONE_BUFFERING], &key); + op_list_search(op_list_array[IND_RECV_EAGER_DONE_BUFFERING], &key); if (query_op) { - /* make sure it isn't too big */ - if (query_op->actual_size > expected_size) - { - gossip_err("Error: message ordering violation;\n"); - gossip_err("Error: message too large for next buffer.\n"); - return (bmi_tcp_errno_to_pvfs(-EPROTO)); - } + /* make sure it isn't too big */ + if (query_op->actual_size > expected_size) + { + gossip_err("Error: message ordering violation;\n"); + gossip_err("Error: message too large for next buffer.\n"); + return (bmi_tcp_errno_to_pvfs(-EPROTO)); + } - /* whoohoo- it is already done! */ - /* copy buffer out to list segments; handle short case */ - for (i = 0; i < list_count; i++) - { - copy_size = size_list[i]; - if (copy_size + total_copied > query_op->actual_size) - { - copy_size = query_op->actual_size - total_copied; - } - memcpy(buffer_list[i], (void *) ((char *) query_op->buffer + - total_copied), copy_size); - total_copied += copy_size; - if (total_copied == query_op->actual_size) - { - break; - } - } - /* copy out to correct memory regions */ - (*actual_size) = query_op->actual_size; - free(query_op->buffer); - *id = 0; - op_list_remove(query_op); - dealloc_tcp_method_op(query_op); - return (1); + /* whoohoo- it is already done! */ + /* copy buffer out to list segments; handle short case */ + for (i = 0; i < list_count; i++) + { + copy_size = size_list[i]; + if (copy_size + total_copied > query_op->actual_size) + { + copy_size = query_op->actual_size - total_copied; + } + memcpy(buffer_list[i], (void *) ((char *) query_op->buffer + + total_copied), copy_size); + total_copied += copy_size; + if (total_copied == query_op->actual_size) + { + break; + } + } + /* copy out to correct memory regions */ + (*actual_size) = query_op->actual_size; + free(query_op->buffer); + *id = 0; + op_list_remove(query_op); + dealloc_tcp_method_op(query_op); + PINT_EVENT_END(bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, eid, 0, + *actual_size); + + return (1); } /* look for a message that is already being received */ query_op = op_list_search(op_list_array[IND_RECV_INFLIGHT], &key); if (query_op) { - tcp_op_data = query_op->method_data; + tcp_op_data = query_op->method_data; } /* see if it is being buffered into a temporary memory region */ if (query_op && tcp_op_data->tcp_op_state == BMI_TCP_BUFFERING) { - /* make sure it isn't too big */ - if (query_op->actual_size > expected_size) - { - gossip_err("Error: message ordering violation;\n"); - gossip_err("Error: message too large for next buffer.\n"); - return (bmi_tcp_errno_to_pvfs(-EPROTO)); - } - - /* copy what we have so far into the correct buffers */ - total_copied = 0; - for (i = 0; i < list_count; i++) - { - copy_size = size_list[i]; - if (copy_size + total_copied > query_op->amt_complete) - { - copy_size = query_op->amt_complete - total_copied; - } - if (copy_size > 0) - { - memcpy(buffer_list[i], (void *) ((char *) query_op->buffer + - total_copied), copy_size); - } - total_copied += copy_size; - if (total_copied == query_op->amt_complete) - { - query_op->list_index = i; - query_op->cur_index_complete = copy_size; - break; - } - } + /* make sure it isn't too big */ + if (query_op->actual_size > expected_size) + { + gossip_err("Error: message ordering violation;\n"); + gossip_err("Error: message too large for next buffer.\n"); + return (bmi_tcp_errno_to_pvfs(-EPROTO)); + } - /* see if we ended on a buffer boundary */ - if (query_op->cur_index_complete == - query_op->size_list[query_op->list_index]) - { - query_op->list_index++; - query_op->cur_index_complete = 0; - } + /* copy what we have so far into the correct buffers */ + total_copied = 0; + for (i = 0; i < list_count; i++) + { + copy_size = size_list[i]; + if (copy_size + total_copied > query_op->amt_complete) + { + copy_size = query_op->amt_complete - total_copied; + } + if (copy_size > 0) + { + memcpy(buffer_list[i], (void *) ((char *) query_op->buffer + + total_copied), copy_size); + } + total_copied += copy_size; + if (total_copied == query_op->amt_complete) + { + query_op->list_index = i; + query_op->cur_index_complete = copy_size; + break; + } + } - /* release the old buffer */ - if (query_op->buffer) - { - free(query_op->buffer); - } + /* see if we ended on a buffer boundary */ + if (query_op->cur_index_complete == + query_op->size_list[query_op->list_index]) + { + query_op->list_index++; + query_op->cur_index_complete = 0; + } - *id = query_op->op_id; - tcp_op_data = query_op->method_data; - tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + /* release the old buffer */ + if (query_op->buffer) + { + free(query_op->buffer); + } - query_op->list_count = list_count; - query_op->user_ptr = user_ptr; - query_op->context_id = context_id; - /* if there is only one item in the list, then keep the list stored - * in the op structure. This allows us to use the same code for send - * and recv as we use for send_list and recv_list, without having to - * malloc lists for those special cases - */ - if (list_count == 1) - { - query_op->buffer_list = &tcp_op_data->buffer_list_stub; - query_op->size_list = &tcp_op_data->size_list_stub; - ((void **)query_op->buffer_list)[0] = buffer_list[0]; - ((bmi_size_t *)query_op->size_list)[0] = size_list[0]; - } - else - { - query_op->buffer_list = buffer_list; - query_op->size_list = size_list; - } + *id = query_op->op_id; + tcp_op_data = query_op->method_data; + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + + query_op->list_count = list_count; + query_op->user_ptr = user_ptr; + query_op->context_id = context_id; + /* if there is only one item in the list, then keep the list stored + * in the op structure. This allows us to use the same code for send + * and recv as we use for send_list and recv_list, without having to + * malloc lists for those special cases + */ + if (list_count == 1) + { + query_op->buffer_list = &tcp_op_data->buffer_list_stub; + query_op->size_list = &tcp_op_data->size_list_stub; + ((void **)query_op->buffer_list)[0] = buffer_list[0]; + ((bmi_size_t *)query_op->size_list)[0] = size_list[0]; + } + else + { + query_op->buffer_list = buffer_list; + query_op->size_list = size_list; + } - if (query_op->amt_complete < query_op->actual_size) - { - /* try to recv some more data */ - tcp_addr_data = query_op->addr->method_data; - ret = payload_progress(tcp_addr_data->socket, - query_op->buffer_list, - query_op->size_list, - query_op->list_count, - query_op->actual_size, - &(query_op->list_index), - &(query_op->cur_index_complete), - BMI_RECV, - NULL, - 0); - if (ret < 0) - { + if (query_op->amt_complete < query_op->actual_size) + { + /* try to recv some more data */ + tcp_addr_data = query_op->addr->method_data; + ret = payload_progress(tcp_addr_data->socket, + query_op->buffer_list, + query_op->size_list, + query_op->list_count, + query_op->actual_size, + &(query_op->list_index), + &(query_op->cur_index_complete), + BMI_RECV, + NULL, + 0); + if (ret < 0) + { PVFS_perror_gossip("Error: payload_progress", ret); /* payload_progress() returns BMI error codes */ - tcp_forget_addr(query_op->addr, 0, ret); - return (ret); - } + tcp_forget_addr(query_op->addr, 0, ret); + return (ret); + } - query_op->amt_complete += ret; - } - assert(query_op->amt_complete <= query_op->actual_size); - if (query_op->amt_complete == query_op->actual_size) - { - /* we are done */ - op_list_remove(query_op); - *id = 0; - (*actual_size) = query_op->actual_size; - dealloc_tcp_method_op(query_op); - return (1); - } - else - { - /* there is still more work to do */ - tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; - return (0); - } + query_op->amt_complete += ret; + } + assert(query_op->amt_complete <= query_op->actual_size); + if (query_op->amt_complete == query_op->actual_size) + { + /* we are done */ + op_list_remove(query_op); + *id = 0; + (*actual_size) = query_op->actual_size; + dealloc_tcp_method_op(query_op); + PINT_EVENT_END( + bmi_tcp_recv_event_id, bmi_tcp_pid, NULL, eid, + 0, *actual_size); + + return (1); + } + else + { + /* there is still more work to do */ + tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; + return (0); + } } /* NOTE: if the message was in flight, but not buffering, then @@ -2566,18 +2624,18 @@ static int tcp_post_recv_generic(bmi_op_id_t * id, /* if we hit this point we must enqueue */ if (expected_size <= TCP_MODE_EAGER_LIMIT) { - bogus_header.mode = TCP_MODE_EAGER; + bogus_header.mode = TCP_MODE_EAGER; } else { - bogus_header.mode = TCP_MODE_REND; + bogus_header.mode = TCP_MODE_REND; } bogus_header.tag = tag; ret = enqueue_operation(op_list_array[IND_RECV], - BMI_RECV, src, buffer_list, size_list, - list_count, 0, 0, id, BMI_TCP_INPROGRESS, - bogus_header, user_ptr, 0, - expected_size, context_id); + BMI_RECV, src, buffer_list, size_list, + list_count, 0, 0, id, BMI_TCP_INPROGRESS, + bogus_header, user_ptr, 0, + expected_size, context_id, eid); /* just for safety; this field isn't valid to the caller anymore */ (*actual_size) = 0; /* TODO: figure out why this causes deadlocks; observable in 2 @@ -2588,11 +2646,11 @@ static int tcp_post_recv_generic(bmi_op_id_t * id, #if 0 if (ret >= 0) { - /* go ahead and try to do some work while we are in this - * function since we appear to be backlogged. Make sure that - * we do not wait in the poll, however. - */ - ret = tcp_do_work(0); + /* go ahead and try to do some work while we are in this + * function since we appear to be backlogged. Make sure that + * we do not wait in the poll, however. + */ + ret = tcp_do_work(0); } #endif return (ret); @@ -2688,17 +2746,56 @@ static int tcp_do_work(int max_idle_time) int busy_flag = 1; struct timespec req; struct tcp_addr* tcp_addr_data = NULL; + struct timespec wait_time; + struct timeval start; + + if(sc_test_busy) + { + /* another thread is already polling or working on sockets */ + if(max_idle_time == 0) + { + /* we don't want to spend time waiting on it; return + * immediately. + */ + return(0); + } - /* now we need to poll and see what to work on */ - /* drop mutex while we make this call */ + /* Sleep until working thread thread signals that it has finished + * its work and then return. No need for this thread to poll; + * the other thread may have already finished what we wanted. + * This condition wait is used strictly as a best effort to + * prevent busy spin. We'll sort out the results later. + */ + gettimeofday(&start, NULL); + wait_time.tv_sec = start.tv_sec + max_idle_time / 1000; + wait_time.tv_nsec = (start.tv_usec + ((max_idle_time % 1000)*1000))*1000; + if (wait_time.tv_nsec > 1000000000) + { + wait_time.tv_nsec = wait_time.tv_nsec - 1000000000; + wait_time.tv_sec++; + } + gen_cond_timedwait(&interface_cond, &interface_mutex, &wait_time); + return(0); + } + + /* this thread has gained control of the polling. */ + sc_test_busy = 1; gen_mutex_unlock(&interface_mutex); + + /* our turn to look at the socket collection */ ret = BMI_socket_collection_testglobal(tcp_socket_collection_p, TCP_WORK_METRIC, &socket_count, addr_array, status_array, - max_idle_time, &interface_mutex); + max_idle_time); + gen_mutex_lock(&interface_mutex); + sc_test_busy = 0; + if (ret < 0) { + /* wake up anyone else who might have been waiting */ + gen_cond_broadcast(&interface_cond); + PVFS_perror_gossip("Error: socket collection:", ret); /* BMI_socket_collection_testglobal() returns BMI error code */ return (ret); } @@ -2723,7 +2820,7 @@ static int tcp_do_work(int max_idle_time) ret = tcp_do_work_error(addr_array[i]); if (ret < 0) { - return (ret); + PVFS_perror_gossip("Warning: BMI error handling failure, continuing", ret); } } else @@ -2733,8 +2830,8 @@ static int tcp_do_work(int max_idle_time) ret = tcp_do_work_send(addr_array[i], &stall_flag); if (ret < 0) { - return (ret); - } + PVFS_perror_gossip("Warning: BMI send error, continuing", ret); + } if(!stall_flag) busy_flag = 0; } @@ -2743,7 +2840,7 @@ static int tcp_do_work(int max_idle_time) ret = tcp_do_work_recv(addr_array[i], &stall_flag); if (ret < 0) { - return (ret); + PVFS_perror_gossip("Warning: BMI recv error, continuing", ret); } if(!stall_flag) busy_flag = 0; @@ -2768,6 +2865,8 @@ static int tcp_do_work(int max_idle_time) gen_mutex_lock(&interface_mutex); } + /* wake up anyone else who might have been waiting */ + gen_cond_broadcast(&interface_cond); return (0); } @@ -2899,6 +2998,7 @@ static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag) int tmp_errno; int tmp; bmi_size_t old_amt_complete = 0; + time_t current_time; *stall_flag = 1; @@ -2993,10 +3093,25 @@ static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag) if (ret < TCP_ENC_HDR_SIZE) { - /* header not ready yet */ + current_time = time(NULL); + if(!tcp_addr_data->short_header_timer) + { + tcp_addr_data->short_header_timer = current_time; + } + else if((current_time - tcp_addr_data->short_header_timer) > + BMI_TCP_HEADER_WAIT_SECONDS) + { + gossip_err("Error: incomplete BMI TCP header after %d seconds, closing connection.\n", + BMI_TCP_HEADER_WAIT_SECONDS); + tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE)); + return (0); + } + + /* header not ready yet, but we will keep hoping */ return (0); } + tcp_addr_data->short_header_timer = 0; *stall_flag = 0; gossip_ldebug(GOSSIP_BMI_DEBUG_TCP, "Reading header for new op.\n"); ret = BMI_sockio_nbrecv(tcp_addr_data->socket, @@ -3012,6 +3127,7 @@ static int tcp_do_work_recv(bmi_method_addr_p map, int* stall_flag) /* decode the header */ BMI_TCP_DEC_HDR(new_header); + /* so we have the header. now what? These are the possible * scenarios: * a) unexpected message @@ -3462,7 +3578,7 @@ static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr) { /* check with all the masks */ if ((peer_sockaddr->sin_addr.s_addr & gtcp_allowed_connection->netmask[i].s_addr) - != gtcp_allowed_connection->network[i].s_addr) + != (gtcp_allowed_connection->network[i].s_addr & gtcp_allowed_connection->netmask[i].s_addr )) { continue; } @@ -3489,7 +3605,7 @@ static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr) return 0; } /* no good */ - gossip_lerr("Rejecting client %s on port %d: %s\n", + gossip_err("Rejecting client %s on port %d: %s\n", peer_hostname, peer_port, bad_errors[what_failed]); return -1; } @@ -3538,9 +3654,17 @@ static int tcp_accept_init(int *socket, char** peer) (errno == EHOSTDOWN) || (errno == ENONET) || (errno == EHOSTUNREACH) || - (errno == EOPNOTSUPP) || (errno == ENETUNREACH)) + (errno == EOPNOTSUPP) || + (errno == ENETUNREACH) || + (errno == ENFILE) || + (errno == EMFILE)) { /* try again later */ + if ((errno == ENFILE) || (errno == EMFILE)) + { + gossip_err("Error: accept: %s (continuing)\n",strerror(errno)); + bmi_method_addr_drop_callback(BMI_tcp_method_name); + } return (0); } else @@ -3557,14 +3681,7 @@ static int tcp_accept_init(int *socket, char** peer) { /* Force closure of the connection */ close(*socket); - errno = EACCES; - /* FIXME: - * BIG KLUDGE - * if we return an error, pvfs2-server's bmi thread simply terminates. - * hence I am returning 0 here. Need to ask Phil or RobR about this... - */ - *socket = -1; - return 0; + return (bmi_tcp_errno_to_pvfs(-EACCES)); } #endif @@ -3630,31 +3747,52 @@ static void dealloc_tcp_method_op(method_op_p old_op) return; } -/* BMI_tcp_post_send_generic() +/* tcp_post_send_generic() * * Submits send operations (low level). * * returns 0 on success that requires later poll, returns 1 on instant * completion, -errno on failure */ -static int BMI_tcp_post_send_generic(bmi_op_id_t * id, - bmi_method_addr_p dest, - const void *const *buffer_list, - const bmi_size_t *size_list, - int list_count, - enum bmi_buffer_type buffer_type, - struct tcp_msg_header my_header, - void *user_ptr, - bmi_context_id context_id) +static int tcp_post_send_generic(bmi_op_id_t * id, + bmi_method_addr_p dest, + const void *const *buffer_list, + const bmi_size_t *size_list, + int list_count, + enum bmi_buffer_type buffer_type, + struct tcp_msg_header my_header, + void *user_ptr, + bmi_context_id context_id, + PVFS_hint hints) { struct tcp_addr *tcp_addr_data = dest->method_data; method_op_p query_op = NULL; int ret = -1; + bmi_size_t total_size = 0; bmi_size_t amt_complete = 0; bmi_size_t env_amt_complete = 0; struct op_list_search_key key; int list_index = 0; bmi_size_t cur_index_complete = 0; + PINT_event_id eid = 0; + + if(PINT_EVENT_ENABLED) + { + int i = 0; + for(; i < list_count; ++i) + { + total_size += size_list[i]; + } + } + + PINT_EVENT_START( + bmi_tcp_send_event_id, bmi_tcp_pid, NULL, &eid, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + PINT_HINT_GET_OP_ID(hints), + total_size); /* Three things can happen here: * a) another op is already in queue for the address, so we just @@ -3684,13 +3822,14 @@ static int BMI_tcp_post_send_generic(bmi_op_id_t * id, query_op = op_list_search(op_list_array[IND_SEND], &key); if (query_op) { - /* queue up operation */ - ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, - dest, (void **) buffer_list, - size_list, list_count, 0, 0, - id, BMI_TCP_INPROGRESS, my_header, user_ptr, - my_header.size, 0, - context_id); + /* queue up operation */ + ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, + dest, (void **) buffer_list, + size_list, list_count, 0, 0, + id, BMI_TCP_INPROGRESS, my_header, user_ptr, + my_header.size, 0, + context_id, + eid); /* TODO: is this causing deadlocks? See similar call in recv * path for another example. This particular one seems to be an @@ -3722,6 +3861,7 @@ static int BMI_tcp_post_send_generic(bmi_op_id_t * id, gossip_debug(GOSSIP_BMI_DEBUG_TCP, "tcp_sock_init() failure.\n"); /* tcp_sock_init() returns BMI error code */ tcp_forget_addr(dest, 0, ret); + PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, NULL, 0, ret); return (ret); } @@ -3746,7 +3886,8 @@ static int BMI_tcp_post_send_generic(bmi_op_id_t * id, list_count, 0, 0, id, BMI_TCP_INPROGRESS, my_header, user_ptr, my_header.size, 0, - context_id); + context_id, + eid); if(ret < 0) { gossip_err("Error: enqueue_operation() returned: %d\n", ret); @@ -3765,6 +3906,7 @@ static int BMI_tcp_post_send_generic(bmi_op_id_t * id, PVFS_perror_gossip("Error: payload_progress", ret); /* payload_progress() returns BMI error codes */ tcp_forget_addr(dest, 0, ret); + PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, NULL, eid, 0, ret); return (ret); } @@ -3773,21 +3915,23 @@ static int BMI_tcp_post_send_generic(bmi_op_id_t * id, assert(amt_complete <= my_header.size); if (amt_complete == my_header.size && env_amt_complete == TCP_ENC_HDR_SIZE) { - /* we are already done */ - return (1); + /* we are already done */ + PINT_EVENT_END(bmi_tcp_send_event_id, bmi_tcp_pid, + NULL, eid, 0, amt_complete); + return (1); } /* queue up the remainder */ ret = enqueue_operation(op_list_array[IND_SEND], BMI_SEND, - dest, (void **) buffer_list, - size_list, list_count, - amt_complete, env_amt_complete, id, - BMI_TCP_INPROGRESS, my_header, user_ptr, - my_header.size, 0, context_id); + dest, (void **) buffer_list, + size_list, list_count, + amt_complete, env_amt_complete, id, + BMI_TCP_INPROGRESS, my_header, user_ptr, + my_header.size, 0, context_id, eid); if(ret < 0) { - gossip_err("Error: enqueue_operation() returned: %d\n", ret); + gossip_err("Error: enqueue_operation() returned: %d\n", ret); } return (ret); } diff --git a/src/io/bmi/bmi_tcp/module.mk.in b/src/io/bmi/bmi_tcp/module.mk.in index 68d2bcc..2d74776 100644 --- a/src/io/bmi/bmi_tcp/module.mk.in +++ b/src/io/bmi/bmi_tcp/module.mk.in @@ -11,13 +11,19 @@ SERVERSRC += \ $(DIR)/bmi-tcp.c \ $(DIR)/sockio.c +LIBBMISRC += \ + $(DIR)/bmi-tcp.c \ + $(DIR)/sockio.c + ifdef BUILD_EPOLL LIBSRC += $(DIR)/socket-collection-epoll.c SERVERSRC += $(DIR)/socket-collection-epoll.c +LIBBMISRC += $(DIR)/socket-collection-epoll.c MODCFLAGS_$(DIR)/bmi-tcp.c := -D__PVFS2_USE_EPOLL__ else LIBSRC += $(DIR)/socket-collection.c SERVERSRC += $(DIR)/socket-collection.c +LIBBMISRC += $(DIR)/socket-collection.c endif endif # BUILD_BMI_TCP diff --git a/src/io/bmi/bmi_tcp/socket-collection-epoll.c b/src/io/bmi/bmi_tcp/socket-collection-epoll.c index e41611d..05f78d4 100644 --- a/src/io/bmi/bmi_tcp/socket-collection-epoll.c +++ b/src/io/bmi/bmi_tcp/socket-collection-epoll.c @@ -65,11 +65,6 @@ socket_collection_p BMI_socket_collection_init(int new_server_socket) return(NULL); } - gen_mutex_init(&tmp_scp->mutex); - gen_mutex_init(&tmp_scp->queue_mutex); - - INIT_QLIST_HEAD(&tmp_scp->remove_queue); - INIT_QLIST_HEAD(&tmp_scp->add_queue); tmp_scp->server_socket = new_server_socket; if(new_server_socket > -1) @@ -82,10 +77,6 @@ socket_collection_p BMI_socket_collection_init(int new_server_socket) if(ret < 0 && errno != EEXIST) { gossip_err("Error: epoll_ctl() failure: %s.\n", strerror(errno)); -#if 0 - gen_mutex_destroy(&tmp_scp->mutex); - gen_mutex_destroy(&tmp_scp->queue_mutex); -#endif free(tmp_scp); return(NULL); } @@ -94,48 +85,6 @@ socket_collection_p BMI_socket_collection_init(int new_server_socket) return (tmp_scp); } -/* socket_collection_queue() - * - * queues a tcp method_addr for addition or removal from the collection. - * - * returns 0 on success, -errno on failure. - */ -void BMI_socket_collection_queue(socket_collection_p scp, - bmi_method_addr_p map, struct qlist_head* queue) -{ - struct qlist_head* iterator = NULL; - struct qlist_head* scratch = NULL; - struct tcp_addr* tcp_addr_data = NULL; - - /* make sure that this address isn't already slated for addition/removal */ - qlist_for_each_safe(iterator, scratch, &scp->remove_queue) - { - tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); - if(tcp_addr_data->map == map) - { - qlist_del(&tcp_addr_data->sc_link); - break; - } - } - qlist_for_each_safe(iterator, scratch, &scp->add_queue) - { - tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); - if(tcp_addr_data->map == map) - { - qlist_del(&tcp_addr_data->sc_link); - break; - } - } - - /* add it on to the appropriate queue */ - tcp_addr_data = map->method_data; - /* add to head, we are likely to access it again soon */ - qlist_add(&tcp_addr_data->sc_link, queue); - - return; -} - - /* socket_collection_finalize() * * destroys a socket collection. IMPORTANT: It DOES NOT destroy the @@ -146,10 +95,6 @@ void BMI_socket_collection_queue(socket_collection_p scp, */ void BMI_socket_collection_finalize(socket_collection_p scp) { -#if 0 - gen_mutex_destroy(&scp->mutex); - gen_mutex_destroy(&scp->queue_mutex); -#endif free(scp); return; } @@ -170,112 +115,24 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, int *outcount, bmi_method_addr_p * maps, int * status, - int poll_timeout, - gen_mutex_t* external_mutex) + int poll_timeout) { - struct qlist_head* iterator = NULL; - struct qlist_head* scratch = NULL; struct tcp_addr* tcp_addr_data = NULL; int ret = -1; int old_errno; int tmp_count; int i; - int skip_flag; -#ifndef __PVFS2_JOB_THREADED__ - struct epoll_event event; -#endif /* init the outgoing arguments for safety */ *outcount = 0; memset(maps, 0, (sizeof(bmi_method_addr_p) * incount)); memset(status, 0, (sizeof(int) * incount)); - gen_mutex_lock(&scp->mutex); - -#ifndef __PVFS2_JOB_THREADED__ - gen_mutex_lock(&scp->queue_mutex); - - /* look for addresses slated for removal */ - qlist_for_each_safe(iterator, scratch, &scp->remove_queue) + if(incount == 0) { - tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); - qlist_del(&tcp_addr_data->sc_link); - - - /* take out of the epoll set */ - if(tcp_addr_data->sc_index > -1) - { - memset(&event, 0, sizeof(event)); - event.events = 0; - event.data.ptr = tcp_addr_data->map; - - ret = epoll_ctl(scp->epfd, EPOLL_CTL_DEL, tcp_addr_data->socket, - &event); - - if(ret < 0 && errno != ENOENT) - { - /* TODO: error handling */ - gossip_lerr("Error: epoll_ctl() failure: %s\n", - strerror(errno)); - assert(0); - } - - tcp_addr_data->sc_index = -1; - tcp_addr_data->write_ref_count = 0; - } + return(0); } - /* look for addresses slated for addition */ - qlist_for_each_safe(iterator, scratch, &scp->add_queue) - { - tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); - qlist_del(&tcp_addr_data->sc_link); - - if(tcp_addr_data->sc_index > -1) - { - memset(&event, 0, sizeof(event)); - /* update existing entry */ - event.data.ptr = tcp_addr_data->map; - event.events = (EPOLLIN|EPOLLERR|EPOLLHUP); - if(tcp_addr_data->write_ref_count > 0) - event.events |= EPOLLOUT; - - ret = epoll_ctl(scp->epfd, EPOLL_CTL_MOD, tcp_addr_data->socket, - &event); - - if(ret < 0 && errno != ENOENT) - { - /* TODO: error handling */ - gossip_lerr("Error: epoll_ctl() failure: %s\n", - strerror(errno)); - assert(0); - } - } - else - { - /* new entry */ - tcp_addr_data->sc_index = 1; - - memset(&event, 0, sizeof(event)); - event.data.ptr = tcp_addr_data->map; - event.events = (EPOLLIN|EPOLLERR|EPOLLHUP); - if(tcp_addr_data->write_ref_count > 0) - event.events |= EPOLLOUT; - - ret = epoll_ctl(scp->epfd, EPOLL_CTL_ADD, tcp_addr_data->socket, - &event); - if(ret < 0 && errno != EEXIST) - { - /* TODO: error handling */ - gossip_lerr("Error: epoll_ctl() failure: %s\n", - strerror(errno)); - assert(0); - } - } - } - gen_mutex_unlock(&scp->queue_mutex); -#endif - /* actually do the epoll_wait() here */ do { @@ -291,14 +148,12 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, if(ret < 0) { - gen_mutex_unlock(&scp->mutex); return(-old_errno); } /* nothing ready, just return */ if(ret == 0) { - gen_mutex_unlock(&scp->mutex); return(0); } @@ -307,22 +162,6 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, for(i=0; ievent_array[i].events); - skip_flag = 0; - - /* make sure this addr hasn't been removed */ - gen_mutex_lock(&scp->queue_mutex); - qlist_for_each_safe(iterator, scratch, &scp->remove_queue) - { - tcp_addr_data = qlist_entry(iterator, struct tcp_addr, sc_link); - if(tcp_addr_data->map == scp->event_array[i].data.ptr) - { - skip_flag = 1; - break; - } - } - gen_mutex_unlock(&scp->queue_mutex); - if(skip_flag) - continue; if(scp->event_array[i].events & ERRMASK) status[*outcount] |= SC_ERROR_BIT; @@ -351,8 +190,6 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, *outcount = (*outcount) + 1; } - gen_mutex_unlock(&scp->mutex); - return (0); } diff --git a/src/io/bmi/bmi_tcp/socket-collection-epoll.h b/src/io/bmi/bmi_tcp/socket-collection-epoll.h index 4c5a238..3795310 100644 --- a/src/io/bmi/bmi_tcp/socket-collection-epoll.h +++ b/src/io/bmi/bmi_tcp/socket-collection-epoll.h @@ -30,14 +30,8 @@ struct socket_collection { - gen_mutex_t mutex; - int epfd; - gen_mutex_t queue_mutex; - struct qlist_head add_queue; - struct qlist_head remove_queue; - struct epoll_event event_array[BMI_EPOLL_MAX_PER_CYCLE]; int server_socket; @@ -52,58 +46,10 @@ enum }; socket_collection_p BMI_socket_collection_init(int new_server_socket); -void BMI_socket_collection_queue(socket_collection_p scp, - bmi_method_addr_p map, struct qlist_head* queue); /* the bmi_tcp code may try to add a socket to the collection before * it is fully connected, just ignore in this case */ -/* TODO: maybe optimize later; with epoll it is safe to add a new descriptor - * while a poll is in progress, so we could skip lock and queue in some - * cases. - */ -#ifndef __PVFS2_JOB_THREADED__ - -#define BMI_socket_collection_add(s, m) \ -do { \ - struct tcp_addr* tcp_data = (m)->method_data; \ - if(tcp_data->socket > -1){ \ - gen_mutex_lock(&((s)->queue_mutex)); \ - BMI_socket_collection_queue(s, m, &((s)->add_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ - } \ -} while(0) - -#define BMI_socket_collection_remove(s, m) \ -do { \ - gen_mutex_lock(&((s)->queue_mutex)); \ - BMI_socket_collection_queue(s, m, &((s)->remove_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ -} while(0) - -/* we _must_ have a valid socket at this point if we want to write data */ -#define BMI_socket_collection_add_write_bit(s, m) \ -do { \ - struct tcp_addr* tcp_data = (m)->method_data; \ - assert(tcp_data->socket > -1); \ - gen_mutex_lock(&((s)->queue_mutex)); \ - tcp_data->write_ref_count++; \ - BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ -} while(0) - -#define BMI_socket_collection_remove_write_bit(s, m) \ -do { \ - struct tcp_addr* tcp_data = (m)->method_data; \ - gen_mutex_lock(&((s)->queue_mutex)); \ - tcp_data->write_ref_count--; \ - assert(tcp_data->write_ref_count > -1); \ - BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ -} while(0) - -#else - #define BMI_socket_collection_add(s, m) \ do { \ struct tcp_addr* tcp_data = (m)->method_data; \ @@ -154,16 +100,13 @@ do { \ }\ } while(0) -#endif - void BMI_socket_collection_finalize(socket_collection_p scp); int BMI_socket_collection_testglobal(socket_collection_p scp, int incount, int *outcount, bmi_method_addr_p * maps, int * status, - int poll_timeout, - gen_mutex_t* external_mutex); + int poll_timeout); #endif /* __SOCKET_COLLECTION_EPOLL_H */ diff --git a/src/io/bmi/bmi_tcp/socket-collection.c b/src/io/bmi/bmi_tcp/socket-collection.c index 1653105..87a3939 100644 --- a/src/io/bmi/bmi_tcp/socket-collection.c +++ b/src/io/bmi/bmi_tcp/socket-collection.c @@ -55,7 +55,6 @@ socket_collection_p BMI_socket_collection_init(int new_server_socket) memset(tmp_scp, 0, sizeof(struct socket_collection)); - gen_mutex_init(&tmp_scp->mutex); gen_mutex_init(&tmp_scp->queue_mutex); tmp_scp->pollfd_array = (struct @@ -90,17 +89,18 @@ socket_collection_p BMI_socket_collection_init(int new_server_socket) if(new_server_socket > -1) { - tmp_scp->pollfd_array[0].fd = new_server_socket; - tmp_scp->pollfd_array[0].events = POLLIN; - tmp_scp->addr_array[0] = NULL; + tmp_scp->pollfd_array[tmp_scp->array_count].fd = new_server_socket; + tmp_scp->pollfd_array[tmp_scp->array_count].events = POLLIN; + tmp_scp->addr_array[tmp_scp->array_count] = NULL; tmp_scp->array_count++; - /* Add the pipe_fd[0] fd to the poll in set always */ - tmp_scp->pollfd_array[1].fd = tmp_scp->pipe_fd[0]; - tmp_scp->pollfd_array[1].events = POLLIN; - tmp_scp->addr_array[1] = NULL; - tmp_scp->array_count++; } + /* Add the pipe_fd[0] fd to the poll in set always */ + tmp_scp->pollfd_array[tmp_scp->array_count].fd = tmp_scp->pipe_fd[0]; + tmp_scp->pollfd_array[tmp_scp->array_count].events = POLLIN; + tmp_scp->addr_array[tmp_scp->array_count] = NULL; + tmp_scp->array_count++; + return (tmp_scp); } @@ -177,8 +177,7 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, int *outcount, bmi_method_addr_p * maps, int * status, - int poll_timeout, - gen_mutex_t* external_mutex) + int poll_timeout) { struct qlist_head* iterator = NULL; struct qlist_head* scratch = NULL; @@ -202,8 +201,6 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, memset(maps, 0, (sizeof(bmi_method_addr_p) * incount)); memset(status, 0, (sizeof(int) * incount)); - gen_mutex_lock(&scp->mutex); - gen_mutex_lock(&scp->queue_mutex); /* look for addresses slated for removal */ @@ -291,14 +288,12 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, if(ret < 0) { - gen_mutex_unlock(&scp->mutex); return(bmi_tcp_errno_to_pvfs(-old_errno)); } /* nothing ready, just return */ if(ret == 0) { - gen_mutex_unlock(&scp->mutex); return(0); } @@ -370,8 +365,6 @@ int BMI_socket_collection_testglobal(socket_collection_p scp, } } - gen_mutex_unlock(&scp->mutex); - /* Under the following conditions (i.e. all of them must be true) we go back to redoing poll * a) There were no outstanding sockets/fds that had data * b) There was a pipe notification that our socket sets have changed diff --git a/src/io/bmi/bmi_tcp/socket-collection.h b/src/io/bmi/bmi_tcp/socket-collection.h index c811f5f..4d03132 100644 --- a/src/io/bmi/bmi_tcp/socket-collection.h +++ b/src/io/bmi/bmi_tcp/socket-collection.h @@ -26,8 +26,6 @@ struct socket_collection { - gen_mutex_t mutex; - struct pollfd* pollfd_array; bmi_method_addr_p* addr_array; int array_max; @@ -53,50 +51,9 @@ socket_collection_p BMI_socket_collection_init(int new_server_socket); void BMI_socket_collection_queue(socket_collection_p scp, bmi_method_addr_p map, struct qlist_head* queue); -#ifndef __PVFS2_JOB_THREADED__ /* the bmi_tcp code may try to add a socket to the collection before * it is fully connected, just ignore in this case */ -#define BMI_socket_collection_add(s, m) \ -do { \ - struct tcp_addr* tcp_data = (m)->method_data; \ - if(tcp_data->socket > -1){ \ - gen_mutex_lock(&((s)->queue_mutex)); \ - BMI_socket_collection_queue(s, m, &((s)->add_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ - } \ -} while(0) - -#define BMI_socket_collection_remove(s, m) \ -do { \ - gen_mutex_lock(&((s)->queue_mutex)); \ - BMI_socket_collection_queue(s, m, &((s)->remove_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ -} while(0) - -/* we _must_ have a valid socket at this point if we want to write data */ -#define BMI_socket_collection_add_write_bit(s, m) \ -do { \ - struct tcp_addr* tcp_data = (m)->method_data; \ - assert(tcp_data->socket > -1); \ - gen_mutex_lock(&((s)->queue_mutex)); \ - tcp_data->write_ref_count++; \ - BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ -} while(0) - -#define BMI_socket_collection_remove_write_bit(s, m) \ -do { \ - struct tcp_addr* tcp_data = (m)->method_data; \ - gen_mutex_lock(&((s)->queue_mutex)); \ - tcp_data->write_ref_count--; \ - assert(tcp_data->write_ref_count > -1); \ - BMI_socket_collection_queue((s),(m), &((s)->add_queue)); \ - gen_mutex_unlock(&((s)->queue_mutex)); \ -} while(0) - -#else - /* write a byte on the pipe_fd[1] so that poll breaks out in case it is idling */ #define BMI_socket_collection_add(s, m) \ do { \ @@ -144,17 +101,13 @@ do { \ write(s->pipe_fd[1], &c, 1);\ } while(0) - -#endif - void BMI_socket_collection_finalize(socket_collection_p scp); int BMI_socket_collection_testglobal(socket_collection_p scp, int incount, int *outcount, bmi_method_addr_p * maps, int * status, - int poll_timeout, - gen_mutex_t* external_mutex); + int poll_timeout); #endif /* __SOCKET_COLLECTION_H */ diff --git a/src/io/bmi/bmi_tcp/sockio.c b/src/io/bmi/bmi_tcp/sockio.c index c6ace2e..8245d94 100755 --- a/src/io/bmi/bmi_tcp/sockio.c +++ b/src/io/bmi/bmi_tcp/sockio.c @@ -143,7 +143,7 @@ int BMI_sockio_init_sock(struct sockaddr *saddrp, } ((struct sockaddr_in *) saddrp)->sin_family = AF_INET; ((struct sockaddr_in *) saddrp)->sin_port = htons((u_short) service); - bcopy(hep->h_addr, (char *) &(((struct sockaddr_in *) saddrp)->sin_addr), + memcpy((char *) &(((struct sockaddr_in *) saddrp)->sin_addr), hep->h_addr, hep->h_length); return (0); } @@ -170,7 +170,7 @@ int BMI_sockio_init_sock(struct sockaddr *saddrp, ((struct sockaddr_in *) saddrp)->sin_family = AF_INET; ((struct sockaddr_in *) saddrp)->sin_port = htons((u_short) service); - bcopy(&addr, (char *) &(((struct sockaddr_in *) saddrp)->sin_addr), + memcpy((char *) &(((struct sockaddr_in *) saddrp)->sin_addr), &addr, sizeof(addr)); return 0; @@ -196,10 +196,15 @@ int BMI_sockio_nbrecv(int s, errno = EPIPE; return (-1); } - if (ret == -1 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)) + if (ret == -1 && errno == EINTR) { goto nbrecv_restart; } + else if (ret == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) + { + /* return what we got so far, this is a nonblocking call */ + return(len-comp); + } else if (ret == -1) { return (-1); @@ -220,34 +225,30 @@ int BMI_sockio_nbrecv(int s, */ int BMI_sockio_nbpeek(int s, void* buf, int len) { - int ret, comp = len; - + int ret; assert(fcntl(s, F_GETFL, 0) & O_NONBLOCK); - while (comp) + nbpeek_restart: + ret = recv(s, buf, len, (MSG_PEEK|DEFAULT_MSG_FLAGS)); + if(ret == 0) { - nbpeek_restart: - ret = recv(s, buf, comp, (MSG_PEEK|DEFAULT_MSG_FLAGS)); - if (!ret) /* socket closed */ - { - errno = EPIPE; - return (-1); - } - if (ret == -1 && errno == EWOULDBLOCK) - { - return (len - comp); /* return amount completed */ - } - if (ret == -1 && errno == EINTR) - { - goto nbpeek_restart; - } - else if (ret == -1) - { - return (-1); - } - comp -= ret; + errno = EPIPE; + return (-1); } - return (len - comp); + else if (ret == -1 && errno == EWOULDBLOCK) + { + return(0); + } + else if (ret == -1 && errno == EINTR) + { + goto nbpeek_restart; + } + else if (ret == -1) + { + return (-1); + } + + return(ret); } diff --git a/src/io/bmi/bmi_zoid/README b/src/io/bmi/bmi_zoid/README new file mode 100644 index 0000000..5e94c82 --- /dev/null +++ b/src/io/bmi/bmi_zoid/README @@ -0,0 +1,214 @@ + + +INTRODUCTION + + +This is a BMI method that runs on top of ZOID, the ZeptoOS I/O Daemon, +running on IBM Blue Gene/P with the ZeptoOS Compute Node Linux. + + +CONFIGURABLE LIMITS + + +- ZOID_MAX_UNEXPECTED_MSG (in zoid.h): defaults to 8192, can be adjusted as + needed, + +- ZOID_MAX_EXPECTED_MSG (in zoid.h): defaults to 128M, can be adjusted as + needed, but is probably already larger than necessary, + +- FIXME ZBMI limits + + +LIMITATIONS + + +This method was specifically developed to address the needs of IOFSL, the +I/O Forwarding Scalability Layer project. Consequently, not all features +of BMI are supported; we focused on those needed by IOFSL. + +Here is a (possibly incomplete) list of the limitations of this method: + +- processes on the compute nodes can only communicate with their I/O nodes + (the compute nodes cannot communicate with each other, neither can the + I/O nodes; communication is limited to each pset), + +- unexpected messages can only be sent from compute nodes to I/O nodes + (sending from I/O nodes to compute nodes will not work), + +- client-side is not multi-thread safe. It is not easy to make it safe, + because the lower-level ZOID client-side forwarding if not multi-thread + safe, + +- CTRL-C might be tricky on the client side, as interrupting a ZOID routine + can deadlock the tree network (this is a ZOID limitation), + +- only one, global context is supported. + +Additional considerations for the users of this method: + +- on the server (I/O node) side, using preallocated (BMI_memalloc) buffer + can significantly improve performance, + +- on both client and server, buffers passed to send/receive routines should + be 16-bytes aligned (they normally will if they were allocated with malloc + or BMI_memalloc), + +- make sure to use a long timeout for BMI test routines, especially those + invoked on the client side, as each such call will result in a + communication with the I/O node (10ms is *way* too short, 1000ms is pretty + short too). + + +ADDRESS FORMAT + + +The only supported address is "zoid://". It denotes the server process +running on the I/O node. + + +IMPLEMENTATION OVERVIEW + + +The implementation is asymmetric; different code paths are used on the +compute node clients and on the I/O node server. The main code can be +found in the "zoid.c" file, which contains the client code and the code to +invoke the server routines. The server routines themselves can be found in +"server.c". + +The method source also includes "dlmalloc", a public domain custom memory +pool implementation used to maintain a shared memory pool on the I/O nodes, +and a "zbmi_pool.c" that acts as a wrapper around "dlmalloc". + +Both the compute node client and the I/O node server codes actually act as +clients to the ZOID daemon's "zbmi" plugin, which is the most complex part +of the code. The source of the zbmi plugin is not included here, but is +rather in the ZeptoOS repository, in the +"packages/zoid/src/zbmi/implementation/" directory. Note that the zbmi +plugin is not documented in detail here, but has its own documentation with +its source code. + + +IMPLEMENTATION DETAILS + +CLIENT + +The communication between the compute node clients and the zbmi plugin on +the I/O node is performed using three ZOID-forwarded function calls: +zbmi_send, zbmi_recv, and zbmi_test. + +The zbmi plugin is mostly stateless so far as the compute node clients are +concerned. Specifically, the information on posted, but not immediately +completed expected message sends/receives is stored exclusively on the +client side. + +All BMI send routines end up in zoid_post_send_common. That includes +unexpected messages and list I/O. This routine attempts to forward the +message to the zbmi plugin on the I/O node, using zbmi_send. For +unexpected messages, zbmi_send is normally expected to succeed and result +in an immediate completion; however, if the zbmi plugin is out of memory, +zbmi_send will fail with ENOMEM. The same failure will occur with expected +messages if a matching receive has not been posted on the I/O node side by +the time zbmi_send is invoked. Either failure is recoverable; the send +request is put in the "zoid_ops" queue for another attempt later. For +expected messages, if a matching receive has been posted, the call succeeds +resulting in an immediate completion. + +The way zbmi_send is forwarded by ZOID, the data payload is only +transferred to the I/O nodes if there is memory buffer there for the +message. So, in spite of how it looks in zoid.c, no bytes are wasted on +the wire. + +All BMI expected receive routines end up in zoid_post_recv_common. This +routine attempts to receive a message waiting in the zbmi plugin, using +zbmi_recv. If a matching message has been posted on the I/O node side, it +is sent to the compute node and zbmi_recv returns 1, resulting in an +immediate completion. Otherwise, the receive request is put in the +"zoid_ops" queue for another attempt later. + +BMI_cancel is very easy to implement thanks to a lack of multi-threading +considerations and because the state is stored on the client-side only: we +just flag a request as canceled. + +All BMI test routines eventually end up in zoid_test_common. The path is +somewhat longer for "testcontext", which first goes through the "zoid_ops" +queue filling in a temporary array with the ids of pending operations +before invoking zoid_test_common. Again, that would not have been correct +were it not for the fact that we don't deal with multi-threading. Anyway, +the routine needs to treat canceled requests specially -- those won't be +sent to the server anymore. For non-canceled requests, it extracts the +message tag, size, and send/recv indicator and forwards those to the server +using zbmi_test. zbmi_test is the only blocking call of the three; it can +block on the server for the specified time if none of the specified +requests is initially ready. zbmi_test returns the number of ready +requests; if it is non-zero, then zoid_test_common next attempts to satisfy +those requests by invoking zbmi_send/zbmi_recv. Those send/recv routines +could still fail in spite of a successful test, if there is no memory, or +if the server-side canceled its matching request; this is recoverable. + +SERVER + +The communication between the I/O node BMI server and the zbmi plugin of +the ZOID daemon is carried across two channels. Commands are sent via a +unix domain socket (zbmi plugin is the server; multiple threads of the BMI +server can communicate simultaneously by opening multiple connections to +the socket). Payload is exchanged using a large shared memory segment, +allocated by the zbmi plugin. We make efforts to avoid unnecessary copies +to/from that segment, so BMI_memalloc() on the BMI server side allocates +from that segment, and ZOID-forwarded zbmi_send/recv calls store their +buffers directly into the segment. + +The shared memory segment is split in two: a normally smaller region is +used for unexpected messages and is managed by the zbmi plugin, wile a +larger region is used for expected messages and is managed by the BMI +server. + +The communication with the zbmi plugin is established during +BMI_initialize, and terminated during BMI_finalize. The stream protocol is +documented in zbmi's zbmi_protocol.h. + +For BMI testunexpected, we communicate the metadata on the pending received +messages via the socket, and the payload is in the shared memory buffer +which is returned to the user. unexpected_free just sends the buffer +address back to the zbmi plugin, since the unexpected messages memory pool +is managed by the plugin. + +To get the best performance, it is important that the user allocates +buffers using BMI_memalloc on the server side, because that will allocate +them in the shared memory area. If instead an externally allocated buffer +is passed to BMI_send/recv, we will allocate a temporary buffer, which +causes an additional copy overhead. Failures to allocate the temporary +buffer are recoverable: we place the request in the "no_mem" queue and +retry the allocation after every BMI_memfree. + +Expected server-side posts, be it sends or receives, are never completed +immediately: we send a message descriptor to the zbmi plugin which +registers it and just sends back a confirmation. When registering we +exchange the internal BMI id and the internal ZOID id, since that +simplifies subsequent testing/canceling. + +Canceling messages is more complex than on the client side. Generally, we +have to send a cancel request to the zbmi plugin to unregister an already +registered message descriptor. Depending on the progress of the zbmi +plugin in handing that registered request, the cancellation request might +be ignored. An exception is when the request has not been registered +because of the lack of memory for a temporary buffer as described earlier; +in that case we cancel it locally and put in in "error_ops" queue. + +When testing (in zoid_server_test_common), we need to deal with locally +failed/canceled messages separately from the ones registered with the zbmi +plugin. This is actually similar to what we also do on the client side. +Those messages come from the "error_ops" queue and we deal with them first, +since they involve no communication with the zbmi plugin. Unlike on the +client side, where the common test routines sort-of "emulated" testcontext +but first building an array of all pending request ids, on the server side +we have a "native" implementation. Testcontext is recognized by passing an +"incount" of 0, and we forward it to the zbmi plugin so that it knows to +return *any* completed request(s). This is necessary because of +multi-threading constraints, and it is possible because the zbmi plugin +does maintain state for server-side requests. As with the client, the test +is the only command that can block in the zbmi plugin for the specified +time period if no request is initially completed. Completed requests +require no further handling, with the exceptions of those that used +temporary shared memory buffer, which needs to be released (after being +copied back for receives). Completed requests can also indicate +cancellations, if we previously canceled a registered request. diff --git a/src/io/bmi/bmi_zoid/dlmalloc.c b/src/io/bmi/bmi_zoid/dlmalloc.c new file mode 100644 index 0000000..2608552 --- /dev/null +++ b/src/io/bmi/bmi_zoid/dlmalloc.c @@ -0,0 +1,5701 @@ +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at + http://creativecommons.org/licenses/publicdomain. Send questions, + comments, complaints, performance data, etc to dl@cs.oswego.edu + +* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O3), and link it into another program. All of + the compile-time options default to reasonable values for use on + most platforms. You might later want to step through various + compile-time and dynamic tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. Note that you may already by default be using a C + library containing a malloc that is based on some version of this + malloc (for example in linux). You might still want to use the one + in this file to customize settings or to avoid overheads associated + with library versions. + +* Vital statistics: + + Supported pointer/size_t representation: 4 or 8 bytes + size_t MUST be an unsigned type of the same width as + pointers. (If you are using an ancient system that declares + size_t as a signed type, or need it to be a different width + than pointers, you can use a previous release of this malloc + (e.g. 2.7.2) supporting these.) + + Alignment: 8 bytes (default) + This suffices for nearly all current machines and C compilers. + However, you can define MALLOC_ALIGNMENT to be wider than this + if necessary (up to 128bytes), at the expense of using more space. + + Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) + 8 or 16 bytes (if 8byte sizes) + Each malloced chunk has a hidden word of overhead holding size + and status information, and additional cross-check word + if FOOTERS is defined. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) + 8-byte ptrs: 32 bytes (including overhead) + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is about + 32 bytes plus the remainder from a system page (the minimal + mmap unit); typically 4096 or 8192 bytes. + + Security: static-safe; optionally more or less + The "security" of malloc refers to the ability of malicious + code to accentuate the effects of errors (for example, freeing + space that is not currently malloc'ed or overwriting past the + ends of chunks) in code that calls malloc. This malloc + guarantees not to modify any memory locations below the base of + heap, i.e., static variables, even in the presence of usage + errors. The routines additionally detect most improper frees + and reallocs. All this holds as long as the static bookkeeping + for malloc itself is not corrupted by some other means. This + is only one aspect of security -- these checks do not, and + cannot, detect all possible programming errors. + + If FOOTERS is defined nonzero, then each allocated chunk + carries an additional check word to verify that it was malloced + from its space. These check words are the same within each + execution of a program using malloc, but differ across + executions, so externally crafted fake chunks cannot be + freed. This improves security by rejecting frees/reallocs that + could corrupt heap memory, in addition to the checks preventing + writes to statics that are always on. This may further improve + security at the expense of time and space overhead. (Note that + FOOTERS may also be worth using with MSPACES.) + + By default detected errors cause the program to abort (calling + "abort()"). You can override this to instead proceed past + errors by defining PROCEED_ON_ERROR. In this case, a bad free + has no effect, and a malloc that encounters a bad address + caused by user overwrites will ignore the bad address by + dropping pointers and indices to all known memory. This may + be appropriate for programs that should continue if at all + possible in the face of programming errors, although they may + run out of memory because dropped memory is never reclaimed. + + If you don't like either of these options, you can define + CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything + else. And if if you are sure that your program using malloc has + no errors or vulnerabilities, you can define INSECURE to 1, + which might (or might not) provide a small performance improvement. + + Thread-safety: NOT thread-safe unless USE_LOCKS defined + When USE_LOCKS is defined, each public call to malloc, free, + etc is surrounded with either a pthread mutex or a win32 + spinlock (depending on WIN32). This is not especially fast, and + can be a major bottleneck. It is designed only to provide + minimal protection in concurrent environments, and to provide a + basis for extensions. If you are using malloc in a concurrent + program, consider instead using nedmalloc + (http://www.nedprod.com/programs/portable/nedmalloc/) or + ptmalloc (See http://www.malloc.de), which are derived + from versions of this malloc. + + System requirements: Any combination of MORECORE and/or MMAP/MUNMAP + This malloc can use unix sbrk or any emulation (invoked using + the CALL_MORECORE macro) and/or mmap/munmap or any emulation + (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system + memory. On most unix systems, it tends to work best if both + MORECORE and MMAP are enabled. On Win32, it uses emulations + based on VirtualAlloc. It also uses common C library functions + like memset. + + Compliance: I believe it is compliant with the Single Unix Specification + (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Overview of algorithms + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and + tunable. Consistent balance across these factors results in a good + general-purpose allocator for malloc-intensive programs. + + In most ways, this malloc is a best-fit allocator. Generally, it + chooses the best-fitting existing chunk for a request, with ties + broken in approximately least-recently-used order. (This strategy + normally maintains low fragmentation.) However, for requests less + than 256bytes, it deviates from best-fit when there is not an + exactly fitting available chunk by preferring to use space adjacent + to that used for the previous small request, as well as by breaking + ties in approximately most-recently-used order. (These enhance + locality of series of small allocations.) And for very large requests + (>= 256Kb by default), it relies on system memory mapping + facilities, if supported. (This helps avoid carrying around and + possibly fragmenting memory used only for large chunks.) + + All operations (except malloc_stats and mallinfo) have execution + times that are bounded by a constant factor of the number of bits in + a size_t, not counting any clearing in calloc or copying in realloc, + or actions surrounding MORECORE and MMAP that have times + proportional to the number of non-contiguous regions returned by + system allocation routines, which is often just 1. In real-time + applications, you can optionally suppress segment traversals using + NO_SEGMENT_TRAVERSAL, which assures bounded execution even when + system allocators return non-contiguous spaces, at the typical + expense of carrying around more memory and increased fragmentation. + + The implementation is not very modular and seriously overuses + macros. Perhaps someday all C compilers will do as good a job + inlining modular code as can now be done by brute-force expansion, + but now, enough of them seem not to. + + Some compilers issue a lot of warnings about code that is + dead/unreachable only on some platforms, and also about intentional + uses of negation on unsigned types. All known cases of each can be + ignored. + + For a longer but out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + +* MSPACES + If MSPACES is defined, then in addition to malloc, free, etc., + this file also defines mspace_malloc, mspace_free, etc. These + are versions of malloc routines that take an "mspace" argument + obtained using create_mspace, to control all internal bookkeeping. + If ONLY_MSPACES is defined, only these versions are compiled. + So if you would like to use this allocator for only some allocations, + and your system malloc for others, you can compile with + ONLY_MSPACES and then do something like... + static mspace mymspace = create_mspace(0,0); // for example + #define mymalloc(bytes) mspace_malloc(mymspace, bytes) + + (Note: If you only need one instance of an mspace, you can instead + use "USE_DL_PREFIX" to relabel the global malloc.) + + You can similarly create thread-local allocators by storing + mspaces as thread-locals. For example: + static __thread mspace tlms = 0; + void* tlmalloc(size_t bytes) { + if (tlms == 0) tlms = create_mspace(0, 0); + return mspace_malloc(tlms, bytes); + } + void tlfree(void* mem) { mspace_free(tlms, mem); } + + Unless FOOTERS is defined, each mspace is completely independent. + You cannot allocate from one and free to another (although + conformance is only weakly checked, so usage errors are not always + caught). If FOOTERS is defined, then each chunk carries around a tag + indicating its originating mspace, and frees are directed to their + originating spaces. + + ------------------------- Compile-time options --------------------------- + +Be careful in setting #define values for numerical constants of type +size_t. On some systems, literal values are not automatically extended +to size_t precision unless they are explicitly casted. You can also +use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. + +WIN32 default: defined if _WIN32 defined + Defining WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. Beware that there seem to be some + cases where this malloc might not be a pure drop-in replacement for + Win32 malloc: Random-looking failures from Win32 GDI API's (eg; + SetDIBits()) may be due to bugs in some video driver implementations + when pixel buffers are malloc()ed, and the region spans more than + one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) + default granularity, pixel buffers may straddle virtual allocation + regions more often than when using the Microsoft allocator. You can + avoid this by using VirtualAlloc() and VirtualFree() for all pixel + buffers rather than using malloc(). If this is not possible, + recompile this malloc with a larger DEFAULT_GRANULARITY. + +MALLOC_ALIGNMENT default: (size_t)8 + Controls the minimum alignment for malloc'ed chunks. It must be a + power of two and at least 8, even on machines for which smaller + alignments would suffice. It may be defined as larger than this + though. Note however that code and data structures are optimized for + the case of 8-byte alignment. + +MSPACES default: 0 (false) + If true, compile in support for independent allocation spaces. + This is only supported if HAVE_MMAP is true. + +ONLY_MSPACES default: 0 (false) + If true, only compile in mspace versions, not regular versions. + +USE_LOCKS default: 0 (false) + Causes each call to each public routine to be surrounded with + pthread or WIN32 mutex lock/unlock. (If set true, this can be + overridden on a per-mspace basis for mspace versions.) If set to a + non-zero value other than 1, locks are used, but their + implementation is left out, so lock functions must be supplied manually, + as described below. + +USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC + If true, uses custom spin locks for locking. This is currently + supported only for x86 platforms using gcc or recent MS compilers. + Otherwise, posix locks or win32 critical sections are used. + +FOOTERS default: 0 + If true, provide extra checking and dispatching by placing + information in the footers of allocated chunks. This adds + space and time overhead. + +INSECURE default: 0 + If true, omit checks for usage errors and heap space overwrites. + +USE_DL_PREFIX default: NOT defined + Causes compiler to prefix all public routines with the string 'dl'. + This can be useful when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. + +ABORT default: defined as abort() + Defines how to abort on failed checks. On most systems, a failed + check cannot die with an "assert" or even print an informative + message, because the underlying print routines in turn call malloc, + which will fail again. Generally, the best policy is to simply call + abort(). It's not very useful to do more than this because many + errors due to overwriting will show up as address faults (null, odd + addresses etc) rather than malloc-triggered checks, so will also + abort. Also, most compilers know that abort() does not return, so + can better optimize code conditionally calling it. + +PROCEED_ON_ERROR default: defined as 0 (false) + Controls whether detected bad addresses cause them to bypassed + rather than aborting. If set, detected bad arguments to free and + realloc are ignored. And all bookkeeping information is zeroed out + upon a detected overwrite of freed heap space, thus losing the + ability to ever return it from malloc again, but enabling the + application to proceed. If PROCEED_ON_ERROR is defined, the + static variable malloc_corruption_error_count is compiled in + and can be examined to see if errors have occurred. This option + generates slower code than the default abort policy. + +DEBUG default: NOT defined + The DEBUG setting is mainly intended for people trying to modify + this code or diagnose problems when porting to new platforms. + However, it may also be able to better isolate user errors than just + using runtime checks. The assertions in the check routines spell + out in more detail the assumptions and invariants underlying the + algorithms. The checking is fairly extensive, and will slow down + execution noticeably. Calling malloc_stats or mallinfo with DEBUG + set will attempt to check every non-mmapped allocated and free chunk + in the course of computing the summaries. + +ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) + Debugging assertion failures can be nearly impossible if your + version of the assert macro causes malloc to be called, which will + lead to a cascade of further failures, blowing the runtime stack. + ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), + which will usually make debugging easier. + +MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 + The action to take before "return 0" when malloc fails to be able to + return memory because there is none available. + +HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES + True if this system supports sbrk or an emulation of it. + +MORECORE default: sbrk + The name of the sbrk-style system routine to call to obtain more + memory. See below for guidance on writing custom MORECORE + functions. The type of the argument to sbrk/MORECORE varies across + systems. It cannot be size_t, because it supports negative + arguments, so it is normally the signed type of the same width as + size_t (sometimes declared as "intptr_t"). It doesn't much matter + though. Internally, we only call it with arguments less than half + the max value of a size_t, which should work across all reasonable + possibilities, although sometimes generating compiler warnings. + +MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE + If true, take advantage of fact that consecutive calls to MORECORE + with positive arguments always return contiguous increasing + addresses. This is true of unix sbrk. It does not hurt too much to + set it true anyway, since malloc copes with non-contiguities. + Setting it false when definitely non-contiguous saves time + and possibly wasted space it would take to discover this though. + +MORECORE_CANNOT_TRIM default: NOT defined + True if MORECORE cannot release space back to the system when given + negative arguments. This is generally necessary only if you are + using a hand-crafted MORECORE function that cannot handle negative + arguments. + +NO_SEGMENT_TRAVERSAL default: 0 + If non-zero, suppresses traversals of memory segments + returned by either MORECORE or CALL_MMAP. This disables + merging of segments that are contiguous, and selectively + releasing them to the OS if unused, but bounds execution times. + +HAVE_MMAP default: 1 (true) + True if this system supports mmap or an emulation of it. If so, and + HAVE_MORECORE is not true, MMAP is used for all system + allocation. If set and HAVE_MORECORE is true as well, MMAP is + primarily used to directly allocate very large blocks. It is also + used as a backup strategy in cases where MORECORE fails to provide + space from system. Note: A single call to MUNMAP is assumed to be + able to unmap memory that may have be allocated using multiple calls + to MMAP, so long as they are adjacent. + +HAVE_MREMAP default: 1 on linux, else 0 + If true realloc() uses mremap() to re-allocate large blocks and + extend or shrink allocation spaces. + +MMAP_CLEARS default: 1 except on WINCE. + True if mmap clears memory so calloc doesn't need to. This is true + for standard unix mmap using /dev/zero and on WIN32 except for WINCE. + +USE_BUILTIN_FFS default: 0 (i.e., not used) + Causes malloc to use the builtin ffs() function to compute indices. + Some compilers may recognize and intrinsify ffs to be faster than the + supplied C version. Also, the case of x86 using gcc is special-cased + to an asm instruction, so is already as fast as it can be, and so + this setting has no effect. Similarly for Win32 under recent MS compilers. + (On most x86s, the asm version is only slightly faster than the C version.) + +malloc_getpagesize default: derive from system includes, or 4096. + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. This may be (and + usually is) a function rather than a constant. This is ignored + if WIN32, where page size is determined using getSystemInfo during + initialization. + +USE_DEV_RANDOM default: 0 (i.e., not used) + Causes malloc to use /dev/random to initialize secure magic seed for + stamping footers. Otherwise, the current time is used. + +NO_MALLINFO default: 0 + If defined, don't compile "mallinfo". This can be a simple way + of dealing with mismatches between system declarations and + those in this file. + +MALLINFO_FIELD_TYPE default: size_t + The type of the fields in the mallinfo struct. This was originally + defined as "int" in SVID etc, but is more usefully defined as + size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set + +REALLOC_ZERO_BYTES_FREES default: not defined + This should be set if a call to realloc with zero bytes should + be the same as a call to free. Some people think it should. Otherwise, + since this malloc returns a unique pointer for malloc(0), so does + realloc(p, 0). + +LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H +LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H +LACKS_STDLIB_H default: NOT defined unless on WIN32 + Define these if your system does not have these header files. + You might need to manually insert some of the declarations they provide. + +DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, + system_info.dwAllocationGranularity in WIN32, + otherwise 64K. + Also settable using mallopt(M_GRANULARITY, x) + The unit for allocating and deallocating memory from the system. On + most systems with contiguous MORECORE, there is no reason to + make this more than a page. However, systems with MMAP tend to + either require or encourage larger granularities. You can increase + this value to prevent system allocation functions to be called so + often, especially if they are slow. The value must be at least one + page and must be a power of two. Setting to 0 causes initialization + to either page size or win32 region size. (Note: In previous + versions of malloc, the equivalent of this option was called + "TOP_PAD") + +DEFAULT_TRIM_THRESHOLD default: 2MB + Also settable using mallopt(M_TRIM_THRESHOLD, x) + The maximum amount of unused top-most memory to keep before + releasing via malloc_trim in free(). Automatic trimming is mainly + useful in long-lived programs using contiguous MORECORE. Because + trimming via sbrk can be slow on some systems, and can sometimes be + wasteful (in cases where programs immediately afterward allocate + more large chunks) the value should be high enough so that your + overall system performance would improve by releasing this much + memory. As a rough guide, you might set to a value close to the + average size of a process (program) running on your system. + Releasing this much memory would allow such a process to run in + memory. Generally, it is worth tuning trim thresholds when a + program undergoes phases where several large chunks are allocated + and released in ways that can reuse each other's storage, perhaps + mixed with phases where there are no such chunks at all. The trim + value must be greater than page size to have any useful effect. To + disable trimming completely, you can set to MAX_SIZE_T. Note that the trick + some people use of mallocing a huge space and then freeing it at + program startup, in an attempt to reserve system memory, doesn't + have the intended effect under automatic trimming, since that memory + will immediately be returned to the system. + +DEFAULT_MMAP_THRESHOLD default: 256K + Also settable using mallopt(M_MMAP_THRESHOLD, x) + The request size threshold for using MMAP to directly service a + request. Requests of at least this size that cannot be allocated + using already-existing space will be serviced via mmap. (If enough + normal freed space already exists it is used instead.) Using mmap + segregates relatively large chunks of memory so that they can be + individually obtained and released from the host system. A request + serviced through mmap is never reused by any other request (at least + not directly; the system may just so happen to remap successive + requests to the same locations). Segregating space in this way has + the benefits that: Mmapped space can always be individually released + back to the system, which helps keep the system level memory demands + of a long-lived program low. Also, mapped memory doesn't become + `locked' between other chunks, as can happen with normally allocated + chunks, which means that even trimming via malloc_trim would not + release them. However, it has the disadvantage that the space + cannot be reclaimed, consolidated, and then used to service later + requests, as happens with normal chunks. The advantages of mmap + nearly always outweigh disadvantages for "large" chunks, but the + value of "large" may vary across systems. The default is an + empirically derived value that works well in most systems. You can + disable mmap by setting to MAX_SIZE_T. + +MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP + The number of consolidated frees between checks to release + unused segments when freeing. When using non-contiguous segments, + especially with multiple mspaces, checking only for topmost space + doesn't always suffice to trigger trimming. To compensate for this, + free() will, with a period of MAX_RELEASE_CHECK_RATE (or the + current number of segments, if greater) try to release unused + segments to the OS when freeing chunks that result in + consolidation. The best value for this parameter is a compromise + between slowing down frees with relatively costly checks that + rarely trigger versus holding on to unused memory. To effectively + disable, set to MAX_SIZE_T. This may lead to a very slight speed + improvement at the expense of carrying around more memory. +*/ + +/* Version identifier to allow people to support multiple versions */ +#ifndef DLMALLOC_VERSION +#define DLMALLOC_VERSION 20804 +#endif /* DLMALLOC_VERSION */ + +#ifndef WIN32 +#ifdef _WIN32 +#define WIN32 1 +#endif /* _WIN32 */ +#ifdef _WIN32_WCE +#define LACKS_FCNTL_H +#define WIN32 1 +#endif /* _WIN32_WCE */ +#endif /* WIN32 */ +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#define HAVE_MMAP 1 +#define HAVE_MORECORE 0 +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H +#define LACKS_STRING_H +#define LACKS_STRINGS_H +#define LACKS_SYS_TYPES_H +#define LACKS_ERRNO_H +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION +#endif /* MALLOC_FAILURE_ACTION */ +#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ +#define MMAP_CLEARS 0 +#else +#define MMAP_CLEARS 1 +#endif /* _WIN32_WCE */ +#endif /* WIN32 */ + +#if defined(DARWIN) || defined(_DARWIN) +/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ +#ifndef HAVE_MORECORE +#define HAVE_MORECORE 0 +#define HAVE_MMAP 1 +/* OSX allocators provide 16 byte alignment */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)16U) +#endif +#endif /* HAVE_MORECORE */ +#endif /* DARWIN */ + +#ifndef LACKS_SYS_TYPES_H +#include /* For size_t */ +#endif /* LACKS_SYS_TYPES_H */ + +#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) +#define SPIN_LOCKS_AVAILABLE 1 +#else +#define SPIN_LOCKS_AVAILABLE 0 +#endif + +/* The maximum possible size_t value has all bits set */ +#define MAX_SIZE_T (~(size_t)0) + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 /* define to a value */ +#else +#define ONLY_MSPACES 1 +#endif /* ONLY_MSPACES */ +#ifndef MSPACES +#if ONLY_MSPACES +#define MSPACES 1 +#else /* ONLY_MSPACES */ +#define MSPACES 0 +#endif /* ONLY_MSPACES */ +#endif /* MSPACES */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)8U) +#endif /* MALLOC_ALIGNMENT */ +#ifndef FOOTERS +#define FOOTERS 0 +#endif /* FOOTERS */ +#ifndef ABORT +#define ABORT abort() +#endif /* ABORT */ +#ifndef ABORT_ON_ASSERT_FAILURE +#define ABORT_ON_ASSERT_FAILURE 1 +#endif /* ABORT_ON_ASSERT_FAILURE */ +#ifndef PROCEED_ON_ERROR +#define PROCEED_ON_ERROR 0 +#endif /* PROCEED_ON_ERROR */ +#ifndef USE_LOCKS +#define USE_LOCKS 0 +#endif /* USE_LOCKS */ +#ifndef USE_SPIN_LOCKS +#if USE_LOCKS && SPIN_LOCKS_AVAILABLE +#define USE_SPIN_LOCKS 1 +#else +#define USE_SPIN_LOCKS 0 +#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */ +#endif /* USE_SPIN_LOCKS */ +#ifndef INSECURE +#define INSECURE 0 +#endif /* INSECURE */ +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif /* HAVE_MMAP */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif /* MMAP_CLEARS */ +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#else /* linux */ +#define HAVE_MREMAP 0 +#endif /* linux */ +#endif /* HAVE_MREMAP */ +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION errno = ENOMEM; +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef HAVE_MORECORE +#if ONLY_MSPACES +#define HAVE_MORECORE 0 +#else /* ONLY_MSPACES */ +#define HAVE_MORECORE 1 +#endif /* ONLY_MSPACES */ +#endif /* HAVE_MORECORE */ +#if !HAVE_MORECORE +#define MORECORE_CONTIGUOUS 0 +#else /* !HAVE_MORECORE */ +#define MORECORE_DEFAULT sbrk +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif /* MORECORE_CONTIGUOUS */ +#endif /* HAVE_MORECORE */ +#ifndef DEFAULT_GRANULARITY +#if (MORECORE_CONTIGUOUS || defined(WIN32)) +#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ +#else /* MORECORE_CONTIGUOUS */ +#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) +#endif /* MORECORE_CONTIGUOUS */ +#endif /* DEFAULT_GRANULARITY */ +#ifndef DEFAULT_TRIM_THRESHOLD +#ifndef MORECORE_CANNOT_TRIM +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#else /* MORECORE_CANNOT_TRIM */ +#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T +#endif /* MORECORE_CANNOT_TRIM */ +#endif /* DEFAULT_TRIM_THRESHOLD */ +#ifndef DEFAULT_MMAP_THRESHOLD +#if HAVE_MMAP +#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +#else /* HAVE_MMAP */ +#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* DEFAULT_MMAP_THRESHOLD */ +#ifndef MAX_RELEASE_CHECK_RATE +#if HAVE_MMAP +#define MAX_RELEASE_CHECK_RATE 4095 +#else +#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* MAX_RELEASE_CHECK_RATE */ +#ifndef USE_BUILTIN_FFS +#define USE_BUILTIN_FFS 0 +#endif /* USE_BUILTIN_FFS */ +#ifndef USE_DEV_RANDOM +#define USE_DEV_RANDOM 0 +#endif /* USE_DEV_RANDOM */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ +#ifndef NO_SEGMENT_TRAVERSAL +#define NO_SEGMENT_TRAVERSAL 0 +#endif /* NO_SEGMENT_TRAVERSAL */ + +/* + mallopt tuning options. SVID/XPG defines four standard parameter + numbers for mallopt, normally defined in malloc.h. None of these + are used in this malloc, so setting them has no effect. But this + malloc does support the following options. +*/ + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + +/* ------------------------ Mallinfo declarations ------------------------ */ + +#if !NO_MALLINFO +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any system that has a + /usr/include/malloc.h defining struct mallinfo. The main + declaration needed is the mallinfo struct that is returned (by-copy) + by mallinfo(). The malloinfo struct contains a bunch of fields that + are not even meaningful in this version of malloc. These fields are + are instead filled by mallinfo() with other numbers that might be of + interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else a compliant version is + declared below. These must be precisely the same for mallinfo() to + work. The original SVID version of this struct, defined on most + systems with mallinfo, declares all fields as ints. But some others + define as unsigned long. If your system defines the fields using a + type of different width than listed here, you MUST #include your + system version and #define HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else /* HAVE_USR_INCLUDE_MALLOC_H */ +#ifndef STRUCT_MALLINFO_DECLARED +#define STRUCT_MALLINFO_DECLARED 1 +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; +#endif /* STRUCT_MALLINFO_DECLARED */ +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* NO_MALLINFO */ + +/* + Try to persuade compilers to inline. The most critical functions for + inlining are defined as macros, so these aren't used for them. +*/ + +#ifndef FORCEINLINE + #if defined(__GNUC__) +#define FORCEINLINE __inline __attribute__ ((always_inline)) + #elif defined(_MSC_VER) + #define FORCEINLINE __forceinline + #endif +#endif +#ifndef NOINLINE + #if defined(__GNUC__) + #define NOINLINE __attribute__ ((noinline)) + #elif defined(_MSC_VER) + #define NOINLINE __declspec(noinline) + #else + #define NOINLINE + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#ifndef FORCEINLINE + #define FORCEINLINE inline +#endif +#endif /* __cplusplus */ +#ifndef FORCEINLINE + #define FORCEINLINE +#endif + +#if !ONLY_MSPACES + +/* ------------------- Declarations of public routines ------------------- */ + +#ifndef USE_DL_PREFIX +#define dlcalloc calloc +#define dlfree free +#define dlmalloc malloc +#define dlmemalign memalign +#define dlrealloc realloc +#define dlvalloc valloc +#define dlpvalloc pvalloc +#define dlmallinfo mallinfo +#define dlmallopt mallopt +#define dlmalloc_trim malloc_trim +#define dlmalloc_stats malloc_stats +#define dlmalloc_usable_size malloc_usable_size +#define dlmalloc_footprint malloc_footprint +#define dlmalloc_max_footprint malloc_max_footprint +#define dlindependent_calloc independent_calloc +#define dlindependent_comalloc independent_comalloc +#endif /* USE_DL_PREFIX */ + + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cause the current program to abort. +*/ +void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ + +void* dlrealloc(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +void* dlmemalign(size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. To workaround the fact that mallopt is specified to use int, + not size_t parameters, the value -1 is specially treated as the + maximum unsigned size_t value. + + SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +int dlmallopt(int, int); + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. +*/ +size_t dlmalloc_max_footprint(void); + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +void** dlindependent_comalloc(size_t, size_t*, void**); + + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ +void dlmalloc_stats(void); + +#endif /* ONLY_MSPACES */ + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +int mspace_track_large_chunks(mspace msp, int enable); + + +/* + mspace_malloc behaves as malloc, but operates within + the given space. +*/ +void* mspace_malloc(mspace msp, size_t bytes); + +/* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. +*/ +void mspace_free(mspace msp, void* mem); + +/* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. +*/ +void* mspace_realloc(mspace msp, void* mem, size_t newsize); + +/* + mspace_calloc behaves as calloc, but operates within + the given space. +*/ +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); + +/* + mspace_memalign behaves as memalign, but operates within + the given space. +*/ +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); + +/* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. +*/ +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); + +/* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. +*/ +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); + +/* + mspace_footprint() returns the number of bytes obtained from the + system for this space. +*/ +size_t mspace_footprint(mspace msp); + +/* + mspace_max_footprint() returns the peak number of bytes obtained from the + system for this space. +*/ +size_t mspace_max_footprint(mspace msp); + + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + malloc_usable_size(void* p) behaves the same as malloc_usable_size; +*/ + size_t mspace_usable_size(void* mem); + +/* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. +*/ +void mspace_malloc_stats(mspace msp); + +/* + mspace_trim behaves as malloc_trim, but + operates within the given space. +*/ +int mspace_trim(mspace msp, size_t pad); + +/* + An alias for mallopt. +*/ +int mspace_mallopt(int, int); + +#endif /* MSPACES */ + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif /* __cplusplus */ + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/*------------------------------ internal #includes ---------------------- */ + +#ifdef WIN32 +#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ +#endif /* WIN32 */ + +#include /* for printing in malloc_stats */ + +#ifndef LACKS_ERRNO_H +#include /* for MALLOC_FAILURE_ACTION */ +#endif /* LACKS_ERRNO_H */ +#if FOOTERS || DEBUG +#include /* for magic initialization */ +#endif /* FOOTERS */ +#ifndef LACKS_STDLIB_H +#include /* for abort() */ +#endif /* LACKS_STDLIB_H */ +#ifdef DEBUG +#if ABORT_ON_ASSERT_FAILURE +#undef assert +#define assert(x) if(!(x)) ABORT +#else /* ABORT_ON_ASSERT_FAILURE */ +#include +#endif /* ABORT_ON_ASSERT_FAILURE */ +#else /* DEBUG */ +#ifndef assert +#define assert(x) +#endif +#define DEBUG 0 +#endif /* DEBUG */ +#ifndef LACKS_STRING_H +#include /* for memset etc */ +#endif /* LACKS_STRING_H */ +#if USE_BUILTIN_FFS +#ifndef LACKS_STRINGS_H +#include /* for ffs */ +#endif /* LACKS_STRINGS_H */ +#endif /* USE_BUILTIN_FFS */ +#if HAVE_MMAP +#ifndef LACKS_SYS_MMAN_H +/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ +#if (defined(linux) && !defined(__USE_GNU)) +#define __USE_GNU 1 +#include /* for mmap */ +#undef __USE_GNU +#else +#include /* for mmap */ +#endif /* linux */ +#endif /* LACKS_SYS_MMAN_H */ +#ifndef LACKS_FCNTL_H +#include +#endif /* LACKS_FCNTL_H */ +#endif /* HAVE_MMAP */ +#ifndef LACKS_UNISTD_H +#include /* for sbrk, sysconf */ +#else /* LACKS_UNISTD_H */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +extern void* sbrk(ptrdiff_t); +#endif /* FreeBSD etc */ +#endif /* LACKS_UNISTD_H */ + +/* Declarations for locking */ +#if USE_LOCKS +#ifndef WIN32 +#include +#if defined (__SVR4) && defined (__sun) /* solaris */ +#include +#endif /* solaris */ +#else +#ifndef _M_AMD64 +/* These are already defined on AMD64 builds */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); +LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _M_AMD64 */ +#pragma intrinsic (_InterlockedCompareExchange) +#pragma intrinsic (_InterlockedExchange) +#define interlockedcompareexchange _InterlockedCompareExchange +#define interlockedexchange _InterlockedExchange +#endif /* Win32 */ +#endif /* USE_LOCKS */ + +/* Declarations for bit scanning on win32 */ +#if defined(_MSC_VER) && _MSC_VER>=1300 +#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +unsigned char _BitScanForward(unsigned long *index, unsigned long mask); +unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#define BitScanForward _BitScanForward +#define BitScanReverse _BitScanReverse +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) +#endif /* BitScanForward */ +#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ + +#ifndef WIN32 +#ifndef malloc_getpagesize +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize ((size_t)4096U) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif +#endif + + + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define SIZE_T_FOUR ((size_t)4) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) +#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* True if address a has acceptable alignment */ +#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* -------------------------- MMAP preliminaries ------------------------- */ + +/* + If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and + checks to fail so compiler optimizer can delete code rather than + using so many "#if"s. +*/ + + +/* MORECORE and MMAP must return MFAIL on failure */ +#define MFAIL ((void*)(MAX_SIZE_T)) +#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ + +#if HAVE_MMAP + +#ifndef WIN32 +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANON */ +#ifdef MAP_ANONYMOUS +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) +#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#else /* MAP_ANONYMOUS */ +/* + Nearly all versions of mmap support MAP_ANONYMOUS, so the following + is unlikely to be needed, but is supplied just in case. +*/ +#define MMAP_FLAGS (MAP_PRIVATE) +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ +#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) +#endif /* MAP_ANONYMOUS */ + +#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) + +#else /* WIN32 */ + +/* Win32 MMAP via VirtualAlloc */ +static FORCEINLINE void* win32mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +static FORCEINLINE void* win32direct_mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* This function supports releasing coalesed segments */ +static FORCEINLINE int win32munmap(void* ptr, size_t size) { + MEMORY_BASIC_INFORMATION minfo; + char* cptr = (char*)ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#define MMAP_DEFAULT(s) win32mmap(s) +#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) +#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) +#endif /* WIN32 */ +#endif /* HAVE_MMAP */ + +#if HAVE_MREMAP +#ifndef WIN32 +#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#endif /* WIN32 */ +#endif /* HAVE_MREMAP */ + + +/** + * Define CALL_MORECORE + */ +#if HAVE_MORECORE + #ifdef MORECORE + #define CALL_MORECORE(S) MORECORE(S) + #else /* MORECORE */ + #define CALL_MORECORE(S) MORECORE_DEFAULT(S) + #endif /* MORECORE */ +#else /* HAVE_MORECORE */ + #define CALL_MORECORE(S) MFAIL +#endif /* HAVE_MORECORE */ + +/** + * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP + */ +#if HAVE_MMAP + #define USE_MMAP_BIT (SIZE_T_ONE) + + #ifdef MMAP + #define CALL_MMAP(s) MMAP(s) + #else /* MMAP */ + #define CALL_MMAP(s) MMAP_DEFAULT(s) + #endif /* MMAP */ + #ifdef MUNMAP + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) + #else /* MUNMAP */ + #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) + #endif /* MUNMAP */ + #ifdef DIRECT_MMAP + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #else /* DIRECT_MMAP */ + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) + #endif /* DIRECT_MMAP */ +#else /* HAVE_MMAP */ + #define USE_MMAP_BIT (SIZE_T_ZERO) + + #define MMAP(s) MFAIL + #define MUNMAP(a, s) (-1) + #define DIRECT_MMAP(s) MFAIL + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #define CALL_MMAP(s) MMAP(s) + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) +#endif /* HAVE_MMAP */ + +/** + * Define CALL_MREMAP + */ +#if HAVE_MMAP && HAVE_MREMAP + #ifdef MREMAP + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) + #else /* MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) + #endif /* MREMAP */ +#else /* HAVE_MMAP && HAVE_MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif /* HAVE_MMAP && HAVE_MREMAP */ + +/* mstate bit set if continguous morecore disabled or failed */ +#define USE_NONCONTIGUOUS_BIT (4U) + +/* segment bit set in create_mspace_with_base */ +#define EXTERN_BIT (8U) + + +/* --------------------------- Lock preliminaries ------------------------ */ + +/* + When locks are defined, there is one global lock, plus + one per-mspace lock. + + The global lock_ensures that mparams.magic and other unique + mparams values are initialized only once. It also protects + sequences of calls to MORECORE. In many cases sys_alloc requires + two calls, that should not be interleaved with calls by other + threads. This does not protect against direct calls to MORECORE + by other threads not using this lock, so there is still code to + cope the best we can on interference. + + Per-mspace locks surround calls to malloc, free, etc. To enable use + in layered extensions, per-mspace locks are reentrant. + + Because lock-protected regions generally have bounded times, it is + OK to use the supplied simple spinlocks in the custom versions for + x86. Spinlocks are likely to improve performance for lightly + contended applications, but worsen performance under heavy + contention. + + If USE_LOCKS is > 1, the definitions of lock routines here are + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly + TRY_LOCK (which is not used in this malloc, but commonly needed in + extensions.) You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. + +*/ + +#if USE_LOCKS == 1 + +#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE +#ifndef WIN32 + +/* Custom pthread-style spin locks on x86 and x64 for gcc */ +struct pthread_mlock_t { + volatile unsigned int l; + unsigned int c; + pthread_t threadid; +}; +#define MLOCK_T struct pthread_mlock_t +#define CURRENT_THREAD pthread_self() +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl) +#define RELEASE_LOCK(sl) pthread_release_lock(sl) +#define TRY_LOCK(sl) pthread_try_lock(sl) +#define SPINS_PER_YIELD 63 + +static MLOCK_T malloc_global_mutex = { 0, 0, 0}; + +static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) { + int spins = 0; + volatile unsigned int* lp = &sl->l; + for (;;) { + if (*lp != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 0; + } + } + else { + /* place args to cmpxchgl in locals to evade oddities in some gccs */ + int cmp = 0; + int val = 1; + int ret; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(lp)), "0"(cmp) + : "memory", "cc"); + if (!ret) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 0; + } + } + if ((++spins & SPINS_PER_YIELD) == 0) { +#if defined (__SVR4) && defined (__sun) /* solaris */ + thr_yield(); +#else +#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + sched_yield(); +#else /* no-op yield on unknown systems */ + ; +#endif /* __linux__ || __FreeBSD__ || __APPLE__ */ +#endif /* solaris */ + } + } +} + +static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { + volatile unsigned int* lp = &sl->l; + assert(*lp != 0); + assert(sl->threadid == CURRENT_THREAD); + if (--sl->c == 0) { + sl->threadid = 0; + int prev = 0; + int ret; + __asm__ __volatile__ ("lock; xchgl %0, %1" + : "=r" (ret) + : "m" (*(lp)), "0"(prev) + : "memory"); + } +} + +static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { + volatile unsigned int* lp = &sl->l; + if (*lp != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } + } + else { + int cmp = 0; + int val = 1; + int ret; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(lp)), "0"(cmp) + : "memory", "cc"); + if (!ret) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 1; + } + } + return 0; +} + + +#else /* WIN32 */ +/* Custom win32-style spin locks on x86 and x64 for MSC */ +struct win32_mlock_t { + volatile long l; + unsigned int c; + long threadid; +}; + +#define MLOCK_T struct win32_mlock_t +#define CURRENT_THREAD GetCurrentThreadId() +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl) +#define RELEASE_LOCK(sl) win32_release_lock(sl) +#define TRY_LOCK(sl) win32_try_lock(sl) +#define SPINS_PER_YIELD 63 + +static MLOCK_T malloc_global_mutex = { 0, 0, 0}; + +static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) { + int spins = 0; + for (;;) { + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 0; + } + } + else { + if (!interlockedexchange(&sl->l, 1)) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 0; + } + } + if ((++spins & SPINS_PER_YIELD) == 0) + SleepEx(0, FALSE); + } +} + +static FORCEINLINE void win32_release_lock (MLOCK_T *sl) { + assert(sl->threadid == CURRENT_THREAD); + assert(sl->l != 0); + if (--sl->c == 0) { + sl->threadid = 0; + interlockedexchange (&sl->l, 0); + } +} + +static FORCEINLINE int win32_try_lock (MLOCK_T *sl) { + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } + } + else { + if (!interlockedexchange(&sl->l, 1)){ + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 1; + } + } + return 0; +} + +#endif /* WIN32 */ +#else /* USE_SPIN_LOCKS */ + +#ifndef WIN32 +/* pthreads-based locks */ + +#define MLOCK_T pthread_mutex_t +#define CURRENT_THREAD pthread_self() +#define INITIAL_LOCK(sl) pthread_init_lock(sl) +#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl) +#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl) +#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl)) + +static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Cope with old-style linux recursive lock initialization by adding */ +/* skipped internal declaration from pthread.h */ +#ifdef linux +#ifndef PTHREAD_MUTEX_RECURSIVE +extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, + int __kind)); +#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP +#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) +#endif +#endif + +static int pthread_init_lock (MLOCK_T *sl) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) return 1; + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; + if (pthread_mutex_init(sl, &attr)) return 1; + if (pthread_mutexattr_destroy(&attr)) return 1; + return 0; +} + +#else /* WIN32 */ +/* Win32 critical sections */ +#define MLOCK_T CRITICAL_SECTION +#define CURRENT_THREAD GetCurrentThreadId() +#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000)) +#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0) +#define RELEASE_LOCK(s) LeaveCriticalSection(sl) +#define TRY_LOCK(s) TryEnterCriticalSection(sl) +#define NEED_GLOBAL_LOCK_INIT + +static MLOCK_T malloc_global_mutex; +static volatile long malloc_global_mutex_status; + +/* Use spin loop to initialize global lock */ +static void init_malloc_global_mutex() { + for (;;) { + long stat = malloc_global_mutex_status; + if (stat > 0) + return; + /* transition to < 0 while initializing, then to > 0) */ + if (stat == 0 && + interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) { + InitializeCriticalSection(&malloc_global_mutex); + interlockedexchange(&malloc_global_mutex_status,1); + return; + } + SleepEx(0, FALSE); + } +} + +#endif /* WIN32 */ +#endif /* USE_SPIN_LOCKS */ +#endif /* USE_LOCKS == 1 */ + +/* ----------------------- User-defined locks ------------------------ */ + +#if USE_LOCKS > 1 +/* Define your own lock implementation here */ +/* #define INITIAL_LOCK(sl) ... */ +/* #define ACQUIRE_LOCK(sl) ... */ +/* #define RELEASE_LOCK(sl) ... */ +/* #define TRY_LOCK(sl) ... */ +/* static MLOCK_T malloc_global_mutex = ... */ +#endif /* USE_LOCKS > 1 */ + +/* ----------------------- Lock-based state ------------------------ */ + +#if USE_LOCKS +#define USE_LOCK_BIT (2U) +#else /* USE_LOCKS */ +#define USE_LOCK_BIT (0U) +#define INITIAL_LOCK(l) +#endif /* USE_LOCKS */ + +#if USE_LOCKS +#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK +#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); +#endif +#ifndef RELEASE_MALLOC_GLOBAL_LOCK +#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); +#endif +#else /* USE_LOCKS */ +#define ACQUIRE_MALLOC_GLOBAL_LOCK() +#define RELEASE_MALLOC_GLOBAL_LOCK() +#endif /* USE_LOCKS */ + + +/* ----------------------- Chunk representations ------------------------ */ + +/* + (The following includes lightly edited explanations by Colin Plumb.) + + The malloc_chunk declaration below is misleading (but accurate and + necessary). It declares a "view" into memory allowing access to + necessary fields at known offsets from a given base. + + Chunks of memory are maintained using a `boundary tag' method as + originally described by Knuth. (See the paper by Paul Wilson + ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such + techniques.) Sizes of free chunks are stored both in the front of + each chunk and at the end. This makes consolidating fragmented + chunks into bigger chunks fast. The head fields also hold bits + representing whether chunks are free or in use. + + Here are some pictures to make it clearer. They are "exploded" to + show that the state of a chunk can be thought of as extending from + the high 31 bits of the head field of its header through the + prev_foot and PINUSE_BIT bit of the following chunk header. + + A chunk that's in use looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk (if P = 0) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 1| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +- -+ + | | + +- -+ + | : + +- size - sizeof(size_t) available payload bytes -+ + : | + chunk-> +- -+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| + | Size of next chunk (may or may not be in use) | +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + And if it's free, it looks like this: + + chunk-> +- -+ + | User payload (must be in use, or we would have merged!) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 0| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Prev pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- size - sizeof(struct chunk) unused bytes -+ + : | + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| + | Size of next chunk (must be in use, or we would have merged)| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- User payload -+ + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0| + +-+ + Note that since we always merge adjacent free chunks, the chunks + adjacent to a free chunk must be in use. + + Given a pointer to a chunk (which can be derived trivially from the + payload pointer) we can, in O(1) time, find out whether the adjacent + chunks are free, and if so, unlink them from the lists that they + are on and merge them with the current chunk. + + Chunks always begin on even word boundaries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + The P (PINUSE_BIT) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, preventing + access to non-existent (or non-owned) memory. If pinuse is set for + any given chunk, then you CANNOT determine the size of the + previous chunk, and might even get a memory addressing fault when + trying to do so. + + The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of + the chunk size redundantly records whether the current chunk is + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. + + Each freshly allocated chunk must have both cinuse and pinuse set. + That is, each allocated chunk borders either a previously allocated + and still in-use chunk, or the base of its memory arena. This is + ensured by making all allocations from the the `lowest' part of any + found chunk. Further, no free chunk physically borders another one, + so each free chunk is known to be preceded and followed by either + inuse chunks or the ends of memory. + + Note that the `foot' of the current chunk is actually represented + as the prev_foot of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The exceptions to all this are + + 1. The special chunk `top' is the top-most available chunk (i.e., + the one bordering the end of available memory). It is treated + specially. Top is never included in any bin, is used only if + no other chunk is available, and is released back to the + system if it is very large (see M_TRIM_THRESHOLD). In effect, + the top chunk is treated as larger (and thus less well + fitting) than any other available chunk. The top chunk + doesn't update its trailing size field since there is no next + contiguous chunk that would have to index off it. However, + space is still allocated for it (TOP_FOOT_SIZE) to enable + separation or merging when space is extended. + + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated + one-by-one, each must carry its own prev_foot field, which is + also used to hold the offset this chunk has within its mmapped + region, which is needed to preserve alignment. Each mmapped + chunk is trailed by the first two fields of a fake next-chunk + for sake of usage checks. + +*/ + +struct malloc_chunk { + size_t prev_foot; /* Size of previous chunk (if free). */ + size_t head; /* Size and inuse bits. */ + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + +typedef struct malloc_chunk mchunk; +typedef struct malloc_chunk* mchunkptr; +typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ +typedef unsigned int bindex_t; /* Described below */ +typedef unsigned int binmap_t; /* Described below */ +typedef unsigned int flag_t; /* The type of various bit flag sets */ + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#if FOOTERS +#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +#else /* FOOTERS */ +#define CHUNK_OVERHEAD (SIZE_T_SIZE) +#endif /* FOOTERS */ + +/* MMapped chunks need a second word of overhead ... */ +#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +/* ... and additional padding for fake next-chunk at foot */ +#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MIN_CHUNK_SIZE\ + ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) +/* chunk associated with aligned address A */ +#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) + +/* Bounds on request (not chunk) sizes. */ +#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) + +/* pad request bytes into a usable size */ +#define pad_request(req) \ + (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* pad request, checking for minimum (but not maximum) */ +#define request2size(req) \ + (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) + + +/* ------------------ Operations on head and foot fields ----------------- */ + +/* + The head field of a chunk is or'ed with PINUSE_BIT when previous + adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in + use, unless mmapped, in which case both bits are cleared. + + FLAG4_BIT is not used by this malloc, but might be useful in extensions. +*/ + +#define PINUSE_BIT (SIZE_T_ONE) +#define CINUSE_BIT (SIZE_T_TWO) +#define FLAG4_BIT (SIZE_T_FOUR) +#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) +#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) + +/* Head value for fenceposts */ +#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) + +/* extraction of fields from head words */ +#define cinuse(p) ((p)->head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) +#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) + +#define chunksize(p) ((p)->head & ~(FLAG_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* Return true if malloced space is not necessarily cleared */ +#if MMAP_CLEARS +#define calloc_must_clear(p) (!is_mmapped(p)) +#else /* MMAP_CLEARS */ +#define calloc_must_clear(p) (1) +#endif /* MMAP_CLEARS */ + +/* ---------------------- Overlaid data structures ----------------------- */ + +/* + When chunks are not in use, they are treated as nodes of either + lists or trees. + + "Small" chunks are stored in circular doubly-linked lists, and look + like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Larger chunks are kept in a form of bitwise digital trees (aka + tries) keyed on chunksizes. Because malloc_tree_chunks are only for + free chunks greater than 256 bytes, their size doesn't impose any + constraints on user chunk sizes. Each node looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to left child (child[0]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to right child (child[1]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to parent | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | bin index of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Each tree holding treenodes is a tree of unique chunk sizes. Chunks + of the same size are arranged in a circularly-linked list, with only + the oldest chunk (the next to be used, in our FIFO ordering) + actually in the tree. (Tree members are distinguished by a non-null + parent pointer.) If a chunk with the same size an an existing node + is inserted, it is linked off the existing node using pointers that + work in the same way as fd/bk pointers of small chunks. + + Each tree contains a power of 2 sized range of chunk sizes (the + smallest is 0x100 <= x < 0x180), which is is divided in half at each + tree level, with the chunks in the smaller half of the range (0x100 + <= x < 0x140 for the top nose) in the left subtree and the larger + half (0x140 <= x < 0x180) in the right subtree. This is, of course, + done by inspecting individual bits. + + Using these rules, each node's left subtree contains all smaller + sizes than its right subtree. However, the node at the root of each + subtree has no particular ordering relationship to either. (The + dividing line between the subtree sizes is based on trie relation.) + If we remove the last chunk of a given size from the interior of the + tree, we need to replace it with a leaf node. The tree ordering + rules permit a node to be replaced by any leaf below it. + + The smallest chunk in a tree (a common operation in a best-fit + allocator) can be found by walking a path to the leftmost leaf in + the tree. Unlike a usual binary tree, where we follow left child + pointers until we reach a null, here we follow the right child + pointer any time the left one is null, until we reach a leaf with + both child pointers null. The smallest chunk in the tree will be + somewhere along that path. + + The worst case number of steps to add, find, or remove a node is + bounded by the number of bits differentiating chunks within + bins. Under current bin calculations, this ranges from 6 up to 21 + (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case + is of course much better. +*/ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk* fd; + struct malloc_tree_chunk* bk; + + struct malloc_tree_chunk* child[2]; + struct malloc_tree_chunk* parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk* tchunkptr; +typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +/* + Each malloc space may include non-contiguous segments, held in a + list headed by an embedded malloc_segment record representing the + top-most space. Segments also include flags holding properties of + the space. Large chunks that are directly allocated by mmap are not + included in this list. They are instead independently created and + destroyed without otherwise keeping track of them. + + Segment management mainly comes into play for spaces allocated by + MMAP. Any call to MMAP might or might not return memory that is + adjacent to an existing segment. MORECORE normally contiguously + extends the current space, so this space is almost always adjacent, + which is simpler and faster to deal with. (This is why MORECORE is + used preferentially to MMAP when both are available -- see + sys_alloc.) When allocating using MMAP, we don't use any of the + hinting mechanisms (inconsistently) supported in various + implementations of unix mmap, or distinguish reserving from + committing memory. Instead, we just ask for space, and exploit + contiguity when we get it. It is probably possible to do + better than this on some systems, but no general scheme seems + to be significantly better. + + Management entails a simpler variant of the consolidation scheme + used for chunks to reduce fragmentation -- new adjacent memory is + normally prepended or appended to an existing segment. However, + there are limitations compared to chunk consolidation that mostly + reflect the fact that segment processing is relatively infrequent + (occurring only when getting memory from system) and that we + don't expect to have huge numbers of segments: + + * Segments are not indexed, so traversal requires linear scans. (It + would be possible to index these, but is not worth the extra + overhead and complexity for most programs on most platforms.) + * New segments are only appended to old ones when holding top-most + memory; if they cannot be prepended to others, they are held in + different segments. + + Except for the top-most segment of an mstate, each segment record + is kept at the tail of its segment. Segments are added by pushing + segment records onto the list headed by &mstate.seg for the + containing mstate. + + Segment flags control allocation/merge/deallocation policies: + * If EXTERN_BIT set, then we did not allocate this segment, + and so should not try to deallocate or merge with others. + (This currently holds only for the initial segment passed + into create_mspace_with_base.) + * If USE_MMAP_BIT set, the segment may be merged with + other surrounding mmapped segments and trimmed/de-allocated + using munmap. + * If neither bit is set, then the segment was obtained using + MORECORE so can be merged with surrounding MORECORE'd segments + and deallocated/trimmed using MORECORE with negative arguments. +*/ + +struct malloc_segment { + char* base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment* next; /* ptr to next segment */ + flag_t sflags; /* mmap and extern flag */ +}; + +#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) +#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) + +typedef struct malloc_segment msegment; +typedef struct malloc_segment* msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* + A malloc_state holds all of the bookkeeping for a space. + The main fields are: + + Top + The topmost chunk of the currently active segment. Its size is + cached in topsize. The actual size of topmost space is + topsize+TOP_FOOT_SIZE, which includes space reserved for adding + fenceposts and segment records if necessary when getting more + space from the system. The size at which to autotrim top is + cached from mparams in trim_check, except that it is disabled if + an autotrim fails. + + Designated victim (dv) + This is the preferred chunk for servicing small requests that + don't have exact fits. It is normally the chunk split off most + recently to service another small request. Its size is cached in + dvsize. The link fields of this chunk are not maintained since it + is not kept in a bin. + + SmallBins + An array of bin headers for free chunks. These bins hold chunks + with sizes less than MIN_LARGE_SIZE bytes. Each bin contains + chunks of all the same size, spaced 8 bytes apart. To simplify + use in double-linked lists, each bin header acts as a malloc_chunk + pointing to the real first node, if it exists (else pointing to + itself). This avoids special-casing for headers. But to avoid + waste, we allocate only the fd/bk pointers of bins, and then use + repositioning tricks to treat these as the fields of a chunk. + + TreeBins + Treebins are pointers to the roots of trees holding a range of + sizes. There are 2 equally spaced treebins for each power of two + from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything + larger. + + Bin maps + There is one bit map for small bins ("smallmap") and one for + treebins ("treemap). Each bin sets its bit when non-empty, and + clears the bit when empty. Bit operations are then used to avoid + bin-by-bin searching -- nearly all "search" is done without ever + looking at bins that won't be selected. The bit maps + conservatively use 32 bits per map word, even if on 64bit system. + For a good description of some of the bit-based techniques used + here, see Henry S. Warren Jr's book "Hacker's Delight" (and + supplement at http://hackersdelight.org/). Many of these are + intended to reduce the branchiness of paths through malloc etc, as + well as to reduce the number of memory locations read or written. + + Segments + A list of segments headed by an embedded malloc_segment record + representing the initial space. + + Address check support + The least_addr field is the least address ever obtained from + MORECORE or MMAP. Attempted frees and reallocs of any address less + than this are trapped (unless INSECURE is defined). + + Magic tag + A cross-check field that should always hold same value as mparams.magic. + + Flags + Bits recording whether to use MMAP, locks, or contiguous MORECORE + + Statistics + Each space keeps track of current and maximum system memory + obtained via MORECORE or MMAP. + + Trim support + Fields holding the amount of unused topmost memory that should trigger + timming, and a counter to force periodic scanning to release unused + non-topmost segments. + + Locking + If USE_LOCKS is defined, the "mutex" lock is acquired and released + around every public call using this mspace. + + Extension support + A void* pointer and a size_t field that can be used to help implement + extensions to this malloc. +*/ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + char* least_addr; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t release_checks; + size_t magic; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + size_t footprint; + size_t max_footprint; + flag_t mflags; +#if USE_LOCKS + MLOCK_T mutex; /* locate lock among fields that rarely change */ +#endif /* USE_LOCKS */ + msegment seg; + void* extp; /* Unused but available for extensions */ + size_t exts; +}; + +typedef struct malloc_state* mstate; + +/* ------------- Global malloc_state and malloc_params ------------------- */ + +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. +*/ + +struct malloc_params { + volatile size_t magic; + size_t page_size; + size_t granularity; + size_t mmap_threshold; + size_t trim_threshold; + flag_t default_mflags; +}; + +static struct malloc_params mparams; + +/* Ensure mparams initialized */ +#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) + +#if !ONLY_MSPACES + +/* The global malloc_state used for all non-"mspace" calls */ +static struct malloc_state _gm_; +#define gm (&_gm_) +#define is_global(M) ((M) == &_gm_) + +#endif /* !ONLY_MSPACES */ + +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* Operations on mflags */ + +#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) +#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) +#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) + +#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) +#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) +#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) + +#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) +#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) + +#define set_lock(M,L)\ + ((M)->mflags = (L)?\ + ((M)->mflags | USE_LOCK_BIT) :\ + ((M)->mflags & ~USE_LOCK_BIT)) + +/* page-align a size */ +#define page_align(S)\ + (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (mparams.granularity - SIZE_T_ONE))\ + & ~(mparams.granularity - SIZE_T_ONE)) + + +/* For mmap, use granularity alignment on windows, else page-align */ +#ifdef WIN32 +#define mmap_align(S) granularity_align(S) +#else +#define mmap_align(S) page_align(S) +#endif + +/* For sys_alloc, enough padding to ensure can malloc request on success */ +#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) + +#define is_page_aligned(S)\ + (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) +#define is_granularity_aligned(S)\ + (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char* addr) { + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) { + msegmentptr sp = &m->seg; + for (;;) { + if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +#ifndef MORECORE_CANNOT_TRIM +#define should_trim(M,s) ((s) > (M)->trim_check) +#else /* MORECORE_CANNOT_TRIM */ +#define should_trim(M,s) (0) +#endif /* MORECORE_CANNOT_TRIM */ + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + +/* ------------------------------- Hooks -------------------------------- */ + +/* + PREACTION should be defined to return 0 on success, and nonzero on + failure. If you are not using locking, you can redefine these to do + anything you like. +*/ + +#if USE_LOCKS + +#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) +#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } +#else /* USE_LOCKS */ + +#ifndef PREACTION +#define PREACTION(M) (0) +#endif /* PREACTION */ + +#ifndef POSTACTION +#define POSTACTION(M) +#endif /* POSTACTION */ + +#endif /* USE_LOCKS */ + +/* + CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. + USAGE_ERROR_ACTION is triggered on detected bad frees and + reallocs. The argument p is an address that might have triggered the + fault. It is ignored by the two predefined actions, but might be + useful in custom actions that try to help diagnose errors. +*/ + +#if PROCEED_ON_ERROR + +/* A count of the number of corruption errors causing resets */ +int malloc_corruption_error_count; + +/* default corruption action */ +static void reset_on_error(mstate m); + +#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) +#define USAGE_ERROR_ACTION(m, p) + +#else /* PROCEED_ON_ERROR */ + +#ifndef CORRUPTION_ERROR_ACTION +#define CORRUPTION_ERROR_ACTION(m) ABORT +#endif /* CORRUPTION_ERROR_ACTION */ + +#ifndef USAGE_ERROR_ACTION +#define USAGE_ERROR_ACTION(m,p) ABORT +#endif /* USAGE_ERROR_ACTION */ + +#endif /* PROCEED_ON_ERROR */ + +/* -------------------------- Debugging setup ---------------------------- */ + +#if ! DEBUG + +#define check_free_chunk(M,P) +#define check_inuse_chunk(M,P) +#define check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) +#define check_malloc_state(M) +#define check_top_chunk(M,P) + +#else /* DEBUG */ +#define check_free_chunk(M,P) do_check_free_chunk(M,P) +#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) +#define check_top_chunk(M,P) do_check_top_chunk(M,P) +#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) +#define check_malloc_state(M) do_check_malloc_state(M) + +static void do_check_any_chunk(mstate m, mchunkptr p); +static void do_check_top_chunk(mstate m, mchunkptr p); +static void do_check_mmapped_chunk(mstate m, mchunkptr p); +static void do_check_inuse_chunk(mstate m, mchunkptr p); +static void do_check_free_chunk(mstate m, mchunkptr p); +static void do_check_malloced_chunk(mstate m, void* mem, size_t s); +static void do_check_tree(mstate m, tchunkptr t); +static void do_check_treebin(mstate m, bindex_t i); +static void do_check_smallbin(mstate m, bindex_t i); +static void do_check_malloc_state(mstate m); +static int bin_find(mstate m, mchunkptr x); +static size_t traverse_and_check(mstate m); +#endif /* DEBUG */ + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) ((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I. Use x86 asm if possible */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_tree_index(S, I)\ +{\ + unsigned int X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = _bit_scan_reverse (X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + _BitScanReverse((DWORD *) &K, X);\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#else /* GNUC */ +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int Y = (unsigned int)X;\ + unsigned int N = ((Y - 0x100) >> 16) & 8;\ + unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ + N += K;\ + N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ + K = 14 - N + ((Y <<= K) >> 15);\ + I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ + }\ +} +#endif /* GNUC */ + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* isolate the least set bit of a bitmap */ +#define least_bit(x) ((x) & -(x)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | -(x<<1)) + +/* mask with all bits to left of or equal to least bit of x on */ +#define same_or_left_bits(x) ((x) | -(x)) + +/* index corresponding to given bit. Use x86 asm if possible */ + +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\ + I = (bindex_t)J;\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = _bit_scan_forward (X); \ + I = (bindex_t)J;\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + _BitScanForward((DWORD *) &J, X);\ + I = (bindex_t)J;\ +} + +#elif USE_BUILTIN_FFS +#define compute_bit2idx(X, I) I = ffs(X)-1 + +#else +#define compute_bit2idx(X, I)\ +{\ + unsigned int Y = X - 1;\ + unsigned int K = Y >> (16-4) & 16;\ + unsigned int N = K; Y >>= K;\ + N += K = Y >> (8-3) & 8; Y >>= K;\ + N += K = Y >> (4-2) & 4; Y >>= K;\ + N += K = Y >> (2-1) & 2; Y >>= K;\ + N += K = Y >> (1-0) & 1; Y >>= K;\ + I = (bindex_t)(N + Y);\ +} +#endif /* GNUC */ + + +/* ----------------------- Runtime Check Support ------------------------- */ + +/* + For security, the main invariant is that malloc/free/etc never + writes to a static address other than malloc_state, unless static + malloc_state itself has been corrupted, which cannot occur via + malloc (because of these checks). In essence this means that we + believe all pointers, sizes, maps etc held in malloc_state, but + check all of those linked or offsetted from other embedded data + structures. These checks are interspersed with main code in a way + that tends to minimize their run-time cost. + + When FOOTERS is defined, in addition to range checking, we also + verify footer fields of inuse chunks, which can be used guarantee + that the mstate controlling malloc/free is intact. This is a + streamlined version of the approach described by William Robertson + et al in "Run-time Detection of Heap-based Overflows" LISA'03 + http://www.usenix.org/events/lisa03/tech/robertson.html The footer + of an inuse chunk holds the xor of its mstate and a random seed, + that is checked upon calls to free() and realloc(). This is + (probablistically) unguessable from outside the program, but can be + computed by any code successfully malloc'ing any chunk, so does not + itself provide protection against code that has already broken + security through some other means. Unlike Robertson et al, we + always dynamically check addresses of all offset chunks (previous, + next, etc). This turns out to be cheaper than relying on hashes. +*/ + +#if !INSECURE +/* Check if address a is at least as high as any from MORECORE or MMAP */ +#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) +/* Check if address of next chunk n is higher than base chunk p */ +#define ok_next(p, n) ((char*)(p) < (char*)(n)) +/* Check if p has inuse status */ +#define ok_inuse(p) is_inuse(p) +/* Check if p has its pinuse bit on */ +#define ok_pinuse(p) pinuse(p) + +#else /* !INSECURE */ +#define ok_address(M, a) (1) +#define ok_next(b, n) (1) +#define ok_inuse(p) (1) +#define ok_pinuse(p) (1) +#endif /* !INSECURE */ + +#if (FOOTERS && !INSECURE) +/* Check if (alleged) mstate m has expected magic field */ +#define ok_magic(M) ((M)->magic == mparams.magic) +#else /* (FOOTERS && !INSECURE) */ +#define ok_magic(M) (1) +#endif /* (FOOTERS && !INSECURE) */ + + +/* In gcc, use __builtin_expect to minimize impact of checks */ +#if !INSECURE +#if defined(__GNUC__) && __GNUC__ >= 3 +#define RTCHECK(e) __builtin_expect(e, 1) +#else /* GNUC */ +#define RTCHECK(e) (e) +#endif /* GNUC */ +#else /* !INSECURE */ +#define RTCHECK(e) (1) +#endif /* !INSECURE */ + +/* macros to set up inuse chunks with or without footers */ + +#if !FOOTERS + +#define mark_inuse_foot(M,p,s) + +/* Macros for setting head/foot of non-mmapped chunks */ + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +#else /* FOOTERS */ + +/* Set foot of inuse chunk to be xor of mstate and seed */ +#define mark_inuse_foot(M,p,s)\ + (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) + +#define get_mstate_for(p)\ + ((mstate)(((mchunkptr)((char*)(p) +\ + (chunksize(p))))->prev_foot ^ mparams.magic)) + +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ + mark_inuse_foot(M,p,s)) + +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ + mark_inuse_foot(M,p,s)) + +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + mark_inuse_foot(M, p, s)) + +#endif /* !FOOTERS */ + +/* ---------------------------- setting mparams -------------------------- */ + +/* Initialize mparams */ +static int init_mparams(void) { +#ifdef NEED_GLOBAL_LOCK_INIT + if (malloc_global_mutex_status <= 0) + init_malloc_global_mutex(); +#endif + + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if (mparams.magic == 0) { + size_t magic; + size_t psize; + size_t gsize; + +#ifndef WIN32 + psize = malloc_getpagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); +#else /* WIN32 */ + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + psize = system_info.dwPageSize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); + } +#endif /* WIN32 */ + + /* Sanity-check configuration: + size_t must be unsigned and as wide as pointer type. + ints must be at least 4 bytes. + alignment must be at least 8. + Alignment, min chunk size, and page size must all be powers of 2. + */ + if ((sizeof(size_t) != sizeof(char*)) || + (MAX_SIZE_T < MIN_CHUNK_SIZE) || + (sizeof(int) < 4) || + (MALLOC_ALIGNMENT < (size_t)8U) || + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || + ((gsize & (gsize-SIZE_T_ONE)) != 0) || + ((psize & (psize-SIZE_T_ONE)) != 0)) + ABORT; + + mparams.granularity = gsize; + mparams.page_size = psize; + mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; + mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; +#else /* MORECORE_CONTIGUOUS */ + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; +#endif /* MORECORE_CONTIGUOUS */ + +#if !ONLY_MSPACES + /* Set up lock for main malloc area */ + gm->mflags = mparams.default_mflags; + INITIAL_LOCK(&gm->mutex); +#endif + + { +#if USE_DEV_RANDOM + int fd; + unsigned char buf[sizeof(size_t)]; + /* Try to use /dev/urandom, else fall back on using time */ + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + read(fd, buf, sizeof(buf)) == sizeof(buf)) { + magic = *((size_t *) buf); + close(fd); + } + else +#endif /* USE_DEV_RANDOM */ +#ifdef WIN32 + magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); +#else + magic = (size_t)(time(0) ^ (size_t)0x55555555U); +#endif + magic |= (size_t)8U; /* ensure nonzero */ + magic &= ~(size_t)7U; /* improve chances of fault for bad values */ + mparams.magic = magic; + } + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +} + +/* support for mallopt */ +static int change_mparam(int param_number, int value) { + size_t val; + ensure_initialization(); + val = (value == -1)? MAX_SIZE_T : (size_t)value; + switch(param_number) { + case M_TRIM_THRESHOLD: + mparams.trim_threshold = val; + return 1; + case M_GRANULARITY: + if (val >= mparams.page_size && ((val & (val-1)) == 0)) { + mparams.granularity = val; + return 1; + } + else + return 0; + case M_MMAP_THRESHOLD: + mparams.mmap_threshold = val; + return 1; + default: + return 0; + } +} + +#if DEBUG +/* ------------------------- Debugging Support --------------------------- */ + +/* Check properties of any chunk, whether free, inuse, mmapped etc */ +static void do_check_any_chunk(mstate m, mchunkptr p) { + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); +} + +/* Check properties of top chunk */ +static void do_check_top_chunk(mstate m, mchunkptr p) { + msegmentptr sp = segment_holding(m, (char*)p); + size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ + assert(sp != 0); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(sz == m->topsize); + assert(sz > 0); + assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); + assert(pinuse(p)); + assert(!pinuse(chunk_plus_offset(p, sz))); +} + +/* Check properties of (inuse) mmapped chunks */ +static void do_check_mmapped_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); + assert(is_mmapped(p)); + assert(use_mmap(m)); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(!is_small(sz)); + assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); + assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); + assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); +} + +/* Check properties of inuse chunks */ +static void do_check_inuse_chunk(mstate m, mchunkptr p) { + do_check_any_chunk(m, p); + assert(is_inuse(p)); + assert(next_pinuse(p)); + /* If not pinuse and not mmapped, previous chunk has OK offset */ + assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); + if (is_mmapped(p)) + do_check_mmapped_chunk(m, p); +} + +/* Check properties of free chunks */ +static void do_check_free_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + mchunkptr next = chunk_plus_offset(p, sz); + do_check_any_chunk(m, p); + assert(!is_inuse(p)); + assert(!next_pinuse(p)); + assert (!is_mmapped(p)); + if (p != m->dv && p != m->top) { + if (sz >= MIN_CHUNK_SIZE) { + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(is_aligned(chunk2mem(p))); + assert(next->prev_foot == sz); + assert(pinuse(p)); + assert (next == m->top || is_inuse(next)); + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_T_SIZE */ + assert(sz == SIZE_T_SIZE); + } +} + +/* Check properties of malloced chunks at the point they are malloced */ +static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t sz = p->head & ~INUSE_BITS; + do_check_inuse_chunk(m, p); + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(sz >= MIN_CHUNK_SIZE); + assert(sz >= s); + /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ + assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); + } +} + +/* Check a tree and its subtrees. */ +static void do_check_tree(mstate m, tchunkptr t) { + tchunkptr head = 0; + tchunkptr u = t; + bindex_t tindex = t->index; + size_t tsize = chunksize(t); + bindex_t idx; + compute_tree_index(tsize, idx); + assert(tindex == idx); + assert(tsize >= MIN_LARGE_SIZE); + assert(tsize >= minsize_for_tree_index(idx)); + assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); + + do { /* traverse through chain of same-sized nodes */ + do_check_any_chunk(m, ((mchunkptr)u)); + assert(u->index == tindex); + assert(chunksize(u) == tsize); + assert(!is_inuse(u)); + assert(!next_pinuse(u)); + assert(u->fd->bk == u); + assert(u->bk->fd == u); + if (u->parent == 0) { + assert(u->child[0] == 0); + assert(u->child[1] == 0); + } + else { + assert(head == 0); /* only one node on chain has parent */ + head = u; + assert(u->parent != u); + assert (u->parent->child[0] == u || + u->parent->child[1] == u || + *((tbinptr*)(u->parent)) == u); + if (u->child[0] != 0) { + assert(u->child[0]->parent == u); + assert(u->child[0] != u); + do_check_tree(m, u->child[0]); + } + if (u->child[1] != 0) { + assert(u->child[1]->parent == u); + assert(u->child[1] != u); + do_check_tree(m, u->child[1]); + } + if (u->child[0] != 0 && u->child[1] != 0) { + assert(chunksize(u->child[0]) < chunksize(u->child[1])); + } + } + u = u->fd; + } while (u != t); + assert(head != 0); +} + +/* Check all the chunks in a treebin. */ +static void do_check_treebin(mstate m, bindex_t i) { + tbinptr* tb = treebin_at(m, i); + tchunkptr t = *tb; + int empty = (m->treemap & (1U << i)) == 0; + if (t == 0) + assert(empty); + if (!empty) + do_check_tree(m, t); +} + +/* Check all the chunks in a smallbin. */ +static void do_check_smallbin(mstate m, bindex_t i) { + sbinptr b = smallbin_at(m, i); + mchunkptr p = b->bk; + unsigned int empty = (m->smallmap & (1U << i)) == 0; + if (p == b) + assert(empty); + if (!empty) { + for (; p != b; p = p->bk) { + size_t size = chunksize(p); + mchunkptr q; + /* each chunk claims to be free */ + do_check_free_chunk(m, p); + /* chunk belongs in bin */ + assert(small_index(size) == i); + assert(p->bk == b || chunksize(p->bk) == chunksize(p)); + /* chunk is followed by an inuse chunk */ + q = next_chunk(p); + if (q->head != FENCEPOST_HEAD) + do_check_inuse_chunk(m, q); + } + } +} + +/* Find x in a bin. Used in other check functions. */ +static int bin_find(mstate m, mchunkptr x) { + size_t size = chunksize(x); + if (is_small(size)) { + bindex_t sidx = small_index(size); + sbinptr b = smallbin_at(m, sidx); + if (smallmap_is_marked(m, sidx)) { + mchunkptr p = b; + do { + if (p == x) + return 1; + } while ((p = p->fd) != b); + } + } + else { + bindex_t tidx; + compute_tree_index(size, tidx); + if (treemap_is_marked(m, tidx)) { + tchunkptr t = *treebin_at(m, tidx); + size_t sizebits = size << leftshift_for_tree_index(tidx); + while (t != 0 && chunksize(t) != size) { + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + sizebits <<= 1; + } + if (t != 0) { + tchunkptr u = t; + do { + if (u == (tchunkptr)x) + return 1; + } while ((u = u->fd) != t); + } + } + } + return 0; +} + +/* Traverse each chunk and check it; return total */ +static size_t traverse_and_check(mstate m) { + size_t sum = 0; + if (is_initialized(m)) { + msegmentptr s = &m->seg; + sum += m->topsize + TOP_FOOT_SIZE; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + mchunkptr lastq = 0; + assert(pinuse(q)); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + sum += chunksize(q); + if (is_inuse(q)) { + assert(!bin_find(m, q)); + do_check_inuse_chunk(m, q); + } + else { + assert(q == m->dv || bin_find(m, q)); + assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ + do_check_free_chunk(m, q); + } + lastq = q; + q = next_chunk(q); + } + s = s->next; + } + } + return sum; +} + +/* Check all properties of malloc_state. */ +static void do_check_malloc_state(mstate m) { + bindex_t i; + size_t total; + /* check bins */ + for (i = 0; i < NSMALLBINS; ++i) + do_check_smallbin(m, i); + for (i = 0; i < NTREEBINS; ++i) + do_check_treebin(m, i); + + if (m->dvsize != 0) { /* check dv chunk */ + do_check_any_chunk(m, m->dv); + assert(m->dvsize == chunksize(m->dv)); + assert(m->dvsize >= MIN_CHUNK_SIZE); + assert(bin_find(m, m->dv) == 0); + } + + if (m->top != 0) { /* check top chunk */ + do_check_top_chunk(m, m->top); + /*assert(m->topsize == chunksize(m->top)); redundant */ + assert(m->topsize > 0); + assert(bin_find(m, m->top) == 0); + } + + total = traverse_and_check(m); + assert(total <= m->footprint); + assert(m->footprint <= m->max_footprint); +} +#endif /* DEBUG */ + +/* ----------------------------- statistics ------------------------------ */ + +#if !NO_MALLINFO +static struct mallinfo internal_mallinfo(mstate m) { + struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + ensure_initialization(); + if (!PREACTION(m)) { + check_malloc_state(m); + if (is_initialized(m)) { + size_t nfree = SIZE_T_ONE; /* top always free */ + size_t mfree = m->topsize + TOP_FOOT_SIZE; + size_t sum = mfree; + msegmentptr s = &m->seg; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + size_t sz = chunksize(q); + sum += sz; + if (!is_inuse(q)) { + mfree += sz; + ++nfree; + } + q = next_chunk(q); + } + s = s->next; + } + + nm.arena = sum; + nm.ordblks = nfree; + nm.hblkhd = m->footprint - sum; + nm.usmblks = m->max_footprint; + nm.uordblks = m->footprint - mfree; + nm.fordblks = mfree; + nm.keepcost = m->topsize; + } + + POSTACTION(m); + } + return nm; +} +#endif /* !NO_MALLINFO */ + +static void internal_malloc_stats(mstate m) { + ensure_initialization(); + if (!PREACTION(m)) { + size_t maxfp = 0; + size_t fp = 0; + size_t used = 0; + check_malloc_state(m); + if (is_initialized(m)) { + msegmentptr s = &m->seg; + maxfp = m->max_footprint; + fp = m->footprint; + used = fp - (m->topsize + TOP_FOOT_SIZE); + + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + if (!is_inuse(q)) + used -= chunksize(q); + q = next_chunk(q); + } + s = s->next; + } + } + + fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); + fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); + fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); + + POSTACTION(m); + } +} + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* + Various forms of linking and unlinking are defined as macros. Even + the ones for trees, which are very long but have very short typical + paths. This is ugly but reduces reliance on inlining support of + compilers. +*/ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + assert(S >= MIN_CHUNK_SIZE);\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, B->fd)))\ + F = B->fd;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (F == B)\ + clear_smallmap(M, I);\ + else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\ + (B == smallbin_at(M,I) || ok_address(M, B)))) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (B == F)\ + clear_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, F))) {\ + B->fd = F;\ + F->bk = B;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + + + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + assert(is_small(DVS));\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr* H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + }\ + else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0)\ + T = *C;\ + else if (RTCHECK(ok_address(M, C))) {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + else {\ + tchunkptr F = T->fd;\ + if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + }\ + }\ +} + +/* + Unlink steps: + + 1. If x is a chained node, unlink it from its same-sized fd/bk links + and choose its bk node as its replacement. + 2. If x was the last node of its size, but not a leaf node, it must + be replaced with a leaf node (not merely one with an open left or + right), to make sure that lefts and rights of descendents + correspond properly to bit masks. We use the rightmost descendent + of x. We could use any other leaf, but this is easy to locate and + tends to counteract removal of leftmosts elsewhere, and so keeps + paths shorter than minimally guaranteed. This doesn't loop much + because on average a node in a tree is near the bottom. + 3. If x is the base of a chain (i.e., has parent links) relink + x's parent and children to x's replacement (or null if none). +*/ + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + if (RTCHECK(ok_address(M, F))) {\ + F->bk = R;\ + R->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + tchunkptr* RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr* CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + if (RTCHECK(ok_address(M, RP)))\ + *RP = 0;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + }\ + if (XP != 0) {\ + tbinptr* H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + }\ + else if (RTCHECK(ok_address(M, XP))) {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + if (R != 0) {\ + if (RTCHECK(ok_address(M, R))) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + if (RTCHECK(ok_address(M, C0))) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + if ((C1 = X->child[1]) != 0) {\ + if (RTCHECK(ok_address(M, C1))) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) insert_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) unlink_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + + +/* Relays to internal calls to malloc/free from realloc, memalign etc */ + +#if ONLY_MSPACES +#define internal_malloc(m, b) mspace_malloc(m, b) +#define internal_free(m, mem) mspace_free(m,mem); +#else /* ONLY_MSPACES */ +#if MSPACES +#define internal_malloc(m, b)\ + (m == gm)? dlmalloc(b) : mspace_malloc(m, b) +#define internal_free(m, mem)\ + if (m == gm) dlfree(mem); else mspace_free(m,mem); +#else /* MSPACES */ +#define internal_malloc(m, b) dlmalloc(b) +#define internal_free(m, mem) dlfree(mem) +#endif /* MSPACES */ +#endif /* ONLY_MSPACES */ + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +/* + Directly mmapped chunks are set up with an offset to the start of + the mmapped region stored in the prev_foot field of the chunk. This + allows reconstruction of the required argument to MUNMAP when freed, + and also allows adjustment of the returned chunk to meet alignment + requirements (especially in memalign). +*/ + +/* Malloc using mmap */ +static void* mmap_alloc(mstate m, size_t nb) { + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (mmsize > nb) { /* Check for wrap around 0 */ + char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - MMAP_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset; + p->head = psize; + mark_inuse_foot(m, p, psize); + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + + if (m->least_addr == 0 || mm < m->least_addr) + m->least_addr = mm; + if ((m->footprint += mmsize) > m->max_footprint) + m->max_footprint = m->footprint; + assert(is_aligned(chunk2mem(p))); + check_mmapped_chunk(m, p); + return chunk2mem(p); + } + } + return 0; +} + +/* Realloc using mmap */ +static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { + size_t oldsize = chunksize(oldp); + if (is_small(nb)) /* Can't shrink mmap regions below small size */ + return 0; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (mparams.granularity << 1)) + return oldp; + else { + size_t offset = oldp->prev_foot; + size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + char* cp = (char*)CALL_MREMAP((char*)oldp - offset, + oldmmsize, newmmsize, 1); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - MMAP_FOOT_PAD; + newp->head = psize; + mark_inuse_foot(m, newp, psize); + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + + if (cp < m->least_addr) + m->least_addr = cp; + if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) + m->max_footprint = m->footprint; + check_mmapped_chunk(m, newp); + return newp; + } + } + return 0; +} + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) { + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char*)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = mparams.trim_threshold; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) { + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; ++i) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +#if PROCEED_ON_ERROR + +/* default corruption action */ +static void reset_on_error(mstate m) { + int i; + ++malloc_corruption_error_count; + /* Reinitialize fields to forget about all memory */ + m->smallbins = m->treebins = 0; + m->dvsize = m->topsize = 0; + m->seg.base = 0; + m->seg.size = 0; + m->seg.next = 0; + m->top = m->dv = 0; + for (i = 0; i < NTREEBINS; ++i) + *treebin_at(m, i) = 0; + init_bins(m); +} +#endif /* PROCEED_ON_ERROR */ + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void* prepend_alloc(mstate m, char* newbase, char* oldbase, + size_t nb) { + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (char*)oldfirst - (char*)p; + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + assert((char*)oldfirst > (char*)q); + assert(pinuse(oldfirst)); + assert(qsize >= MIN_CHUNK_SIZE); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + check_top_chunk(m, q); + } + else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } + else { + if (!is_inuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + check_free_chunk(m, q); + } + + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); +} + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { + /* Determine locations and sizes of segment, fenceposts, old top */ + char* old_top = (char*)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char* old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char* asp = rawsp + offset; + char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + int nfences = 0; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + assert(is_aligned(ss)); + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmapped; + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + ++nfences; + if ((char*)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + assert(nfences >= 2); + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = csp - old_top; + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } + + check_top_chunk(m, m->top); +} + +/* -------------------------- System allocation -------------------------- */ + +/* Get memory from system using MORECORE or MMAP */ +static void* sys_alloc(mstate m, size_t nb) { + char* tbase = CMFAIL; + size_t tsize = 0; + flag_t mmap_flag = 0; + + ensure_initialization(); + + /* Directly map large chunks, but only if already initialized */ + if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { + void* mem = mmap_alloc(m, nb); + if (mem != 0) + return mem; + } + + /* + Try getting memory in any of three ways (in most-preferred to + least-preferred order): + 1. A call to MORECORE that can normally contiguously extend memory. + (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or + or main space is mmapped or a previous contiguous call failed) + 2. A call to MMAP new space (disabled if not HAVE_MMAP). + Note that under the default settings, if MORECORE is unable to + fulfill a request, and HAVE_MMAP is true, then mmap is + used as a noncontiguous system allocator. This is a useful backup + strategy for systems with holes in address spaces -- in this case + sbrk cannot contiguously expand the heap, but mmap may be able to + find space. + 3. A call to MORECORE that cannot usually contiguously extend memory. + (disabled if not HAVE_MORECORE) + + In all cases, we need to request enough bytes from system to ensure + we can malloc nb bytes upon success, so pad with enough space for + top_foot, plus alignment-pad to make sure we don't lose bytes if + not on boundary, and round this up to a granularity unit. + */ + + if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { + char* br = CMFAIL; + msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); + size_t asize = 0; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + + if (ss == 0) { /* First time through or recovery */ + char* base = (char*)CALL_MORECORE(0); + if (base != CMFAIL) { + asize = granularity_align(nb + SYS_ALLOC_PADDING); + /* Adjust to end on a page boundary */ + if (!is_page_aligned(base)) + asize += (page_align((size_t)base) - (size_t)base); + /* Can't call MORECORE if size is negative when treated as signed */ + if (asize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(asize))) == base) { + tbase = base; + tsize = asize; + } + } + } + else { + /* Subtract out existing available top space from MORECORE request. */ + asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); + /* Use mem here only if it did continuously extend old space */ + if (asize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { + tbase = br; + tsize = asize; + } + } + + if (tbase == CMFAIL) { /* Cope with partial failure */ + if (br != CMFAIL) { /* Try to use/extend the space we did get */ + if (asize < HALF_MAX_SIZE_T && + asize < nb + SYS_ALLOC_PADDING) { + size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize); + if (esize < HALF_MAX_SIZE_T) { + char* end = (char*)CALL_MORECORE(esize); + if (end != CMFAIL) + asize += esize; + else { /* Can't use; try to release */ + (void) CALL_MORECORE(-asize); + br = CMFAIL; + } + } + } + } + if (br != CMFAIL) { /* Use the space we did get */ + tbase = br; + tsize = asize; + } + else + disable_contiguous(m); /* Don't try contiguous path in the future */ + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + } + + if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ + size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING); + if (rsize > nb) { /* Fail if wraps around zero */ + char* mp = (char*)(CALL_MMAP(rsize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = rsize; + mmap_flag = USE_MMAP_BIT; + } + } + } + + if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ + size_t asize = granularity_align(nb + SYS_ALLOC_PADDING); + if (asize < HALF_MAX_SIZE_T) { + char* br = CMFAIL; + char* end = CMFAIL; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + br = (char*)(CALL_MORECORE(asize)); + end = (char*)(CALL_MORECORE(0)); + RELEASE_MALLOC_GLOBAL_LOCK(); + if (br != CMFAIL && end != CMFAIL && br < end) { + size_t ssize = end - br; + if (ssize > nb + TOP_FOOT_SIZE) { + tbase = br; + tsize = ssize; + } + } + } + } + + if (tbase != CMFAIL) { + + if ((m->footprint += tsize) > m->max_footprint) + m->max_footprint = m->footprint; + + if (!is_initialized(m)) { /* first-time initialization */ + if (m->least_addr == 0 || tbase < m->least_addr) + m->least_addr = tbase; + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmap_flag; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(m); +#if !ONLY_MSPACES + if (is_global(m)) + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + else +#endif + { + /* Offset top by embedded malloc_state */ + mchunkptr mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); + } + } + + else { + /* Try to merge with an existing segment */ + msegmentptr sp = &m->seg; + /* Only consider most recent segment if traversal suppressed */ + while (sp != 0 && tbase != sp->base + sp->size) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag && + segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } + else { + if (tbase < m->least_addr) + m->least_addr = tbase; + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag) { + char* oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } + else + add_segment(m, tbase, tsize, mmap_flag); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + check_top_chunk(m, m->top); + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); + } + } + + MALLOC_FAILURE_ACTION; + return 0; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) { + size_t released = 0; + int nsegs = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + ++nsegs; + if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + assert(segment_holds(sp, (char*)sp)); + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + m->footprint -= size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } + else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ + break; + pred = sp; + sp = next; + } + /* Reset check counter */ + m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)? + nsegs : MAX_RELEASE_CHECK_RATE); + return released; +} + +static int sys_trim(mstate m, size_t pad) { + size_t released = 0; + ensure_initialization(); + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = mparams.granularity; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char*)m->top); + + if (!is_extern_segment(sp)) { + if (is_mmapped_segment(sp)) { + if (HAVE_MMAP && + sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ +#if HAVE_MMAP + size_t newsize = sp->size - extra; +#endif + /* Prefer mremap, fall back to munmap */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + } + else if (HAVE_MORECORE) { + if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ + extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + { + /* Make sure end of memory is where we last set it. */ + char* old_br = (char*)(CALL_MORECORE(0)); + if (old_br == sp->base + sp->size) { + char* rel_br = (char*)(CALL_MORECORE(-extra)); + char* new_br = (char*)(CALL_MORECORE(0)); + if (rel_br != CMFAIL && new_br < old_br) + released = old_br - new_br; + } + } + RELEASE_MALLOC_GLOBAL_LOCK(); + } + } + + if (released != 0) { + sp->size -= released; + m->footprint -= released; + init_top(m, m->top, m->topsize - released); + check_top_chunk(m, m->top); + } + } + + /* Unmap any unused mmapped segments */ + if (HAVE_MMAP) + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0 && m->topsize > m->trim_check) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + + +/* ---------------------------- malloc support --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void* tmalloc_large(mstate m, size_t nb) { + tchunkptr v = 0; + size_t rsize = -nb; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) { + bindex_t i; + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + t = *treebin_at(m, i); + } + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return 0 so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + if (RTCHECK(ok_address(m, v))) { /* split */ + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + } + CORRUPTION_ERROR_ACTION(m); + } + return 0; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void* tmalloc_small(mstate m, size_t nb) { + tchunkptr t, v; + size_t rsize; + bindex_t i; + binmap_t leastbit = least_bit(m->treemap); + compute_bit2idx(leastbit, i); + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + if (RTCHECK(ok_address(m, v))) { + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); + } + } + + CORRUPTION_ERROR_ACTION(m); + return 0; +} + +/* --------------------------- realloc support --------------------------- */ + +static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + return 0; + } + if (!PREACTION(m)) { + mchunkptr oldp = mem2chunk(oldmem); + size_t oldsize = chunksize(oldp); + mchunkptr next = chunk_plus_offset(oldp, oldsize); + mchunkptr newp = 0; + void* extra = 0; + + /* Try to either shrink or extend into top. Else malloc-copy-free */ + + if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) && + ok_next(oldp, next) && ok_pinuse(next))) { + size_t nb = request2size(bytes); + if (is_mmapped(oldp)) + newp = mmap_resize(m, oldp, nb); + else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + newp = oldp; + if (rsize >= MIN_CHUNK_SIZE) { + mchunkptr remainder = chunk_plus_offset(newp, nb); + set_inuse(m, newp, nb); + set_inuse_and_pinuse(m, remainder, rsize); + extra = chunk2mem(remainder); + } + } + else if (next == m->top && oldsize + m->topsize > nb) { + /* Expand into top */ + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(oldp, nb); + set_inuse(m, oldp, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = oldp; + } + } + else { + USAGE_ERROR_ACTION(m, oldmem); + POSTACTION(m); + return 0; + } +#if DEBUG + if (newp != 0) { + check_inuse_chunk(m, newp); /* Check requires lock */ + } +#endif + + POSTACTION(m); + + if (newp != 0) { + if (extra != 0) { + internal_free(m, extra); + } + return chunk2mem(newp); + } + else { + void* newmem = internal_malloc(m, bytes); + if (newmem != 0) { + size_t oc = oldsize - overhead_for(oldp); + memcpy(newmem, oldmem, (oc < bytes)? oc : bytes); + internal_free(m, oldmem); + } + return newmem; + } + } + return 0; +} + +/* --------------------------- memalign support -------------------------- */ + +static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { + if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */ + return internal_malloc(m, bytes); + if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ + alignment = MIN_CHUNK_SIZE; + if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ + size_t a = MALLOC_ALIGNMENT << 1; + while (a < alignment) a <<= 1; + alignment = a; + } + + if (bytes >= MAX_REQUEST - alignment) { + if (m != 0) { /* Test isn't needed but avoids compiler warning */ + MALLOC_FAILURE_ACTION; + } + } + else { + size_t nb = request2size(bytes); + size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; + char* mem = (char*)internal_malloc(m, req); + if (mem != 0) { + void* leader = 0; + void* trailer = 0; + mchunkptr p = mem2chunk(mem); + + if (PREACTION(m)) return 0; + if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */ + /* + Find an aligned spot inside chunk. Since we need to give + back leading space in a chunk of at least MIN_CHUNK_SIZE, if + the first calculation places us at a spot with less than + MIN_CHUNK_SIZE leader, we can move to the next aligned spot. + We've allocated enough total room so that this is always + possible. + */ + char* br = (char*)mem2chunk((size_t)(((size_t)(mem + + alignment - + SIZE_T_ONE)) & + -alignment)); + char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? + br : br+alignment; + mchunkptr newp = (mchunkptr)pos; + size_t leadsize = pos - (char*)(p); + size_t newsize = chunksize(p) - leadsize; + + if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ + newp->prev_foot = p->prev_foot + leadsize; + newp->head = newsize; + } + else { /* Otherwise, give back leader, use the rest */ + set_inuse(m, newp, newsize); + set_inuse(m, p, leadsize); + leader = chunk2mem(p); + } + p = newp; + } + + /* Give back spare room at the end */ + if (!is_mmapped(p)) { + size_t size = chunksize(p); + if (size > nb + MIN_CHUNK_SIZE) { + size_t remainder_size = size - nb; + mchunkptr remainder = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, remainder, remainder_size); + trailer = chunk2mem(remainder); + } + } + + assert (chunksize(p) >= nb); + assert((((size_t)(chunk2mem(p))) % alignment) == 0); + check_inuse_chunk(m, p); + POSTACTION(m); + if (leader != 0) { + internal_free(m, leader); + } + if (trailer != 0) { + internal_free(m, trailer); + } + return chunk2mem(p); + } + } + return 0; +} + +/* ------------------------ comalloc/coalloc support --------------------- */ + +static void** ialloc(mstate m, + size_t n_elements, + size_t* sizes, + int opts, + void* chunks[]) { + /* + This provides common support for independent_X routines, handling + all of the combinations that can result. + + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed + */ + + size_t element_size; /* chunksize of each element, if all same */ + size_t contents_size; /* total size of elements */ + size_t array_size; /* request size of pointer array */ + void* mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + size_t remainder_size; /* remaining bytes while splitting */ + void** marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + flag_t was_enabled; /* to disable mmap */ + size_t size; + size_t i; + + ensure_initialization(); + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) + return chunks; /* nothing to do */ + marray = chunks; + array_size = 0; + } + else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) + return (void**)internal_malloc(m, 0); + marray = 0; + array_size = request2size(n_elements * (sizeof(void*))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + size = contents_size + array_size; + + /* + Allocate the aggregate chunk. First disable direct-mmapping so + malloc won't use it, since we would not be able to later + free/realloc space internal to a segregated mmap region. + */ + was_enabled = use_mmap(m); + disable_mmap(m); + mem = internal_malloc(m, size - CHUNK_OVERHEAD); + if (was_enabled) + enable_mmap(m); + if (mem == 0) + return 0; + + if (PREACTION(m)) return 0; + p = mem2chunk(mem); + remainder_size = chunksize(p); + + assert(!is_mmapped(p)); + + if (opts & 0x2) { /* optionally clear the elements */ + memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + size_t array_chunk_size; + array_chunk = chunk_plus_offset(p, contents_size); + array_chunk_size = remainder_size - contents_size; + marray = (void**) (chunk2mem(array_chunk)); + set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements-1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_size_and_pinuse_of_inuse_chunk(m, p, size); + p = chunk_plus_offset(p, size); + } + else { /* the final element absorbs any overallocation slop */ + set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); + break; + } + } + +#if DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) { + assert(remainder_size == element_size); + } + else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(m, mem2chunk(marray)); + } + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(m, mem2chunk(marray[i])); + +#endif /* DEBUG */ + + POSTACTION(m); + return marray; +} + + +/* -------------------------- public routines ---------------------------- */ + +#if !ONLY_MSPACES + +void* dlmalloc(size_t bytes) { + /* + Basic algorithm: + If a small request (< 256 bytes minus per-chunk overhead): + 1. If one exists, use a remainderless chunk in associated smallbin. + (Remainderless means that there are too few excess bytes to + represent as a chunk.) + 2. If it is big enough, use the dv chunk, which is normally the + chunk adjacent to the one used for the most recent small request. + 3. If one exists, split the smallest available chunk in a bin, + saving remainder in dv. + 4. If it is big enough, use the top chunk. + 5. If available, get memory from system and use it + Otherwise, for a large request: + 1. Find the smallest available binned chunk that fits, and use it + if it is better fitting than dv chunk, splitting if necessary. + 2. If better fitting than any binned chunk, use the dv chunk. + 3. If it is big enough, use the top chunk. + 4. If request size >= mmap threshold, try to directly mmap this chunk. + 5. If available, get memory from system and use it + + The ugly goto's here ensure that postaction occurs along all paths. + */ + +#if USE_LOCKS + ensure_initialization(); /* initialize in sys_alloc if not using locks */ +#endif + + if (!PREACTION(gm)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = gm->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(gm, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(gm, b, p, idx); + set_inuse_and_pinuse(gm, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb > gm->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(gm, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(gm, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(gm, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(gm, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + + if (nb <= gm->dvsize) { + size_t rsize = gm->dvsize - nb; + mchunkptr p = gm->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = gm->dv = chunk_plus_offset(p, nb); + gm->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + } + else { /* exhaust dv */ + size_t dvs = gm->dvsize; + gm->dvsize = 0; + gm->dv = 0; + set_inuse_and_pinuse(gm, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb < gm->topsize) { /* Split top */ + size_t rsize = gm->topsize -= nb; + mchunkptr p = gm->top; + mchunkptr r = gm->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + mem = chunk2mem(p); + check_top_chunk(gm, gm->top); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + mem = sys_alloc(gm, nb); + + postaction: + POSTACTION(gm); + return mem; + } + + return 0; +} + +void dlfree(void* mem) { + /* + Consolidate freed chunks with preceeding or succeeding bordering + free chunks, if they exist, and then place in a bin. Intermixed + with special cases for top, dv, mmapped chunks, and usage errors. + */ + + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } +#else /* FOOTERS */ +#define fm gm +#endif /* FOOTERS */ + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +#if !FOOTERS +#undef fm +#endif /* FOOTERS */ +} + +void* dlcalloc(size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = dlmalloc(req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* dlrealloc(void* oldmem, size_t bytes) { + if (oldmem == 0) + return dlmalloc(bytes); +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + dlfree(oldmem); + return 0; + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(mem2chunk(oldmem)); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + return internal_realloc(m, oldmem, bytes); + } +} + +void* dlmemalign(size_t alignment, size_t bytes) { + return internal_memalign(gm, alignment, bytes); +} + +void** dlindependent_calloc(size_t n_elements, size_t elem_size, + void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + return ialloc(gm, n_elements, &sz, 3, chunks); +} + +void** dlindependent_comalloc(size_t n_elements, size_t sizes[], + void* chunks[]) { + return ialloc(gm, n_elements, sizes, 0, chunks); +} + +void* dlvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, bytes); +} + +void* dlpvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); +} + +int dlmalloc_trim(size_t pad) { + int result = 0; + ensure_initialization(); + if (!PREACTION(gm)) { + result = sys_trim(gm, pad); + POSTACTION(gm); + } + return result; +} + +size_t dlmalloc_footprint(void) { + return gm->footprint; +} + +size_t dlmalloc_max_footprint(void) { + return gm->max_footprint; +} + +#if !NO_MALLINFO +struct mallinfo dlmallinfo(void) { + return internal_mallinfo(gm); +} +#endif /* NO_MALLINFO */ + +void dlmalloc_stats() { + internal_malloc_stats(gm); +} + +int dlmallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* !ONLY_MSPACES */ + +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +/* ----------------------------- user mspaces ---------------------------- */ + +#if MSPACES + +static mstate init_user_mstate(char* tbase, size_t tsize) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + INITIAL_LOCK(&m->mutex); + msp->head = (msize|INUSE_BITS); + m->seg.base = m->least_addr = tbase; + m->seg.size = m->footprint = m->max_footprint = tsize; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + m->mflags = mparams.default_mflags; + m->extp = 0; + m->exts = 0; + disable_contiguous(m); + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); + check_top_chunk(m, m->top); + return m; +} + +mspace create_mspace(size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + size_t rs = ((capacity == 0)? mparams.granularity : + (capacity + TOP_FOOT_SIZE + msize)); + size_t tsize = granularity_align(rs); + char* tbase = (char*)(CALL_MMAP(tsize)); + if (tbase != CMFAIL) { + m = init_user_mstate(tbase, tsize); + m->seg.sflags = USE_MMAP_BIT; + set_lock(m, locked); + } + } + return (mspace)m; +} + +mspace create_mspace_with_base(void* base, size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity > msize + TOP_FOOT_SIZE && + capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + m = init_user_mstate((char*)base, capacity); + m->seg.sflags = EXTERN_BIT; + set_lock(m, locked); + } + return (mspace)m; +} + +int mspace_track_large_chunks(mspace msp, int enable) { + int ret = 0; + mstate ms = (mstate)msp; + if (!PREACTION(ms)) { + if (!use_mmap(ms)) + ret = 1; + if (!enable) + enable_mmap(ms); + else + disable_mmap(ms); + POSTACTION(ms); + } + return ret; +} + +size_t destroy_mspace(mspace msp) { + size_t freed = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + msegmentptr sp = &ms->seg; + while (sp != 0) { +#if HAVE_MMAP + char* base = sp->base; +#endif + size_t size = sp->size; + flag_t flag = sp->sflags; + sp = sp->next; + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && + CALL_MUNMAP(base, size) == 0) + freed += size; + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return freed; +} + +/* + mspace versions of routines are near-clones of the global + versions. This is not so nice but better than the alternatives. +*/ + + +void* mspace_malloc(mspace msp, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (!PREACTION(ms)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(ms, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(ms, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } + else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + check_top_chunk(ms, ms->top); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + mem = sys_alloc(ms, nb); + + postaction: + POSTACTION(ms); + return mem; + } + + return 0; +} + +void mspace_free(mspace msp, void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + msp = msp; /* placate people compiling -Wunused */ +#else /* FOOTERS */ + mstate fm = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +} + +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = internal_malloc(ms, req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { + if (oldmem == 0) + return mspace_malloc(msp, bytes); +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + mspace_free(msp, oldmem); + return 0; + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { +#if FOOTERS + mchunkptr p = mem2chunk(oldmem); + mstate ms = get_mstate_for(p); +#else /* FOOTERS */ + mstate ms = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return internal_realloc(ms, oldmem, bytes); + } +} + +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return internal_memalign(ms, alignment, bytes); +} + +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, &sz, 3, chunks); +} + +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, sizes, 0, chunks); +} + +int mspace_trim(mspace msp, size_t pad) { + int result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + result = sys_trim(ms, pad); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +void mspace_malloc_stats(mspace msp) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + internal_malloc_stats(ms); + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} + +size_t mspace_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + + +size_t mspace_max_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->max_footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + + +#if !NO_MALLINFO +struct mallinfo mspace_mallinfo(mspace msp) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + } + return internal_mallinfo(ms); +} +#endif /* NO_MALLINFO */ + +size_t mspace_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +int mspace_mallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* MSPACES */ + + +/* -------------------- Alternative MORECORE functions ------------------- */ + +/* + Guidelines for creating a custom version of MORECORE: + + * For best performance, MORECORE should allocate in multiples of pagesize. + * MORECORE may allocate more memory than requested. (Or even less, + but this will usually result in a malloc failure.) + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. + * For best performance, consecutive calls to MORECORE with positive + arguments should return increasing addresses, indicating that + space has been contiguously extended. + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + * MORECORE need not handle negative arguments -- it may instead + just return MFAIL when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + As an example alternative MORECORE, here is a custom allocator + kindly contributed for pre-OSX macOS. It uses virtually but not + necessarily physically contiguous non-paged memory (locked in, + present and won't get swapped out). You can use it by uncommenting + this section, adding some #includes, and setting up the appropriate + defines above: + + #define MORECORE osMoreCore + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024U) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MFAIL; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MFAIL; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + +/* ----------------------------------------------------------------------- +History: + V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + * Use zeros instead of prev foot for is_mmapped + * Add mspace_track_large_chunks; thanks to Jean Brouwers + * Fix set_inuse in internal_realloc; thanks to Jean Brouwers + * Fix insufficient sys_alloc padding when using 16byte alignment + * Fix bad error check in mspace_footprint + * Adaptations for ptmalloc; thanks to Wolfram Gloger. + * Reentrant spin locks; thanks to Earl Chew and others + * Win32 improvements; thanks to Niall Douglas and Earl Chew + * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options + * Extension hook in malloc_state + * Various small adjustments to reduce warnings on some compilers + * Various configuration extensions/changes for more platforms. Thanks + to all who contributed these. + + V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) + * Add max_footprint functions + * Ensure all appropriate literals are size_t + * Fix conditional compilation problem for some #define settings + * Avoid concatenating segments with the one provided + in create_mspace_with_base + * Rename some variables to avoid compiler shadowing warnings + * Use explicit lock initialization. + * Better handling of sbrk interference. + * Simplify and fix segment insertion, trimming and mspace_destroy + * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x + * Thanks especially to Dennis Flanagan for help on these. + + V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) + * Fix memalign brace error. + + V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) + * Fix improper #endif nesting in C++ + * Add explicit casts needed for C++ + + V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) + * Use trees for large bins + * Support mspaces + * Use segments to unify sbrk-based and mmap-based system allocation, + removing need for emulation on most platforms without sbrk. + * Default safety checks + * Optional footer checks. Thanks to William Robertson for the idea. + * Internal code refactoring + * Incorporate suggestions and platform-specific changes. + Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, + Aaron Bachmann, Emery Berger, and others. + * Speed up non-fastbin processing enough to remove fastbins. + * Remove useless cfree() to avoid conflicts with other apps. + * Remove internal memcpy, memset. Compilers handle builtins better. + * Remove some options that no one ever used and rename others. + + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from . + Thanks also to Andreas Mueller , + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sysmalloc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ + diff --git a/src/io/bmi/bmi_zoid/dlmalloc.h b/src/io/bmi/bmi_zoid/dlmalloc.h new file mode 100644 index 0000000..12c7d4c --- /dev/null +++ b/src/io/bmi/bmi_zoid/dlmalloc.h @@ -0,0 +1,560 @@ +/* + Default header file for malloc-2.8.x, written by Doug Lea + and released to the public domain, as explained at + http://creativecommons.org/licenses/publicdomain. + + last update: Wed May 27 14:25:17 2009 Doug Lea (dl at gee) + + This header is for ANSI C/C++ only. You can set any of + the following #defines before including: + + * If USE_DL_PREFIX is defined, it is assumed that malloc.c + was also compiled with this option, so all routines + have names starting with "dl". + + * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this + file will be #included AFTER . This is needed only if + your system defines a struct mallinfo that is incompatible with the + standard one declared here. Otherwise, you can include this file + INSTEAD of your system system . At least on ANSI, all + declarations should be compatible with system versions + + * If MSPACES is defined, declarations for mspace versions are included. +*/ + +#ifndef MALLOC_280_H +#define MALLOC_280_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include /* for size_t */ + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 /* define to a value */ +#endif /* ONLY_MSPACES */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ + + +#if !ONLY_MSPACES + +#ifndef USE_DL_PREFIX +#define dlcalloc calloc +#define dlfree free +#define dlmalloc malloc +#define dlmemalign memalign +#define dlrealloc realloc +#define dlvalloc valloc +#define dlpvalloc pvalloc +#define dlmallinfo mallinfo +#define dlmallopt mallopt +#define dlmalloc_trim malloc_trim +#define dlmalloc_stats malloc_stats +#define dlmalloc_usable_size malloc_usable_size +#define dlmalloc_footprint malloc_footprint +#define dlindependent_calloc independent_calloc +#define dlindependent_comalloc independent_comalloc +#endif /* USE_DL_PREFIX */ +#if !NO_MALLINFO +#ifndef HAVE_USR_INCLUDE_MALLOC_H +#ifndef _MALLOC_H +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ +#ifndef STRUCT_MALLINFO_DECLARED +#define STRUCT_MALLINFO_DECLARED 1 +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; +#endif /* STRUCT_MALLINFO_DECLARED */ +#endif /* _MALLOC_H */ +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* !NO_MALLINFO */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cuase the current program to abort. +*/ +void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ + +void* dlrealloc(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +void* dlmemalign(size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt: + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1U disables trimming) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +int dlmallopt(int, int); + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +size_t dlmalloc_footprint(); + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ + +struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +void** dlindependent_comalloc(size_t, size_t*, void**); + + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ +void dlmalloc_stats(); + +#endif /* !ONLY_MSPACES */ + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +int mspace_track_large_chunks(mspace msp, int enable); + +/* + mspace_malloc behaves as malloc, but operates within + the given space. +*/ +void* mspace_malloc(mspace msp, size_t bytes); + +/* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. +*/ +void mspace_free(mspace msp, void* mem); + +/* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. +*/ +void* mspace_realloc(mspace msp, void* mem, size_t newsize); + +/* + mspace_calloc behaves as calloc, but operates within + the given space. +*/ +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); + +/* + mspace_memalign behaves as memalign, but operates within + the given space. +*/ +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); + +/* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. +*/ +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); + +/* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. +*/ +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); + +/* + mspace_footprint() returns the number of bytes obtained from the + system for this space. +*/ +size_t mspace_footprint(mspace msp); + + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + malloc_usable_size(void* p) behaves the same as malloc_usable_size; +*/ + size_t mspace_usable_size(void* mem); + +/* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. +*/ +void mspace_malloc_stats(mspace msp); + +/* + mspace_trim behaves as malloc_trim, but + operates within the given space. +*/ +int mspace_trim(mspace msp, size_t pad); + +/* + An alias for mallopt. +*/ +int mspace_mallopt(int, int); + +#endif /* MSPACES */ + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif + +#endif /* MALLOC_280_H */ diff --git a/src/io/bmi/bmi_zoid/module.mk.in b/src/io/bmi/bmi_zoid/module.mk.in new file mode 100644 index 0000000..1f40866 --- /dev/null +++ b/src/io/bmi/bmi_zoid/module.mk.in @@ -0,0 +1,30 @@ +# +# Makefile stub for bmi_zoid. +# +# See COPYING in top-level directory. +# + +# only do any of this if configure decided to use ZOID +ifneq (,$(BUILD_ZOID)) + +# +# Local definitions. +# +DIR := src/io/bmi/bmi_zoid +cfiles := zoid.c server.c zbmi_pool.c + +# +# Export these to the top Makefile to tell it what to build. +# +src := $(patsubst %,$(DIR)/%,$(cfiles)) +LIBSRC += $(src) +SERVERSRC += $(src) +LIBBMISRC += $(src) + +# +# Extra cflags for files in this directory. +# +MODCFLAGS_$(DIR) := -I@ZOID_SRCDIR@/include -I@ZOID_SRCDIR@/zbmi \ + -I@ZOID_SRCDIR@/zbmi/implementation + +endif # BUILD_ZOID diff --git a/src/io/bmi/bmi_zoid/server.c b/src/io/bmi/bmi_zoid/server.c new file mode 100644 index 0000000..872bd39 --- /dev/null +++ b/src/io/bmi/bmi_zoid/server.c @@ -0,0 +1,1169 @@ +/* ZOID implementation of a BMI method -- I/O node server side */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "zoid.h" +#include "zbmi_pool.h" +#include "zbmi_protocol.h" + +/* method_op_p's method_data points to this structure on the server side. */ +struct ZoidServerMethodData +{ + void* tmp_buffer; /* Used with BMI_EXT_ALLOC to store the address of the + temporary shared memory buffer, NULL otherwise. */ + int zoid_buf_id; /* 0 if the operation has not yet been sent to ZOID. */ +}; + +/* Describes a request with BMI_EXT_ALLOC pending for a temporary memory + buffer. */ +struct NoMemDescriptor +{ + struct NoMemDescriptor* next; + struct NoMemDescriptor* prev; + + bmi_size_t total_size; + method_op_p op; +}; + +/* Command streams to the zbmi plugin in the ZOID daemon. */ +#define ZBMI_SOCKETS_LEN_INIT 10 +static pthread_mutex_t zbmi_sockets_mutex = PTHREAD_MUTEX_INITIALIZER; +static int* zbmi_sockets = NULL; +static char* zbmi_sockets_inuse = NULL; /* Whether a particular zbmi_sockets + entry is currently in use. */ +static int zbmi_sockets_len = 0; /* Length of zbmi_sockets and + zbmi_sockets_inuse. */ +static int zbmi_sockets_used = 0; /* Count of initialized zbmi_sockets + entries. */ + +/* An array of client addresses. */ +#define CLIENTS_LEN_INC 10 +static pthread_mutex_t clients_mutex = PTHREAD_MUTEX_INITIALIZER; +static bmi_method_addr_p* clients_addr = NULL; +static int clients_len = 0; + +/* Shared memory buffer between the ZBMI plugin an us. */ +static void* zbmi_shm = NULL; +static void *zbmi_shm_unexp, *zbmi_shm_exp; + +static int zbmi_shm_size_total; +static int zbmi_shm_size_unexp; + +/* Queue of operations with BMI_EXT_ALLOC buffers pending for a temporary + memory buffer, sorted by descending total_size. */ +static struct NoMemDescriptor *no_mem_queue_first = NULL; +/* Only valid if no_mem_queue_first != NULL. */ +static struct NoMemDescriptor *no_mem_queue_last; +/* Protects access to the above queue. */ +static pthread_mutex_t no_mem_queue_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Queue of failed/canceled operations (that the ZOID server doesn't know + about). */ +static op_list_p error_ops; +static pthread_mutex_t error_ops_mutex = PTHREAD_MUTEX_INITIALIZER; + + +static ssize_t socket_read(int fd, void* buf, size_t count); +static ssize_t socket_write(int fd, const void* buf, size_t count); +static int get_zoid_socket(int* release_token); +static void release_zoid_socket(int release_token); +static bmi_method_addr_p get_client_addr(int zoid_addr); +static int enqueue_no_mem(method_op_p op, bmi_size_t total_size); +static int send_post_cmd(method_op_p op); + + +/* These symbols come from external libraries that would need to be linked + even on the client-side, even though the client never actually invokes + them. */ +typeof(shm_open) shm_open __attribute__((weak)); +typeof(shm_unlink) shm_unlink __attribute__((weak)); + + +/* Invoked on BMI_initialize. */ +int +BMI_zoid_server_initialize(void) +{ + int hdr; + struct ZBMIControlInitResp init_resp; + int shm_fd; + int zoid_fd, zoid_release; + + /* Connect to the ZBMI plugin in the ZOID daemon. This will initialize + all the socket structures first. */ + + if ((zoid_fd = get_zoid_socket(&zoid_release)) < 0) + return zoid_fd; + + /* Initial handshake. */ + + hdr = ZBMI_CONTROL_INIT; + + if (socket_write(zoid_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + { + perror("write"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + if (socket_read(zoid_fd, &init_resp, sizeof(init_resp)) != + sizeof(init_resp)) + { + perror("read"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + release_zoid_socket(zoid_release); + + zbmi_shm_size_total = init_resp.shm_size_total; + zbmi_shm_size_unexp = init_resp.shm_size_unexp; + + /* Open the shared memory area. */ + + if ((shm_fd = shm_open(ZBMI_SHM_NAME, O_RDWR, 0)) < 0) + { + perror("ZBMI shm_open"); + return -BMI_ENOMEM; + } + + if ((zbmi_shm = mmap(NULL, zbmi_shm_size_total, PROT_READ | PROT_WRITE, + MAP_SHARED, shm_fd, 0)) == MAP_FAILED) + { + perror("mmap"); + close(shm_fd); + return -BMI_ENOMEM; + } + + close(shm_fd); + + /* The shared memory buffer starts with an unexpected section, which is + managed by the ZBMI ZOID plugin. */ + zbmi_shm_unexp = zbmi_shm; + /* The expected buffers part is initialized and managed by us. */ + zbmi_shm_exp = zbmi_shm + zbmi_shm_size_unexp; + zbmi_pool_init(zbmi_shm_exp, zbmi_shm_size_total - zbmi_shm_size_unexp); + + if (!(error_ops = op_list_new())) + return -BMI_ENOMEM; + + return 0; +} + +/* Invoked on BMI_finalize. */ +int +BMI_zoid_server_finalize(void) +{ + int i; + + if (error_ops) + { + op_list_cleanup(error_ops); + error_ops = NULL; + } + + zbmi_pool_fini(); + + munmap(zbmi_shm, zbmi_shm_size_total); + + /* FIXME! Send some sort of a FINI message first? */ + for (i = 0; i < zbmi_sockets_used; i++) + close(zbmi_sockets[i]); + free(zbmi_sockets_inuse); + free(zbmi_sockets); + zbmi_sockets_len = zbmi_sockets_used = 0; + + return 0; +} + +/* Invoked on BMI_memalloc. Because of the shared memory buffer, it is + important for the performance that applications use it. */ +void* +BMI_zoid_server_memalloc(bmi_size_t size) +{ + return zbmi_pool_malloc(size); +} + +/* Invoked on BMI_memfree. */ +void +BMI_zoid_server_memfree(void* buffer) +{ + struct NoMemDescriptor* desc; + + zbmi_pool_free(buffer); + + /* Once some memory has been freed, go over the queue of requests waiting + for memory and try to satisfy any of them. */ + pthread_mutex_lock(&no_mem_queue_mutex); + + for (desc = no_mem_queue_first; desc;) + { + struct NoMemDescriptor* desc_next = desc->next; + void* buf; + + if ((buf = BMI_zoid_server_memalloc(desc->total_size))) + { + method_op_p op = desc->op; + + ((struct ZoidServerMethodData*)op->method_data)->tmp_buffer = buf; + + if (op->send_recv == BMI_SEND) + { + /* Copy the data to the temporary buffer. */ + void *buf_cur = buf; + int i; + + for (i = 0; i < op->list_count; i++) + { + memcpy(buf_cur, op->buffer_list[i], op->size_list[i]); + buf_cur += op->size_list[i]; + } + } + + /* Remove the request from the list, as we have succeeded in + allocating memory for it. */ + if (desc->prev) + desc->prev->next = desc->next; + if (desc->next) + desc->next->prev = desc->prev; + if (no_mem_queue_first == desc) + no_mem_queue_first = no_mem_queue_first->next; + if (no_mem_queue_last == desc) + no_mem_queue_last = no_mem_queue_last->prev; + free(desc); + + if ((op->error_code = -send_post_cmd(op))) + { + pthread_mutex_lock(&error_ops_mutex); + op_list_add(error_ops, op); + pthread_mutex_unlock(&error_ops_mutex); + } + } + + desc = desc_next; + } + + pthread_mutex_unlock(&no_mem_queue_mutex); +} + +/* Invoked on BMI_unexpected_free. */ +int +BMI_zoid_server_unexpected_free(void* buffer) +{ + int zoid_fd, zoid_release; + int hdr; + struct ZBMIControlUnexpFreeCmd cmd; + + if (buffer < zbmi_shm_unexp || + buffer >= zbmi_shm_unexp + zbmi_shm_size_unexp) + return -BMI_EINVAL; + + if ((zoid_fd = get_zoid_socket(&zoid_release)) < 0) + return zoid_fd; + + hdr = ZBMI_CONTROL_UNEXP_FREE; + cmd.buffer = buffer - zbmi_shm_unexp; + + if (socket_write(zoid_fd, &hdr, sizeof(hdr)) != sizeof(hdr) || + socket_write(zoid_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) + { + perror("write"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + release_zoid_socket(zoid_release); + return 0; +} + +/* Invoked on BMI_testunexpected. */ +int +BMI_zoid_server_testunexpected(int incount, int* outcount, + struct bmi_method_unexpected_info* info, + int max_idle_time_ms) +{ + int zoid_fd, zoid_release; + int hdr; + int i; + struct ZBMIControlUnexpTestCmd cmd; + struct ZBMIControlUnexpTestResp resp; + struct ZBMIControlBufDesc* buf_descs = NULL; + + if ((zoid_fd = get_zoid_socket(&zoid_release)) < 0) + return zoid_fd; + + hdr = ZBMI_CONTROL_UNEXP_TEST; + cmd.incount = incount; + cmd.max_idle_time_ms = max_idle_time_ms; + + if (socket_write(zoid_fd, &hdr, sizeof(hdr)) != sizeof(hdr) || + socket_write(zoid_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) + { + perror("write"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + if (socket_read(zoid_fd, &resp, offsetof(typeof(resp), buffers)) != + offsetof(typeof(resp), buffers)) + { + perror("read"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + if (resp.outcount_bytes > 0) + { + buf_descs = alloca(resp.outcount_bytes); + + if (socket_read(zoid_fd, buf_descs, resp.outcount_bytes) != + resp.outcount_bytes) + { + perror("read"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + } + + release_zoid_socket(zoid_release); + + *outcount = resp.outcount; + for (i = 0; i < resp.outcount; i++) + { + info[i].error_code = 0; + + if (!(info[i].addr = get_client_addr(buf_descs->addr))) + return -BMI_ENOMEM; + + if (buf_descs->list_count != 1) + return -BMI_EINVAL; + + info[i].buffer = zbmi_shm_unexp + buf_descs->list[0].buffer; + info[i].size = buf_descs->list[0].size; + info[i].tag = buf_descs->tag; + + buf_descs = (struct ZBMIControlBufDesc*) + (((char*)buf_descs) + offsetof(typeof(*buf_descs), list) + + buf_descs->list_count * sizeof(buf_descs->list[0])); + } + + return 0; +} + +/* A common send routine used for all expected messages. */ +int +zoid_server_send_common(bmi_op_id_t* id, bmi_method_addr_p dest, + const void*const* buffer_list, + const bmi_size_t* size_list, int list_count, + bmi_size_t total_size, enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, void* user_ptr, + bmi_context_id context_id, PVFS_hint hints) +{ + method_op_p new_op; + + /* Server-side sends are never immediate, so we start by allocating a + method op. */ + + if (!(new_op = bmi_alloc_method_op(sizeof(struct ZoidServerMethodData)))) + return -BMI_ENOMEM; + *id = new_op->op_id; + new_op->addr = dest; + new_op->send_recv = BMI_SEND; + new_op->user_ptr = user_ptr; + new_op->msg_tag = tag; + new_op->list_count = list_count; + new_op->actual_size = total_size; + if (list_count == 1) + { + new_op->buffer = (void*)buffer_list[0]; + new_op->buffer_list = &new_op->buffer; + new_op->size_list = &new_op->actual_size; + } + else + { + new_op->buffer = NULL; + new_op->buffer_list = (void*const*)buffer_list; + new_op->size_list = size_list; + } + new_op->error_code = 0; + ((struct ZoidServerMethodData*)new_op->method_data)->tmp_buffer = NULL; + ((struct ZoidServerMethodData*)new_op->method_data)->zoid_buf_id = 0; + + if (buffer_type == BMI_EXT_ALLOC) + { + /* Copy to shared memory area. */ + + void *buf, *buf_cur; + int i; + + if (!(buf = buf_cur = BMI_zoid_server_memalloc(total_size))) + { + /* No memory for the temporary buffer. This is not considered + a fatal error; we will retry when some memory is returned. */ + return enqueue_no_mem(new_op, total_size); + } + + ((struct ZoidServerMethodData*)new_op->method_data)->tmp_buffer = buf; + + for (i = 0; i < list_count; i++) + { + memcpy(buf_cur, buffer_list[i], size_list[i]); + buf_cur += size_list[i]; + } + } + else + { + /* Verify that the buffer is actually allocated by us. */ + int i; + for (i = 0; i < list_count; i++) + if (buffer_list[i] < zbmi_shm_exp || + buffer_list[i] + size_list[i] > zbmi_shm_exp + + zbmi_shm_size_total - zbmi_shm_size_unexp) + { + return -BMI_EINVAL; + } + } + + return send_post_cmd(new_op); +} + +/* A common receive routine used for all expected messages. */ +int +zoid_server_recv_common(bmi_op_id_t* id, bmi_method_addr_p src, + void *const* buffer_list, const bmi_size_t* size_list, + int list_count, bmi_size_t total_expected_size, + bmi_size_t* total_actual_size, + enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, + void* user_ptr, bmi_context_id context_id, + PVFS_hint hints) +{ + method_op_p new_op; + + /* Server-side receives are never immediate, so we start by allocating a + method op. */ + + if (!(new_op = bmi_alloc_method_op(sizeof(struct ZoidServerMethodData)))) + return -BMI_ENOMEM; + *id = new_op->op_id; + new_op->addr = src; + new_op->send_recv = BMI_RECV; + new_op->user_ptr = user_ptr; + new_op->msg_tag = tag; + new_op->list_count = list_count; + new_op->expected_size = total_expected_size; + if (list_count == 1) + { + new_op->buffer = (void*)buffer_list[0]; + new_op->buffer_list = &new_op->buffer; + new_op->size_list = &new_op->expected_size; + } + else + { + new_op->buffer = NULL; + new_op->buffer_list = buffer_list; + new_op->size_list = size_list; + } + new_op->error_code = 0; + ((struct ZoidServerMethodData*)new_op->method_data)->tmp_buffer = NULL; + ((struct ZoidServerMethodData*)new_op->method_data)->zoid_buf_id = 0; + + if (buffer_type == BMI_EXT_ALLOC) + { + /* Allocate a shared memory area. */ + void* buf; + + if (!(buf = BMI_zoid_server_memalloc(total_expected_size))) + { + /* No memory for the temporary buffer. This is not considered + a fatal error; we will retry when some memory is returned. */ + return enqueue_no_mem(new_op, total_expected_size); + } + + ((struct ZoidServerMethodData*)new_op->method_data)->tmp_buffer = buf; + } + else + { + /* Verify that the buffer is actually allocated by us. */ + int i; + for (i = 0; i < list_count; i++) + if (buffer_list[i] < zbmi_shm_exp || + buffer_list[i] + size_list[i] > zbmi_shm_exp + + zbmi_shm_size_total - zbmi_shm_size_unexp) + { + return -BMI_EINVAL; + } + } + + return send_post_cmd(new_op); +} + +/* A common test routine used for all expected messages. "incount" is 0 + for testcontext; in that case "id_array" is an output argument. */ +int +zoid_server_test_common(int incount, bmi_op_id_t* id_array, + int outcount_max, int* outcount, int* index_array, + bmi_error_code_t* error_code_array, + bmi_size_t* actual_size_array, + void** user_ptr_array, int max_idle_time_ms, + bmi_context_id context_id) +{ + int zoid_fd, zoid_release; + int hdr; + struct ZBMIControlTestCmd* cmd; + int cmd_len; + struct ZBMIControlTestResp resp; + int i; + int outcount_used = 0; /* Counter of already used output entries. */ + int incount_fwd = incount; /* Counter of how many input entries to + forward to the ZBMI plugin. */ + + /* We start by checking if there are any local failed/canceled operations + and taking care of those first. */ + pthread_mutex_lock(&error_ops_mutex); + if (incount) + { + for (i = 0; i < incount; i++) + { + method_op_p op = (method_op_p)id_gen_fast_lookup(id_array[i]); + + if (op->error_code) + { + if (outcount_used >= outcount_max) + break; + + if (index_array) + index_array[outcount_used] = i; + else + assert(outcount_used == i); + error_code_array[i] = op->error_code; + actual_size_array[i] = 0; + if (user_ptr_array) + user_ptr_array[i] = op->user_ptr; + outcount_used++; + + op_list_remove(op); + /* Note: we will dealloc a little later. */ + } + } + + if (outcount_used > 0) + { + incount_fwd = incount - outcount_used; + if (index_array) + index_array += outcount_used; + } + } + else + { + /* Testcontext. */ + method_op_p op, tmp; + + qlist_for_each_entry_safe(op, tmp, error_ops, op_list_entry) + { + if (outcount_used >= outcount_max) + break; + + id_array[outcount_used] = op->op_id; + error_code_array[outcount_used] = op->error_code; + actual_size_array[outcount_used] = 0; + if (user_ptr_array) + user_ptr_array[outcount_used] = op->user_ptr; + outcount_used++; + + op_list_remove(op); + bmi_dealloc_method_op(op); + } + + if (outcount_used > 0) + { + id_array += outcount_used; + error_code_array += outcount_used; + actual_size_array += outcount_used; + user_ptr_array += outcount_used; + } + } + pthread_mutex_unlock(&error_ops_mutex); + + hdr = ZBMI_CONTROL_TEST; + cmd_len = offsetof(typeof(*cmd), zoid_ids) + + incount_fwd * sizeof(cmd->zoid_ids[0]); + cmd = alloca(cmd_len); + + cmd->timeout_ms = max_idle_time_ms; + + /* incount_fwd == 0 indicates "testcontext". We still need to communicate + the max. count of outputs we are prepared to handle. */ + cmd->count = (incount_fwd ? incount_fwd : -outcount_max); + for (i = 0; i < incount; i++) + { + method_op_p op = (method_op_p)id_gen_fast_lookup(id_array[i]); + if (op->error_code) + { + bmi_dealloc_method_op(op); + continue; + } + cmd->zoid_ids[i] = ((struct ZoidServerMethodData*)op->method_data)-> + zoid_buf_id; + } + + if (outcount_used > 0) + { + outcount_max -= outcount_used; + if (outcount_max == 0 || (incount > 0 && incount_fwd == 0)) + { + *outcount = outcount_used; + return 0; + } + } + + /* Note: this is shifted later than usual in the function body so that + we can invoke bmi_dealloc_method_op above as appropriate. */ + if ((zoid_fd = get_zoid_socket(&zoid_release)) < 0) + return zoid_fd; + + if (socket_write(zoid_fd, &hdr, sizeof(hdr)) != sizeof(hdr) || + socket_write(zoid_fd, cmd, cmd_len) != cmd_len) + { + perror("write"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + if (socket_read(zoid_fd, &resp, offsetof(typeof(resp), list)) != + offsetof(typeof(resp), list)) + { + perror("read"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + assert(resp.count <= outcount_max); + *outcount = resp.count; + if (resp.count > 0) + { + struct ZBMIControlTestRespList* resp_list; + int index; + + resp_list = alloca(resp.count * sizeof(*resp_list)); + + if (socket_read(zoid_fd, resp_list, resp.count * sizeof(*resp_list)) != + resp.count * sizeof(*resp_list)) + { + perror("read"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + for (i = 0, index = 0; i < resp.count; i++, index++) + { + method_op_p op = (method_op_p)id_gen_fast_lookup(resp_list[i]. + bmi_id); + if (incount_fwd) + { + for (; index < incount_fwd; index++) + { + if (cmd->zoid_ids[index] == ((struct ZoidServerMethodData*) + op->method_data)->zoid_buf_id) + break; + } + assert(index < incount_fwd); + + if (index_array) + index_array[i] = index; + else + assert(i == index); + } + else /* testcontext */ + id_array[i] = resp_list[i].bmi_id; + + if (resp_list[i].length < 0) /* Most likely BMI_ECANCEL */ + { + error_code_array[index] = -resp_list[i].length; + actual_size_array[index] = 0; + } + else + { + actual_size_array[index] = resp_list[i].length; + error_code_array[index] = 0; + } + + if (user_ptr_array) + user_ptr_array[index] = op->user_ptr; + + /* We are done with this message. Clean up. */ + if (((struct ZoidServerMethodData*)op->method_data)->tmp_buffer) + { + if (op->send_recv == BMI_RECV) + { + /* Copy the memory back to the user buffer(s). */ + int j, size_remaining = resp_list[i].length; + void *buf_cur = ((struct ZoidServerMethodData*)op-> + method_data)->tmp_buffer; + j = 0; + while (size_remaining > 0) + { + int tocopy = (op->size_list[j] < size_remaining ? + op->size_list[j] : size_remaining); + + memcpy(op->buffer_list[j], buf_cur, tocopy); + buf_cur += tocopy; + size_remaining -= tocopy; + j++; + } + } + + BMI_zoid_server_memfree(((struct ZoidServerMethodData*)op-> + method_data)->tmp_buffer); + } + + bmi_dealloc_method_op(op); + } /* for (i) */ + } /* if (resp.count > 0) */ + + release_zoid_socket(zoid_release); + + return 0; +} + +/* Invoked on BMI_cancel. */ +int +BMI_zoid_server_cancel(bmi_op_id_t id, bmi_context_id context_id) +{ + int zoid_fd, zoid_release; + int hdr; + struct ZBMIControlCancelCmd cmd; + method_op_p op; + + op = (method_op_p)id_gen_fast_lookup(id); + + /* We have to distinguish here between requests that have been registered + with the ZBMI plugin (we need to unregister those) and those that + have not, most likely because of a lack of memory (those can be handled + locally). */ + if (!((struct ZoidServerMethodData*)op->method_data)->zoid_buf_id) + { + pthread_mutex_lock(&no_mem_queue_mutex); + + /* Test again, now with mutex properly locked. */ + if (!((struct ZoidServerMethodData*)op->method_data)->zoid_buf_id) + { + if (!op->error_code) + { + /* It must be an out-of-memory case on no_mem_queue. */ + struct NoMemDescriptor* desc; + + op->error_code = BMI_ECANCEL; + + for (desc = no_mem_queue_first; desc;) + if (desc->op == op) + { + if (desc->prev) + desc->prev->next = desc->next; + if (desc->next) + desc->next->prev = desc->prev; + if (no_mem_queue_first == desc) + no_mem_queue_first = no_mem_queue_first->next; + if (no_mem_queue_last == desc) + no_mem_queue_last = no_mem_queue_last->prev; + free(desc); + break; + } + assert(desc); + + pthread_mutex_lock(&error_ops_mutex); + op_list_add(error_ops, op); + pthread_mutex_unlock(&error_ops_mutex); + } + + pthread_mutex_unlock(&no_mem_queue_mutex); + + return 0; + } + + pthread_mutex_unlock(&no_mem_queue_mutex); + } + + if ((zoid_fd = get_zoid_socket(&zoid_release)) < 0) + return zoid_fd; + + hdr = ZBMI_CONTROL_CANCEL; + cmd.zoid_id = ((struct ZoidServerMethodData*)op->method_data)-> + zoid_buf_id; + + if (socket_write(zoid_fd, &hdr, sizeof(hdr)) != sizeof(hdr) || + socket_write(zoid_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) + { + perror("write"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + return 0; +} + +/* + * A more robust version of read(2). + */ +static ssize_t +socket_read(int fd, void* buf, size_t count) +{ + size_t already_read = 0; + + while (already_read < count) + { + ssize_t n; + + n = read(fd, buf + already_read, count - already_read); + + if (n == -1) + { + if (errno == EINTR || errno == EAGAIN) + continue; + return -1; + } + else if (n == 0) + return already_read; + else + already_read += n; + } + + return already_read; +} + +/* + * A more robust version of write(2). + */ +static ssize_t +socket_write(int fd, const void* buf, size_t count) +{ + size_t already_written = 0; + + while (already_written < count) + { + ssize_t n; + + n = write(fd, buf + already_written, count - already_written); + + if (n == -1) + { + if (errno == EINTR || errno == EAGAIN) + continue; + return -1; + } + else + already_written += n; + } + + return already_written; +} + +/* An internal routine used to obtain a socket to the ZBMI plugin. */ +static int +get_zoid_socket(int* release_token) +{ + int i; + + pthread_mutex_lock(&zbmi_sockets_mutex); + + for (i = 0; i < zbmi_sockets_used; i++) + if (!zbmi_sockets_inuse[i]) + break; + + if (i == zbmi_sockets_used) + { + /* All open sockets are currently in use. Open a new one. */ + struct sockaddr_un addr; + + if (zbmi_sockets_used == zbmi_sockets_len) + { + /* Enlarge the arrays first. */ + int j; + int* zbmi_sockets_new; + char* zbmi_sockets_inuse_new; + + if (zbmi_sockets_len == 0) + zbmi_sockets_len = ZBMI_SOCKETS_LEN_INIT; + else + zbmi_sockets_len *= 2; + zbmi_sockets_new = realloc(zbmi_sockets, zbmi_sockets_len * + sizeof(*zbmi_sockets)); + if (!zbmi_sockets_new) + { + pthread_mutex_unlock(&zbmi_sockets_mutex); + return -BMI_ENOMEM; + } + zbmi_sockets = zbmi_sockets_new; + zbmi_sockets_inuse_new = realloc(zbmi_sockets_inuse, + zbmi_sockets_len * + sizeof(*zbmi_sockets_inuse)); + if (!zbmi_sockets_inuse_new) + { + pthread_mutex_unlock(&zbmi_sockets_mutex); + return -BMI_ENOMEM; + } + zbmi_sockets_inuse = zbmi_sockets_inuse_new; + + for (j = zbmi_sockets_used; j < zbmi_sockets_len; j++) + zbmi_sockets_inuse[j] = 0; + } + + if ((zbmi_sockets[i] = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) + { + perror("ZBMI control socket"); + pthread_mutex_unlock(&zbmi_sockets_mutex); + return -BMI_EINVAL; + } + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, ZBMI_SOCKET_NAME); +#if 0 + if (bind(zbmi_sockets[i], (struct sockaddr*)&addr, sizeof(addr)) < 0) + { + perror("bind " ZBMI_SOCKET_NAME); + close(zbmi_sockets[i]); + pthread_mutex_unlock(&zbmi_sockets_mutex); + return -BMI_EINVAL; + } +#endif + while (connect(zbmi_sockets[i], (struct sockaddr*)&addr, sizeof(addr)) + < 0) + { + if (errno == ENOENT || errno == ECONNREFUSED) + { + /* ZOID server not running yet or too many requests? Wait + a little... */ + sleep(1); + } + else + { + perror("connect to ZOID"); + close(zbmi_sockets[i]); + pthread_mutex_unlock(&zbmi_sockets_mutex); + return -BMI_EINVAL; + } + } + + zbmi_sockets_used++; + } + + zbmi_sockets_inuse[i] = 1; + + pthread_mutex_unlock(&zbmi_sockets_mutex); + + *release_token = i; + + return zbmi_sockets[i]; +} + +/* Releases the socket obtained with get_zoid_socket. */ +static void +release_zoid_socket(int release_token) +{ + assert(release_token >= 0 && release_token < zbmi_sockets_used); + + pthread_mutex_lock(&zbmi_sockets_mutex); + + assert(zbmi_sockets_inuse[release_token]); + zbmi_sockets_inuse[release_token] = 0; + + pthread_mutex_unlock(&zbmi_sockets_mutex); +} + +/* Translates a ZOID address to a BMI address, allocating a new one if + necessary. */ +static bmi_method_addr_p +get_client_addr(int zoid_addr) +{ + bmi_method_addr_p ret; + + assert(zoid_addr >= 0); + + pthread_mutex_lock(&clients_mutex); + + if (zoid_addr >= clients_len) + { + /* Enlarge the array first. */ + + bmi_method_addr_p* clients_addr_new; + int i; + + if (!(clients_addr_new = realloc(clients_addr, + (zoid_addr + CLIENTS_LEN_INC) * + sizeof(*clients_addr)))) + { + pthread_mutex_unlock(&clients_mutex); + return NULL; + } + clients_addr = clients_addr_new; + + for (i = clients_len; i < zoid_addr + CLIENTS_LEN_INC; i++) + clients_addr[i] = NULL; + + clients_len = zoid_addr + CLIENTS_LEN_INC; + } + + if (!clients_addr[zoid_addr]) + { + if ((clients_addr[zoid_addr] = + bmi_alloc_method_addr(zoid_method_id, sizeof(struct zoid_addr)))) + { + ((struct zoid_addr*)clients_addr[zoid_addr]->method_data)->pid = + zoid_addr; + bmi_method_addr_reg_callback(clients_addr[zoid_addr]); + } + } + + ret = clients_addr[zoid_addr]; + + pthread_mutex_unlock(&clients_mutex); + + return ret; +} + +/* Releases a no longer needed client address. */ +void +zoid_server_free_client_addr(bmi_method_addr_p addr) +{ + pthread_mutex_lock(&clients_mutex); + + assert(((struct zoid_addr*)addr->method_data)->pid < clients_len && + clients_addr[((struct zoid_addr*)addr->method_data)->pid] == addr); + clients_addr[((struct zoid_addr*)addr->method_data)->pid] = NULL; + + pthread_mutex_unlock(&clients_mutex); +} + +/* Puts an out-of-temporary-buffer-memory operation on the "no_mem" list. */ +static int +enqueue_no_mem(method_op_p op, bmi_size_t total_size) +{ + struct NoMemDescriptor *nomemdesc, *desc; + + if (!(nomemdesc = malloc(sizeof(*nomemdesc)))) + return -BMI_ENOMEM; + + nomemdesc->total_size = total_size; + nomemdesc->op = op; + + /* no_mem_queue is sorted in descending size order. + Look for an appropriate spot to insert a new entry. */ + pthread_mutex_lock(&no_mem_queue_mutex); + + for (desc = no_mem_queue_first; desc; desc = desc->next) + if (total_size > desc->total_size) + break; + + if (!desc) + { + /* Insert at the end (or no_mem_queue is empty). */ + if (no_mem_queue_first) + { + no_mem_queue_last->next = nomemdesc; + nomemdesc->prev = no_mem_queue_last; + } + else + { + no_mem_queue_first = nomemdesc; + nomemdesc->prev = NULL; + } + + no_mem_queue_last = nomemdesc; + nomemdesc->next = NULL; + } + else if (desc->prev) + { + /* Insert before desc. */ + nomemdesc->next = desc; + nomemdesc->prev = desc->prev; + desc->prev->next = nomemdesc; + desc->prev = nomemdesc; + } + else + { + /* Insert as the first element of the queue. */ + nomemdesc->next = no_mem_queue_first; + no_mem_queue_first->prev = nomemdesc; + + nomemdesc->prev = NULL; + + no_mem_queue_first = nomemdesc; + } + + pthread_mutex_unlock(&no_mem_queue_mutex); + + return 0; +} + +/* A common internal posting routine for send and receive requests. */ +static int +send_post_cmd(method_op_p op) +{ + int zoid_fd, zoid_release; + int cmd_len, i; + int hdr; + struct ZBMIControlPostCmd* cmd; + struct ZBMIControlPostResp resp; + int list_count_zoid; + + if ((zoid_fd = get_zoid_socket(&zoid_release)) < 0) + return zoid_fd; + + hdr = (op->send_recv == BMI_SEND ? ZBMI_CONTROL_POST_SEND : + ZBMI_CONTROL_POST_RECV); + list_count_zoid = (((struct ZoidServerMethodData*)op->method_data)-> + tmp_buffer ? 1 : op->list_count); + cmd_len = offsetof(typeof(*cmd), buf.list) + + list_count_zoid * sizeof(cmd->buf.list[0]); + cmd = alloca(cmd_len); + cmd->bmi_id = op->op_id; + cmd->buf.addr = ((struct zoid_addr*)op->addr->method_data)->pid; + cmd->buf.tag = op->msg_tag; + cmd->buf.list_count = list_count_zoid; + if (((struct ZoidServerMethodData*)op->method_data)->tmp_buffer) + { + cmd->buf.list[0].buffer = ((struct ZoidServerMethodData*)op-> + method_data)->tmp_buffer - zbmi_shm_exp; + cmd->buf.list[0].size = (op->send_recv == BMI_SEND ? op->actual_size : + op->expected_size); + } + else + for (i = 0; i < op->list_count; i++) + { + cmd->buf.list[i].buffer = op->buffer_list[i] - zbmi_shm_exp; + cmd->buf.list[i].size = op->size_list[i]; + } + + if (socket_write(zoid_fd, &hdr, sizeof(hdr)) != sizeof(hdr) || + socket_write(zoid_fd, cmd, cmd_len) != cmd_len) + { + perror("write"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + if (socket_read(zoid_fd, &resp, sizeof(resp)) != sizeof(resp)) + { + perror("read"); + release_zoid_socket(zoid_release); + return -BMI_EINVAL; + } + + release_zoid_socket(zoid_release); + + if (!resp.zoid_id) + return -BMI_ENOMEM; + + ((struct ZoidServerMethodData*)op->method_data)->zoid_buf_id = resp.zoid_id; + + return 0; +} diff --git a/src/io/bmi/bmi_zoid/zbmi_pool.c b/src/io/bmi/bmi_zoid/zbmi_pool.c new file mode 100644 index 0000000..bdf5097 --- /dev/null +++ b/src/io/bmi/bmi_zoid/zbmi_pool.c @@ -0,0 +1,56 @@ +#define ONLY_MSPACES 1 +#define MSPACES 1 +#define MALLOC_ALIGNMENT 16 +#define USE_LOCKS 1 +/* HAVE_MORECORE defaults to 0 if ONLY_MSPACES is used. */ +#define HAVE_MMAP 0 + +/* + * Avoid duplicate symbol errors when compiling + * with ZeptoOS mpi compilers + */ +#pragma weak destroy_mspace +#pragma weak mspace_independent_comalloc +#pragma weak mspace_memalign +#pragma weak mspace_mallinfo +#pragma weak mspace_calloc +#pragma weak mspace_max_footprint +#pragma weak mspace_free +#pragma weak mspace_mallopt +#pragma weak mspace_independent_calloc +#pragma weak create_mspace_with_base +#pragma weak mspace_realloc +#pragma weak mspace_malloc +#pragma weak mspace_trim +#pragma weak mspace_malloc_stats +#pragma weak create_mspace +#pragma weak mspace_footprint + +//#include "dlmalloc.h" +#include "dlmalloc.c" + +static mspace pool = NULL; + +void +zbmi_pool_init(void* start, size_t len) +{ + pool = create_mspace_with_base(start, len, 1); +} + +void zbmi_pool_fini(void) +{ + destroy_mspace(pool); + pool = NULL; +} + +void* +zbmi_pool_malloc(size_t bytes) +{ + return mspace_malloc(pool, bytes); +} + +void +zbmi_pool_free(void* mem) +{ + mspace_free(pool, mem); +} diff --git a/src/io/bmi/bmi_zoid/zbmi_pool.h b/src/io/bmi/bmi_zoid/zbmi_pool.h new file mode 100644 index 0000000..6ace204 --- /dev/null +++ b/src/io/bmi/bmi_zoid/zbmi_pool.h @@ -0,0 +1,10 @@ +#ifndef ZBMI_POOL_H +#define ZBMI_POOL_H + +void zbmi_pool_init(void* start, size_t len); +void zbmi_pool_fini(void); + +void* zbmi_pool_malloc(size_t bytes); +void zbmi_pool_free(void* mem); + +#endif diff --git a/src/io/bmi/bmi_zoid/zoid.c b/src/io/bmi/bmi_zoid/zoid.c new file mode 100644 index 0000000..8d9d26a --- /dev/null +++ b/src/io/bmi/bmi_zoid/zoid.c @@ -0,0 +1,827 @@ +/* ZOID implementation of a BMI method */ + +/* + Limitations: + + - unexpected messages limited to 8K (easy to increase) + - expected messages limited to 128M (easy to increase, but even the current + value is not safe given the memory consideration on IONs) + - compute nodes can only communicate with the ION, not with each other + - unexpected messages can only be sent by a CN and can only be received + by an ION + - only one string address is supported, "zoid://", which denotes the server + - only one, global context is supported + - multithreading is not supported on the compute node client side +*/ + +#include +#include +#include + +#include +#include +#include + +#include "zbmi.h" +#include "zoid_api.h" +#include "zoid.h" + +#define CLIENT 0 +#define SERVER 1 + +/* [Compute node] CLIENT or [I/O node] SERVER */ +static int zoid_node_type; + +/* As passed to the initialize routine. */ +int zoid_method_id; + +/* A queue of all non-immediate posted message sends/receives. Only used + on the client side. We don't allocate a custom method data for the + entries; rather, we reuse the method_data pointer itself to indicate if + we are dealing with an unexpected message. */ +static op_list_p zoid_ops; + + +static int zoid_err_to_bmi(int err); + +/* A common send routine used for both expected and unexpected messages. */ +static int +zoid_post_send_common(bmi_op_id_t* id, bmi_method_addr_p dest, + const void*const* buffer_list, + const bmi_size_t* size_list, int list_count, + bmi_size_t total_size, enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, void* user_ptr, + bmi_context_id context_id, PVFS_hint hints, + int unexpected) +{ + int ret, err; + method_op_p new_op; + + if (unexpected) + { + /* We only support unexpected messages from clients. */ + if (zoid_node_type == SERVER) + abort(); + + if (total_size > ZOID_MAX_UNEXPECTED_MSG) + return -BMI_EMSGSIZE; + } + else /* expected */ + { + if (total_size > ZOID_MAX_EXPECTED_MSG) + return -BMI_EMSGSIZE; + } + + if (zoid_node_type == CLIENT) + { + size_t* size_list_cp; + + /* We only support communication for the compute nodes to the I/O + node server. */ + assert(((struct zoid_addr*)dest->method_data)->pid == + ZOID_ADDR_SERVER_PID); + + /* In principle, for expected messages we should start with a + handshake (post_test). However, because of how input userbuf is + implemented in ZOID, there is an implicit ZOID handshake that we + can take advantage of, so we attempt to send immediately. */ + + /* bmi_size_t is 64-bit, which is not supported by ZOID. */ + size_list_cp = alloca(list_count * sizeof(*size_list_cp)); + { + int i; + for (i = 0; i < list_count; i++) + size_list_cp[i] = size_list[i]; + } + + ret = zbmi_send(buffer_list, size_list_cp, list_count, tag, unexpected); + if ((err = __zoid_error())) + { + if (err == ENOMEM) + { + /* Indicates that there was no memory on the server side + for the message. Could happen if this is an expected + message and no matching receive has been posted, or if + we sent too many unexpected messages without the server-side + receiving anything. */ + + if (!(new_op = bmi_alloc_method_op(0))) + return -BMI_ENOMEM; + *id = new_op->op_id; + new_op->addr = dest; + new_op->send_recv = BMI_SEND; + new_op->user_ptr = user_ptr; + new_op->msg_tag = tag; + new_op->list_count = list_count; + new_op->actual_size = total_size; + if (list_count == 1) + { + /* Our buffer_list and size_list pointers might be + temporary (see, e.g., BMI_zoid_post_send), so we + prefer to copy the data over to someplace more + permanent. */ + new_op->buffer = (void*)buffer_list[0]; + new_op->buffer_list = &new_op->buffer; + new_op->size_list = &new_op->actual_size; + } + else + { + new_op->buffer_list = (void*const*)buffer_list; + new_op->size_list = size_list; + } + new_op->error_code = 0; + new_op->method_data = (void*)unexpected; + + op_list_add(zoid_ops, new_op); + + return 0; /* Non-immediate completion. */ + } + else + return zoid_err_to_bmi(err); + } + + assert (ret == 1); + + return 1; /* Immediate completion. */ + } + + /* Server code. */ + + return zoid_server_send_common(id, dest, buffer_list, size_list, + list_count, total_size, buffer_type, + tag, user_ptr, context_id, hints); +} + +/* A common send routine used for all expected messages. */ +static int +zoid_post_recv_common(bmi_op_id_t* id, bmi_method_addr_p src, + void *const* buffer_list, const bmi_size_t* size_list, + int list_count, bmi_size_t total_expected_size, + bmi_size_t* total_actual_size, + enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, + void* user_ptr, bmi_context_id context_id, + PVFS_hint hints) +{ + int ret, err; + method_op_p new_op; + + if (total_expected_size > ZOID_MAX_EXPECTED_MSG) + return -BMI_EMSGSIZE; + + if (zoid_node_type == CLIENT) + { + size_t* size_list_cp; + + /* We only support communication for the compute nodes to the I/O + node server. */ + assert(((struct zoid_addr*)src->method_data)->pid == + ZOID_ADDR_SERVER_PID); + + /* In principle, for expected messages we should start with a + handshake (post_test). However, we can take a shortcut and + immediately try a receive instead, which will first do a test + on the server side. */ + + /* Try immediate completion first. */ + + size_list_cp = alloca(list_count * sizeof(*size_list_cp)); + { + int i; + for (i = 0; i < list_count; i++) + size_list_cp[i] = size_list[i]; + } + + ret = zbmi_recv(buffer_list, size_list_cp, list_count, tag); + if ((err = __zoid_error())) + return zoid_err_to_bmi(err); + + if (ret == 1) /* Immediate completion succeeded! */ + { + int i; + + *total_actual_size = 0; + for (i = 0; i < list_count; i++) + *total_actual_size += size_list_cp[i]; + + return 1; + } + else if (ret < 0) + { + /* Error. */ + return -BMI_EPROTO; + } + + /* No matching send. Queue the message. */ + + if (!(new_op = bmi_alloc_method_op(0))) + return -BMI_ENOMEM; + *id = new_op->op_id; + new_op->addr = src; + new_op->send_recv = BMI_RECV; + new_op->user_ptr = user_ptr; + new_op->msg_tag = tag; + new_op->list_count = list_count; + new_op->expected_size = total_expected_size; + if (list_count == 1) + { + /* See zoid_post_send_common for an explanation. */ + new_op->buffer = buffer_list[0]; + new_op->buffer_list = &new_op->buffer; + new_op->size_list = &new_op->expected_size; + } + else + { + new_op->buffer_list = buffer_list; + new_op->size_list = size_list; + } + new_op->error_code = 0; + new_op->method_data = (void*)0; + + op_list_add(zoid_ops, new_op); + + return 0; /* Non-immediate completion. */ + } + + /* Server code. */ + + return zoid_server_recv_common(id, src, buffer_list, size_list, list_count, + total_expected_size, total_actual_size, + buffer_type, tag, user_ptr, context_id, + hints); +} + +/* A common test routine used for all pending messages. */ +static int +zoid_test_common(int incount, bmi_op_id_t* id_array, int outcount_max, + int* outcount, int* index_array, + bmi_error_code_t* error_code_array, + bmi_size_t* actual_size_array, void** user_ptr_array, + int max_idle_time_ms, bmi_context_id context_id) +{ + int i, ret, err, out; +#if 0 + fprintf(stderr, "zoid_test_common incount %d, outcount_max %d, timeout %d\n", incount, outcount_max, max_idle_time_ms); + for (i = 0; i < incount; i++) + fprintf(stderr, "id[%d]: %lld\n", i, id_array[i]); +#endif + if (zoid_node_type == CLIENT) + { + bmi_msg_tag_t* tags = alloca(incount * sizeof(*tags)); + enum bmi_op_type* ops = alloca(incount * sizeof(*ops)); + ssize_t* unexp_sizes = alloca(incount * sizeof(*unexp_sizes)); + int* ready = alloca(incount * sizeof(*ready)); + int incount_fwd, canceled, out_total; + + /* First do a test to see what is ready on the server side. */ + + incount_fwd = 0; + canceled = 0; + for (i = 0; i < incount; i++) + { + method_op_p op = (method_op_p)id_gen_fast_lookup(id_array[i]); + + /* Canceled messages are not queried. */ + if (op->error_code == BMI_ECANCEL) + { + canceled++; + continue; + } + + tags[incount_fwd] = op->msg_tag; + ops[incount_fwd] = op->send_recv; + if ((int)op->method_data) + unexp_sizes[incount_fwd] = op->actual_size; + else + unexp_sizes[incount_fwd] = -1; + incount_fwd++; + } + + ret = zbmi_test(tags, ops, unexp_sizes, incount_fwd, ready, + max_idle_time_ms); + if ((err = __zoid_error())) + return zoid_err_to_bmi(err); + + /* Now that we know where we stand, we can perform the actual + sends/receives. */ +#if 0 + fprintf(stderr, "zbmi_test returned %d\n", ret); +#endif + out_total = ret + canceled; + + for (i = 0, out = 0; i < incount && out < out_total; i++) + { + method_op_p op; + + if (out == outcount_max) + break; + + op = (method_op_p)id_gen_fast_lookup(id_array[i]); + + if (op->error_code == BMI_ECANCEL) + { + actual_size_array[out] = 0; + error_code_array[out] = BMI_ECANCEL; + } + else if (ready[i]) + { + size_t* size_list_cp; +#if 0 + fprintf(stderr, "op %d is ready!\n", i); +#endif + size_list_cp = alloca(op->list_count * sizeof(*size_list_cp)); + { + int j; + for (j = 0; j < op->list_count; j++) + size_list_cp[j] = op->size_list[j]; + } + + if (op->send_recv == BMI_SEND) + { + ret = zbmi_send((const void*const*)op->buffer_list, + size_list_cp, op->list_count, op->msg_tag, + (int)op->method_data); + if ((err = __zoid_error())) + { +#if 0 + fprintf(stderr, "zbmi_send returned err %d\n", err); +#endif + if (err == ENOMEM) + { + /* This is unexpected, but not impossible. The + server side might have issued a cancel. Or, for + unexpected messages, another client might have + sent a message and used up the space that we + wanted to use. */ + continue; + } + return zoid_err_to_bmi(err); + } + + assert (ret == 1); + + actual_size_array[out] = op->actual_size; + } + else /* BMI_RECV */ + { + int j; + + ret = zbmi_recv(op->buffer_list, size_list_cp, + op->list_count, op->msg_tag); + if ((err = __zoid_error())) + return zoid_err_to_bmi(err); + + if (ret != 1) + { + /* This is unexpected, but not impossible. The + server side might have issued a cancel. */ + continue; + } + + actual_size_array[out] = 0; + for (j = 0; j < op->list_count; j++) + actual_size_array[out] += size_list_cp[j]; + } + error_code_array[out] = 0; + } + else /* not ready */ + continue; + + if (index_array) + index_array[out] = i; + if (user_ptr_array) + user_ptr_array[out] = op->user_ptr; + + op_list_remove(op); + bmi_dealloc_method_op(op); + + out++; + } + *outcount = out; + + return 0; + } + + /* Server code. */ + + return zoid_server_test_common(incount, id_array, outcount_max, outcount, + index_array, error_code_array, + actual_size_array, user_ptr_array, + max_idle_time_ms, context_id); +} + +/* Internal routine to translate the few POSIX errors used by ZOID to + their BMI equivalents. */ +static int +zoid_err_to_bmi(int err) +{ + if (err == ENOMEM) + return -BMI_ENOMEM; + else if (err == E2BIG) + return -BMI_EMSGSIZE; + else if (err == ENOSYS) + return -BMI_ENOSYS; + else /* Some undefined error. */ + return -1; +} + +/* Invoked on BMI_initialize. */ +static int +BMI_zoid_initialize(bmi_method_addr_p listen_addr, int method_id, + int init_flags) +{ +#if 0 + fprintf(stderr, "Invoked zoid_initialize\n"); +#endif + zoid_node_type = (init_flags & BMI_INIT_SERVER) ? SERVER : CLIENT; + + zoid_method_id = method_id; + + if (!(zoid_ops = op_list_new())) + return -BMI_ENOMEM; + + if (zoid_node_type == CLIENT) + { + if (__zoid_init()) + return -BMI_EINVAL; + } + else /* SERVER */ + return BMI_zoid_server_initialize(); + + return 0; +} + +/* Invoked on BMI_finalize. */ +static int +BMI_zoid_finalize(void) +{ + if (zoid_ops) + { + op_list_cleanup(zoid_ops); + zoid_ops = NULL; + } + + if (zoid_node_type == CLIENT) + { + /* Nothing to do, maybe free some internal memory buffers... */ + } + else /* SERVER */ + return BMI_zoid_server_finalize(); + + return 0; +} + +/* Invoked on BMI_set_info. The only important case seems to be an internal + invocation to release a no longer needed address. */ +static int +BMI_zoid_set_info(int option, void* inout_parameter) +{ + switch (option) + { + case BMI_DROP_ADDR: + if (zoid_node_type == SERVER) + zoid_server_free_client_addr(inout_parameter); + bmi_dealloc_method_addr(inout_parameter); + break; + } + + return 0; +} + +/* Invoked on BMI_get_info. */ +static int +BMI_zoid_get_info(int option, void* inout_parameter) +{ + switch (option) + { + case BMI_CHECK_MAXSIZE: + *(int*)inout_parameter = ZOID_MAX_EXPECTED_MSG; + break; + + case BMI_GET_UNEXP_SIZE: + *(int*)inout_parameter = ZOID_MAX_UNEXPECTED_MSG; + break; + + default: + return -BMI_ENOSYS; + } + + return 0; +} + +/* Invoked on BMI_memalloc. Important on the server, not so much on the + client. */ +static void* +BMI_zoid_memalloc(bmi_size_t size, enum bmi_op_type send_recv) +{ + if (zoid_node_type == CLIENT) + { + void* ptr; + + /* Ordinary malloc() is also aligned to 16 bytes on BG, but let's be + explicit here... */ + if (posix_memalign(&ptr, 16, size)) + return NULL; + + return ptr; + } + else /* SERVER */ + return BMI_zoid_server_memalloc(size); +} + +/* Invoked on BMI_memfree. */ +static int +BMI_zoid_memfree(void* buffer, bmi_size_t size, enum bmi_op_type send_recv) +{ + if (zoid_node_type == CLIENT) + free(buffer); + else /* SERVER */ + BMI_zoid_server_memfree(buffer); + return 0; +} + +/* Invoked on BMI_unexpected_free. We only support in on the server. */ +static int +BMI_zoid_unexpected_free(void* buffer) +{ + if (zoid_node_type == CLIENT) + { + /* We only support unexpected messages from clients to the server. */ + abort(); + } + else + return BMI_zoid_server_unexpected_free(buffer); +} + +/* Invoked on BMI_post_send. */ +static int +BMI_zoid_post_send(bmi_op_id_t* id, bmi_method_addr_p dest, + const void* buffer, bmi_size_t size, + enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, + void* user_ptr, bmi_context_id context_id, PVFS_hint hints) +{ + return zoid_post_send_common(id, dest, &buffer, &size, 1, size, buffer_type, + tag, user_ptr, context_id, hints, 0); +} + +/* Invoked on BMI_post_sendunexpected. We only support it on clients. */ +static int +BMI_zoid_post_sendunexpected(bmi_op_id_t* id, bmi_method_addr_p dest, + const void* buffer, bmi_size_t size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, void* user_ptr, + bmi_context_id context_id, PVFS_hint hints) +{ + return zoid_post_send_common(id, dest, &buffer, &size, 1, size, buffer_type, + tag, user_ptr, context_id, hints, 1); +} + +/* Invoked on BMI_post_recv. */ +static int +BMI_zoid_post_recv(bmi_op_id_t* id, bmi_method_addr_p src, void* buffer, + bmi_size_t expected_size, bmi_size_t* actual_size, + enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, + void* user_ptr, bmi_context_id context_id, PVFS_hint hints) +{ + return zoid_post_recv_common(id, src, &buffer, &expected_size, 1, + expected_size, actual_size, buffer_type, tag, + user_ptr, context_id, hints); +} + +/* Invoked on BMI_post_test. */ +static int +BMI_zoid_test(bmi_op_id_t id, int* outcount, bmi_error_code_t* error_code, + bmi_size_t* actual_size, void** user_ptr, int max_idle_time_ms, + bmi_context_id context_id) +{ +#if 0 + fprintf(stderr, "BMI_zoid_test invoked\n"); +#endif + return zoid_test_common(1, &id, 1, outcount, NULL, error_code, + actual_size, user_ptr, max_idle_time_ms, + context_id); +} + +/* Invoked on BMI_post_testsome. */ +static int +BMI_zoid_testsome(int incount, bmi_op_id_t* id_array, int* outcount, + int* index_array, bmi_error_code_t* error_code_array, + bmi_size_t* actual_size_array, void** user_ptr_array, + int max_idle_time_ms, bmi_context_id context_id) +{ + return zoid_test_common(incount, id_array, incount, outcount, index_array, + error_code_array, actual_size_array, + user_ptr_array, max_idle_time_ms, context_id); +} + +/* Invoked on BMI_testcontext. */ +static int +BMI_zoid_testcontext(int incount, bmi_op_id_t* out_id_array, int* outcount, + bmi_error_code_t* error_code_array, + bmi_size_t* actual_size_array, void** user_ptr_array, + int max_idle_time_ms, bmi_context_id context_id) +{ + if (zoid_node_type == CLIENT) + { + /* We scan zoid_ops for pending request and invoke zoid_test_common + on them. This is OK because this code is not expected to be + multi-thread safe (otherwise, a testcontext in one thread followed + by a post_send/recv in another, would not take that post into + account). */ + int ret, i; + int pending_count = op_list_count(zoid_ops); + bmi_op_id_t* tmp_id_array = alloca(pending_count * + sizeof(*tmp_id_array)); + int* tmp_index_array = alloca(incount * sizeof(*tmp_index_array)); + method_op_p met; + + i = 0; + qlist_for_each_entry(met, zoid_ops, op_list_entry) + tmp_id_array[i++] = met->op_id; + + ret = zoid_test_common(pending_count, tmp_id_array, incount, outcount, + tmp_index_array, error_code_array, + actual_size_array, user_ptr_array, + max_idle_time_ms, context_id); + + for (i = 0; i < *outcount; i++) + out_id_array[i] = tmp_id_array[tmp_index_array[i]]; + + return ret; + } + + /* Server code. */ + + return zoid_server_test_common(0, out_id_array, incount, outcount, NULL, + error_code_array, actual_size_array, + user_ptr_array, max_idle_time_ms, + context_id); +} + +/* Invoked on BMI_testunexpected. We only support in on the server. */ +static int +BMI_zoid_testunexpected(int incount, int* outcount, + struct bmi_method_unexpected_info* info, + int max_idle_time_ms) +{ + if (zoid_node_type == CLIENT) + abort(); + + /* Server code. */ + + return BMI_zoid_server_testunexpected(incount, outcount, info, + max_idle_time_ms); +} + +/* Invoked on BMI_addr_lookup, also part of BMI_intialize. The only address + we support in the string form is zoid://, which denotes the server. */ +static struct bmi_method_addr* +BMI_zoid_method_addr_lookup(const char *id) +{ + static bmi_method_addr_p new_addr = NULL; +#if 0 + fprintf(stderr, "Invoked method_addr_lookup with id %s\n", id); +#endif + if (strcmp(id, "zoid://")) + return NULL; + + if (!new_addr) + { + /* Note: zoid_method_id will not be initialized here if we are a server. + Any solution? */ + if (!(new_addr = bmi_alloc_method_addr(zoid_method_id, + sizeof(struct zoid_addr)))) + return NULL; + + ((struct zoid_addr*)new_addr->method_data)->pid = ZOID_ADDR_SERVER_PID; + } + + return new_addr; +} + +/* Invoked on BMI_post_send_list. */ +static int +BMI_zoid_post_send_list(bmi_op_id_t* id, bmi_method_addr_p dest, + const void*const* buffer_list, + const bmi_size_t* size_list, int list_count, + bmi_size_t total_size, enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, void* user_ptr, + bmi_context_id context_id, PVFS_hint hints) +{ + return zoid_post_send_common(id, dest, buffer_list, size_list, list_count, + total_size, buffer_type, tag, user_ptr, + context_id, hints, 0); +} + +/* Invoked on BMI_post_recv_list. */ +static int +BMI_zoid_post_recv_list(bmi_op_id_t* id, bmi_method_addr_p src, + void *const* buffer_list, const bmi_size_t* size_list, + int list_count, bmi_size_t total_expected_size, + bmi_size_t* total_actual_size, + enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, + void* user_ptr, bmi_context_id context_id, + PVFS_hint hints) +{ + return zoid_post_recv_common(id, src, buffer_list, size_list, list_count, + total_expected_size, total_actual_size, + buffer_type, tag, user_ptr, context_id, hints); +} + +/* Invoked on BMI_post_sendunexpected_list. We only support it on clients. */ +static int +BMI_zoid_post_sendunexpected_list(bmi_op_id_t* id, bmi_method_addr_p dest, + const void*const* buffer_list, + const bmi_size_t* size_list, int list_count, + bmi_size_t total_size, + enum bmi_buffer_type buffer_type, + bmi_msg_tag_t tag, void* user_ptr, + bmi_context_id context_id, PVFS_hint hints) +{ + return zoid_post_send_common(id, dest, buffer_list, size_list, list_count, + total_size, buffer_type, tag, user_ptr, + context_id, hints, 1); +} + +/* Invoked on BMI_open_context. We only support one, global context. */ +static int +BMI_zoid_open_context(bmi_context_id context_id) +{ + return 0; +} + +/* Invoked on BMI_close_context. We only support one, global context. */ +static void +BMI_zoid_close_context(bmi_context_id context_id) +{ +} + +/* Invoked on BMI_cancel. */ +static int +BMI_zoid_cancel(bmi_op_id_t id, bmi_context_id context_id) +{ + if (zoid_node_type == CLIENT) + { + /* Because of a lack of multi-threading considerations and the fact + that the server holds no state about pending requests, this is very + easy on the client side. */ + method_op_p op = (method_op_p)id_gen_fast_lookup(id); + + op->error_code = BMI_ECANCEL; + + return 0; + } + + /* Server code. */ + + return BMI_zoid_server_cancel(id, context_id); +} + +/* Invoked on BMI_rev_lookup_unexpected. */ +static const char* +BMI_zoid_rev_lookup_unexpected(bmi_method_addr_p map) +{ + /* No idea what the purpose of this one is, so we don't implement it. */ + fprintf(stderr, "zoid_rev_lookup_unexpected invoked!\n"); + /* FIXME! */ + + return NULL; +} + +const struct bmi_method_ops bmi_zoid_ops = +{ + .method_name = "bmi_zoid", + + .flags = BMI_METHOD_FLAG_NO_POLLING, + + .initialize = BMI_zoid_initialize, + .finalize = BMI_zoid_finalize, + + .set_info = BMI_zoid_set_info, + .get_info = BMI_zoid_get_info, + + .memalloc = BMI_zoid_memalloc, + .memfree = BMI_zoid_memfree, + .unexpected_free = BMI_zoid_unexpected_free, + + .post_send = BMI_zoid_post_send, + .post_sendunexpected = BMI_zoid_post_sendunexpected, + .post_recv = BMI_zoid_post_recv, + + .test = BMI_zoid_test, + .testsome = BMI_zoid_testsome, + .testcontext = BMI_zoid_testcontext, + .testunexpected = BMI_zoid_testunexpected, + + .method_addr_lookup = BMI_zoid_method_addr_lookup, + + .post_send_list = BMI_zoid_post_send_list, + .post_recv_list = BMI_zoid_post_recv_list, + .post_sendunexpected_list = BMI_zoid_post_sendunexpected_list, + + .open_context = BMI_zoid_open_context, + .close_context = BMI_zoid_close_context, + + .cancel = BMI_zoid_cancel, + + .rev_lookup_unexpected = BMI_zoid_rev_lookup_unexpected, +}; diff --git a/src/io/bmi/bmi_zoid/zoid.h b/src/io/bmi/bmi_zoid/zoid.h new file mode 100644 index 0000000..3583614 --- /dev/null +++ b/src/io/bmi/bmi_zoid/zoid.h @@ -0,0 +1,49 @@ +#ifndef ZOID_H +#define ZOID_H + +#define ZOID_MAX_EXPECTED_MSG (128 * 1024 * 1024) +#define ZOID_MAX_UNEXPECTED_MSG 8192 + +#define ZOID_ADDR_SERVER_PID -1 + +struct zoid_addr +{ + int pid; +}; + +/* zoid.c */ + +extern int zoid_method_id; + +/* server.c */ + +int BMI_zoid_server_initialize(void); +int BMI_zoid_server_finalize(void); +void* BMI_zoid_server_memalloc(bmi_size_t size); +void BMI_zoid_server_memfree(void* buffer); +int BMI_zoid_server_unexpected_free(void* buffer); +int BMI_zoid_server_testunexpected(int incount, int* outcount, + struct bmi_method_unexpected_info* info, + int max_idle_time_ms); +int zoid_server_send_common(bmi_op_id_t* id, bmi_method_addr_p dest, + const void*const* buffer_list, + const bmi_size_t* size_list, int list_count, + bmi_size_t total_size, enum bmi_buffer_type + buffer_type, bmi_msg_tag_t tag, void* user_ptr, + bmi_context_id context_id, PVFS_hint hints); +int zoid_server_recv_common(bmi_op_id_t* id, bmi_method_addr_p src, + void *const* buffer_list, const bmi_size_t* + size_list, int list_count, bmi_size_t + total_expected_size, bmi_size_t* total_actual_size, + enum bmi_buffer_type buffer_type, bmi_msg_tag_t tag, + void* user_ptr, bmi_context_id context_id, + PVFS_hint hints); +int zoid_server_test_common(int incount, bmi_op_id_t* id_array, + int outcount_max, int* outcount, int* index_array, + bmi_error_code_t* error_code_array, + bmi_size_t* actual_size_array, + void** user_ptr_array, int max_idle_time_ms, + bmi_context_id context_id); +int BMI_zoid_server_cancel(bmi_op_id_t id, bmi_context_id context_id); +void zoid_server_free_client_addr(bmi_method_addr_p addr); +#endif diff --git a/src/io/bmi/module.mk.in b/src/io/bmi/module.mk.in index e33bbdd..03ac29c 100644 --- a/src/io/bmi/module.mk.in +++ b/src/io/bmi/module.mk.in @@ -9,3 +9,8 @@ SERVERSRC += \ $(DIR)/bmi-method-support.c \ $(DIR)/op-list.c \ $(DIR)/reference-list.c +LIBBMISRC += \ + $(DIR)/bmi.c \ + $(DIR)/bmi-method-support.c \ + $(DIR)/op-list.c \ + $(DIR)/reference-list.c diff --git a/src/io/bmi/reference-list.c b/src/io/bmi/reference-list.c index 3ad9d43..0cef773 100755 --- a/src/io/bmi/reference-list.c +++ b/src/io/bmi/reference-list.c @@ -14,16 +14,22 @@ #include #include #include +#include #include "reference-list.h" #include "gossip.h" #include "id-generator.h" +#include "quickhash.h" +static struct qhash_table* str_table = NULL; +#define STR_TABLE_SIZE 137 /*************************************************************** * Visible functions */ +static int ref_list_compare_key_entry(void* key, struct qhash_head* link); + /* * ref_list_new() * @@ -36,11 +42,31 @@ ref_list_p ref_list_new(void) ref_list_p tmp_list = NULL; + /* There is currently never more than one reference list in BMI. If we + * ever have a need for more, then this hash table should be moved from + * a static global to actually be part of the ref_list_p. + */ + assert(str_table == NULL); + + str_table = qhash_init( + ref_list_compare_key_entry, + quickhash_string_hash, + STR_TABLE_SIZE); + + if(!str_table) + { + return(NULL); + } + tmp_list = (ref_list_p) malloc(sizeof(struct qlist_head)); - if (tmp_list) + if(!tmp_list) { - INIT_QLIST_HEAD(tmp_list); + qhash_finalize(str_table); + str_table = NULL; + return(NULL); } + + INIT_QLIST_HEAD(tmp_list); return (tmp_list); } @@ -54,6 +80,11 @@ ref_list_p ref_list_new(void) void ref_list_add(ref_list_p rlp, ref_st_p rsp) { + if(rsp->id_string) + { + qhash_add(str_table, rsp->id_string, &rsp->hash_link); + } + qlist_add(&(rsp->list_link), rlp); } @@ -61,24 +92,14 @@ void ref_list_add(ref_list_p rlp, * ref_list_search_addr() * * looks for a reference structure in the list that matches the given - * PVFS_BMI_addr_t. + * BMI_addr_t. * * returns a pointer to the structure on success, a NULL on failure. */ ref_st_p ref_list_search_addr(ref_list_p rlp, - PVFS_BMI_addr_t my_addr) + BMI_addr_t my_addr) { - ref_list_p tmp_link = NULL; - ref_st_p tmp_entry = NULL; - - qlist_for_each(tmp_link, rlp) - { - tmp_entry = qlist_entry(tmp_link, struct ref_st, - list_link); - if (tmp_entry->bmi_addr == my_addr) - return (tmp_entry); - } - return (NULL); + return(id_gen_safe_lookup(my_addr)); } @@ -93,16 +114,7 @@ ref_st_p ref_list_search_addr(ref_list_p rlp, ref_st_p ref_list_search_method_addr(ref_list_p rlp, bmi_method_addr_p map) { - ref_list_p tmp_link = NULL; - ref_st_p tmp_entry = NULL; - - qlist_for_each(tmp_link, rlp) - { - tmp_entry = qlist_entry(tmp_link, struct ref_st, list_link); - if (tmp_entry->method_addr == map) - return (tmp_entry); - } - return (NULL); + return(map->parent); } /* @@ -116,17 +128,16 @@ ref_st_p ref_list_search_method_addr(ref_list_p rlp, ref_st_p ref_list_search_str(ref_list_p rlp, const char *idstring) { - ref_list_p tmp_link = NULL; - ref_st_p tmp_entry = NULL; - qlist_for_each(tmp_link, rlp) + struct qhash_head* tmp_link; + + tmp_link = qhash_search(str_table, (char*)idstring); + if(!tmp_link) { - tmp_entry = qlist_entry(tmp_link, struct ref_st, - list_link); - if (tmp_entry->id_string && !strcmp(tmp_entry->id_string, idstring)) - return (tmp_entry); + return(NULL); } - return (NULL); + + return(qlist_entry(tmp_link, ref_st, hash_link)); } /* @@ -137,22 +148,22 @@ ref_st_p ref_list_search_str(ref_list_p rlp, * returns a pointer to the structure on success, a NULL on failure. */ ref_st_p ref_list_rem(ref_list_p rlp, - PVFS_BMI_addr_t my_addr) + BMI_addr_t my_addr) { - ref_list_p tmp_link = NULL; - ref_list_p scratch = NULL; - ref_st_p tmp_entry = NULL; + ref_st_p tmp_entry; + + tmp_entry = id_gen_safe_lookup(my_addr); - qlist_for_each_safe(tmp_link, scratch, rlp) + if(tmp_entry) { - tmp_entry = qlist_entry(tmp_link, struct ref_st, list_link); - if (tmp_entry->bmi_addr == my_addr) - { - qlist_del(&tmp_entry->list_link); - return (tmp_entry); - } + qlist_del(&tmp_entry->list_link); + + if(tmp_entry->id_string) + { + qhash_del(&tmp_entry->hash_link); + } } - return (NULL); + return (tmp_entry); } @@ -177,6 +188,9 @@ void ref_list_cleanup(ref_list_p rlp) dealloc_ref_st(tmp_entry); } + qhash_finalize(str_table); + str_table = NULL; + free(rlp); return; } @@ -203,7 +217,7 @@ ref_st_p alloc_ref_st(void) memset(new_ref, 0, ssize); /* we can go ahead and set the bmi_addr here */ - id_gen_fast_register(&(new_ref->bmi_addr), new_ref); + id_gen_safe_register(&(new_ref->bmi_addr), new_ref); return (new_ref); } @@ -234,11 +248,26 @@ void dealloc_ref_st(ref_st_p deadref) deadref->interface->set_info(BMI_DROP_ADDR, deadref->method_addr); } - id_gen_fast_unregister(deadref->bmi_addr); + id_gen_safe_unregister(deadref->bmi_addr); free(deadref); } +static int ref_list_compare_key_entry(void* key, struct qhash_head* link) +{ + char* key_string = (char*)key; + ref_st_p tmp_entry = NULL; + + tmp_entry = qhash_entry(link, ref_st, hash_link); + assert(tmp_entry); + + if(strcmp(tmp_entry->id_string, key_string) == 0) + { + return(1); + } + return(0); +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/io/bmi/reference-list.h b/src/io/bmi/reference-list.h index ec54efb..1bd63fd 100644 --- a/src/io/bmi/reference-list.h +++ b/src/io/bmi/reference-list.h @@ -7,7 +7,7 @@ /* * Header file for reference list management functions. Reference structures - * are used to maintain the mapping between PVFS_BMI_addr_t and + * are used to maintain the mapping between BMI_addr_t and * method_addr_p addresses. */ @@ -17,6 +17,13 @@ #include "bmi-types.h" #include "bmi-method-support.h" #include "quicklist.h" +#include "quickhash.h" + +#ifdef WIN32 +/* interface is a macro on Windows */ +#undef interface +#define interface _interface +#endif typedef struct qlist_head *ref_list_p; @@ -27,7 +34,7 @@ typedef struct qlist_head *ref_list_p; */ struct ref_st { - PVFS_BMI_addr_t bmi_addr; /* the identifier passed out of the BMI layer */ + BMI_addr_t bmi_addr; /* the identifier passed out of the BMI layer */ char *id_string; /* the id string that represents this reference */ bmi_method_addr_p method_addr; /* address structure used by the method */ @@ -37,6 +44,7 @@ struct ref_st /* linked list entry */ struct qlist_head list_link; int ref_count; + struct qhash_head hash_link; }; typedef struct ref_st ref_st, *ref_st_p; @@ -49,9 +57,9 @@ ref_list_p ref_list_new(void); void ref_list_add(ref_list_p rlp, ref_st_p rsp); ref_st_p ref_list_search_addr(ref_list_p rlp, - PVFS_BMI_addr_t my_addr); + BMI_addr_t my_addr); ref_st_p ref_list_rem(ref_list_p rlp, - PVFS_BMI_addr_t my_addr); + BMI_addr_t my_addr); ref_st_p ref_list_search_method_addr(ref_list_p rlp, bmi_method_addr_p map); ref_st_p ref_list_search_str(ref_list_p rlp, diff --git a/src/io/buffer/ncac-trove.c b/src/io/buffer/ncac-trove.c index 3a08ded..544181d 100644 --- a/src/io/buffer/ncac-trove.c +++ b/src/io/buffer/ncac-trove.c @@ -117,7 +117,7 @@ int NCAC_aio_read_ext( PVFS_fs_id coll_id, PVFS_handle handle, NULL, /* vtag */ user_ptr_array, context, - &op_id); + &op_id, NULL); if (ret < 0) { NCAC_error("trove listio read failed\n"); @@ -212,7 +212,7 @@ int NCAC_aio_write( PVFS_fs_id coll_id, NULL, /* vtag */ user_ptr_array, context, - &op_id); + &op_id, NULL); if (ret < 0) { NCAC_error("trove listio read failed\n"); @@ -247,7 +247,7 @@ int do_read_for_rmw(PVFS_fs_id coll_id, PVFS_handle handle, ret = trove_bstream_read_at(coll_id, handle, buf, &inout_size, 0, 0, NULL, NULL, - context, &op_id); + context, &op_id, NULL); DPRINT("do_read_for_rmw; req=%lld\n", op_id); @@ -392,7 +392,7 @@ int init_io_read( PVFS_fs_id coll_id, PVFS_handle handle, NULL, /* vtag */ user_ptr_array, context, - ioreq); + ioreq, NULL); if (ret < 0) { NCAC_error("trove read at failed\n"); diff --git a/src/io/description/dist-basic.c b/src/io/description/dist-basic.c index c3193e1..2129705 100644 --- a/src/io/description/dist-basic.c +++ b/src/io/description/dist-basic.c @@ -4,11 +4,9 @@ * See COPYING in top-level directory. */ -#ifndef __KERNEL__ #include #include #include -#endif #include "pint-distribution.h" #include "pint-dist-utils.h" #include "pvfs2-types.h" @@ -62,6 +60,11 @@ static int get_num_dfiles(void* params, return 1; } +static PVFS_size get_blksize(void* params) +{ + /* this is arbitrary; all data is on one server */ + return CONTIGBLOCKSZ; +} static void encode_lebf(char **pptr, void* params) { @@ -75,6 +78,10 @@ static void registration_init(void* params) { } +static void unregister(void) +{ +} + static char *params_string(void *params) { return strdup("none"); @@ -91,12 +98,23 @@ static PINT_dist_methods basic_methods = { logical_file_size, get_num_dfiles, PINT_dist_default_set_param, + get_blksize, encode_lebf, decode_lebf, registration_init, + unregister, params_string }; +#ifdef WIN32 +PINT_dist basic_dist = { + PVFS_DIST_BASIC_NAME, + roundup8(PVFS_DIST_BASIC_NAME_SIZE), /* name size */ + 0, /* param size */ + &basic_params, + &basic_methods +}; +#else PINT_dist basic_dist = { .dist_name = PVFS_DIST_BASIC_NAME, .name_size = roundup8(PVFS_DIST_BASIC_NAME_SIZE), /* name size */ @@ -104,6 +122,7 @@ PINT_dist basic_dist = { .params = &basic_params, .methods = &basic_methods }; +#endif /* * Local variables: diff --git a/src/io/description/dist-simple-stripe.c b/src/io/description/dist-simple-stripe.c index 46c6dbb..cd12b02 100644 --- a/src/io/description/dist-simple-stripe.c +++ b/src/io/description/dist-simple-stripe.c @@ -4,11 +4,9 @@ * See COPYING in top-level directory. */ -#ifndef __KERNEL__ #include #include #include -#endif #define __PINT_REQPROTO_ENCODE_FUNCS_C #include "pint-distribution.h" #include "pint-dist-utils.h" @@ -180,6 +178,11 @@ static void registration_init(void* params) } +static void unregister(void) +{ + PINT_dist_unregister_param(PVFS_DIST_SIMPLE_STRIPE_NAME, "strip_size"); +} + static char *params_string(void *params) { char param_string[1024]; @@ -193,6 +196,13 @@ static PVFS_simple_stripe_params simple_stripe_params = { PVFS_DIST_SIMPLE_STRIPE_DEFAULT_STRIP_SIZE /* strip size */ }; +static PVFS_size get_blksize(void* params) +{ + PVFS_simple_stripe_params* dparam = (PVFS_simple_stripe_params*)params; + /* report the strip size as the block size */ + return(dparam->strip_size); +} + static PINT_dist_methods simple_stripe_methods = { logical_to_physical_offset, physical_to_logical_offset, @@ -201,12 +211,23 @@ static PINT_dist_methods simple_stripe_methods = { logical_file_size, PINT_dist_default_get_num_dfiles, PINT_dist_default_set_param, + get_blksize, encode_lebf, decode_lebf, registration_init, + unregister, params_string }; +#ifdef WIN32 +PINT_dist simple_stripe_dist = { + PVFS_DIST_SIMPLE_STRIPE_NAME, + roundup8(PVFS_DIST_SIMPLE_STRIPE_NAME_SIZE), /* name size */ + roundup8(sizeof(PVFS_simple_stripe_params)), /* param size */ + &simple_stripe_params, + &simple_stripe_methods +}; +#else PINT_dist simple_stripe_dist = { .dist_name = PVFS_DIST_SIMPLE_STRIPE_NAME, .name_size = roundup8(PVFS_DIST_SIMPLE_STRIPE_NAME_SIZE), /* name size */ @@ -214,6 +235,7 @@ PINT_dist simple_stripe_dist = { .params = &simple_stripe_params, .methods = &simple_stripe_methods }; +#endif /* diff --git a/src/io/description/dist-twod-stripe.c b/src/io/description/dist-twod-stripe.c index f15e676..e99ac62 100644 --- a/src/io/description/dist-twod-stripe.c +++ b/src/io/description/dist-twod-stripe.c @@ -4,6 +4,15 @@ * * See COPYING in top-level directory. */ +/* twod-stripe will take all of the servers in the filesystem and + * partition them into num_groups groups. Data will then be striped to + * each group before we move onto the next group. The strip_factor will + * determine how many chunks of strip_size are written to each server + * in each group before we transition to the next group. + * The striping on the group level is done round-robin in the same + * fashion as simple-stripe + */ + #include #include #include @@ -62,13 +71,16 @@ static PVFS_offset logical_to_physical_offset(void* params, num_groups = dparam->num_groups; strip_size = dparam->strip_size; - if(num_groups > server_ct || num_groups == 0 || server_ct == 0 ) + if( num_groups == 0 || server_ct == 0 ) { gossip_err("%s: Invalid num_groups/server_ct options: " "gr:%d server:%d\n", __func__, num_groups, server_ct); } + if(num_groups > server_ct ) + num_groups = server_ct; + /* size of all groups that are of equal size: all groups * except when server_ct doesnt divide evenly into num_groups */ small_group_size = server_ct / num_groups; @@ -181,7 +193,7 @@ static PVFS_offset physical_to_logical_offset(void* params, PVFS_size global_stripes = 0; uint32_t num_groups = dparam->num_groups; - if(num_groups > server_ct || num_groups == 0 || server_ct == 0 ) + if( num_groups == 0 || server_ct == 0 ) { gossip_err( "%s: Invalid num_groups/server_ct options: " @@ -189,6 +201,9 @@ static PVFS_offset physical_to_logical_offset(void* params, __func__,num_groups,server_ct); } + if(num_groups > server_ct) + num_groups = server_ct; + /* if we are a server in the last group, make sure things are happy */ if(server_nr >= (num_groups-1)*(small_group_size)) { @@ -325,7 +340,7 @@ static PVFS_offset next_mapped_offset(void* params, return physical_to_logical_offset(params,fd,0); } - if(num_groups > server_ct || num_groups == 0 || server_ct == 0 ) + if( num_groups == 0 || server_ct == 0 ) { gossip_err("%s: Invalid num_groups/server_ct options: " "gr:%d server:%d\n", @@ -333,7 +348,8 @@ static PVFS_offset next_mapped_offset(void* params, num_groups, server_ct); } - + if(num_groups > server_ct) + num_groups = server_ct; total_stripes += global_stripes * factor; /* if we are a server in the last group, make sure things are happy */ @@ -433,29 +449,29 @@ static int set_param(const char* dist_name, void* params, } else if(strcmp(param_name, "num_groups")==0) { - if(*(uint32_t*)value <= 0) + if(*(int64_t*)value <= 0) { gossip_err("ERROR: num_groups param <= 0!\n"); } else { gossip_debug(GOSSIP_DIST_DEBUG, - "%s: num_groups: %d\n", - __func__, *(uint32_t*)value); - dparam->num_groups = *(uint32_t*)value; + "%s: num_groups: %lld\n", + __func__, lld(*(int64_t*)value)); + dparam->num_groups = *(int64_t*)value; } } else if(strcmp(param_name, "group_strip_factor")==0) { - if(*(uint32_t*)value <= 0) + if(*(int64_t*)value <= 0) gossip_err("ERROR: group_strip_factor param <= 0!\n"); else { gossip_debug(GOSSIP_DIST_DEBUG, - "%s: group_strip_factor: %d\n", - __func__,*(uint32_t*)value); + "%s: group_strip_factor: %lld\n", + __func__,lld(*(int64_t*)value)); - dparam->group_strip_factor = *(uint32_t*)value; + dparam->group_strip_factor = *(int64_t*)value; } } else @@ -470,7 +486,7 @@ static void encode_params(char **pptr, void* params) PVFS_twod_stripe_params *dparam =(PVFS_twod_stripe_params*)params; encode_uint32_t(pptr,&dparam->num_groups); encode_PVFS_size(pptr, &dparam->strip_size); - encode_int32_t(pptr,&dparam->group_strip_factor); + encode_uint32_t(pptr,&dparam->group_strip_factor); } @@ -492,6 +508,14 @@ static void registration_init(void* params) PVFS_twod_stripe_params, group_strip_factor); } +static void unregister(void) +{ + PINT_dist_unregister_param(PVFS_DIST_TWOD_STRIPE_NAME, "num_groups"); + PINT_dist_unregister_param(PVFS_DIST_TWOD_STRIPE_NAME, "strip_size"); + PINT_dist_unregister_param(PVFS_DIST_TWOD_STRIPE_NAME, + "group_strip_factor"); +} + static char *params_string(void *params) { char param_string[1024]; @@ -502,6 +526,13 @@ static char *params_string(void *params) return strdup(param_string); } +static PVFS_size get_blksize(void* params) +{ + PVFS_twod_stripe_params* dparam = (PVFS_twod_stripe_params*)params; + /* report the strip size as the block size */ + return(dparam->strip_size); +} + /* default twod_stripe_params */ static PVFS_twod_stripe_params twod_stripe_params = { PVFS_DIST_TWOD_STRIPE_DEFAULT_GROUPS, /* num_groups */ @@ -517,12 +548,23 @@ static PINT_dist_methods twod_stripe_methods = { logical_file_size, PINT_dist_default_get_num_dfiles, set_param, + get_blksize, encode_params, decode_params, registration_init, + unregister, params_string }; +#ifdef WIN32 +PINT_dist twod_stripe_dist = { + PVFS_DIST_TWOD_STRIPE_NAME, + roundup8(PVFS_DIST_TWOD_STRIPE_NAME_SIZE), /* name size */ + roundup8(sizeof(PVFS_twod_stripe_params)), /* param size */ + &twod_stripe_params, + &twod_stripe_methods +}; +#else PINT_dist twod_stripe_dist = { .dist_name = PVFS_DIST_TWOD_STRIPE_NAME, .name_size = roundup8(PVFS_DIST_TWOD_STRIPE_NAME_SIZE), /* name size */ @@ -530,6 +572,7 @@ PINT_dist twod_stripe_dist = { .params = &twod_stripe_params, .methods = &twod_stripe_methods }; +#endif /* * Local variables: diff --git a/src/io/description/dist-varstrip-parser.c b/src/io/description/dist-varstrip-parser.c index a9c01ab..142d676 100644 --- a/src/io/description/dist-varstrip-parser.c +++ b/src/io/description/dist-varstrip-parser.c @@ -7,6 +7,7 @@ #include "dist-varstrip-parser.h" #include "pvfs2-dist-varstrip.h" +#include "gossip.h" #include #include @@ -70,7 +71,11 @@ static int strips_parse_elem( s_size = strtok(NULL, ";"); if (s_size != NULL) { +#ifdef WIN32 + i_size = _atoi64(s_size); +#else i_size = atoll(s_size); +#endif if (i_size > 0) { if (strlen(s_size) > 1) @@ -145,6 +150,12 @@ int PINT_dist_strips_parse( *count = 0; *strips = 0; + if(!input || strlen(input) == 0) + { + gossip_err("Error: missing manditory parameters to varstrip_dist distribution.\n"); + return(-1); + } + if (strlen(input) < PVFS_DIST_VARSTRIP_MAX_STRIPS_STRING_LENGTH - 1) { strcpy(inp, input); @@ -152,6 +163,7 @@ int PINT_dist_strips_parse( else { /* input string too long, abort */ + gossip_err("Error: varstrip_dist distribution parameters too long.\n"); return -1; } @@ -160,6 +172,7 @@ int PINT_dist_strips_parse( if (!(*strips)) { /* allocation failed, abort */ + gossip_err("Error: unable to parse varstrip_dist distribution parameters.\n"); return -1; } diff --git a/src/io/description/dist-varstrip.c b/src/io/description/dist-varstrip.c index 750ccb3..347bf91 100644 --- a/src/io/description/dist-varstrip.c +++ b/src/io/description/dist-varstrip.c @@ -405,6 +405,11 @@ static void registration_init(void* params) PVFS_varstrip_params, strips); } +static void unregister(void) +{ + PINT_dist_unregister_param(PVFS_DIST_VARSTRIP_NAME, "strips"); +} + static char *params_string(void *params) { PVFS_varstrip_params* dparam = (PVFS_varstrip_params*)params; @@ -412,6 +417,26 @@ static char *params_string(void *params) return strdup(dparam->strips); } +static PVFS_size get_blksize(void* params) +{ + PVFS_varstrip_params* varstrip_params = (PVFS_varstrip_params*)params; + PINT_dist_strips *strips; + uint32_t ui_count; + PVFS_size blksize; + + if (PINT_dist_strips_parse( + varstrip_params->strips, &strips, &ui_count) == -1) + { + return -1; + } + + /* report the first trip size in the set as the block size */ + blksize = strips[0].size; + + PINT_dist_strips_free_mem(&strips); + + return(blksize); +} static PVFS_varstrip_params varstrip_params = { "\0" }; @@ -423,12 +448,23 @@ static PINT_dist_methods varstrip_methods = { logical_file_size, get_num_dfiles, set_param, + get_blksize, encode_params, decode_params, registration_init, + unregister, params_string }; +#ifdef WIN32 +PINT_dist varstrip_dist = { + PVFS_DIST_VARSTRIP_NAME, + roundup8(PVFS_DIST_VARSTRIP_NAME_SIZE), /* name size */ + roundup8(sizeof(PVFS_varstrip_params)), /* param size */ + &varstrip_params, + &varstrip_methods +}; +#else PINT_dist varstrip_dist = { .dist_name = PVFS_DIST_VARSTRIP_NAME, .name_size = roundup8(PVFS_DIST_VARSTRIP_NAME_SIZE), /* name size */ @@ -436,6 +472,7 @@ PINT_dist varstrip_dist = { .params = &varstrip_params, .methods = &varstrip_methods }; +#endif /* * Local variables: diff --git a/src/io/description/pint-dist-utils.c b/src/io/description/pint-dist-utils.c index 2f366d0..eb3ab97 100644 --- a/src/io/description/pint-dist-utils.c +++ b/src/io/description/pint-dist-utils.c @@ -3,23 +3,20 @@ * * See COPYING in top-level directory. */ -#include "pvfs2-dist-simple-stripe.h" -#ifndef __KERNEL__ + #include #include #include +#include "pvfs2-dist-simple-stripe.h" #include "pvfs2-dist-varstrip.h" #include "pvfs2-dist-twod-stripe.h" -#endif #include "pint-dist-utils.h" /* Default distributions */ extern PINT_dist basic_dist; extern PINT_dist simple_stripe_dist; -#ifndef __KERNEL__ extern PINT_dist varstrip_dist; extern PINT_dist twod_stripe_dist; -#endif /* Struct for determining how to set a distribution parameter by name */ typedef struct PINT_dist_param_offset_s @@ -65,36 +62,32 @@ int PINT_dist_initialize(server_configuration_s* server_config) int ret = 0; /* Register the basic distribution */ - PINT_register_distribution(&basic_dist); - - /* Register the simple stripe distribution */ - PINT_register_distribution(&simple_stripe_dist); + PINT_register_distribution(&basic_dist); -#ifndef __KERNEL__ - /* Register the varstrip distribution */ PINT_register_distribution(&varstrip_dist); + + /* Register the simple stripe distribution */ + PINT_register_distribution(&simple_stripe_dist); /* Register the twod stripe distribution */ PINT_register_distribution(&twod_stripe_dist); -#endif - - - + /* add an associated unregister to any new distributions */ return ret; } /* PINT_dist_finalize implementation */ void PINT_dist_finalize(void) { - int i; - for (i = 0; i < PINT_dist_param_table_entries; i++) - { - pint_free(PINT_dist_param_table[i].dist_name); - pint_free(PINT_dist_param_table[i].param_name); - } - pint_free(PINT_dist_param_table); + PINT_unregister_distribution(basic_dist.dist_name); + PINT_unregister_distribution(varstrip_dist.dist_name); + PINT_unregister_distribution(simple_stripe_dist.dist_name); + PINT_unregister_distribution(twod_stripe_dist.dist_name); + + free(PINT_dist_param_table); + PINT_dist_param_table = 0; + PINT_dist_param_table_size = 0; } /* PINT_dist_default_get_num_dfiles implementation */ @@ -147,7 +140,7 @@ int PINT_dist_register_param_offset(const char* dist_name, int new_table_size = PINT_dist_param_table_size + PINT_dist_param_table_alloc_inc; - buf = pint_malloc(new_table_size * sizeof(PINT_dist_param_offset)); + buf = malloc(new_table_size * sizeof(PINT_dist_param_offset)); if (0 != buf) { if(PINT_dist_param_table_size) @@ -160,7 +153,7 @@ int PINT_dist_register_param_offset(const char* dist_name, if(PINT_dist_param_table_size) { - pint_free(PINT_dist_param_table); + free(PINT_dist_param_table); } PINT_dist_param_table_size = new_table_size; PINT_dist_param_table = buf; @@ -175,14 +168,14 @@ int PINT_dist_register_param_offset(const char* dist_name, dist_len = strlen(dist_name) + 1; param_len = strlen(param_name) + 1; PINT_dist_param_table[PINT_dist_param_table_entries].dist_name = - pint_malloc(dist_len); - if (NULL == PINT_dist_param_table[PINT_dist_param_table_entries].dist_name) + malloc(dist_len); + if (0 == PINT_dist_param_table[PINT_dist_param_table_entries].dist_name) { return -1; } PINT_dist_param_table[PINT_dist_param_table_entries].param_name = - pint_malloc(param_len); + malloc(param_len); if (0 == PINT_dist_param_table[PINT_dist_param_table_entries].param_name) { return -1; @@ -199,6 +192,46 @@ int PINT_dist_register_param_offset(const char* dist_name, return 0; } +int PINT_dist_unregister_param_offset(const char *dist_name, + const char *param_name) +{ + int i = 0, dlen, plen; + + if( !dist_name || !param_name ) + { + return -EINVAL; + } + + dlen = strlen(dist_name) + 1; + plen = strlen(param_name) + 1; + + for( i = 0; i < PINT_dist_param_table_entries; i++ ) + { + if((strncmp(PINT_dist_param_table[i].dist_name, dist_name, dlen)==0) && + (strncmp(PINT_dist_param_table[i].param_name, param_name, plen)==0)) + { + /* found dist and param to unregister */ + if( PINT_dist_param_table[i].dist_name ) + { + free(PINT_dist_param_table[i].dist_name); + } + if( PINT_dist_param_table[i].param_name ) + { + free(PINT_dist_param_table[i].param_name); + } + + /* bubble up, not sure I like this but it is just an array */ + --PINT_dist_param_table_entries; + for( ; i < PINT_dist_param_table_entries; i++ ) + { + PINT_dist_param_table[i] = PINT_dist_param_table[i+1]; + } + } + } + + return 0; +} + /* * Local variables: * mode: c diff --git a/src/io/description/pint-dist-utils.h b/src/io/description/pint-dist-utils.h index a297bcf..df93bb4 100644 --- a/src/io/description/pint-dist-utils.h +++ b/src/io/description/pint-dist-utils.h @@ -7,22 +7,9 @@ #ifndef __PINT_DIST_UTILS_H #define __PINT_DIST_UTILS_H -#include "pvfs2-types.h" #include "pint-distribution.h" #include "server-config.h" -#ifdef __KERNEL__ -#include -#include -#include - -#define pint_malloc(size) kmalloc(size, GFP_KERNEL) -#define pint_free(ptr) kfree(ptr) -#else -#define pint_malloc(size) malloc(size) -#define pint_free(ptr) free(ptr) -#endif - /** * Perform initialization tasks for distributions * - register the default distributions @@ -66,6 +53,14 @@ int PINT_dist_register_param_offset(const char* dist_name, size_t offset, size_t field_size); +/** + * Unregister the parameter offset. + * + * Just helper to make register/unregister look complete + */ +int PINT_dist_unregister_param_offset(const char* dist_name, + const char* param_name); + /** * Wrapper macro to make adding parameter fields easy. * @@ -78,6 +73,8 @@ int PINT_dist_register_param_offset(const char* dist_name, (size_t)&((param_type*)0)->param_member, \ sizeof(((param_type*)0)->param_member)) +#define PINT_dist_unregister_param(dname, pname) \ + PINT_dist_unregister_param_offset(dname, pname) #endif diff --git a/src/io/description/pint-distribution.c b/src/io/description/pint-distribution.c index 2d3c828..832f179 100644 --- a/src/io/description/pint-distribution.c +++ b/src/io/description/pint-distribution.c @@ -4,18 +4,17 @@ * See COPYING in top-level directory. */ -#ifndef __KERNEL__ #include #include #include -#endif +#include #define __PINT_REQPROTO_ENCODE_FUNCS_C #include "pvfs2-types.h" #include "pvfs2-debug.h" #include "gossip.h" #include "pint-distribution.h" -#include "pint-dist-utils.h" + /* global size of dist table */ @@ -58,13 +57,17 @@ int PINT_unregister_distribution(char *dist_name) return -1; for (d = 0; d < PINT_Dist_count && PINT_Dist_table[d]; d++) { - if (!strncmp(dist_name, PINT_Dist_table[d]->dist_name, - PINT_DIST_NAME_SZ)) + if (strncmp(dist_name, PINT_Dist_table[d]->dist_name, + PINT_DIST_NAME_SZ) == 0) { + PINT_Dist_table[d]->methods->unregister(); /* bubble up */ --PINT_Dist_count; for (; dparams = (void *)(new_dist->dist_name + roundup8(new_dist->name_size)); + /* after lookup there must be enough room to hold name */ + assert(old_dist.name_size >= (strlen(old_dist.dist_name) + 1)); + /* copy using length of string passed in by caller + * rather than rounded up name_size used for distribution packing + */ memcpy(new_dist->dist_name, old_dist.dist_name, - old_dist.name_size); + (strlen(old_dist.dist_name) + 1)); memcpy(new_dist->params, old_dist.params, old_dist.param_size); /* leave methods pointing to same static functions */ } @@ -111,7 +119,7 @@ int PINT_dist_free(PINT_dist *dist) if (dist) { /* assumes this is a dist created from above */ - pint_free(dist); + free(dist); return 0; } return -1; @@ -127,7 +135,7 @@ PINT_dist* PINT_dist_copy(const PINT_dist *dist) return NULL; } dist_size = PINT_DIST_PACK_SIZE(dist); - new_dist = (PINT_dist *)pint_malloc(dist_size); + new_dist = (PINT_dist *)malloc(dist_size); if (new_dist) { memcpy(new_dist, dist, dist_size); diff --git a/src/io/description/pint-distribution.h b/src/io/description/pint-distribution.h index 7846a0b..b373467 100644 --- a/src/io/description/pint-distribution.h +++ b/src/io/description/pint-distribution.h @@ -51,6 +51,9 @@ typedef struct PINT_dist_methods_s int (*set_param)(const char* dist_name, void* params, const char* param_name, void* value); + /* Retrieves a blocksize value suitable to report in stat() */ + PVFS_size (*get_blksize)(void* params); + /* Stores parameters in lebf memory at pptr */ void (*encode_lebf)(char **pptr, void* params); @@ -59,6 +62,9 @@ typedef struct PINT_dist_methods_s /* Called when the distribution is registered */ void (*registration_init)(void* params); + + /* Called when the distribution is unregisterd */ + void (*unregister)(void); char *(*params_string)(void *params); } PINT_dist_methods; @@ -82,7 +88,7 @@ typedef struct PINT_dist_s { encode_string(pptr, &px->dist_name); \ if (!px->methods) { \ gossip_err("%s: encode_PINT_dist: methods is null\n", __func__); \ - assert(0); \ + exit(1); \ } \ (px->methods->encode_lebf) (pptr, px->params); \ align8(pptr); \ @@ -95,7 +101,7 @@ typedef struct PINT_dist_s { PINT_dist_lookup(&tmp_dist); \ if (!tmp_dist.methods) { \ gossip_err("%s: decode_PINT_dist: methods is null\n", __func__); \ - assert(0); \ + exit(1); \ } \ /* later routines assume dist is a big contiguous thing, do so */ \ *(x) = px = decode_malloc(PINT_DIST_PACK_SIZE(&tmp_dist)); \ diff --git a/src/io/description/pint-request-encode.h b/src/io/description/pint-request-encode.h index 7fdb4b4..25987c0 100644 --- a/src/io/description/pint-request-encode.h +++ b/src/io/description/pint-request-encode.h @@ -6,7 +6,6 @@ #define PVFS_REQ_LIMIT_PINT_REQUEST_NUM 100 #include "gossip.h" - /* linearize the PINT_Request tree into a contiguous array */ inline static int linearize_PVFS_Request( @@ -37,7 +36,7 @@ linearize_PVFS_Request( ret = PINT_request_commit(linreq, req); if(ret < 0) { - decode_free(linreq); + free(linreq); return ret; } @@ -45,7 +44,7 @@ linearize_PVFS_Request( ret = PINT_request_encode(linreq); if(ret < 0) { - decode_free(linreq); + free(linreq); return ret; } diff --git a/src/io/description/pint-request.c b/src/io/description/pint-request.c index e9d3df9..13bbba5 100644 --- a/src/io/description/pint-request.c +++ b/src/io/description/pint-request.c @@ -4,22 +4,19 @@ * See COPYING in top-level directory. */ -#ifndef __KERNEL__ #include #include #include #include -#else -#include -#include -#endif - #include #include #include #include #include "pvfs2-internal.h" -#include "pint-dist-utils.h" + +#ifdef WIN32 +typedef uint32_t u_int32_t; +#endif static PVFS_offset PINT_request_disp(PINT_Request *request); @@ -115,12 +112,12 @@ int PINT_process_request(PINT_Request_state *req, gossip_debug(GOSSIP_REQUEST_DEBUG, "\tsize request - copying state, hold on to your hat! dp %d\n", req->cur->rqbase->depth); - temp_space = (void *)pint_malloc(sizeof(PINT_Request_state)+ + temp_space = (void *)malloc(sizeof(PINT_Request_state)+ (sizeof(PINT_reqstack)*req->cur->rqbase->depth)); - if(!temp_space) - { - return -PVFS_ENOMEM; - } + if(!temp_space) + { + return -PVFS_ENOMEM; + } memcpy(temp_space,req,sizeof(PINT_Request_state)); req = (PINT_Request_state *)temp_space; @@ -171,7 +168,7 @@ int PINT_process_request(PINT_Request_state *req, { /* do we allow external setting of LOGICAL_SKIP */ /* what about backwards skipping, as in seeking? */ - } + } /* we should be ready to begin */ /* zero retval indicates everything flowing successfully */ @@ -180,7 +177,7 @@ int PINT_process_request(PINT_Request_state *req, while(!retval) { if (req->cur[req->lvl].rq) - { + { /* print the current state of the decoding process */ gossip_debug(GOSSIP_REQUEST_DEBUG,"\tDo seq of %lld ne %d st %lld nb %d " "ub %lld lb %lld as %lld co %llu\n", @@ -195,13 +192,13 @@ int PINT_process_request(PINT_Request_state *req, gossip_debug(GOSSIP_REQUEST_DEBUG,"\t\tto %lld ta %lld fi %lld\n", lld(req->type_offset), lld(req->target_offset), lld(req->final_offset)); - if (mem) /* if a mem type is specified print its state */ - { + if (mem) /* if a mem type is specified print its state */ + { gossip_debug(GOSSIP_REQUEST_DEBUG,"\t\tmto %lld mta %lld mfi %lld\n", lld(mem->type_offset), lld(mem->target_offset), lld(mem->final_offset)); - } - } + } + } /* NULL type indicates packed data - handle directly */ if (req->cur[req->lvl].rq == NULL) { @@ -429,7 +426,7 @@ int PINT_process_request(PINT_Request_state *req, if (PINT_EQ_CKSIZE(mode)) /* must be exact here */ { /* restore request state */ - pint_free(temp_space); + free(temp_space); } if (!PINT_IS_MEMREQ(mode)) gossip_debug(GOSSIP_REQUEST_DEBUG, @@ -475,10 +472,10 @@ struct PINT_Request_state *PINT_new_request_states(PINT_Request *request, int n) rqdepth = 1; } - reqs = pint_malloc(n * (sizeof(*reqs) + rqdepth * sizeof(*reqs->cur))); + reqs = malloc(n * (sizeof(*reqs) + rqdepth * sizeof(*reqs->cur))); if (!reqs) { - gossip_lerr("%s: pint_malloc failed\n", __func__); + gossip_lerr("%s: malloc failed\n", __func__); return NULL; } @@ -508,12 +505,12 @@ struct PINT_Request_state *PINT_new_request_states(PINT_Request *request, int n) /* This function frees request state structures */ void PINT_free_request_state(PINT_Request_state *req) { - pint_free(req); + free(req); } void PINT_free_request_states(PINT_Request_state *reqs) { - pint_free(reqs); + free(reqs); } /** @@ -584,14 +581,22 @@ PVFS_size PINT_distribute(PVFS_offset offset, if (!rfdata || !rfdata->dist || !rfdata->dist->methods || !rfdata->dist->params) { - if (!rfdata) + if (!rfdata) + { gossip_debug(GOSSIP_REQUEST_DEBUG,"rfdata is NULL\n"); + } else if (!rfdata->dist) + { gossip_debug(GOSSIP_REQUEST_DEBUG,"rfdata->dist is NULL\n"); + } else if (!rfdata->dist->methods) + { gossip_debug(GOSSIP_REQUEST_DEBUG,"rfdata->dist->methods is NULL\n"); + } else if (!rfdata->dist->params) + { gossip_debug(GOSSIP_REQUEST_DEBUG,"rfdata->dist->params is NULL\n"); + } gossip_lerr("Bad Distribution! Bailing out!\n"); return 0; } diff --git a/src/io/description/pint-request.h b/src/io/description/pint-request.h index 92ae2a6..e2d2be2 100644 --- a/src/io/description/pint-request.h +++ b/src/io/description/pint-request.h @@ -65,7 +65,7 @@ typedef struct PINT_Request { #define PVFS_REQUEST_ENCODED_SIZE \ ((sizeof(PVFS_offset) * 3) + (sizeof(PVFS_size) * 2) + \ - (sizeof(int32_t) * 7) + (sizeof(uint32_t) * 2) + 4) + (sizeof(int32_t) * 6) + (sizeof(uint32_t) * 2) + (sizeof(uint64_t)) + 4) typedef struct PINT_reqstack { int64_t el; /* number of element being processed */ diff --git a/src/io/description/pvfs-request.c b/src/io/description/pvfs-request.c index 952e025..4db4530 100644 --- a/src/io/description/pvfs-request.c +++ b/src/io/description/pvfs-request.c @@ -5,19 +5,17 @@ * See COPYING in top-level directory. */ -#ifndef __KERNEL__ #include #include +#ifndef WIN32 #include -#include #endif - +#include "string.h" #include "pvfs2-types.h" #include "pint-request.h" #include "pvfs2-request.h" #include "pvfs2-debug.h" #include "gossip.h" -#include "pint-dist-utils.h" #define PVFS_SUCCESS 0 #define PVFS_ERR_REQ -1 @@ -174,7 +172,7 @@ int PVFS_Request_hvector(int32_t count, return PVFS_ERR_REQ; PVFS_Request_extent(oldreq, &oldext); // PINT_REQUEST_REFINC(oldreq); - *newreq = (PINT_Request *) pint_malloc(sizeof(struct PINT_Request)); + *newreq = (PINT_Request *) malloc(sizeof(struct PINT_Request)); (*newreq)->sreq = NULL; PINT_subreq(0, blocklength, stride, count, oldreq, oldext, newreq); /* calculate statistics like ub, lb, depth, etc. */ @@ -232,7 +230,7 @@ int PVFS_Request_indexed(int32_t count, while (count--) { dt = *newreq; - *newreq = (PINT_Request *) pint_malloc(sizeof(struct PINT_Request)); + *newreq = (PINT_Request *) malloc(sizeof(struct PINT_Request)); (*newreq)->sreq = dt; PINT_subreq(displacements[count] * oldext, blocklengths[count], 0, 1, oldreq, oldext, newreq); @@ -242,6 +240,7 @@ int PVFS_Request_indexed(int32_t count, return PVFS_SUCCESS; } +#ifndef WIN32 static int PVFS_Request_indexed_block(int32_t count, int32_t blocklength, PVFS_size * displacements, @@ -254,16 +253,15 @@ static int PVFS_Request_indexed_block(int32_t count, PVFS_Request oldreq, PVFS_Request * newreq) { - int i, ret; + int i; int32_t *blocklengths; - blocklengths = pint_malloc(count * sizeof(int32_t)); + blocklengths = alloca(count * sizeof(int32_t)); for (i = 0; i < count; i++) blocklengths[i] = blocklength; - ret = PVFS_Request_indexed(count, blocklengths, displacements, - oldreq, newreq); - pint_free(blocklengths); - return ret; + return PVFS_Request_indexed(count, blocklengths, displacements, + oldreq, newreq); } +#endif int PVFS_Request_hindexed(int32_t count, int32_t * blocklengths, @@ -281,7 +279,7 @@ int PVFS_Request_hindexed(int32_t count, while (count--) { dt = *newreq; - *newreq = (PINT_Request *) pint_malloc(sizeof(struct PINT_Request)); + *newreq = (PINT_Request *) malloc(sizeof(struct PINT_Request)); (*newreq)->sreq = dt; PINT_subreq(displacements[count], blocklengths[count], 0, 1, oldreq, oldext, newreq); @@ -308,7 +306,7 @@ int PVFS_Request_struct(int32_t count, return PVFS_ERR_REQ; PVFS_Request_extent(oldreqs[count], &oldext); dt = *newreq; - *newreq = (PINT_Request *) pint_malloc(sizeof(struct PINT_Request)); + *newreq = (PINT_Request *) malloc(sizeof(struct PINT_Request)); (*newreq)->sreq = dt; PINT_subreq(displacements[count], blocklengths[count], 0, 1, oldreqs[count], oldext, newreq); @@ -418,7 +416,7 @@ int PVFS_Request_commit(PVFS_Request * reqp) /* Allocate memory for contiguous region */ if (PINT_REQUEST_NEST_SIZE(req) > 0) { - region = (PVFS_Request) pint_malloc(PINT_REQUEST_PACK_SIZE(req)); + region = (PVFS_Request) malloc(PINT_REQUEST_PACK_SIZE(req)); if (region == NULL) { gossip_lerr("PVFS_Request_commit: Memory cannot be allocated\n"); @@ -464,7 +462,7 @@ int PVFS_Request_free(PVFS_Request * req) if (PINT_REQUEST_IS_PACKED(*req)) { /* these are contiguous and have no external refs */ - pint_free(*req); + free(*req); *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "free packed request\n"); return PVFS_SUCCESS; @@ -477,13 +475,13 @@ int PVFS_Request_free(PVFS_Request * req) PVFS_Request_free(&(reqp->ereq)); /* this is a little awkward but it works */ reqp_next = reqp->sreq; - pint_free(reqp); + free(reqp); gossip_debug(GOSSIP_REQUEST_DEBUG, "free sreq linked request\n"); reqp = reqp_next; } /* now deal with the main struct */ PVFS_Request_free(&((*req)->ereq)); - pint_free(*req); + free(*req); *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "free unpacked request\n"); return PVFS_SUCCESS; diff --git a/src/io/dev/pint-dev-shared.h b/src/io/dev/pint-dev-shared.h index 0c6a1ef..82b063b 100644 --- a/src/io/dev/pint-dev-shared.h +++ b/src/io/dev/pint-dev-shared.h @@ -15,8 +15,10 @@ #ifdef __KERNEL__ #include #else +#ifndef WIN32 #include /* needed for constructing the _IO macros */ #endif +#endif /* version number for use in communicating between kernel space and user * space diff --git a/src/io/dev/pint-dev.c b/src/io/dev/pint-dev.c index 42791ee..a7181f6 100644 --- a/src/io/dev/pint-dev.c +++ b/src/io/dev/pint-dev.c @@ -8,25 +8,44 @@ */ #include +#ifndef WIN32 #include +#endif #include #include #include #include #include #include +#ifndef WIN32 #include #include #include #include +#endif #include +#ifndef WIN32 +#include +#endif + +#ifdef WIN32 +#include + +/* define our own iovec */ +struct iovec { + void *iov_base; + size_t iov_len; +}; +#endif #include "pint-mem.h" #include "pvfs2-types.h" #include "pvfs2-debug.h" #include "gossip.h" #include "pint-dev.h" +#ifndef WIN32 #include "pvfs2-dev-proto.h" +#endif #include "pvfs2-internal.h" #ifdef WITH_LINUX_KMOD @@ -39,10 +58,12 @@ static int parse_devices( int *majornum); #endif /* WITH_LINUX_KMOD */ + static int pdev_fd = -1; static int32_t pdev_magic; #ifdef WITH_LINUX_KMOD static int32_t pdev_max_upsize; + #endif /* WITH_LINUX_KMOD */ static int32_t pdev_max_downsize; @@ -62,7 +83,13 @@ int PINT_dev_initialize( #ifdef WITH_LINUX_KMOD int ret = -1; char *debug_string = getenv("PVFS2_KMODMASK"); - int32_t debug_mask = 0; + uint64_t debug_mask = 0; + dev_mask_info_t mask_info; + + if (!debug_string) + { + debug_string = "none"; + } /* we have to be root to access the device */ if ((getuid() != 0) && (geteuid() != 0)) @@ -118,16 +145,33 @@ int PINT_dev_initialize( return(-(PVFS_ENODEV|PVFS_ERROR_DEV)); } - /* set the debug mask through an ioctl */ - if (!debug_string) - return 0; + /* push the kernel debug mask into the kernel, set gossip_debug_mask in the + * kernel and initialize the kernel debug string used by + * /proc/sys/pvfs2/kernel-debug. + */ + mask_info.mask_type = KERNEL_MASK; + mask_info.mask_value = PVFS_kmod_eventlog_to_mask(debug_string); + ret = ioctl(pdev_fd, PVFS_DEV_DEBUG, &mask_info); + if (ret < 0) + { + gossip_err("Error: ioctl() PVFS_DEV_DEBUG failure (kernel debug mask to" + " %x)\n" + ,(unsigned int)debug_mask); + close(pdev_fd); + return -(PVFS_ENODEV|PVFS_ERROR_DEV); + } - /* truncate it to a 32 bit mask since we dont have a need for more than that anyways */ - debug_mask = (int32_t) PVFS_kmod_eventlog_to_mask(debug_string); - ret = ioctl(pdev_fd, PVFS_DEV_DEBUG, &debug_mask); + /* push the client debug mask into the kernel and initialize the client + * debug string used by /proc/sys/pvfs2/client-debug. + */ + mask_info.mask_type = CLIENT_MASK; + mask_info.mask_value = gossip_debug_mask; + ret = ioctl(pdev_fd, PVFS_DEV_DEBUG, &mask_info); if (ret < 0) { - gossip_err("Error: ioctl() PVFS_DEV_DEBUG failure (debug mask to %x)\n", debug_mask); + gossip_err("Error: ioctl() PVFS_DEV_DEBUG failure (client debug mask to" + " %x)\n" + ,(unsigned int)gossip_debug_mask); close(pdev_fd); return -(PVFS_ENODEV|PVFS_ERROR_DEV); } @@ -145,7 +189,11 @@ void PINT_dev_finalize(void) { if (pdev_fd > -1) { +#ifdef WIN32 + _close(pdev_fd); +#else close(pdev_fd); +#endif pdev_fd = -1; } } @@ -204,6 +252,16 @@ int PINT_dev_get_mapped_regions(int ndesc, struct PVFS_dev_map_desc *desc, desc[i].ptr = NULL; break; } + + /* fixes a corruption issue on linux 2.4 kernels where the buffers are + * not being pinned in memory properly + */ + if(mlock( (const char *) ptr, total_size) != 0) + { + gossip_err("Error: FAILED to mlock shared buffer\n"); + break; + } + desc[i].ptr = ptr; desc[i].total_size = total_size; desc[i].size = params[i].dev_buffer_size; @@ -261,6 +319,15 @@ void PINT_dev_put_mapped_regions(int ndesc, struct PVFS_dev_map_desc *desc) ptr = (void *) desc[i].ptr; assert(ptr); + /* fixes a corruption issue on linux 2.4 kernels where the buffers are + * not being pinned in memory properly + */ +#ifndef WIN32 + if(munlock( (const char *) ptr, desc[i].total_size) != 0) + { + gossip_err("Error: FAILED to munlock shared buffer\n"); + } +#endif PINT_mem_aligned_free(ptr); } } @@ -320,6 +387,11 @@ int PINT_dev_test_unexpected( void *buffer = NULL; pvfs2_upcall_t *upc = NULL; + if(incount < 1) + { + return(-PVFS_EINVAL); + } + /* prepare to read max upcall size (magic nr and tag included) */ int read_size = pdev_max_upsize; @@ -557,11 +629,15 @@ int PINT_dev_write_list( enum PINT_dev_buffer_type buffer_type, PVFS_id_gen_t tag) { - struct iovec io_array[8]; + struct iovec io_array[10]; int io_count = 3; int i; int ret = -1; int32_t proto_ver = PVFS_KERNEL_PROTO_VERSION; +#ifdef WIN32 + char *buffer, *b; + size_t bsize = 0; +#endif /* lets be reasonable about list size :) */ /* two vecs are taken up by magic nr and tag */ @@ -598,8 +674,35 @@ int PINT_dev_write_list( io_count++; } - ret = writev(pdev_fd, io_array, io_count); +#ifdef WIN32 + /* we must write one buffer on Windows */ + /* compute buffer size */ + for (i = 0; i < io_count; i++) + { + bsize += io_array[i].iov_len; + } + /* allocate buffer */ + buffer = (char *) malloc(bsize); + if (buffer == NULL) + { + return (-PVFS_ENOMEM); + } + + /* copy multiple vectors into buffer */ + for (i = 0, b = buffer; i < io_count; i++) + { + memcpy(b, io_array[i].iov_base, io_array[i].iov_len); + b += io_array[i].iov_len; + } + + /* write the buffer */ + ret = _write(pdev_fd, buffer, bsize); + + free(buffer); +#else + ret = writev(pdev_fd, io_array, io_count); +#endif return ((ret < 0) ? -(PVFS_EIO|PVFS_ERROR_DEV) : 0); } diff --git a/src/io/dev/pint-dev.h b/src/io/dev/pint-dev.h index 7aa4000..516928c 100644 --- a/src/io/dev/pint-dev.h +++ b/src/io/dev/pint-dev.h @@ -9,6 +9,20 @@ #include "pvfs2-types.h" #include "pint-dev-shared.h" + +/* parameter structure used in PVFS_DEV_DEBUG ioctl command */ +typedef struct +{ + enum + { + KERNEL_MASK, + CLIENT_MASK, + } mask_type; + uint64_t mask_value; +} dev_mask_info_t; + + + enum pvfs_bufmap_type { BM_IO = 0, BM_READDIR = 1, diff --git a/src/io/flow/flow.c b/src/io/flow/flow.c index 671830c..54726f5 100644 --- a/src/io/flow/flow.c +++ b/src/io/flow/flow.c @@ -8,8 +8,10 @@ /* (see flow.h) */ #include +#ifndef WIN32 #include #include +#endif #include #include diff --git a/src/io/flow/flow.h b/src/io/flow/flow.h index fcfde92..f283ec3 100644 --- a/src/io/flow/flow.h +++ b/src/io/flow/flow.h @@ -99,7 +99,7 @@ struct flow_descriptor /* fields that can be set publicly before posting */ /* function to be triggered upon completion */ - void(*callback)(struct flow_descriptor* flow_d); + void(*callback)(struct flow_descriptor* flow_d, int cancel_path); struct flow_endpoint src; /* src endpoint */ struct flow_endpoint dest; /* dest endpoint */ @@ -143,6 +143,8 @@ struct flow_descriptor PINT_Request_state *file_req_state; PINT_Request_state *mem_req_state; PINT_Request_result result; + + PVFS_hint hints; }; typedef struct flow_descriptor flow_descriptor; diff --git a/src/io/flow/flowproto-bmi-cache/flowproto-bmi-cache-server.c b/src/io/flow/flowproto-bmi-cache/flowproto-bmi-cache-server.c index 8b18ec2..1b10cb1 100644 --- a/src/io/flow/flowproto-bmi-cache/flowproto-bmi-cache-server.c +++ b/src/io/flow/flowproto-bmi-cache/flowproto-bmi-cache-server.c @@ -221,7 +221,7 @@ int fp_bmi_cache_initialize(int flowproto_id) ret = PINT_thread_mgr_bmi_start(); if(ret < 0) return(ret); - PINT_thread_mgr_bmi_getcontext(&global_bmi_context); + PINT_thread_mgr_bmi_getcontext((PVFS_context_id *)&global_bmi_context); return(0); } @@ -312,7 +312,6 @@ int fp_bmi_cache_post(flow_descriptor * flow_d) fprintf(stderr, "TROVE_open_context() failure.\n"); return (-1); } - fprintf(stderr, "collid:%d, trove_context:%d\n", flow_d->dest.u.trove.coll_id, global_trove_context); ret = cache_init(&info); if ( ret < 0 ) @@ -490,7 +489,7 @@ int bmi_cache_request_init(struct fp_private_data *flow_data, int direction) flow_d->state = FLOW_COMPLETE; free(flow_data); flow_d->release(flow_d); - flow_d->callback(flow_d); + flow_d->callback(flow_d, 0); fprintf(stderr, "bmi_cache_request_init: exit with return 1. zero request.\n"); return(1); } @@ -801,7 +800,7 @@ static void bmi_recv_callback_fn(void *user_ptr, free(flow_data); flow_d->state = FLOW_COMPLETE; flow_d->release(flow_d); - flow_d->callback(flow_d); + flow_d->callback(flow_d, 0); fprintf(stderr, "bmi_recv_callback_fn: request is done\n"); return; } @@ -870,7 +869,8 @@ static void cache_write_callback_fn(void *user_ptr, q_item->cache_req.buffer_type, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); + global_bmi_context, + q_item->parent->hints); /* TODO: error handling */ assert(ret >= 0); @@ -941,7 +941,8 @@ static void cache_read_callback_fn(void *user_ptr, q_item->cache_req.buffer_type, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); + global_bmi_context, + (bmi_hint)q_item->parent->hints); /* TODO: error handling */ assert(ret >= 0); diff --git a/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c b/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c index 1f4332f..ef700bc 100644 --- a/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c +++ b/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c @@ -9,8 +9,10 @@ #include #include #include +#ifndef WIN32 #include #include +#endif #include "gossip.h" #include "quicklist.h" @@ -30,7 +32,7 @@ #define MAX_REGIONS 64 -#define FLOW_CLEANUP(__flow_data) \ +#define FLOW_CLEANUP_CANCEL_PATH(__flow_data, __cancel_path) \ do { \ struct flow_descriptor *__flow_d = (__flow_data)->parent; \ gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "flowproto completing %p\n",\ @@ -39,9 +41,11 @@ do { \ __flow_d = (__flow_data)->parent; \ free(__flow_data); \ __flow_d->release(__flow_d); \ - __flow_d->callback(__flow_d); \ + __flow_d->callback(__flow_d, __cancel_path); \ } while(0) +#define FLOW_CLEANUP(___flow_data) FLOW_CLEANUP_CANCEL_PATH(___flow_data, 0) + struct result_chain_entry { PVFS_id_gen_t posted_id; @@ -106,7 +110,8 @@ static void handle_io_error( static int cancel_pending_bmi( struct qlist_head *list); static int cancel_pending_trove( - struct qlist_head *list); + struct qlist_head *list, + TROVE_coll_id coll_id); #ifdef __PVFS2_TROVE_SUPPORT__ typedef struct @@ -473,11 +478,12 @@ int fp_multiqueue_cancel(flow_descriptor *flow_d) /* NOTE: set flow error class bit so that system interface understands * that this may be a retry-able error */ + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(-(PVFS_ECANCEL|PVFS_ERROR_FLOW), NULL, flow_data); if(flow_data->parent->state == FLOW_COMPLETE) { gen_mutex_unlock(&flow_data->parent->flow_mutex); - FLOW_CLEANUP(flow_data); + FLOW_CLEANUP_CANCEL_PATH(flow_data, 1); return(0); } } @@ -636,7 +642,6 @@ int fp_multiqueue_post(flow_descriptor *flow_d) bmi_send_callback_fn(&(flow_data->prealloc_array[i]), 0, 0, 1); if(flow_data->dest_last_posted) { - flow_data->initial_posts = 0; break; } } @@ -710,6 +715,7 @@ static void bmi_recv_callback_fn(void *user_ptr, PVFS_size bytes_processed = 0; void *tmp_buffer; void *tmp_user_ptr; + int sync_mode = 0; gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "flowproto-multiqueue bmi_recv_callback_fn, error code: %d, flow: %p.\n", @@ -719,6 +725,7 @@ static void bmi_recv_callback_fn(void *user_ptr, if(error_code != 0 || flow_data->parent->error_code != 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(error_code, q_item, flow_data); return; } @@ -738,6 +745,15 @@ static void bmi_recv_callback_fn(void *user_ptr, tmp_user_ptr = result_tmp; assert(result_tmp->result.bytes); + if(PINT_REQUEST_DONE(q_item->parent->file_req_state)) + { + /* This is the last write operation for this flow. Set sync + * flag if needed + */ + sync_mode = get_data_sync_mode( + q_item->parent->dest.u.trove.coll_id); + } + ret = trove_bstream_write_list( q_item->parent->dest.u.trove.coll_id, q_item->parent->dest.u.trove.handle, @@ -748,16 +764,18 @@ static void bmi_recv_callback_fn(void *user_ptr, result_tmp->result.size_array, result_tmp->result.segs, &q_item->out_size, - get_data_sync_mode(q_item->parent->dest.u.trove.coll_id), + sync_mode, NULL, &result_tmp->trove_callback, global_trove_context, - &result_tmp->posted_id); + &result_tmp->posted_id, + q_item->parent->hints); result_tmp = result_tmp->next; if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); return; } @@ -783,7 +801,8 @@ static void bmi_recv_callback_fn(void *user_ptr, if(!q_item->buffer) { /* if the q_item has not been used, allocate a buffer */ - q_item->buffer = BMI_memalloc(q_item->parent->src.u.bmi.address, + q_item->buffer = BMI_memalloc( + q_item->parent->src.u.bmi.address, q_item->parent->buffer_size, BMI_RECV); /* TODO: error handling */ assert(q_item->buffer); @@ -851,19 +870,26 @@ static void bmi_recv_callback_fn(void *user_ptr, flow_data->total_bytes_processed += bytes_processed; + gossip_debug(GOSSIP_DIRECTIO_DEBUG, + "offset %llu, buffer ptr: %p\n", + llu(q_item->result_chain.result.offset_array[0]), + q_item->buffer); + /* TODO: what if we recv less than expected? */ ret = BMI_post_recv(&q_item->posted_id, - q_item->parent->src.u.bmi.address, - q_item->buffer, - flow_data->parent->buffer_size, - &tmp_actual_size, + q_item->parent->src.u.bmi.address, + ((char *)q_item->buffer), + flow_data->parent->buffer_size, + &tmp_actual_size, BMI_PRE_ALLOC, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); - + global_bmi_context, + (bmi_hint)q_item->parent->hints); + if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); return; } @@ -874,7 +900,7 @@ static void bmi_recv_callback_fn(void *user_ptr, bmi_recv_callback_fn(q_item, tmp_actual_size, 0); } } - + return; } @@ -906,6 +932,7 @@ static void trove_read_callback_fn(void *user_ptr, if(error_code != 0 || flow_data->parent->error_code != 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(error_code, q_item, flow_data); return; } @@ -953,10 +980,14 @@ static void trove_read_callback_fn(void *user_ptr, BMI_PRE_ALLOC, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); + global_bmi_context, + (bmi_hint)q_item->parent->hints); flow_data->next_seq_to_send++; if(q_item->last) + { + flow_data->initial_posts = 0; flow_data->dest_last_posted = 1; + } gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "%s: (post send time) ini posts: %d, pending: %d, last: %d\n", __func__, @@ -971,6 +1002,7 @@ static void trove_read_callback_fn(void *user_ptr, if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); return; } @@ -1024,6 +1056,7 @@ static int bmi_send_callback_fn(void *user_ptr, if(error_code != 0 || flow_data->parent->error_code != 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(error_code, q_item, flow_data); if(flow_data->parent->state == FLOW_COMPLETE) return(1); @@ -1035,6 +1068,10 @@ static int bmi_send_callback_fn(void *user_ptr, PINT_PERF_READ, actual_size, PINT_PERF_ADD); + PINT_perf_count(PINT_server_pc, + PINT_PERF_FLOW_READ, + actual_size, + PINT_PERF_ADD); flow_data->parent->total_transferred += actual_size; @@ -1089,7 +1126,8 @@ static int bmi_send_callback_fn(void *user_ptr, else { /* if the q_item has not been used, allocate a buffer */ - q_item->buffer = BMI_memalloc(q_item->parent->dest.u.bmi.address, + q_item->buffer = BMI_memalloc( + q_item->parent->dest.u.bmi.address, q_item->parent->buffer_size, BMI_SEND); /* TODO: error handling */ assert(q_item->buffer); @@ -1170,7 +1208,10 @@ static int bmi_send_callback_fn(void *user_ptr, * is no work to do, trigger manually */ if(flow_data->total_bytes_processed == 0) + { + flow_data->initial_posts = 0; flow_data->dest_last_posted = 1; + } } if(bytes_processed == 0) @@ -1214,6 +1255,7 @@ static int bmi_send_callback_fn(void *user_ptr, * to prevent further trying to start other qitems from being * posted */ + flow_data->initial_posts = 0; flow_data->dest_last_posted = 1; return 0; } @@ -1248,12 +1290,14 @@ static int bmi_send_callback_fn(void *user_ptr, NULL, &result_tmp->trove_callback, global_trove_context, - &result_tmp->posted_id); + &result_tmp->posted_id, + flow_data->parent->hints); result_tmp = result_tmp->next; if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); if(flow_data->parent->state == FLOW_COMPLETE) return(1); @@ -1298,6 +1342,7 @@ static void trove_write_callback_fn(void *user_ptr, if(error_code != 0 || flow_data->parent->error_code != 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(error_code, q_item, flow_data); return; } @@ -1312,11 +1357,14 @@ static void trove_write_callback_fn(void *user_ptr, result_tmp = &q_item->result_chain; do{ q_item->parent->total_transferred += result_tmp->result.bytes; - PINT_perf_count( - PINT_server_pc, - PINT_PERF_WRITE, - result_tmp->result.bytes, - PINT_PERF_ADD); + PINT_perf_count( PINT_server_pc, + PINT_PERF_WRITE, + result_tmp->result.bytes, + PINT_PERF_ADD); + PINT_perf_count( PINT_server_pc, + PINT_PERF_FLOW_WRITE, + result_tmp->result.bytes, + PINT_PERF_ADD); old_result_tmp = result_tmp; result_tmp = result_tmp->next; if(old_result_tmp != &q_item->result_chain) @@ -1353,7 +1401,8 @@ static void trove_write_callback_fn(void *user_ptr, else { /* if the q_item has not been used, allocate a buffer */ - q_item->buffer = BMI_memalloc(q_item->parent->src.u.bmi.address, + q_item->buffer = BMI_memalloc( + q_item->parent->src.u.bmi.address, q_item->parent->buffer_size, BMI_RECV); /* TODO: error handling */ assert(q_item->buffer); @@ -1437,19 +1486,25 @@ static void trove_write_callback_fn(void *user_ptr, return; } + gossip_debug(GOSSIP_DIRECTIO_DEBUG, + "offset %llu, buffer ptr: %p\n", + llu(q_item->result_chain.result.offset_array[0]), + q_item->buffer); /* TODO: what if we recv less than expected? */ ret = BMI_post_recv(&q_item->posted_id, q_item->parent->src.u.bmi.address, - q_item->buffer, + ((char *)q_item->buffer), flow_data->parent->buffer_size, &tmp_actual_size, BMI_PRE_ALLOC, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); - + global_bmi_context, + (bmi_hint)q_item->parent->hints); + if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); return; } @@ -1489,10 +1544,10 @@ static void cleanup_buffers(struct fp_private_data *flow_data) { if(flow_data->prealloc_array[i].buffer) { - BMI_memfree(flow_data->parent->src.u.bmi.address, - flow_data->prealloc_array[i].buffer, - flow_data->parent->buffer_size, - BMI_RECV); + BMI_memfree(flow_data->parent->src.u.bmi.address, + flow_data->prealloc_array[i].buffer, + flow_data->parent->buffer_size, + BMI_RECV); } result_tmp = &(flow_data->prealloc_array[i].result_chain); do{ @@ -1577,6 +1632,7 @@ static void mem_to_bmi_callback_fn(void *user_ptr, if(error_code != 0 || flow_data->parent->error_code != 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(error_code, q_item, flow_data); return; } @@ -1626,7 +1682,8 @@ static void mem_to_bmi_callback_fn(void *user_ptr, { flow_data->intermediate = BMI_memalloc( flow_data->parent->dest.u.bmi.address, - flow_data->parent->buffer_size, BMI_SEND); + flow_data->parent->buffer_size, + BMI_SEND); /* TODO: error handling */ assert(flow_data->intermediate); } @@ -1712,10 +1769,12 @@ static void mem_to_bmi_callback_fn(void *user_ptr, buffer_type, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); + global_bmi_context, + (bmi_hint)q_item->parent->hints); if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); return; } @@ -1760,6 +1819,7 @@ static void bmi_to_mem_callback_fn(void *user_ptr, if(error_code != 0 || flow_data->parent->error_code != 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(error_code, q_item, flow_data); return; } @@ -1858,7 +1918,8 @@ static void bmi_to_mem_callback_fn(void *user_ptr, { flow_data->intermediate = BMI_memalloc( flow_data->parent->src.u.bmi.address, - flow_data->parent->buffer_size, BMI_RECV); + flow_data->parent->buffer_size, + BMI_RECV); /* TODO: error handling */ assert(flow_data->intermediate); } @@ -1909,10 +1970,12 @@ static void bmi_to_mem_callback_fn(void *user_ptr, buffer_type, q_item->parent->tag, &q_item->bmi_callback, - global_bmi_context); + global_bmi_context, + (bmi_hint)q_item->parent->hints); if(ret < 0) { + gossip_err("%s: I/O error occurred\n", __func__); handle_io_error(ret, q_item, flow_data); return; } @@ -1941,6 +2004,7 @@ static void handle_io_error( struct fp_private_data *flow_data) { int ret; + char buf[64] = {0}; gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "flowproto-multiqueue handle_io_error() called for flow %p.\n", @@ -1950,8 +2014,9 @@ static void handle_io_error( if(flow_data->parent->error_code == 0) { enum flow_endpoint_type src, dest; - - gossip_err("%s: flow proto error cleanup started on %p, error_code: %d\n", __func__, flow_data->parent, error_code); + + PVFS_strerror_r(error_code, buf, 64); + gossip_err("%s: flow proto error cleanup started on %p: %s\n", __func__, flow_data->parent, buf); flow_data->parent->error_code = error_code; if(q_item) @@ -1980,7 +2045,7 @@ static void handle_io_error( } else if (src == TROVE_ENDPOINT && dest == BMI_ENDPOINT) { - ret = cancel_pending_trove(&flow_data->src_list); + ret = cancel_pending_trove(&flow_data->src_list, flow_data->parent->src.u.trove.coll_id); flow_data->cleanup_pending_count += ret; gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "flowproto-multiqueue canceled %d trove-bmi Trove ops.\n", ret); @@ -1995,7 +2060,7 @@ static void handle_io_error( gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "flowproto-multiqueue canceled %d bmi-trove BMI ops.\n", ret); flow_data->cleanup_pending_count += ret; - ret = cancel_pending_trove(&flow_data->dest_list); + ret = cancel_pending_trove(&flow_data->dest_list, flow_data->parent->dest.u.trove.coll_id); gossip_debug(GOSSIP_FLOW_PROTO_DEBUG, "flowproto-multiqueue canceled %d bmi-trove Trove ops.\n", ret); flow_data->cleanup_pending_count += ret; @@ -2021,8 +2086,9 @@ static void handle_io_error( if(flow_data->cleanup_pending_count == 0) { - gossip_err("%s: flow proto %p error cleanup finished, error_code: %d\n", - __func__, flow_data->parent, flow_data->parent->error_code); + PVFS_strerror_r(flow_data->parent->error_code, buf, 64); + gossip_err("%s: flow proto %p error cleanup finished: %s\n", + __func__, flow_data->parent, buf); /* we are finished, make sure error is marked and state is set */ assert(flow_data->parent->error_code); @@ -2077,7 +2143,7 @@ static int cancel_pending_bmi(struct qlist_head *list) * * returns the number of operations that were canceled */ -static int cancel_pending_trove(struct qlist_head *list) +static int cancel_pending_trove(struct qlist_head *list, TROVE_coll_id coll_id) { struct qlist_head *tmp_link; struct fp_queue_item *q_item = NULL; @@ -2102,7 +2168,7 @@ static int cancel_pending_trove(struct qlist_head *list) count++; ret = PINT_thread_mgr_trove_cancel( old_result_tmp->posted_id, - q_item->parent->src.u.trove.coll_id, + coll_id, &old_result_tmp->trove_callback); if(ret < 0) { diff --git a/src/io/job/job-desc-queue.c b/src/io/job/job-desc-queue.c index 1fa8db6..fcf0f72 100644 --- a/src/io/job/job-desc-queue.c +++ b/src/io/job/job-desc-queue.c @@ -16,6 +16,11 @@ #include "job-desc-queue.h" #include "gossip.h" #include "id-generator.h" +#include "pint-util.h" + +#ifdef WIN32 +typedef enum job_type job_type_t; +#endif /*************************************************************** * Visible functions @@ -40,7 +45,12 @@ struct job_desc *alloc_job_desc(int type) id_gen_safe_register(&(jd->job_id), jd); +#ifdef WIN32 + jd->type = (job_type_t) type; +#else jd->type = type; +#endif + return (jd); }; @@ -92,7 +102,7 @@ void job_desc_q_cleanup(job_desc_q_p jdqp) qlist_for_each_safe(iterator, scratch, jdqp) { tmp_job_desc = qlist_entry(iterator, struct job_desc, - job_desc_q_link); + job_desc_q_link); /* qlist_for_each_safe lets us iterate and remove nodes. no * need to adjust pointers as we are freeing everything */ free(tmp_job_desc); @@ -208,6 +218,9 @@ void job_desc_q_dump(job_desc_q_p jdqp) case JOB_NULL: gossip_err(" type: JOB_NULL.\n"); break; + case JOB_PRECREATE_POOL: + gossip_err(" type: JOB_PRECREATE_POOL.\n"); + break; } } diff --git a/src/io/job/job-desc-queue.h b/src/io/job/job-desc-queue.h index d980315..e796031 100644 --- a/src/io/job/job-desc-queue.h +++ b/src/io/job/job-desc-queue.h @@ -17,7 +17,6 @@ #include "trove-types.h" #include "src/server/request-scheduler/request-scheduler.h" #include "thread-mgr.h" -#include "pvfs2-event.h" /* describes BMI operations */ struct bmi_desc @@ -31,7 +30,7 @@ struct bmi_desc struct trove_desc { TROVE_op_id id; - PVFS_size actual_size; + PVFS_size *out_size_p; PVFS_vtag *vtag; PVFS_fs_id fsid; PVFS_error state; @@ -42,6 +41,31 @@ struct trove_desc int count; }; +/* describes precreate pool operations */ +struct precreate_pool_desc +{ + PVFS_handle precreate_pool; + PVFS_fs_id fsid; + PVFS_handle* precreate_handle_array; + int precreate_handle_count; + int precreate_handle_index; + int posted_count; + const char** servers; + struct qlist_head* current_pool; + int trove_pending; + int low_threshold; + void* data; + int first_callback_flag; + TROVE_keyval_s* key_array; + PVFS_ds_flags flags; + PVFS_ds_position position; + PVFS_ds_position pool_index; + int count; + PVFS_ds_type type; /* ds type of operation */ + + PVFS_error error_code; +}; + /* describes unexpected BMI operations */ struct bmi_unexp_desc { @@ -85,6 +109,7 @@ enum job_type JOB_REQ_SCHED, JOB_DEV_UNEXP, JOB_REQ_SCHED_TIMER, + JOB_PRECREATE_POOL, JOB_NULL }; @@ -99,7 +124,7 @@ struct job_desc job_context_id context_id; /* context */ struct PINT_thread_mgr_bmi_callback bmi_callback; /* callback information */ struct PINT_thread_mgr_trove_callback trove_callback; /* callback information */ - enum PVFS_event_op event_type; + PVFS_hint hints; /* union of information for lower level interfaces */ union @@ -111,6 +136,7 @@ struct job_desc struct req_sched_desc req_sched; struct dev_unexp_desc dev_unexp; struct null_info_desc null_info; + struct precreate_pool_desc precreate_pool; } u; diff --git a/src/io/job/job-time-mgr.c b/src/io/job/job-time-mgr.c index 3444b07..7f36a79 100644 --- a/src/io/job/job-time-mgr.c +++ b/src/io/job/job-time-mgr.c @@ -4,7 +4,9 @@ * See COPYING in top-level directory. */ +#ifndef WIN32 #include +#endif #include #include @@ -198,6 +200,9 @@ void job_time_mgr_rem(struct job_desc* jd) tmp_bucket = (struct time_bucket*)jd->time_bucket; + /* this item needs to be removed before the test. Otherwise, the removal + * of the bucket_link will never happen and the list will grow forever */ + qlist_del(&jd->job_time_link); if(qlist_empty(&tmp_bucket->jd_queue)) { /* no need for this bucket any longer; it is empty */ @@ -205,7 +210,6 @@ void job_time_mgr_rem(struct job_desc* jd) INIT_QLIST_HEAD(&tmp_bucket->jd_queue); free(tmp_bucket); } - qlist_del(&jd->job_time_link); jd->time_bucket = NULL; gen_mutex_unlock(&bucket_mutex); diff --git a/src/io/job/job.c b/src/io/job/job.c index 9db968f..e95057d 100644 --- a/src/io/job/job.c +++ b/src/io/job/job.c @@ -7,8 +7,10 @@ /* this file contains a skeleton implementation of the job interface */ #include +#ifndef WIN32 #include #include +#endif #include #include #include @@ -23,19 +25,10 @@ #include "trove.h" #include "gossip.h" #include "id-generator.h" -#include "pint-event.h" #include "job-time-mgr.h" - +#include "pvfs2-internal.h" #include "src/client/sysint/osd.h" -#define JOB_EVENT_START(__op, __id) \ - PINT_event_timestamp(PVFS_EVENT_API_JOB, __op, 0, __id, \ - PVFS_EVENT_FLAG_START) - -#define JOB_EVENT_END(__op, __size, __id) \ - PINT_event_timestamp(PVFS_EVENT_API_JOB, __op, __size, __id, \ - PVFS_EVENT_FLAG_END) - /* contexts for use within the job interface */ static bmi_context_id global_bmi_context = -1; #ifdef __PVFS2_TROVE_SUPPORT__ @@ -71,6 +64,50 @@ enum thread_wait_timeout = 10000 /* usecs */ }; +/* cap how many keys we dump into trove at once when filling precreate pools + * so that it doesn't clog up trove queues + */ +#define PRECREATE_POOL_MAX_KEYS 32 + +#ifdef __PVFS2_TROVE_SUPPORT__ + +static gen_mutex_t precreate_pool_mutex = GEN_MUTEX_INITIALIZER; +static QLIST_HEAD(precreate_pool_check_level_list); +static QLIST_HEAD(precreate_pool_get_handles_list); +static QLIST_HEAD(precreate_pool_fs_list); + +struct precreate_pool +{ + struct qlist_head list_link; + char* host; + PVFS_handle pool_handle; + uint32_t pool_count; + PVFS_ds_type pool_type; /* ds type of pool */ +}; + +struct fs_pool +{ + struct qlist_head list_link; + PVFS_fs_id fsid; + /* store the batch count parameter inside the struct. this lets us + * return einval if we get called with a type that has a batch count of 0 */ + uint32_t type_batch_count[PVFS_DS_TYPE_COUNT]; + struct qlist_head precreate_pool_list; + struct qlist_head* precreate_pool_initial; +}; + +struct precreate_pool_get_trove +{ + struct job_desc* jd; /* parent job descriptor */ + /* variables needed per keyval_iterate_keys() call */ + PVFS_ds_position pos; + PVFS_ds_keyval key; + int count; + struct PINT_thread_mgr_trove_callback trove_callback; + struct precreate_pool* pool; +}; +#endif /* __PVFS2_TROVE_SUPPORT__ */ + /******************************************************** * function prototypes */ @@ -99,11 +136,27 @@ static void bmi_thread_mgr_unexp_handler(struct BMI_unexpected_info* unexp); static void dev_thread_mgr_unexp_handler(struct PINT_dev_unexp_info* unexp); static void trove_thread_mgr_callback(void* data, PVFS_error error_code); -static void flow_callback(flow_descriptor* flow_d); +static void flow_callback(flow_descriptor* flow_d, int cancel_path); #ifndef __PVFS2_JOB_THREADED__ static gen_mutex_t work_cycle_mutex = GEN_MUTEX_INITIALIZER; static void do_one_work_cycle_all(int idle_time_ms); #endif +#ifdef __PVFS2_TROVE_SUPPORT__ +static void precreate_pool_get_thread_mgr_callback( + void* data, + PVFS_error error_code); +static void precreate_pool_get_thread_mgr_callback_unlocked( + void* data, + PVFS_error error_code); +static void precreate_pool_fill_thread_mgr_callback( + void* data, + PVFS_error error_code); +static void precreate_pool_iterate_callback( + void* data, + PVFS_error error_code); +static void precreate_pool_get_handles_try_post(struct job_desc* jd); +static struct fs_pool* find_fs(PVFS_fs_id fsid); +#endif /******************************************************** * public interface @@ -132,7 +185,7 @@ int job_initialize(int flags) teardown_queues(); return (-ret); } - ret = PINT_thread_mgr_bmi_getcontext(&global_bmi_context); + ret = PINT_thread_mgr_bmi_getcontext((PVFS_context_id *)&global_bmi_context); /* this should never fail if the thread startup succeeded */ assert(ret == 0); @@ -325,7 +378,8 @@ int job_bmi_send(PVFS_BMI_addr_t addr, job_status_s * out_status_p, job_id_t * id, job_context_id context_id, - int timeout_sec) + int timeout_sec, + PVFS_hint hints) { /* post a bmi send. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested @@ -342,7 +396,8 @@ int job_bmi_send(PVFS_BMI_addr_t addr, jd = alloc_job_desc(JOB_BMI); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ERROR_CODE(errno); + return 1; } jd->job_user_ptr = user_ptr; jd->u.bmi.actual_size = size; @@ -351,20 +406,22 @@ int job_bmi_send(PVFS_BMI_addr_t addr, jd->bmi_callback.fn = bmi_thread_mgr_callback; jd->bmi_callback.data = (void*)jd; user_ptr_internal = &jd->bmi_callback; - JOB_EVENT_START(PVFS_EVENT_BMI_SEND, jd->job_id); + + jd->hints = hints; /* post appropriate type of send */ if (!send_unexpected) { ret = BMI_post_send(&(jd->u.bmi.id), addr, buffer, size, - buffer_type, tag, user_ptr_internal, - global_bmi_context); + buffer_type, tag, user_ptr_internal, + global_bmi_context, jd->hints); } else { ret = BMI_post_sendunexpected(&(jd->u.bmi.id), addr, buffer, size, buffer_type, tag, - user_ptr_internal, global_bmi_context); + user_ptr_internal, global_bmi_context, + jd->hints); } if (ret < 0) @@ -372,7 +429,6 @@ int job_bmi_send(PVFS_BMI_addr_t addr, /* error posting */ out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_BMI_SEND, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (1); @@ -384,18 +440,16 @@ int job_bmi_send(PVFS_BMI_addr_t addr, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->actual_size = size; - JOB_EVENT_END(PVFS_EVENT_BMI_SEND, size, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; bmi_pending_count++; - jd->event_type = PVFS_EVENT_BMI_SEND; return(job_time_mgr_add(jd, timeout_sec)); } @@ -421,7 +475,8 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, job_status_s * out_status_p, job_id_t * id, job_context_id context_id, - int timeout_sec) + int timeout_sec, + PVFS_hint hints) { /* post a bmi send. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested @@ -438,7 +493,8 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, jd = alloc_job_desc(JOB_BMI); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ERROR_CODE(errno); + return 1; } jd->job_user_ptr = user_ptr; jd->u.bmi.actual_size = total_size; @@ -447,7 +503,8 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, jd->bmi_callback.fn = bmi_thread_mgr_callback; jd->bmi_callback.data = (void*)jd; user_ptr_internal = &jd->bmi_callback; - JOB_EVENT_START(PVFS_EVENT_BMI_SEND, jd->job_id); + + jd->hints = hints; /* post appropriate type of send */ if (!send_unexpected) @@ -455,7 +512,7 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, ret = BMI_post_send_list(&(jd->u.bmi.id), addr, (const void **) buffer_list, size_list, list_count, total_size, buffer_type, - tag, user_ptr_internal, global_bmi_context); + tag, user_ptr_internal, global_bmi_context, hints); } else { @@ -463,7 +520,7 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, (const void **) buffer_list, size_list, list_count, total_size, buffer_type, tag, - user_ptr_internal, global_bmi_context); + user_ptr_internal, global_bmi_context, hints); } if (ret < 0) @@ -471,7 +528,6 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, /* error posting */ out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_BMI_SEND, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (1); @@ -483,18 +539,16 @@ int job_bmi_send_list(PVFS_BMI_addr_t addr, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->actual_size = total_size; - JOB_EVENT_END(PVFS_EVENT_BMI_SEND, total_size, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; bmi_pending_count++; - jd->event_type = PVFS_EVENT_BMI_SEND; return(job_time_mgr_add(jd, timeout_sec)); } @@ -515,7 +569,8 @@ int job_bmi_recv(PVFS_BMI_addr_t addr, job_status_s * out_status_p, job_id_t * id, job_context_id context_id, - int timeout_sec) + int timeout_sec, + PVFS_hint hints) { /* post a bmi recv. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested @@ -531,26 +586,28 @@ int job_bmi_recv(PVFS_BMI_addr_t addr, jd = alloc_job_desc(JOB_BMI); if (!jd) { - return (-ENOMEM); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->bmi_callback.fn = bmi_thread_mgr_callback; jd->bmi_callback.data = (void*)jd; user_ptr_internal = &jd->bmi_callback; - JOB_EVENT_START(PVFS_EVENT_BMI_RECV, jd->job_id); + ret = BMI_post_recv(&(jd->u.bmi.id), addr, buffer, size, - &(jd->u.bmi.actual_size), buffer_type, tag, - user_ptr_internal, - global_bmi_context); + &(jd->u.bmi.actual_size), buffer_type, tag, + user_ptr_internal, + global_bmi_context, + hints); if (ret < 0) { /* error posting */ out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_BMI_RECV, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (1); @@ -562,19 +619,16 @@ int job_bmi_recv(PVFS_BMI_addr_t addr, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->actual_size = jd->u.bmi.actual_size; - JOB_EVENT_END(PVFS_EVENT_BMI_RECV, out_status_p->actual_size, - jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; bmi_pending_count++; - jd->event_type = PVFS_EVENT_BMI_RECV; return(job_time_mgr_add(jd, timeout_sec)); } @@ -599,7 +653,8 @@ int job_bmi_recv_list(PVFS_BMI_addr_t addr, job_status_s * out_status_p, job_id_t * id, job_context_id context_id, - int timeout_sec) + int timeout_sec, + PVFS_hint hints) { /* post a bmi recv. If it completes (or fails) immediately, then @@ -617,27 +672,27 @@ int job_bmi_recv_list(PVFS_BMI_addr_t addr, jd = alloc_job_desc(JOB_BMI); if (!jd) { - return (-ENOMEM); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->bmi_callback.fn = bmi_thread_mgr_callback; jd->bmi_callback.data = (void*)jd; user_ptr_internal = &jd->bmi_callback; - JOB_EVENT_START(PVFS_EVENT_BMI_RECV, jd->job_id); ret = BMI_post_recv_list(&(jd->u.bmi.id), addr, buffer_list, size_list, list_count, total_expected_size, &(jd->u.bmi.actual_size), buffer_type, tag, - user_ptr_internal, global_bmi_context); + user_ptr_internal, global_bmi_context, hints); if (ret < 0) { /* error posting */ out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_BMI_RECV, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (1); @@ -649,19 +704,16 @@ int job_bmi_recv_list(PVFS_BMI_addr_t addr, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->actual_size = jd->u.bmi.actual_size; - JOB_EVENT_END(PVFS_EVENT_BMI_RECV, out_status_p->actual_size, - jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; bmi_pending_count++; - jd->event_type = PVFS_EVENT_BMI_RECV; return(job_time_mgr_add(jd, timeout_sec)); } @@ -696,7 +748,8 @@ int job_bmi_unexp(struct BMI_unexpected_info *bmi_unexp_d, jd = alloc_job_desc(JOB_BMI_UNEXP); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->u.bmi_unexp.info = bmi_unexp_d; @@ -723,7 +776,8 @@ int job_bmi_unexp(struct BMI_unexpected_info *bmi_unexp_d, /* error testing */ dealloc_job_desc(jd); jd = NULL; - return (ret); + out_status_p->error_code = ret; + return 1; } if (outcount == 1) @@ -751,6 +805,33 @@ int job_bmi_unexp(struct BMI_unexpected_info *bmi_unexp_d, return (0); } +int job_bmi_unexp_cancel(job_id_t id) +{ + struct job_desc *jd; + + gen_mutex_lock(&bmi_unexp_mutex); + jd = id_gen_safe_lookup(id); + job_desc_q_remove(jd); + bmi_unexp_pending_count--; + gen_mutex_unlock(&bmi_unexp_mutex); + + gen_mutex_lock(&completion_mutex); + /* set completed flag while holding queue lock */ + jd->completed_flag = 1; + if (completion_queue_array[jd->context_id]) + { + job_desc_q_add(completion_queue_array[jd->context_id], jd); + } + +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + + return 0; +} + #ifndef __PVFS2_SERVER__ /* * Post a job that does an OSD command submit using bmi_osd. @@ -861,7 +942,8 @@ int job_dev_unexp( jd = alloc_job_desc(JOB_DEV_UNEXP); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->u.dev_unexp.info = dev_unexp_d; @@ -879,7 +961,8 @@ int job_dev_unexp( /* error testing */ dealloc_job_desc(jd); jd = NULL; - return (ret); + out_status_p->error_code = ret; + return 1; } if (outcount == 1) @@ -1009,8 +1092,8 @@ int job_dev_write_list(void** buffer_list, int job_req_sched_post(enum PVFS_server_op op, PVFS_fs_id fs_id, PVFS_handle handle, - int read_only, - int schedule, + enum PINT_server_req_access_type access_type, + enum PINT_server_sched_policy sched_policy, void *user_ptr, job_aint status_user_tag, job_status_s * out_status_p, @@ -1033,7 +1116,8 @@ int job_req_sched_post(enum PVFS_server_op op, jd = alloc_job_desc(JOB_REQ_SCHED); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->u.req_sched.post_flag = 1; @@ -1041,7 +1125,7 @@ int job_req_sched_post(enum PVFS_server_op op, jd->status_user_tag = status_user_tag; ret = PINT_req_sched_post( - op, fs_id, handle, read_only, schedule, jd, &(jd->u.req_sched.id)); + op, fs_id, handle, access_type, sched_policy, jd, &(jd->u.req_sched.id)); if (ret < 0) { @@ -1084,7 +1168,8 @@ int job_req_sched_change_mode(enum PVFS_server_mode mode, jd = alloc_job_desc(JOB_REQ_SCHED); if(!jd) { - return (errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->u.req_sched.post_flag = 1; @@ -1142,7 +1227,8 @@ int job_req_sched_post_timer(int msecs, jd = alloc_job_desc(JOB_REQ_SCHED); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -1205,7 +1291,8 @@ int job_req_sched_release(job_id_t in_completed_id, jd = alloc_job_desc(JOB_REQ_SCHED); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -1230,15 +1317,13 @@ int job_req_sched_release(job_id_t in_completed_id, dealloc_job_desc(match_jd); match_jd = NULL; - /* NOTE: I am letting the return value propigate here, rather - * than just setting the status. Failure here is bad... - */ if (ret < 0) { /* error posting */ dealloc_job_desc(jd); jd = NULL; - return (ret); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } if (ret == 1) @@ -1275,7 +1360,8 @@ int job_flow(flow_descriptor * flow_d, job_status_s * out_status_p, job_id_t * id, job_context_id context_id, - int timeout_sec) + int timeout_sec, + PVFS_hint hints) { struct job_desc *jd = NULL; int ret = -1; @@ -1284,8 +1370,11 @@ int job_flow(flow_descriptor * flow_d, jd = alloc_job_desc(JOB_FLOW); if (!jd) { - return (-ENOMEM); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; + flow_d->hints = hints; jd->job_user_ptr = user_ptr; jd->u.flow.flow_d = flow_d; jd->context_id = context_id; @@ -1293,15 +1382,12 @@ int job_flow(flow_descriptor * flow_d, flow_d->user_ptr = jd; flow_d->callback = flow_callback; - JOB_EVENT_START(PVFS_EVENT_FLOW, jd->job_id); - /* post the flow */ ret = PINT_flow_post(flow_d); if (ret < 0) { out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_FLOW, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (1); @@ -1312,7 +1398,6 @@ int job_flow(flow_descriptor * flow_d, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->actual_size = flow_d->total_transferred; - JOB_EVENT_END(PVFS_EVENT_FLOW, flow_d->total_transferred, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (1); @@ -1321,7 +1406,6 @@ int job_flow(flow_descriptor * flow_d, /* queue up the job desc. for later completion */ *id = jd->job_id; flow_pending_count++; - jd->event_type = PVFS_EVENT_FLOW; gossip_debug(GOSSIP_FLOW_DEBUG, "Job flows in progress (post time): %d\n", flow_pending_count); @@ -1364,114 +1448,23 @@ int job_flow_cancel(job_id_t id, job_context_id context_id) return(ret); } -/* job_trove_bstream_write_at() - * - * storage byte stream write - * - * returns 0 on success, 1 on immediate completion, and -errno on - * failure - */ -int job_trove_bstream_write_at(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_offset offset, - void *buffer, - PVFS_size size, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) -{ - /* post a trove write. If it completes (or fails) immediately, then - * return and fill in the status structure. If it needs to be tested - * for completion later, then queue up a job_desc structure. - */ - int ret = -1; - struct job_desc *jd = NULL; - void* user_ptr_internal; - - /* create the job desc first, even though we may not use it. This - * gives us somewhere to store the BMI id and user ptr - */ - jd = alloc_job_desc(JOB_TROVE); - if (!jd) - { - return (-errno); - } - jd->job_user_ptr = user_ptr; - jd->u.trove.actual_size = size; - jd->u.trove.vtag = vtag; - jd->context_id = context_id; - jd->status_user_tag = status_user_tag; - jd->trove_callback.fn = trove_thread_mgr_callback; - jd->trove_callback.data = (void*)jd; - user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_WRITE_AT, jd->job_id); - -#ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_bstream_write_at(coll_id, handle, buffer, - &jd->u.trove.actual_size, offset, flags, - jd->u.trove.vtag, user_ptr_internal, - global_trove_context, - &(jd->u.trove.id)); -#else - gossip_err("Error: Trove support not enabled.\n"); - ret = -ENOSYS; -#endif - - if (ret < 0) - { - /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_WRITE_AT, 0, jd->job_id); - dealloc_job_desc(jd); - jd = NULL; - out_status_p->error_code = ret; - out_status_p->status_user_tag = status_user_tag; - return (1); - } - - if (ret == 1) - { - /* immediate completion */ - out_status_p->error_code = 0; - out_status_p->status_user_tag = status_user_tag; - out_status_p->actual_size = jd->u.trove.actual_size; - out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_WRITE_AT, out_status_p->actual_size, - jd->job_id); - dealloc_job_desc(jd); - jd = NULL; - return (ret); - } - - /* if we fall through to this point, the job did not - * immediately complete and we must queue up to test later - */ - *id = jd->job_id; - trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_WRITE_AT; - - return (0); -} - int job_trove_bstream_write_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, - TROVE_offset *stream_offset_array, + TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void * user_ptr, job_aint status_user_tag, job_status_s *out_status_p, job_id_t *id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove write. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested @@ -1487,19 +1480,21 @@ int job_trove_bstream_write_list(TROVE_coll_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; + jd->hints = hints; jd->u.trove.vtag = vtag; + jd->u.trove.out_size_p = out_size_p; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_WRITE_LIST, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_bstream_write_list(coll_id, handle, + ret = trove_bstream_write_list(coll_id, handle, mem_offset_array, mem_size_array, mem_count, stream_offset_array, stream_size_array, @@ -1509,7 +1504,7 @@ int job_trove_bstream_write_list(TROVE_coll_id coll_id, vtag, user_ptr_internal, global_trove_context, - &(jd->u.trove.id)); + &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -1518,7 +1513,6 @@ int job_trove_bstream_write_list(TROVE_coll_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_WRITE_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -1531,10 +1525,8 @@ int job_trove_bstream_write_list(TROVE_coll_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - out_status_p->actual_size = jd->u.trove.actual_size; + out_status_p->actual_size = *out_size_p; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_WRITE_LIST, out_status_p->actual_size, - jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -1545,104 +1537,10 @@ int job_trove_bstream_write_list(TROVE_coll_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_WRITE_LIST; return (0); } - -/* job_trove_bstream_read_at() - * - * storage byte stream read - * - * returns 0 on success, 1 on immediate completion, and -errno on - * failure - */ -int job_trove_bstream_read_at(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_offset offset, - void *buffer, - PVFS_size size, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) -{ - /* post a trove read. If it completes (or fails) immediately, then - * return and fill in the status structure. If it needs to be tested - * for completion later, then queue up a job_desc structure. - */ - int ret = -1; - struct job_desc *jd = NULL; - void* user_ptr_internal; - - /* create the job desc first, even though we may not use it. This - * gives us somewhere to store the BMI id and user ptr - */ - jd = alloc_job_desc(JOB_TROVE); - if (!jd) - { - return (-errno); - } - jd->job_user_ptr = user_ptr; - jd->u.trove.actual_size = size; - jd->u.trove.vtag = vtag; - jd->context_id = context_id; - jd->status_user_tag = status_user_tag; - jd->trove_callback.fn = trove_thread_mgr_callback; - jd->trove_callback.data = (void*)jd; - user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_READ_AT, jd->job_id); - -#ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_bstream_read_at(coll_id, handle, buffer, - &jd->u.trove.actual_size, offset, flags, - jd->u.trove.vtag, user_ptr_internal, - global_trove_context, - &(jd->u.trove.id)); -#else - gossip_err("Error: Trove support not enabled.\n"); - ret = -ENOSYS; -#endif - - if (ret < 0) - { - /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_READ_AT, 0, jd->job_id); - dealloc_job_desc(jd); - jd = NULL; - out_status_p->error_code = ret; - out_status_p->status_user_tag = status_user_tag; - return (1); - } - - if (ret == 1) - { - /* immediate completion */ - out_status_p->error_code = 0; - out_status_p->status_user_tag = status_user_tag; - out_status_p->actual_size = jd->u.trove.actual_size; - out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_READ_AT, out_status_p->actual_size, - jd->job_id); - dealloc_job_desc(jd); - jd = NULL; - return (ret); - } - - /* if we fall through to this point, the job did not - * immediately complete and we must queue up to test later - */ - *id = jd->job_id; - trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_READ_AT; - - return (0); -} - int job_trove_bstream_read_list(PVFS_fs_id coll_id, PVFS_handle handle, char **mem_offset_array, @@ -1658,7 +1556,8 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove read. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested @@ -1674,19 +1573,21 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; + jd->u.trove.out_size_p = out_size_p; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_READ_LIST, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_bstream_read_list(coll_id, handle, + ret = trove_bstream_read_list(coll_id, handle, mem_offset_array, mem_size_array, mem_count, stream_offset_array, stream_size_array, @@ -1694,7 +1595,7 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, out_size_p, flags, jd->u.trove.vtag, user_ptr_internal, global_trove_context, - &(jd->u.trove.id)); + &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -1703,7 +1604,6 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_READ_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -1716,10 +1616,8 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - out_status_p->actual_size = jd->u.trove.actual_size; + out_status_p->actual_size = *out_size_p; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_READ_LIST, out_status_p->actual_size, - jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -1730,7 +1628,6 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_READ_LIST; return (0); } @@ -1749,8 +1646,9 @@ int job_trove_bstream_flush(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) - + job_context_id context_id, + PVFS_hint hints) + { int ret = -1; struct job_desc *jd = NULL; @@ -1762,7 +1660,8 @@ int job_trove_bstream_flush(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -1770,11 +1669,10 @@ int job_trove_bstream_flush(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_BSTREAM_FLUSH, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_bstream_flush(coll_id, handle, flags, user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -1783,7 +1681,6 @@ int job_trove_bstream_flush(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_BSTREAM_FLUSH, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -1795,7 +1692,6 @@ int job_trove_bstream_flush(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_BSTREAM_FLUSH, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -1805,7 +1701,6 @@ int job_trove_bstream_flush(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_BSTREAM_FLUSH; return (0); } @@ -1827,11 +1722,12 @@ int job_trove_keyval_read(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval read. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -1843,8 +1739,10 @@ int job_trove_keyval_read(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->context_id = context_id; @@ -1852,12 +1750,11 @@ int job_trove_keyval_read(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_READ, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_read(coll_id, handle, key_p, val_p, flags, - jd->u.trove.vtag, user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + jd->u.trove.vtag, user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -1866,7 +1763,6 @@ int job_trove_keyval_read(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_READ, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -1880,7 +1776,6 @@ int job_trove_keyval_read(PVFS_fs_id coll_id, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_READ, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -1891,7 +1786,6 @@ int job_trove_keyval_read(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_READ; return (0); } @@ -1915,11 +1809,12 @@ int job_trove_keyval_read_list(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval read. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -1931,8 +1826,10 @@ int job_trove_keyval_read_list(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->context_id = context_id; @@ -1940,13 +1837,12 @@ int job_trove_keyval_read_list(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_READ_LIST, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_keyval_read_list(coll_id, handle, key_array, val_array, - err_array, count, flags, jd->u.trove.vtag, + ret = trove_keyval_read_list(coll_id, handle, key_array, val_array, + err_array, count, flags, jd->u.trove.vtag, user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -1955,7 +1851,6 @@ int job_trove_keyval_read_list(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_READ_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -1969,7 +1864,6 @@ int job_trove_keyval_read_list(PVFS_fs_id coll_id, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_READ_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -1980,14 +1874,13 @@ int job_trove_keyval_read_list(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_READ_LIST; return (0); } /* job_trove_keyval_write() * - * storage key/value write + * storage key/value write * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2002,11 +1895,12 @@ int job_trove_keyval_write(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval write. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -2018,8 +1912,10 @@ int job_trove_keyval_write(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->context_id = context_id; @@ -2027,13 +1923,12 @@ int job_trove_keyval_write(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_WRITE, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_write(coll_id, handle, key_p, val_p, flags, - jd->u.trove.vtag, user_ptr_internal, + jd->u.trove.vtag, user_ptr_internal, global_trove_context, - &(jd->u.trove.id)); + &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2042,7 +1937,6 @@ int job_trove_keyval_write(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_WRITE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2056,7 +1950,6 @@ int job_trove_keyval_write(PVFS_fs_id coll_id, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_WRITE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -2067,14 +1960,13 @@ int job_trove_keyval_write(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_WRITE; return (0); } /* job_trove_keyval_write_list() * - * storage key/value list write + * storage key/value list write * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2090,11 +1982,12 @@ int job_trove_keyval_write_list(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval write. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -2106,8 +1999,10 @@ int job_trove_keyval_write_list(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->context_id = context_id; @@ -2115,15 +2010,15 @@ int job_trove_keyval_write_list(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_WRITE_LIST, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ + gossip_debug(GOSSIP_JOB_DEBUG, "job_trove_keyval_write_list() posting trove_keyval_write_list()\n"); ret = trove_keyval_write_list(coll_id, handle, key_array, val_array, count, flags, - jd->u.trove.vtag, user_ptr_internal, + jd->u.trove.vtag, user_ptr_internal, global_trove_context, - &(jd->u.trove.id)); + &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2132,7 +2027,6 @@ int job_trove_keyval_write_list(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_WRITE_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2146,7 +2040,6 @@ int job_trove_keyval_write_list(PVFS_fs_id coll_id, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_WRITE_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -2157,18 +2050,97 @@ int job_trove_keyval_write_list(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_WRITE_LIST; return (0); } -/* job_trove_keyval_flush() - * - * ask the storage layer to flush keyvals to disk - * - * returns 0 on success, 1 on immediate completion, and -errno on failure - */ - +int job_trove_keyval_remove_list(PVFS_fs_id coll_id, + PVFS_handle handle, + PVFS_ds_keyval * key_array, + PVFS_ds_keyval * val_array, + int * error_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) +{ + int ret = -1; + struct job_desc *jd = NULL; + void* user_ptr_internal; + + /* create the job desc first, even though we may not use it. This + * gives us somewhere to store the BMI id and user ptr + */ + jd = alloc_job_desc(JOB_TROVE); + if (!jd) + { + out_status_p->error_code = -PVFS_ENOMEM; + return 1; + } + jd->job_user_ptr = user_ptr; + jd->u.trove.vtag = vtag; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->trove_callback.fn = trove_thread_mgr_callback; + jd->trove_callback.data = (void*)jd; + user_ptr_internal = &jd->trove_callback; + +#ifdef __PVFS2_TROVE_SUPPORT__ + ret = trove_keyval_remove_list(coll_id, handle, + key_array, val_array, error_array, + count, flags, + jd->u.trove.vtag, user_ptr_internal, + global_trove_context, + &(jd->u.trove.id), + hints); +#else + gossip_err("Error: Trove support not enabled.\n"); + ret = -ENOSYS; +#endif + + if (ret < 0) + { + /* error posting trove operation */ + dealloc_job_desc(jd); + jd = NULL; + out_status_p->error_code = ret; + out_status_p->status_user_tag = status_user_tag; + return (1); + } + + if (ret == 1) + { + /* immediate completion */ + out_status_p->error_code = 0; + out_status_p->status_user_tag = status_user_tag; + out_status_p->vtag = jd->u.trove.vtag; + dealloc_job_desc(jd); + jd = NULL; + return (ret); + } + + /* if we fall through to this point, the job did not + * immediately complete and we must queue up to test later + */ + *id = jd->job_id; + trove_pending_count++; + + return (0); +} + + +/* job_trove_keyval_flush() + * + * ask the storage layer to flush keyvals to disk + * + * returns 0 on success, 1 on immediate completion, and -errno on failure + */ + int job_trove_keyval_flush(PVFS_fs_id coll_id, PVFS_handle handle, PVFS_ds_flags flags, @@ -2176,7 +2148,8 @@ int job_trove_keyval_flush(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { int ret = -1; struct job_desc *jd = NULL; @@ -2188,7 +2161,8 @@ int job_trove_keyval_flush(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -2196,11 +2170,10 @@ int job_trove_keyval_flush(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_FLUSH, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_flush(coll_id, handle, flags, user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2209,7 +2182,6 @@ int job_trove_keyval_flush(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_FLUSH, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2222,7 +2194,6 @@ int job_trove_keyval_flush(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_FLUSH, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -2233,7 +2204,6 @@ int job_trove_keyval_flush(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_FLUSH; return (0); } @@ -2246,7 +2216,8 @@ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove operation keyval get handle info. If it completes (or * fails) immediately, then return and fill in the status @@ -2264,24 +2235,27 @@ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_GET_HANDLE_INFO, jd->job_id); + + #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_get_handle_info( coll_id, handle, - flags, + flags, info, - user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2290,7 +2264,6 @@ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_GET_HANDLE_INFO, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2303,18 +2276,16 @@ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_GET_HANDLE_INFO, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_GET_HANDLE_INFO; return (0); } @@ -2322,7 +2293,7 @@ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, /* job_trove_dspace_getattr() * - * read generic dspace attributes + * read generic dspace attributes * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2334,7 +2305,8 @@ int job_trove_dspace_getattr(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s *out_status_p, job_id_t *id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove operation dspace get attr. If it completes (or * fails) immediately, then return and fill in the status @@ -2352,7 +2324,8 @@ int job_trove_dspace_getattr(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -2360,13 +2333,14 @@ int job_trove_dspace_getattr(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_GETATTR, jd->job_id); + + #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_dspace_getattr(coll_id, handle, out_ds_attr_ptr, 0 /* flags */ , - user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2375,7 +2349,6 @@ int job_trove_dspace_getattr(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_GETATTR, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2388,18 +2361,16 @@ int job_trove_dspace_getattr(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_GETATTR, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_GETATTR; return (0); } @@ -2420,7 +2391,8 @@ int job_trove_dspace_getattr_list(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s *out_status_p, job_id_t *id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove operation dspace get attr list. If it completes (or * fails) immediately, then return and fill in the status @@ -2438,15 +2410,18 @@ int job_trove_dspace_getattr_list(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_GETATTR_LIST, jd->job_id); + + #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_dspace_getattr_list(coll_id, @@ -2454,8 +2429,8 @@ int job_trove_dspace_getattr_list(PVFS_fs_id coll_id, handle_array, out_ds_attr_ptr, out_error_array, 0 /* flags */ , - user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2464,7 +2439,6 @@ int job_trove_dspace_getattr_list(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_GETATTR_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2477,25 +2451,23 @@ int job_trove_dspace_getattr_list(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_GETATTR_LIST, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_GETATTR_LIST; return (0); } /* job_trove_dspace_setattr() * - * write generic dspace attributes + * write generic dspace attributes * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2508,7 +2480,8 @@ int job_trove_dspace_setattr(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove operation dspace set attr. If it completes (or * fails) immediately, then return and fill in the status @@ -2526,7 +2499,8 @@ int job_trove_dspace_setattr(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -2534,13 +2508,12 @@ int job_trove_dspace_setattr(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_SETATTR, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_dspace_setattr(coll_id, handle, ds_attr_p, flags, - user_ptr_internal, global_trove_context, - &(jd->u.trove.id)); + user_ptr_internal, global_trove_context, + &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2549,7 +2522,6 @@ int job_trove_dspace_setattr(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_SETATTR, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2562,25 +2534,23 @@ int job_trove_dspace_setattr(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_SETATTR, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_SETATTR; return (0); } /* job_trove_bstream_resize() * - * resize (truncate or preallocate) a storage byte stream + * resize (truncate or preallocate) a storage byte stream * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2594,7 +2564,8 @@ int job_trove_bstream_resize(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a resize trove operation. If it completes (or * fails) immediately, then return and fill in the status @@ -2612,7 +2583,8 @@ int job_trove_bstream_resize(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -2620,13 +2592,12 @@ int job_trove_bstream_resize(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_BSTREAM_RESIZE, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_bstream_resize(coll_id, handle, &size, flags, - vtag, user_ptr_internal, global_trove_context, - &(jd->u.trove.id)); + vtag, user_ptr_internal, global_trove_context, + &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2635,7 +2606,6 @@ int job_trove_bstream_resize(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_BSTREAM_RESIZE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2648,25 +2618,23 @@ int job_trove_bstream_resize(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_BSTREAM_RESIZE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); } /* if we fall to this point, the job did not immediately complete and - * we must queue up to test it later + * we must queue up to test it later */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_BSTREAM_RESIZE; return (0); } /* job_trove_bstream_validate() * - * check consistency of a bytestream for a given vtag + * check consistency of a bytestream for a given vtag * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2678,15 +2646,17 @@ int job_trove_bstream_validate(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { gossip_lerr("Error: unimplemented.\n"); - return (-ENOSYS); + out_status_p->error_code = -PVFS_ENOSYS; + return 1; } /* job_trove_keyval_remove() * - * remove a key/value entry + * remove a key/value entry * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2701,11 +2671,12 @@ int job_trove_keyval_remove(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval remove. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -2717,8 +2688,10 @@ int job_trove_keyval_remove(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->context_id = context_id; @@ -2726,12 +2699,11 @@ int job_trove_keyval_remove(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_REMOVE, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_remove(coll_id, handle, key_p, val_p, flags, - jd->u.trove.vtag, user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + jd->u.trove.vtag, user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2740,7 +2712,6 @@ int job_trove_keyval_remove(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_REMOVE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2754,7 +2725,6 @@ int job_trove_keyval_remove(PVFS_fs_id coll_id, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->vtag = jd->u.trove.vtag; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_REMOVE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -2765,14 +2735,13 @@ int job_trove_keyval_remove(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_REMOVE; return (0); } /* job_trove_keyval_validate() * - * check consistency of a key/value pair for a given vtag + * check consistency of a key/value pair for a given vtag * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2784,15 +2753,17 @@ int job_trove_keyval_validate(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { gossip_lerr("Error: unimplemented.\n"); - return (-ENOSYS); + out_status_p->error_code = -PVFS_ENOSYS; + return 1; } /* job_trove_keyval_iterate() * - * iterate through all of the key/value pairs for a data space + * iterate through all of the key/value pairs for a data space * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2809,11 +2780,12 @@ int job_trove_keyval_iterate(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval iterate. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -2825,8 +2797,10 @@ int job_trove_keyval_iterate(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->u.trove.position = position; @@ -2836,14 +2810,13 @@ int job_trove_keyval_iterate(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_ITERATE, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_iterate(coll_id, handle, &(jd->u.trove.position), key_array, val_array, &(jd->u.trove.count), flags, jd->u.trove.vtag, - user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2852,7 +2825,6 @@ int job_trove_keyval_iterate(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_ITERATE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2868,7 +2840,6 @@ int job_trove_keyval_iterate(PVFS_fs_id coll_id, out_status_p->vtag = jd->u.trove.vtag; out_status_p->position = jd->u.trove.position; out_status_p->count = jd->u.trove.count; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_ITERATE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -2879,14 +2850,13 @@ int job_trove_keyval_iterate(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_ITERATE; return (0); } /* job_trove_keyval_iterate_keys() * - * iterate through all of the keys for a data space + * iterate through all of the keys for a data space * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -2902,11 +2872,12 @@ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove keyval iterate_keys. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -2918,8 +2889,10 @@ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; jd->u.trove.position = position; @@ -2929,14 +2902,13 @@ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_KEYVAL_ITERATE_KEYS, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_keyval_iterate_keys(coll_id, handle, - &(jd->u.trove.position), key_array, + &(jd->u.trove.position), key_array, &(jd->u.trove.count), flags, jd->u.trove.vtag, - user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -2945,7 +2917,6 @@ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_ITERATE_KEYS, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -2961,7 +2932,6 @@ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, out_status_p->vtag = jd->u.trove.vtag; out_status_p->position = jd->u.trove.position; out_status_p->count = jd->u.trove.count; - JOB_EVENT_END(PVFS_EVENT_TROVE_KEYVAL_ITERATE_KEYS, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -2972,7 +2942,6 @@ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_KEYVAL_ITERATE_KEYS; return (0); } @@ -2996,8 +2965,8 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, job_context_id context_id) { /* post a trove keyval iterate_handles. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -3009,7 +2978,8 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->u.trove.vtag = vtag; @@ -3020,13 +2990,12 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_ITERATE_HANDLES, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_dspace_iterate_handles(coll_id, &(jd->u.trove.position), handle_array, &(jd->u.trove.count), flags, jd->u.trove.vtag, - user_ptr_internal, + user_ptr_internal, global_trove_context, &(jd->u.trove.id)); #else gossip_err("Error: Trove support not enabled.\n"); @@ -3036,7 +3005,6 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_ITERATE_HANDLES, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -3052,7 +3020,6 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, out_status_p->vtag = jd->u.trove.vtag; out_status_p->position = jd->u.trove.position; out_status_p->count = jd->u.trove.count; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_ITERATE_HANDLES, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -3063,7 +3030,6 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_ITERATE_HANDLES; return (0); } @@ -3071,7 +3037,7 @@ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, /* job_trove_dspace_create() * - * create a new data space object + * create a new data space object * * returns 0 on success, 1 on immediate completion, and -errno on * failure @@ -3085,7 +3051,8 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a dspace create. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested @@ -3101,8 +3068,10 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->u.trove.handle = PVFS_HANDLE_NULL; jd->context_id = context_id; @@ -3110,7 +3079,8 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_CREATE, jd->job_id); + + #ifdef __PVFS2_TROVE_SUPPORT__ ret = trove_dspace_create(coll_id, @@ -3118,8 +3088,8 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, &(jd->u.trove.handle), type, hint, flags, - user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -3128,7 +3098,6 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_CREATE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -3142,7 +3111,6 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; out_status_p->handle = jd->u.trove.handle; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_CREATE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -3153,28 +3121,32 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_CREATE; return (0); } -/* job_trove_dspace_remove() +/* job_trove_dspace_create_list() * - * remove an entire data space object (byte stream and key/value) + * create a new data space object * * returns 0 on success, 1 on immediate completion, and -errno on * failure */ -int job_trove_dspace_remove(PVFS_fs_id coll_id, - PVFS_handle handle, +int job_trove_dspace_create_list(PVFS_fs_id coll_id, + PVFS_handle_extent_array *handle_extent_array, + PVFS_handle* out_handle_array, + int count, + PVFS_ds_type type, + void *hint, PVFS_ds_flags flags, void *user_ptr, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { - /* post a dspace remove. If it completes (or fails) immediately, then + /* post a dspace create list. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested * for completion later, then queue up a job_desc structure. */ @@ -3188,21 +3160,27 @@ int job_trove_dspace_remove(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; + jd->u.trove.handle = PVFS_HANDLE_NULL; jd->context_id = context_id; jd->status_user_tag = status_user_tag; jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_REMOVE, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_dspace_remove(coll_id, - handle, flags, + ret = trove_dspace_create_list(coll_id, + handle_extent_array, + out_handle_array, + count, + type, + hint, flags, user_ptr_internal, - global_trove_context, &(jd->u.trove.id)); + global_trove_context, &(jd->u.trove.id), + hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -3211,7 +3189,6 @@ int job_trove_dspace_remove(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_REMOVE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; out_status_p->error_code = ret; @@ -3224,7 +3201,6 @@ int job_trove_dspace_remove(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_REMOVE, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -3235,28 +3211,30 @@ int job_trove_dspace_remove(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_REMOVE; return (0); } -/* job_trove_dspace_verify() +/* job_trove_dspace_remove_list() * - * verify that a given dataspace exists and discover its type + * remove a list of data space objects (byte stream and key/value) * * returns 0 on success, 1 on immediate completion, and -errno on * failure */ -int job_trove_dspace_verify(PVFS_fs_id coll_id, - PVFS_handle handle, +int job_trove_dspace_remove_list(PVFS_fs_id coll_id, + PVFS_handle* handle_array, + PVFS_error *out_error_array, + int count, PVFS_ds_flags flags, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) { - /* post a dspace verify. If it completes (or fails) immediately, then + /* post a dspace remove_list. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested * for completion later, then queue up a job_desc structure. */ @@ -3270,7 +3248,8 @@ int job_trove_dspace_verify(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -3278,13 +3257,16 @@ int job_trove_dspace_verify(PVFS_fs_id coll_id, jd->trove_callback.fn = trove_thread_mgr_callback; jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; - JOB_EVENT_START(PVFS_EVENT_TROVE_DSPACE_VERIFY, jd->job_id); #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_dspace_verify(coll_id, - handle, &jd->u.trove.type, + ret = trove_dspace_remove_list(coll_id, + handle_array, + out_error_array, + count, flags, - user_ptr_internal, global_trove_context, &(jd->u.trove.id)); + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), + hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -3293,11 +3275,8 @@ int job_trove_dspace_verify(PVFS_fs_id coll_id, if (ret < 0) { /* error posting trove operation */ - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_VERIFY, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; - /* the trove_method will determine what value is returned in immediate - * completion case */ out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; return (1); @@ -3308,7 +3287,6 @@ int job_trove_dspace_verify(PVFS_fs_id coll_id, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - JOB_EVENT_END(PVFS_EVENT_TROVE_DSPACE_VERIFY, 0, jd->job_id); dealloc_job_desc(jd); jd = NULL; return (ret); @@ -3319,62 +3297,30 @@ int job_trove_dspace_verify(PVFS_fs_id coll_id, */ *id = jd->job_id; trove_pending_count++; - jd->event_type = PVFS_EVENT_TROVE_DSPACE_VERIFY; return (0); } -/* job_trove_dspace_cancel() - * - * used to cancel a trove dspace operation in progress - * - * returns 0 on success, 1 on immediate completion, and -errno on - * failure - */ -int job_trove_dspace_cancel(PVFS_fs_id coll_id, - job_id_t id, - job_context_id context_id) -{ - struct job_desc* query = NULL; - int ret = -1; - - gen_mutex_lock(&completion_mutex); - - query = id_gen_safe_lookup(id); - if (!query || query->completed_flag) - { - /* job has already completed, no cancellation needed */ - gen_mutex_unlock(&completion_mutex); - return(0); - } - - /* tell thread mgr to cancel operation. This will result in normal - * completion path through thread mgr callbacks; no more work to do here */ - ret = PINT_thread_mgr_trove_cancel( - query->u.trove.id, coll_id, &(query->trove_callback)); - - gen_mutex_unlock(&completion_mutex); - - return(ret); -} -/* job_trove_fs_create() +/* job_trove_dspace_remove() * - * create a new file system + * remove an entire data space object (byte stream and key/value) * * returns 0 on success, 1 on immediate completion, and -errno on * failure */ -int job_trove_fs_create(char *collname, - PVFS_fs_id new_coll_id, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) +int job_trove_dspace_remove(PVFS_fs_id coll_id, + PVFS_handle handle, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) { - /* post an fs create. If it completes (or fails) immediately, then + /* post a dspace remove. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested * for completion later, then queue up a job_desc structure. */ @@ -3388,7 +3334,8 @@ int job_trove_fs_create(char *collname, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -3397,9 +3344,13 @@ int job_trove_fs_create(char *collname, jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; + + #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_collection_create(collname, new_coll_id, user_ptr_internal, - &(jd->u.trove.id)); + ret = trove_dspace_remove(coll_id, + handle, flags, + user_ptr_internal, + global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -3434,39 +3385,24 @@ int job_trove_fs_create(char *collname, return (0); } -/* job_trove_fs_remove() - * - * remove an existing file system - * - * returns 0 on success, 1 on immediate completion, and -errno on - * failure - */ -int job_trove_fs_remove(char *collname, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) -{ - gossip_lerr("Error: unimplemented.\n"); - return (-ENOSYS); -} - -/* job_trove_fs_lookup() +/* job_trove_dspace_verify() * - * lookup a file system based on a string name + * verify that a given dataspace exists and discover its type * * returns 0 on success, 1 on immediate completion, and -errno on * failure */ -int job_trove_fs_lookup(char *collname, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) +int job_trove_dspace_verify(PVFS_fs_id coll_id, + PVFS_handle handle, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) { - /* post a collection lookup. If it completes (or fails) immediately, then + /* post a dspace verify. If it completes (or fails) immediately, then * return and fill in the status structure. If it needs to be tested * for completion later, then queue up a job_desc structure. */ @@ -3480,7 +3416,8 @@ int job_trove_fs_lookup(char *collname, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -3489,11 +3426,13 @@ int job_trove_fs_lookup(char *collname, jd->trove_callback.data = (void*)jd; user_ptr_internal = &jd->trove_callback; + + #ifdef __PVFS2_TROVE_SUPPORT__ - ret = trove_collection_lookup( - TROVE_METHOD_DBPF, - collname, &(jd->u.trove.fsid), - user_ptr_internal, &(jd->u.trove.id)); + ret = trove_dspace_verify(coll_id, + handle, &jd->u.trove.type, + flags, + user_ptr_internal, global_trove_context, &(jd->u.trove.id), hints); #else gossip_err("Error: Trove support not enabled.\n"); ret = -ENOSYS; @@ -3504,6 +3443,8 @@ int job_trove_fs_lookup(char *collname, /* error posting trove operation */ dealloc_job_desc(jd); jd = NULL; + /* the trove_method will determine what value is returned in immediate + * completion case */ out_status_p->error_code = ret; out_status_p->status_user_tag = status_user_tag; return (1); @@ -3514,16 +3455,224 @@ int job_trove_fs_lookup(char *collname, /* immediate completion */ out_status_p->error_code = 0; out_status_p->status_user_tag = status_user_tag; - out_status_p->coll_id = jd->u.trove.fsid; dealloc_job_desc(jd); jd = NULL; return (ret); } - /* there is no way we can test on this if we don't know the coll_id */ - gossip_lerr("Error: trove_collection_lookup() returned 0 ???\n"); - - return (-EINVAL); + /* if we fall through to this point, the job did not + * immediately complete and we must queue up to test later + */ + *id = jd->job_id; + trove_pending_count++; + + return (0); +} + +/* job_trove_dspace_cancel() + * + * used to cancel a trove dspace operation in progress + * + * returns 0 on success, 1 on immediate completion, and -errno on + * failure + */ +int job_trove_dspace_cancel(PVFS_fs_id coll_id, + job_id_t id, + job_context_id context_id) +{ + struct job_desc* query = NULL; + int ret = -1; + + gen_mutex_lock(&completion_mutex); + + query = id_gen_safe_lookup(id); + if (!query || query->completed_flag) + { + /* job has already completed, no cancellation needed */ + gen_mutex_unlock(&completion_mutex); + return(0); + } + + /* tell thread mgr to cancel operation. This will result in normal + * completion path through thread mgr callbacks; no more work to do here */ + ret = PINT_thread_mgr_trove_cancel( + query->u.trove.id, coll_id, &(query->trove_callback)); + + gen_mutex_unlock(&completion_mutex); + + return(ret); +} + + +/* job_trove_fs_create() + * + * create a new file system + * + * returns 0 on success, 1 on immediate completion, and -errno on + * failure + */ +int job_trove_fs_create(char *collname, + PVFS_fs_id new_coll_id, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id) +{ + /* post an fs create. If it completes (or fails) immediately, then + * return and fill in the status structure. If it needs to be tested + * for completion later, then queue up a job_desc structure. + */ + int ret = -1; + struct job_desc *jd = NULL; + void* user_ptr_internal; + + /* create the job desc first, even though we may not use it. This + * gives us somewhere to store the BMI id and user ptr + */ + jd = alloc_job_desc(JOB_TROVE); + if (!jd) + { + out_status_p->error_code = -PVFS_ENOMEM; + return 1; + } + jd->job_user_ptr = user_ptr; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->trove_callback.fn = trove_thread_mgr_callback; + jd->trove_callback.data = (void*)jd; + user_ptr_internal = &jd->trove_callback; + +#ifdef __PVFS2_TROVE_SUPPORT__ + ret = trove_collection_create(collname, new_coll_id, user_ptr_internal, + &(jd->u.trove.id)); +#else + gossip_err("Error: Trove support not enabled.\n"); + ret = -ENOSYS; +#endif + + if (ret < 0) + { + /* error posting trove operation */ + dealloc_job_desc(jd); + jd = NULL; + out_status_p->error_code = ret; + out_status_p->status_user_tag = status_user_tag; + return (1); + } + + if (ret == 1) + { + /* immediate completion */ + out_status_p->error_code = 0; + out_status_p->status_user_tag = status_user_tag; + dealloc_job_desc(jd); + jd = NULL; + return (ret); + } + + /* if we fall through to this point, the job did not + * immediately complete and we must queue up to test later + */ + *id = jd->job_id; + trove_pending_count++; + + return (0); +} + +/* job_trove_fs_remove() + * + * remove an existing file system + * + * returns 0 on success, 1 on immediate completion, and -errno on + * failure + */ +int job_trove_fs_remove(char *collname, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id) +{ + gossip_lerr("Error: unimplemented.\n"); + out_status_p->error_code = -PVFS_ENOSYS; + return 1; +} + +/* job_trove_fs_lookup() + * + * lookup a file system based on a string name + * + * returns 0 on success, 1 on immediate completion, and -errno on + * failure + */ +int job_trove_fs_lookup(char *collname, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id) +{ + /* post a collection lookup. If it completes (or fails) immediately, then + * return and fill in the status structure. If it needs to be tested + * for completion later, then queue up a job_desc structure. + */ + int ret = -1; + struct job_desc *jd = NULL; + void* user_ptr_internal; + + /* create the job desc first, even though we may not use it. This + * gives us somewhere to store the BMI id and user ptr + */ + jd = alloc_job_desc(JOB_TROVE); + if (!jd) + { + out_status_p->error_code = -PVFS_ENOMEM; + return 1; + } + jd->job_user_ptr = user_ptr; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->trove_callback.fn = trove_thread_mgr_callback; + jd->trove_callback.data = (void*)jd; + user_ptr_internal = &jd->trove_callback; + +#ifdef __PVFS2_TROVE_SUPPORT__ + ret = trove_collection_lookup( + TROVE_METHOD_DBPF, + collname, &(jd->u.trove.fsid), + user_ptr_internal, &(jd->u.trove.id)); +#else + gossip_err("Error: Trove support not enabled.\n"); + ret = -ENOSYS; +#endif + + if (ret < 0) + { + /* error posting trove operation */ + dealloc_job_desc(jd); + jd = NULL; + out_status_p->error_code = ret; + out_status_p->status_user_tag = status_user_tag; + return (1); + } + + if (ret == 1) + { + /* immediate completion */ + out_status_p->error_code = 0; + out_status_p->status_user_tag = status_user_tag; + out_status_p->coll_id = jd->u.trove.fsid; + dealloc_job_desc(jd); + jd = NULL; + return (ret); + } + + /* there is no way we can test on this if we don't know the coll_id */ + gossip_lerr("Error: trove_collection_lookup() returned 0 ???\n"); + + out_status_p->error_code = -PVFS_EINVAL; + return 1; } /* job_trove_fs_set_eattr() @@ -3541,11 +3690,12 @@ int job_trove_fs_seteattr(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove collection set eattr. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -3557,8 +3707,10 @@ int job_trove_fs_seteattr(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->context_id = context_id; jd->status_user_tag = status_user_tag; @@ -3619,11 +3771,12 @@ int job_trove_fs_geteattr(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id) + job_context_id context_id, + PVFS_hint hints) { /* post a trove collection get eattr. If it completes (or fails) - * immediately, then return and fill in the status structure. - * If it needs to be tested for completion later, then queue + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue * up a job_desc structure. */ int ret = -1; struct job_desc *jd = NULL; @@ -3635,7 +3788,8 @@ int job_trove_fs_geteattr(PVFS_fs_id coll_id, jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } jd->job_user_ptr = user_ptr; jd->context_id = context_id; @@ -3682,43 +3836,125 @@ int job_trove_fs_geteattr(PVFS_fs_id coll_id, return (0); } -/* job_null() + +/* job_trove_fs_del_eattr() * - * post null job; can be used to trigger asynchronous state transitions - * without doing any underlying work + * delete extended attribute for a file system * - * returns 0 on success, -PVFS_error on failure - * NOTE: immediate completion not allowed here + * returns 0 on success, 1 on immediate completion, and -errno on + * failure */ -int job_null( - int error_code, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id) +int job_trove_fs_deleattr(PVFS_fs_id coll_id, + PVFS_ds_keyval * key_p, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) { + /* post a trove collection del eattr. If it completes (or fails) + * immediately, then return and fill in the status structure. + * If it needs to be tested for completion later, then queue + * up a job_desc structure. */ + int ret = -1; struct job_desc *jd = NULL; + void* user_ptr_internal; - jd = alloc_job_desc(JOB_NULL); + /* create the job desc first, even though we may not use it. This + * gives us somewhere to store the BMI id and user ptr + */ + jd = alloc_job_desc(JOB_TROVE); if (!jd) { - return (-errno); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; } + jd->hints = hints; jd->job_user_ptr = user_ptr; jd->context_id = context_id; jd->status_user_tag = status_user_tag; - jd->u.null_info.error_code = error_code; + jd->trove_callback.fn = trove_thread_mgr_callback; + jd->trove_callback.data = (void*)jd; + user_ptr_internal = &jd->trove_callback; - gen_mutex_lock(&completion_mutex); - job_desc_q_add(completion_queue_array[jd->context_id], - jd); - /* set completed flag while holding queue lock */ - jd->completed_flag = 1; -#ifdef __PVFS2_JOB_THREADED__ - /* wake up anyone waiting for completion */ - pthread_cond_signal(&completion_cond); -#endif +#ifdef __PVFS2_TROVE_SUPPORT__ + ret = trove_collection_deleattr(coll_id, key_p, flags, + user_ptr_internal, global_trove_context, + &(jd->u.trove.id)); +#else + gossip_err("%s: error: Trove support not enabled.\n", __func__); + ret = -ENOSYS; +#endif + + if (ret < 0) + { + /* error posting trove operation */ + dealloc_job_desc(jd); + jd = NULL; + out_status_p->error_code = ret; + out_status_p->status_user_tag = status_user_tag; + return (1); + } + + if (ret == 1) + { + /* immediate completion */ + out_status_p->error_code = 0; + out_status_p->status_user_tag = status_user_tag; + dealloc_job_desc(jd); + jd = NULL; + return (ret); + } + + /* if we fall through to this point, the job did not + * immediately complete and we must queue up to test later + */ + *id = jd->job_id; + trove_pending_count++; + + return (0); +} + +/* job_null() + * + * post null job; can be used to trigger asynchronous state transitions + * without doing any underlying work + * + * returns 0 on success, -PVFS_error on failure + * NOTE: immediate completion not allowed here + */ +int job_null( + int error_code, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id) +{ + struct job_desc *jd = NULL; + + jd = alloc_job_desc(JOB_NULL); + if (!jd) + { + out_status_p->error_code = -PVFS_ENOMEM; + return 1; + } + jd->job_user_ptr = user_ptr; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->u.null_info.error_code = error_code; + + gen_mutex_lock(&completion_mutex); + job_desc_q_add(completion_queue_array[jd->context_id], + jd); + /* set completed flag while holding queue lock */ + jd->completed_flag = 1; +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif gen_mutex_unlock(&completion_mutex); return(0); @@ -3728,7 +3964,7 @@ int job_null( /* job_test() * * check for completion of a particular job, don't return until - * either job completes or timeout expires + * either job completes or timeout expires * * returns 0 if nothing done, 1 if something done, -errno on failure */ @@ -3756,7 +3992,7 @@ int job_test(job_id_t id, /* job_testsome() * * check for completion of a set of jobs, don't return until - * either all jobs complete or timeout expires + * either all jobs complete or timeout expires * * returns 0 on success, -errno on failure */ @@ -3813,7 +4049,7 @@ int job_testsome(job_id_t * id_array, if(timeout_ms > 0) { - pthread_ret = pthread_cond_timedwait(&completion_cond, + pthread_ret = pthread_cond_timedwait(&completion_cond, &completion_mutex, &pthread_timeout); } @@ -3838,7 +4074,7 @@ int job_testsome(job_id_t * id_array, EINVAL) && pthread_ret != ETIMEDOUT) { /* pthread_cond_wait() gave a weird return code; pass along to - * caller + * caller */ ret = pthread_ret; } @@ -3852,7 +4088,7 @@ int job_testsome(job_id_t * id_array, /* job_testsome() * * check for completion of a set of jobs, don't return until - * either all jobs complete or timeout expires + * either all jobs complete or timeout expires * * returns 0 if nothing done, 1 if something done, -errno on failure */ @@ -3959,7 +4195,7 @@ int job_testsome(job_id_t * id_array, time_exhaust_flag = 1; } else if(timeout_ms < 0) - { + { time_exhaust_flag = 0; } else @@ -4052,7 +4288,7 @@ int job_testcontext(job_id_t * out_id_array_p, if(timeout_ms > 0) { - pthread_ret = pthread_cond_timedwait(&completion_cond, + pthread_ret = pthread_cond_timedwait(&completion_cond, &completion_mutex, &pthread_timeout); } @@ -4077,7 +4313,7 @@ int job_testcontext(job_id_t * out_id_array_p, EINVAL) && pthread_ret != ETIMEDOUT) { /* pthread_cond_wait() gave a weird return code; pass along to - * caller + * caller */ ret = pthread_ret; } @@ -4197,7 +4433,7 @@ int job_testcontext(job_id_t * out_id_array_p, time_exhaust_flag = 1; } else if(timeout_ms < 0) - { + { time_exhaust_flag = 0; } else @@ -4288,20 +4524,21 @@ static void teardown_queues(void) return; } -/* trove_thread_mgr_callback() +#ifdef __PVFS2_TROVE_SUPPORT__ + +/* precreate_pool_get_thread_mgr_callback_unlocked() * - * callback function executed by the thread manager for Trove when a Trove - * job completes + * callback function executed by the thread manager for precreate pool get + * when a trove operation completes * * no return value */ -static void trove_thread_mgr_callback( +static void precreate_pool_get_thread_mgr_callback_unlocked( void* data, PVFS_error error_code) { - struct job_desc* tmp_desc = (struct job_desc*)data; - assert(tmp_desc); - + struct precreate_pool_get_trove* tmp_trove = data; + gen_mutex_lock(&initialized_mutex); if(initialized == 0) { @@ -4311,40 +4548,59 @@ static void trove_thread_mgr_callback( } gen_mutex_unlock(&initialized_mutex); - gen_mutex_lock(&completion_mutex); - if (tmp_desc->completed_flag == 0) + if(error_code == 0) + { + gossip_debug(GOSSIP_JOB_DEBUG, + "Got precreated handle: %llu\n", + llu(*((PVFS_handle*)tmp_trove->key.buffer))); + } + + trove_pending_count--; + tmp_trove->jd->u.precreate_pool.trove_pending--; + + /* don't overwrite error codes from other trove ops */ + if(tmp_trove->jd->u.precreate_pool.error_code == 0) + { + tmp_trove->jd->u.precreate_pool.error_code = error_code; + } + + /* is this job done? */ + if(tmp_trove->jd->u.precreate_pool.trove_pending == 0) { + gen_mutex_lock(&completion_mutex); + /* set job descriptor fields and put into completion queue */ - tmp_desc->u.trove.state = error_code; - job_desc_q_add(completion_queue_array[tmp_desc->context_id], - tmp_desc); + tmp_trove->jd->u.precreate_pool.error_code = 0; + job_desc_q_add(completion_queue_array[tmp_trove->jd->context_id], + tmp_trove->jd); /* set completed flag while holding queue lock */ - tmp_desc->completed_flag = 1; - - trove_pending_count--; + tmp_trove->jd->completed_flag = 1; #ifdef __PVFS2_JOB_THREADED__ /* wake up anyone waiting for completion */ pthread_cond_signal(&completion_cond); #endif + free(tmp_trove->jd->u.precreate_pool.data); + gen_mutex_unlock(&completion_mutex); + return; } - gen_mutex_unlock(&completion_mutex); + + return; } -/* bmi_thread_mgr_callback() + +/* precreate_pool_iterate_callback() * - * callback function executed by the thread manager for BMI when a BMI - * job completes + * callback function executed by the thread mgr when a trove iterate + * completes * * no return value */ -static void bmi_thread_mgr_callback( +static void precreate_pool_iterate_callback( void* data, - PVFS_size actual_size, PVFS_error error_code) -{ - struct job_desc* tmp_desc = (struct job_desc*)data; - assert(tmp_desc); +{ + struct job_desc* tmp_desc = (struct job_desc*)data; gen_mutex_lock(&initialized_mutex); if(initialized == 0) @@ -4359,14 +4615,14 @@ static void bmi_thread_mgr_callback( if (tmp_desc->completed_flag == 0) { /* set job descriptor fields and put into completion queue */ - tmp_desc->u.bmi.error_code = error_code; - tmp_desc->u.bmi.actual_size = actual_size; - job_desc_q_add(completion_queue_array[tmp_desc->context_id], + tmp_desc->u.precreate_pool.error_code = error_code; + free(tmp_desc->u.precreate_pool.key_array); + job_desc_q_add(completion_queue_array[tmp_desc->context_id], tmp_desc); /* set completed flag while holding queue lock */ tmp_desc->completed_flag = 1; - bmi_pending_count--; + trove_pending_count--; #ifdef __PVFS2_JOB_THREADED__ /* wake up anyone waiting for completion */ @@ -4374,19 +4630,52 @@ static void bmi_thread_mgr_callback( #endif } gen_mutex_unlock(&completion_mutex); + + return; } -/* bmi_thread_mgr_unexp_handler() +/* precreate_pool_get_thread_mgr_callback() * - * callback function executed by the thread manager for BMI when an unexpected - * BMI message arrives + * callback function executed by the thread manager for precreate pool get + * when a trove operation completes * * no return value */ -static void bmi_thread_mgr_unexp_handler( - struct BMI_unexpected_info* unexp) +static void precreate_pool_get_thread_mgr_callback( + void* data, + PVFS_error error_code) +{ + gen_mutex_lock(&precreate_pool_mutex); + precreate_pool_get_thread_mgr_callback_unlocked(data, error_code); + gen_mutex_unlock(&precreate_pool_mutex); +} + +/* precreate_pool_fill_thread_mgr_callback() + * + * callback function executed by the thread manager for precreate pool fill + * when a trove operation completes + * + * no return value + */ +static void precreate_pool_fill_thread_mgr_callback( + void* data, + PVFS_error error_code) { - struct job_desc* tmp_desc = NULL; + struct job_desc* jd = (struct job_desc*)data; + struct job_desc* jd_checker; + int ret; + int count = 0; + int i; + struct qlist_head* iterator; + struct qlist_head* scratch; + struct precreate_pool* pool; + int awoken_count = 0; + QLIST_HEAD(tmp_list); + job_id_t tmp_id; + int extra_trove_flags = 0; + struct fs_pool* fs; + + assert(jd); gen_mutex_lock(&initialized_mutex); if(initialized == 0) @@ -4397,98 +4686,409 @@ static void bmi_thread_mgr_unexp_handler( } gen_mutex_unlock(&initialized_mutex); - gen_mutex_lock(&bmi_unexp_mutex); - /* remove the operation from the pending bmi_unexp queue */ - tmp_desc = job_desc_q_shownext(bmi_unexp_queue); - assert(tmp_desc != NULL); /* TODO: fix this */ - if (tmp_desc->completed_flag == 0) + if(error_code != 0) { - job_desc_q_remove(tmp_desc); - bmi_unexp_pending_count--; - gen_mutex_unlock(&bmi_unexp_mutex); - /* set appropriate fields and store in completed queue */ - *(tmp_desc->u.bmi_unexp.info) = *unexp; + gossip_err("Error: unable to write all precreated handles to pool.\n"); + gossip_err("Warning: fsck may be needed to recover stranded handles.\n"); + free(jd->u.precreate_pool.key_array); gen_mutex_lock(&completion_mutex); + + /* set job descriptor fields and put into completion queue */ + jd->u.precreate_pool.error_code = error_code; + job_desc_q_add(completion_queue_array[jd->context_id], jd); /* set completed flag while holding queue lock */ - tmp_desc->completed_flag = 1; - if (completion_queue_array[tmp_desc->context_id]) + jd->completed_flag = 1; +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + return; + } + + if(jd->u.precreate_pool.first_callback_flag == 1) + { + /* this is the first post */ + gossip_debug(GOSSIP_JOB_DEBUG, "precreate_pool_fill_thread_mgr_callback() first post.\n"); + jd->u.precreate_pool.first_callback_flag = 0; + } + else + { + gossip_debug(GOSSIP_JOB_DEBUG, "precreate_pool_fill_thread_mgr_callback() completed trove op.\n"); + /* a trove operation completed successfully */ + jd->u.precreate_pool.precreate_handle_index += + jd->u.precreate_pool.posted_count; + trove_pending_count--; + + /* increment in-memory count for this pool */ + gen_mutex_lock(&precreate_pool_mutex); + fs = find_fs(jd->u.precreate_pool.fsid); + assert(fs); + + qlist_for_each(iterator, &fs->precreate_pool_list) { - job_desc_q_add(completion_queue_array[tmp_desc->context_id], - tmp_desc); + pool = qlist_entry(iterator, struct precreate_pool, + list_link); + if(pool->pool_handle == jd->u.precreate_pool.precreate_pool) + { + pool->pool_count += jd->u.precreate_pool.posted_count; + gossip_debug(GOSSIP_JOB_DEBUG, + "Pool count for handle %llu (type %u) incremented to %d\n", + llu(pool->pool_handle), pool->pool_type, + pool->pool_count); + break; + } + } + + /* find out if anyone was sleeping because a pool was empty */ + gossip_debug(GOSSIP_JOB_DEBUG, "checking for get_handles() sleepers\n"); + qlist_for_each_safe(iterator, scratch, + &precreate_pool_get_handles_list) + { + jd_checker = qlist_entry(iterator, struct job_desc, + job_desc_q_link); + + awoken_count++; + /* put them on a new local queue */ + qlist_del(&jd_checker->job_desc_q_link); + qlist_add(&jd_checker->job_desc_q_link, &tmp_list); + gossip_debug(GOSSIP_JOB_DEBUG, "Found someone waiting to get handles from precreate pool\n"); + + if(awoken_count == jd->u.precreate_pool.posted_count) + { + /* that's as many as we should wake up right now */ + break; + } + } + gen_mutex_unlock(&precreate_pool_mutex); + + /* now that we have collected the sleepers into our own private + * queue, we can push them without the precreate_pool_mutex held + */ + gossip_debug(GOSSIP_JOB_DEBUG, "About to push on get_handles() sleepers.\n"); + qlist_for_each_safe(iterator, scratch, &tmp_list) + { + jd_checker = qlist_entry(iterator, struct job_desc, + job_desc_q_link); + qlist_del(&jd_checker->job_desc_q_link); + gossip_debug(GOSSIP_JOB_DEBUG, "Pushing get_handles() sleeper for jd: %p.\n", jd_checker); + precreate_pool_get_handles_try_post(jd_checker); } + } + + /* are we done? */ + if(jd->u.precreate_pool.precreate_handle_index >= + jd->u.precreate_pool.precreate_handle_count) + { + free(jd->u.precreate_pool.key_array); + gen_mutex_lock(&completion_mutex); + + /* set job descriptor fields and put into completion queue */ + jd->u.precreate_pool.error_code = 0; + job_desc_q_add(completion_queue_array[jd->context_id], + jd); + /* set completed flag while holding queue lock */ + jd->completed_flag = 1; #ifdef __PVFS2_JOB_THREADED__ /* wake up anyone waiting for completion */ pthread_cond_signal(&completion_cond); #endif gen_mutex_unlock(&completion_mutex); + return; } - else + + /* fill in information for next keyval write */ + for(i=jd->u.precreate_pool.precreate_handle_index; + (i < jd->u.precreate_pool.precreate_handle_count && + (i < (jd->u.precreate_pool.precreate_handle_index + + PRECREATE_POOL_MAX_KEYS))); + i++) { - gen_mutex_unlock(&bmi_unexp_mutex); + jd->u.precreate_pool.key_array[count].buffer = + &jd->u.precreate_pool.precreate_handle_array[i]; + jd->u.precreate_pool.key_array[count].buffer_sz = sizeof(PVFS_handle); + count++; + + /* always leave the values zeroed out */ } -} -/* dev_thread_mgr_unexp_handler() - * - * callback function executed by the thread manager for dev when an unexpected - * device message arrives - * - * no return value - */ -static void dev_thread_mgr_unexp_handler(struct PINT_dev_unexp_info* unexp) -{ - struct job_desc* tmp_desc = NULL; + jd->u.precreate_pool.posted_count = count; - gen_mutex_lock(&dev_unexp_mutex); - /* remove the operation from the pending dev_unexp queue */ - tmp_desc = job_desc_q_shownext(dev_unexp_queue); - assert(tmp_desc != NULL); /* TODO: fix this */ - if (tmp_desc->completed_flag == 0) + if((jd->u.precreate_pool.posted_count + + jd->u.precreate_pool.precreate_handle_index) + >= jd->u.precreate_pool.precreate_handle_count) { - job_desc_q_remove(tmp_desc); - dev_unexp_pending_count--; - gen_mutex_unlock(&dev_unexp_mutex); - /* set appropriate fields and store in completed queue */ - *(tmp_desc->u.dev_unexp.info) = *unexp; + /* this will be the last set written; sync db */ + extra_trove_flags |= TROVE_SYNC; + } + + gossip_debug(GOSSIP_JOB_DEBUG, "job_precreate_pool_fill() posting trove_keyval_write_list()\n"); + ret = trove_keyval_write_list(jd->u.precreate_pool.fsid, + jd->u.precreate_pool.precreate_pool, + jd->u.precreate_pool.key_array, + NULL, + count, + (TROVE_BINARY_KEY|TROVE_NOOVERWRITE| + TROVE_KEYVAL_HANDLE_COUNT|extra_trove_flags), + NULL, + &jd->trove_callback, + global_trove_context, + &tmp_id, + jd->hints); + + trove_pending_count++; + + if(ret < 0) + { + gossip_err("Error: unable to write all precreated handles to pool.\n"); + gossip_err("Warning: fsck may be needed to recover stranded handles.\n"); gen_mutex_lock(&completion_mutex); - /* set completed flag while holding queue lock */ - tmp_desc->completed_flag = 1; - if (completion_queue_array[tmp_desc->context_id]) - { - job_desc_q_add(completion_queue_array[tmp_desc->context_id], - tmp_desc); - } + /* set job descriptor fields and put into completion queue */ + jd->u.precreate_pool.error_code = ret; + job_desc_q_add(completion_queue_array[jd->context_id], jd); + /* set completed flag while holding queue lock */ + jd->completed_flag = 1; #ifdef __PVFS2_JOB_THREADED__ /* wake up anyone waiting for completion */ pthread_cond_signal(&completion_cond); #endif gen_mutex_unlock(&completion_mutex); + return; + } + else if(ret == 1) + { + gossip_debug(GOSSIP_JOB_DEBUG, "trove_keyval_write_list() immediate completion\n"); + precreate_pool_fill_thread_mgr_callback(jd, 0); } else { - gen_mutex_unlock(&dev_unexp_mutex); + gossip_debug(GOSSIP_JOB_DEBUG, "trove_keyval_write_list() returned zero\n"); } + + return; } +#endif /* __PVFS2_TROVE_SUPPORT__ */ -/* fill_status() + +/* trove_thread_mgr_callback() * - * fills in the completion status based on the given job descriptor + * callback function executed by the thread manager for Trove when a Trove + * job completes * * no return value */ -static void fill_status(struct job_desc *jd, - void **returned_user_ptr_p, - job_status_s * status) +static void trove_thread_mgr_callback( + void* data, + PVFS_error error_code) { - assert(jd); - assert(status); - - status->status_user_tag = jd->status_user_tag; + struct job_desc* tmp_desc = (struct job_desc*)data; + assert(tmp_desc); - if (returned_user_ptr_p) - { + gen_mutex_lock(&initialized_mutex); + if(initialized == 0) + { + /* The job interface has been shutdown. Silently ignore callback. */ + gen_mutex_unlock(&initialized_mutex); + return; + } + gen_mutex_unlock(&initialized_mutex); + + gen_mutex_lock(&completion_mutex); + if (tmp_desc->completed_flag == 0) + { + /* set job descriptor fields and put into completion queue */ + tmp_desc->u.trove.state = error_code; + job_desc_q_add(completion_queue_array[tmp_desc->context_id], + tmp_desc); + /* set completed flag while holding queue lock */ + tmp_desc->completed_flag = 1; + +/* the value of trove_pending_count is only used in the non-threaded + * situation. so, to prevent reported data races from helgrind, we + * will only modify it's value in the non-threaded case. +*/ +#ifndef __PVFS2_JOB_THREADED__ + trove_pending_count--; +#endif + +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + } + gen_mutex_unlock(&completion_mutex); +} + +/* bmi_thread_mgr_callback() + * + * callback function executed by the thread manager for BMI when a BMI + * job completes + * + * no return value + */ +static void bmi_thread_mgr_callback( + void* data, + PVFS_size actual_size, + PVFS_error error_code) +{ + struct job_desc* tmp_desc = (struct job_desc*)data; + assert(tmp_desc); + + gen_mutex_lock(&initialized_mutex); + if(initialized == 0) + { + /* The job interface has been shutdown. Silently ignore callback. */ + gen_mutex_unlock(&initialized_mutex); + return; + } + gen_mutex_unlock(&initialized_mutex); + + gen_mutex_lock(&completion_mutex); + if (tmp_desc->completed_flag == 0) + { + /* set job descriptor fields and put into completion queue */ + tmp_desc->u.bmi.error_code = error_code; + tmp_desc->u.bmi.actual_size = actual_size; + job_desc_q_add(completion_queue_array[tmp_desc->context_id], + tmp_desc); + /* set completed flag while holding queue lock */ + tmp_desc->completed_flag = 1; + + bmi_pending_count--; + +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + } + gen_mutex_unlock(&completion_mutex); +} + +/* bmi_thread_mgr_unexp_handler() + * + * callback function executed by the thread manager for BMI when an unexpected + * BMI message arrives + * + * no return value + */ +static void bmi_thread_mgr_unexp_handler( + struct BMI_unexpected_info* unexp) +{ + struct job_desc* tmp_desc = NULL; + + gen_mutex_lock(&initialized_mutex); + if(initialized == 0) + { + /* The job interface has been shutdown. Silently ignore callback. */ + gen_mutex_unlock(&initialized_mutex); + return; + } + gen_mutex_unlock(&initialized_mutex); + + gen_mutex_lock(&bmi_unexp_mutex); + + /* remove the operation from the pending bmi_unexp queue */ + tmp_desc = job_desc_q_shownext(bmi_unexp_queue); + assert(tmp_desc != NULL); + if (tmp_desc->completed_flag == 0) + { + job_desc_q_remove(tmp_desc); + bmi_unexp_pending_count--; + gen_mutex_unlock(&bmi_unexp_mutex); + /* set appropriate fields and store in completed queue */ + *(tmp_desc->u.bmi_unexp.info) = *unexp; + gen_mutex_lock(&completion_mutex); + /* set completed flag while holding queue lock */ + tmp_desc->completed_flag = 1; + if (completion_queue_array[tmp_desc->context_id]) + { + job_desc_q_add(completion_queue_array[tmp_desc->context_id], + tmp_desc); + } + +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + } + else + { + gen_mutex_unlock(&bmi_unexp_mutex); + } +} + +/* dev_thread_mgr_unexp_handler() + * + * callback function executed by the thread manager for dev when an unexpected + * device message arrives + * + * no return value + */ +static void dev_thread_mgr_unexp_handler(struct PINT_dev_unexp_info* unexp) +{ + struct job_desc* tmp_desc = NULL; + + gen_mutex_lock(&dev_unexp_mutex); + /* remove the operation from the pending dev_unexp queue */ + tmp_desc = job_desc_q_shownext(dev_unexp_queue); + /* if the thread mgr accounting is accurate, then there _must_ be a + * dev_unexp job posted for us to hit this point. + */ + assert(tmp_desc != NULL); + if (tmp_desc->completed_flag == 0) + { + job_desc_q_remove(tmp_desc); + dev_unexp_pending_count--; + gen_mutex_unlock(&dev_unexp_mutex); + /* set appropriate fields and store in completed queue */ + *(tmp_desc->u.dev_unexp.info) = *unexp; + gen_mutex_lock(&completion_mutex); + /* set completed flag while holding queue lock */ + tmp_desc->completed_flag = 1; + if (completion_queue_array[tmp_desc->context_id]) + { + job_desc_q_add(completion_queue_array[tmp_desc->context_id], + tmp_desc); + } + +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + } + else + { + gen_mutex_unlock(&dev_unexp_mutex); + } +} + +/* fill_status() + * + * fills in the completion status based on the given job descriptor + * + * no return value + */ +static void fill_status(struct job_desc *jd, + void **returned_user_ptr_p, + job_status_s * status) +{ + assert(jd); + assert(status); + +#if 0 + gossip_debug(GOSSIP_JOB_DEBUG, + "job fill_status() for id: %llu, type: %d\n", + llu(jd->job_id), jd->type); +#endif + + status->status_user_tag = jd->status_user_tag; + + if (returned_user_ptr_p) + { *returned_user_ptr_p = jd->job_user_ptr; } switch (jd->type) @@ -4511,11 +5111,9 @@ static void fill_status(struct job_desc *jd, status->error_code = jd->u.req_sched.error_code; break; case JOB_TROVE: - /* TODO: make this work out for whatever type of trove - * operation this is... - */ status->error_code = jd->u.trove.state; - status->actual_size = jd->u.trove.actual_size; + if(jd->u.trove.out_size_p) + status->actual_size = *jd->u.trove.out_size_p; status->vtag = jd->u.trove.vtag; status->coll_id = jd->u.trove.fsid; status->handle = jd->u.trove.handle; @@ -4533,18 +5131,21 @@ static void fill_status(struct job_desc *jd, case JOB_NULL: status->error_code = jd->u.null_info.error_code; break; + case JOB_PRECREATE_POOL: + status->error_code = jd->u.precreate_pool.error_code; + status->count = jd->u.precreate_pool.count; + status->position = jd->u.precreate_pool.pool_index << 32; + status->position |= jd->u.precreate_pool.position; + break; } - if(jd->event_type) - JOB_EVENT_END(jd->event_type, status->actual_size, jd->job_id); - return; } /* do_one_test_cycle_req_sched() * * tests the request scheduler to see if anything has completed. - * Does not block at all. + * Does not block at all. * * returns 0 on success, -errno on failure */ @@ -4566,7 +5167,7 @@ static int do_one_test_cycle_req_sched(void) { /* critical failure */ /* TODO: can I clean up anything else here? */ - gossip_lerr("Error: critical BMI failure.\n"); + gossip_lerr("Error: critical request scheduler failure.\n"); return (ret); } @@ -4579,7 +5180,7 @@ static int do_one_test_cycle_req_sched(void) gen_mutex_lock(&completion_mutex); /* set completed flag while holding queue lock */ tmp_desc->completed_flag = 1; - job_desc_q_add(completion_queue_array[tmp_desc->context_id], + job_desc_q_add(completion_queue_array[tmp_desc->context_id], tmp_desc); gen_mutex_unlock(&completion_mutex); } @@ -4677,14 +5278,19 @@ static int completion_query_some(job_id_t * id_array, } /* we better not have lost any ops since the first loop through the job - * list + * list */ assert((*inout_count_p) == done_count); return(1); } -/* TODO: fill in comment */ +/* completion_query_context() + * + * retrieves completed jobs from specified context + * + * returns 1 if anything completed, 0 otherwise + */ static int completion_query_context(job_id_t * out_id_array_p, int *inout_count_p, void **returned_user_ptr_array, @@ -4779,10 +5385,14 @@ static void do_one_work_cycle_all(int idle_time_ms) * don't have a single thing to do. Sleep here to prevent busy * spins. */ - struct timespec ts; - ts.tv_sec = idle_time_ms/1000; - ts.tv_nsec = (idle_time_ms%1000)*1000*1000; - nanosleep(&ts, NULL); +#ifdef WIN32 + Sleep(idle_time_ms); +#else + struct timespec ts; + ts.tv_sec = idle_time_ms/1000; + ts.tv_nsec = (idle_time_ms%1000)*1000*1000; + nanosleep(&ts, NULL); +#endif } gen_mutex_unlock(&work_cycle_mutex); @@ -4796,9 +5406,9 @@ static void do_one_work_cycle_all(int idle_time_ms) * * no return value */ -static void flow_callback(flow_descriptor* flow_d) +static void flow_callback(flow_descriptor* flow_d, int cancel_path) { - struct job_desc* tmp_desc = (struct job_desc*)flow_d->user_ptr; + struct job_desc* tmp_desc = (struct job_desc*)flow_d->user_ptr; gen_mutex_lock(&initialized_mutex); if(initialized == 0) @@ -4810,8 +5420,13 @@ static void flow_callback(flow_descriptor* flow_d) gen_mutex_unlock(&initialized_mutex); /* set job descriptor fields and put into completion queue */ - gen_mutex_lock(&completion_mutex); - job_desc_q_add(completion_queue_array[tmp_desc->context_id], + + /* if this is being triggered directly from PINT_flow_cancel(), then the + * completion mutex is already held by the caller; skip the mutex. + */ + if(!cancel_path) + gen_mutex_lock(&completion_mutex); + job_desc_q_add(completion_queue_array[tmp_desc->context_id], tmp_desc); /* set completed flag while holding queue lock */ tmp_desc->completed_flag = 1; @@ -4824,11 +5439,942 @@ static void flow_callback(flow_descriptor* flow_d) /* wake up anyone waiting for completion */ pthread_cond_signal(&completion_cond); #endif - gen_mutex_unlock(&completion_mutex); + if(!cancel_path) + gen_mutex_unlock(&completion_mutex); return; } +#ifdef __PVFS2_TROVE_SUPPORT__ + +/* job_precreate_pool_fill_signal_error() + * + * used for the entity responsible for filling the pool to indicate when + * there are errors preventing it from making progress. The error_code will + * be propigated to get_handles() callers that are sleeping if the pool is + * empty + * + * returns 0 on success, 1 on immediate completion, and -PVFS_errno on + * failure + */ +int job_precreate_pool_fill_signal_error( + PVFS_handle precreate_pool, + PVFS_fs_id fsid, + int error_code, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id) +{ + struct job_desc* jd_checker; + struct qlist_head* iterator; + struct qlist_head* scratch; + + gossip_debug(GOSSIP_FLOW_DEBUG, "job_precreate_pool_fill_signal_error() called.\n"); + /* note: this function always processes immediately (returns 1) */ + + gen_mutex_lock(&precreate_pool_mutex); + /* see if anyone is waiting on pool handles */ + qlist_for_each_safe(iterator, scratch, &precreate_pool_get_handles_list) + { + jd_checker = qlist_entry(iterator, struct job_desc, + job_desc_q_link); + + qlist_del(&jd_checker->job_desc_q_link); + + gossip_debug(GOSSIP_FLOW_DEBUG, "job_precreate_pool_fill_signal_error() waking up a get_handles() caller.\n"); + gen_mutex_lock(&completion_mutex); + + /* set job descriptor fields and put into completion queue */ + jd_checker->u.precreate_pool.error_code = error_code; + job_desc_q_add(completion_queue_array[jd_checker->context_id], + jd_checker); + /* set completed flag while holding queue lock */ + jd_checker->completed_flag = 1; + +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + } + gen_mutex_unlock(&precreate_pool_mutex); + + out_status_p->error_code = 0; + return(1); +} + +/* job_precreate_pool_fill() + * + * fills in handles for a precreate pool + * + * returns 0 on success, 1 on immediate completion, and -PVFS_errno on + * failure + */ +int job_precreate_pool_fill( + PVFS_handle precreate_pool, + PVFS_fs_id fsid, + PVFS_handle* precreate_handle_array, + int precreate_handle_count, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) +{ + struct job_desc *jd = NULL; + + gossip_debug(GOSSIP_JOB_DEBUG, "job_precreate_pool_fill() called.\n"); + + /* create the job desc first, even though we may not use it. This + * gives us somewhere to store information + */ + jd = alloc_job_desc(JOB_PRECREATE_POOL); + if (!jd) + { + return (-errno); + } + jd->job_user_ptr = user_ptr; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->hints = hints; + jd->trove_callback.fn = precreate_pool_fill_thread_mgr_callback; + jd->trove_callback.data = (void*)jd; + jd->u.precreate_pool.precreate_pool = precreate_pool; + jd->u.precreate_pool.precreate_handle_array = precreate_handle_array; + jd->u.precreate_pool.precreate_handle_count = precreate_handle_count; + jd->u.precreate_pool.precreate_handle_index = 0; + jd->u.precreate_pool.first_callback_flag = 1; + jd->u.precreate_pool.fsid = fsid; + jd->u.precreate_pool.key_array = + malloc(PRECREATE_POOL_MAX_KEYS*sizeof(TROVE_keyval_s)); + if(!jd->u.precreate_pool.key_array) + { + dealloc_job_desc(jd); + out_status_p->error_code = -PVFS_ENOMEM; + return(1); + } + + /* reuse the logic for trove op completion to get this started */ + precreate_pool_fill_thread_mgr_callback(jd, 0); + + /* for the moment, this type of job cannot immediately complete */ + + *id = jd->job_id; + return (0); +} + +/* job_precreate_pool_lookup_server() + * + * resolves a string hostname into a pool handle + */ +int job_precreate_pool_lookup_server( + const char* host, + PVFS_ds_type type, + PVFS_fs_id fsid, + PVFS_handle* pool_handle) +{ + struct precreate_pool* pool; + struct qlist_head* iterator; + struct fs_pool* fs; + + gen_mutex_lock(&precreate_pool_mutex); + + fs = find_fs(fsid); + assert(fs); + + /* check pool list, go back to sleep if any are empty */ + qlist_for_each(iterator, &fs->precreate_pool_list) + { + pool = qlist_entry(iterator, struct precreate_pool, + list_link); + /* only sleep for pools of the type we need to fulfill the request */ + if(!strcmp(pool->host, host) && (pool->pool_type == type) ) + { + *pool_handle = pool->pool_handle; + gen_mutex_unlock(&precreate_pool_mutex); + return(0); + } + } + gen_mutex_unlock(&precreate_pool_mutex); + + return(-PVFS_ENOENT); +} + +/* job_precreate_pool_set_index() + * + * used to assign a unique offset into the list of servers on each daemon in + * the file system, so that load is balanced evenly + */ +void job_precreate_pool_set_index( + int server_index) +{ + struct qlist_head* iterator; + struct qlist_head* iterator2; + int num_pools = 0; + int pool_index = 0; + int current_index = 0; + struct fs_pool* fs; + + gen_mutex_lock(&precreate_pool_mutex); + + qlist_for_each(iterator2, &precreate_pool_fs_list) + { + num_pools = 0; + pool_index = 0; + current_index = 0; + + fs = qlist_entry(iterator2, struct fs_pool, list_link); + + qlist_for_each(iterator, &fs->precreate_pool_list) + { + num_pools++; + } + + if(num_pools == 0) + { + pool_index = 0; + } + else + { + pool_index = server_index % num_pools; + } + + qlist_for_each(iterator, &fs->precreate_pool_list) + { + if(current_index == pool_index) + { + fs->precreate_pool_initial = iterator; + break; + } + current_index++; + } + + /* safety check, should not hit this case */ + if(!fs->precreate_pool_initial) + { + fs->precreate_pool_initial = fs->precreate_pool_list.next; + } + } + + gen_mutex_unlock(&precreate_pool_mutex); + + return; +} + +int job_precreate_pool_register_server( + const char* host, + PVFS_ds_type type, + PVFS_fs_id fsid, + PVFS_handle pool_handle, + int count, + uint32_t *batch_count) +{ + struct precreate_pool* tmp_pool; + struct fs_pool* fs; + + /* create a little struct to track the pool information for this peer + * server + */ + tmp_pool = malloc(sizeof(*tmp_pool)); + if(!tmp_pool) + { + return(-ENOMEM); + } + + tmp_pool->host = strdup(host); + if(!tmp_pool->host) + { + free(tmp_pool); + return(-ENOMEM); + } + + tmp_pool->pool_handle = pool_handle; + tmp_pool->pool_count = count; + tmp_pool->pool_type = type; + gossip_debug(GOSSIP_JOB_DEBUG, + "Pool count for handle %llu (type %u) initially set to %d\n", + llu(tmp_pool->pool_handle), tmp_pool->pool_type, + tmp_pool->pool_count); + + gossip_debug(GOSSIP_JOB_DEBUG, + "Initial pool count for host %s, fsid %d, type %u: %d\n", host, + (int)fsid, tmp_pool->pool_type, count); + + /* search through file systems to see if we have registered anything for + * this fsid yet + */ + fs = find_fs(fsid); + if(!fs) + { + /* allocate a new structure for this fsid */ + fs = malloc(sizeof(*fs)); + if(!fs) + { + free(tmp_pool->host); + free(tmp_pool); + return(-ENOMEM); + } + memset(fs, 0, sizeof(*fs)); + fs->fsid = fsid; + + /* copy batch counts we are given into fs_pool struct */ + memcpy(fs->type_batch_count, batch_count, + sizeof(uint32_t)*PVFS_DS_TYPE_COUNT); + int i = 0; + for( i=0; i < PVFS_DS_TYPE_COUNT; i++ ) + { + gossip_debug(GOSSIP_JOB_DEBUG, "%s: fs_pool %p, storing batch " + "count at index %d: %u\n", __func__, fs, i, + fs->type_batch_count[i]); + } + + + fs->precreate_pool_initial = NULL; + INIT_QLIST_HEAD(&fs->precreate_pool_list); + qlist_add(&fs->list_link, &precreate_pool_fs_list); + } + + /* stash the info where we can search and find it later */ + qlist_add(&tmp_pool->list_link, &fs->precreate_pool_list); + + return(1); +} + +/* job_precreate_pool_check_level() + * + * checks to see if the current pool level is below a specified threshold + * + * returns 1 on immediate completion, 0 if level is not low enough yet + */ +int job_precreate_pool_check_level( + PVFS_handle precreate_pool, + PVFS_fs_id fsid, + int low_threshold, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id) +{ + struct qlist_head* iterator; + struct precreate_pool* pool; + struct job_desc *jd = NULL; + struct fs_pool* fs; + + gen_mutex_lock(&precreate_pool_mutex); + + fs = find_fs(fsid); + assert(fs); + + qlist_for_each(iterator, &fs->precreate_pool_list) + { + pool = qlist_entry(iterator, struct precreate_pool, + list_link); + if(pool->pool_handle == precreate_pool) + { + if(pool->pool_count < low_threshold) + { + /* handle count is below the low threshold */ + out_status_p->error_code = 0; + gen_mutex_unlock(&precreate_pool_mutex); + gossip_debug(GOSSIP_JOB_DEBUG, "found pool count low for " + "for pool handle %llu.\n", llu(pool->pool_handle)); + return(1); + } + else + { + /* we are above threshold right now; queue up until it drops */ + jd = alloc_job_desc(JOB_PRECREATE_POOL); + if (!jd) + { + out_status_p->error_code = -PVFS_ENOMEM; + gen_mutex_unlock(&precreate_pool_mutex); + return(1); + } + jd->job_user_ptr = user_ptr; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->u.precreate_pool.precreate_pool = precreate_pool; + jd->u.precreate_pool.fsid = fsid; + jd->u.precreate_pool.low_threshold = low_threshold; + *id = jd->job_id; + + qlist_add(&jd->job_desc_q_link, &precreate_pool_check_level_list); + gen_mutex_unlock(&precreate_pool_mutex); + gossip_debug(GOSSIP_JOB_DEBUG, "found pool count high for pool " + "handle %llu.\n", llu(pool->pool_handle) ); + return(0); + } + break; + } + } + gen_mutex_unlock(&precreate_pool_mutex); + + return(-PVFS_EINVAL); +} + +/* job_precreate_pool_get_handles() + * + * Retrieves a set of datafile handles from one or more precreate pools. + * Servers may be specified using bmi addresses in the servers array. If + * servers is NULL, then it will provide handles from pools in round robin + * manner. + * + * returns 0 on success, 1 on immediate completion, and -PVFS_errno on failure + */ +int job_precreate_pool_get_handles( + PVFS_fs_id fsid, + int count, + PVFS_ds_type type, + const char** servers, + PVFS_handle* handle_array, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints) +{ + struct job_desc *jd = NULL; + struct fs_pool* fs; + int index = 0; + + if(count < 0) + { + out_status_p->error_code = -PVFS_EINVAL; + return(1); + } + + gossip_debug(GOSSIP_JOB_DEBUG, "%s: requesting %d handles of type %u\n", + __func__, count, type); + jd = alloc_job_desc(JOB_PRECREATE_POOL); + if (!jd) + { + out_status_p->error_code = -PVFS_ENOMEM; + return(1); + } + jd->job_user_ptr = user_ptr; + jd->context_id = context_id; + jd->hints = hints; + jd->status_user_tag = status_user_tag; + jd->u.precreate_pool.precreate_handle_array = handle_array; + jd->u.precreate_pool.precreate_handle_count = count; + jd->u.precreate_pool.precreate_handle_index = 0; + jd->u.precreate_pool.fsid = fsid; + jd->u.precreate_pool.servers = servers; + jd->u.precreate_pool.trove_pending = 0; + jd->u.precreate_pool.flags = flags; + jd->u.precreate_pool.type = type; + + /* rotate to use a different starting server in the pool next time */ + gen_mutex_lock(&precreate_pool_mutex); + fs = find_fs(fsid); + assert(fs); + + /* make sure the requested type is actually trying to get handles (i.e. has + * a batch count bigger than 0). if not, return einval */ + PVFS_ds_type_to_int(type, &index); + assert(fs->type_batch_count); + if( fs->type_batch_count[index] < 1 ) + { + gen_mutex_unlock(&precreate_pool_mutex); + out_status_p->error_code = -PVFS_EINVAL; + return 1; + } + + jd->u.precreate_pool.current_pool = fs->precreate_pool_initial; + fs->precreate_pool_initial = fs->precreate_pool_initial->next; + gen_mutex_unlock(&precreate_pool_mutex); + + precreate_pool_get_handles_try_post(jd); + + /* for the moment, this type of job cannot immediately complete */ + *id = jd->job_id; + return(0); +} + +/* precreate_pool_get_handles_try_post() + * + * Internal function used by job_precreate_pool_get_handles(). This + * function will check to see if all pools are ready (at least one handle + * available) and then post all required trove operations + * + * no return value + */ +static void precreate_pool_get_handles_try_post(struct job_desc* jd) +{ + struct precreate_pool* pool; + TROVE_op_id tmp_id; + int ret; + struct precreate_pool_get_trove* tmp_trove_array; + struct qlist_head* iterator; + struct qlist_head* scratch; + struct job_desc* jd_checker; + int i, total_pool_count=0, j=0; + struct fs_pool* fs; + + gossip_debug(GOSSIP_JOB_DEBUG, "precreate_pool_get_handles_try_post\n"); + + gen_mutex_lock(&precreate_pool_mutex); + + fs = find_fs(jd->u.precreate_pool.fsid); + assert(fs); + + /* check pool list, go back to sleep if any are empty */ + qlist_for_each(iterator, &fs->precreate_pool_list) + { + pool = qlist_entry(iterator, struct precreate_pool, + list_link); + /* only queue up for the type the call is looking for. no reason to + * to wait on a type we don't need. it should get filled later */ + if((pool->pool_count < 1) && + (jd->u.precreate_pool.type == pool->pool_type) ) + { + /* queue up until the count for this pool increases */ + qlist_add(&jd->job_desc_q_link, &precreate_pool_get_handles_list); + gossip_debug(GOSSIP_JOB_DEBUG, "Found empty precreate pool %llu\n", + llu(pool->pool_handle)); + gen_mutex_unlock(&precreate_pool_mutex); + return; + } + } + + /* if we get to this point, set up necessary information for all trove + * operations needed to service job + */ + tmp_trove_array = malloc(jd->u.precreate_pool.precreate_handle_count * + sizeof(struct precreate_pool_get_trove)); + if(!tmp_trove_array) + { + gen_mutex_unlock(&precreate_pool_mutex); + gen_mutex_lock(&completion_mutex); + jd->u.precreate_pool.error_code = -PVFS_ENOMEM; + job_desc_q_add(completion_queue_array[jd->context_id], jd); + jd->completed_flag = 1; +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + return; + + } + jd->u.precreate_pool.data = tmp_trove_array; + + /* translate reqested servers and set up necessary fields to post + * trove operations + */ + for(i=0; iu.precreate_pool.precreate_handle_count; i++) + { + if(jd->u.precreate_pool.servers) + { + /* caller wanted specific servers ; search through list and + * set current pool to appropriate entry for this server + */ + jd->u.precreate_pool.current_pool = NULL; /* sentinal */ + qlist_for_each(iterator, &fs->precreate_pool_list) + { + pool = qlist_entry(iterator, struct precreate_pool, + list_link); + /* in addition to matching host name, now we also make + * sure it's the correct type of pool for the specified + * server */ + if( (!strcmp(pool->host, jd->u.precreate_pool.servers[i])) && + (pool->pool_type == jd->u.precreate_pool.type) ) + { + jd->u.precreate_pool.current_pool = iterator; + break; + } + } + if(!jd->u.precreate_pool.current_pool) + { + gossip_err("Error: get_handles(): unknown server: %s\n", + jd->u.precreate_pool.servers[i]); + + free(tmp_trove_array); + gen_mutex_unlock(&precreate_pool_mutex); + + gen_mutex_lock(&completion_mutex); + jd->u.precreate_pool.error_code = -PVFS_EINVAL; + job_desc_q_add(completion_queue_array[jd->context_id], jd); + jd->completed_flag = 1; + #ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); + #endif + gen_mutex_unlock(&completion_mutex); + return; + } + } + else + { + /* caller wants whatever we hand out so we only need to check + * that the pool is of the correct type. use the original code + * to either start the process or move to the next. afterwards, + * we cycle until we get to the right type */ + if(jd->u.precreate_pool.current_pool == NULL || + jd->u.precreate_pool.current_pool->next == &fs->precreate_pool_list) + { + /* either we are just starting, or we have wrapped around */ + jd->u.precreate_pool.current_pool = fs->precreate_pool_list.next; + } + else + { + /* normal case; cycle to next pool */ + jd->u.precreate_pool.current_pool = + jd->u.precreate_pool.current_pool->next; + } + + /* ensure we don't loop forever, we want to look through at most + * total_pool_count pools no matter the place in the list we start + * at. if we don't find a pool, then there isn't a pool with the + * requested type. we'll call that einval below.*/ + j = 0; + total_pool_count = qlist_count( &fs->precreate_pool_list ); + gossip_debug( GOSSIP_SERVER_DEBUG, "%s: total pool count %d\n", + __func__, total_pool_count); + + /* maybe too succinct? get the pool entry from the qlist, then + * if we haven't looked through too many items and we actually + * got a pool item, see if it matches types. if not, assign the next + * pool and do it again. */ + do + { + pool = qlist_entry(jd->u.precreate_pool.current_pool, + struct precreate_pool, list_link); + } + while( ( j++ < total_pool_count ) && ( pool != NULL ) && + ( pool->pool_type != jd->u.precreate_pool.type ) && + ( jd->u.precreate_pool.current_pool = + jd->u.precreate_pool.current_pool->next) ); + + /* either we got something null, we iterated through pool count + * items or, hopefully, we found a pool of the correct type! + * look at the pool's type, if it's wrong, exit */ + if(pool->pool_type != jd->u.precreate_pool.type) + { + gossip_err("Error %s : could not find pool of " + "type %u\n", __func__, jd->u.precreate_pool.type); + + free(tmp_trove_array); + gen_mutex_unlock(&precreate_pool_mutex); + + gen_mutex_lock(&completion_mutex); + jd->u.precreate_pool.error_code = -PVFS_EINVAL; + job_desc_q_add(completion_queue_array[jd->context_id], jd); + jd->completed_flag = 1; + #ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); + #endif + gen_mutex_unlock(&completion_mutex); + return; + } + } + + tmp_trove_array[i].pool = qlist_entry(jd->u.precreate_pool.current_pool, + struct precreate_pool, list_link); + + tmp_trove_array[i].jd = jd; + tmp_trove_array[i].pos = PVFS_ITERATE_START; + tmp_trove_array[i].count = 1; + tmp_trove_array[i].key.buffer + = &jd->u.precreate_pool.precreate_handle_array[i]; + tmp_trove_array[i].key.buffer_sz = sizeof(PVFS_handle); + tmp_trove_array[i].trove_callback.fn + = precreate_pool_get_thread_mgr_callback; + tmp_trove_array[i].trove_callback.data + = &tmp_trove_array[i]; + } + + /* post all trove operations at once */ + for(i=0; iu.precreate_pool.precreate_handle_count; i++) + { + /* go ahead and decrement count to avoid races with other consumers */ + tmp_trove_array[i].pool->pool_count--; + gossip_debug(GOSSIP_JOB_DEBUG, + "Pool count for handle %llu (type %u) decremented to %d\n", + llu(tmp_trove_array[i].pool->pool_handle), + tmp_trove_array[i].pool->pool_type, + tmp_trove_array[i].pool->pool_count); + + /* is anyone waiting to check the count of this pool? */ + if(!qlist_empty(&precreate_pool_check_level_list)) + { + qlist_for_each_safe(iterator, scratch, + &precreate_pool_check_level_list) + { + jd_checker = qlist_entry(iterator, struct job_desc, + job_desc_q_link); + if(jd_checker->u.precreate_pool.precreate_pool == + tmp_trove_array[i].pool->pool_handle && + tmp_trove_array[i].pool->pool_count < + jd_checker->u.precreate_pool.low_threshold) + { + /* the pool level is low */ + gossip_debug(GOSSIP_JOB_DEBUG, "Pool count low, waking up waiter for handle %llu.\n", llu(jd_checker->u.precreate_pool.precreate_pool)); + qlist_del(&jd_checker->job_desc_q_link); + + /* move waiting job to completion queue */ + gen_mutex_lock(&completion_mutex); + job_desc_q_add(completion_queue_array[jd->context_id], jd_checker); + jd->completed_flag = 1; +#ifdef __PVFS2_JOB_THREADED__ + /* wake up anyone waiting for completion */ + pthread_cond_signal(&completion_cond); +#endif + gen_mutex_unlock(&completion_mutex); + } + } + } + + /* post trove operation to pull out a handle */ + ret = trove_keyval_iterate_keys( + fs->fsid, + tmp_trove_array[i].pool->pool_handle, + &tmp_trove_array[i].pos, + &tmp_trove_array[i].key, + &tmp_trove_array[i].count, + tmp_trove_array[i].jd->u.precreate_pool.flags| + TROVE_BINARY_KEY| + TROVE_KEYVAL_HANDLE_COUNT| + TROVE_KEYVAL_ITERATE_REMOVE, + NULL, + &tmp_trove_array[i].trove_callback, + global_trove_context, + &tmp_id, + jd->hints); + if(ret < 0) + { + precreate_pool_get_thread_mgr_callback_unlocked( + &tmp_trove_array[i], ret); + } + else if(ret == 1) + { + precreate_pool_get_thread_mgr_callback_unlocked( + &tmp_trove_array[i], 0); + } + else + { + /* callback will be triggered later */ + trove_pending_count++; + jd->u.precreate_pool.trove_pending++; + } + } + gen_mutex_unlock(&precreate_pool_mutex); +} + +/* job_precreate_pool_iterate_handles() + * + * similar to the trove iterate handles function, but returns all handles + * stored in the precreate pools, including the handles for the pool objects + * themselves. + * mtmoore: need to expose types through this interface + */ +int job_precreate_pool_iterate_handles( + PVFS_fs_id fsid, + PVFS_ds_position position, + PVFS_handle* handle_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag* vtag, + void* user_ptr, + job_aint status_user_tag, + job_status_s* out_status_p, + job_id_t* id, + job_context_id context_id, + PVFS_hint hints) +{ + PVFS_ds_position local_position; + PVFS_ds_position pool_index; + struct qlist_head* iterator; + PVFS_ds_position tmp_index = 1; + struct precreate_pool* pool = NULL; + int ret; + struct job_desc *jd = NULL; + void* user_ptr_internal; + TROVE_op_id tmp_id; + int i; + struct fs_pool* fs; + + /* low order bits are the trove iterate position */ + local_position = position & 0xffffffff; + /* high order bits tell us which pool we are on */ + pool_index = position >> 32; + + /* we start indexing at one and reserve 0 for the special start and end + * values for the entire set of pools + */ + if(pool_index == 0) + { + if(local_position == PVFS_ITERATE_START) + { + pool_index = 1; + } + else + { + gossip_err("Error: invalid position given to job_precreate_pool_iterate_handles().\n"); + out_status_p->error_code = -PVFS_EINVAL; + return(1); + } + } + + gen_mutex_lock(&precreate_pool_mutex); + + fs = find_fs(fsid); + if(!fs) + { + /* no precreate pools available for the requested fs; stop iteration + * right here + */ + gen_mutex_unlock(&precreate_pool_mutex); + out_status_p->error_code = 0; + out_status_p->count = 0; + out_status_p->position = PVFS_ITERATE_END; + return(1); + } + + qlist_for_each(iterator, &fs->precreate_pool_list) + { + if(tmp_index == pool_index) + { + pool = qlist_entry(iterator, struct precreate_pool, + list_link); + break; + } + tmp_index++; + } + + if(!pool) + { + /* we ran out of pools; iteration is done */ + gen_mutex_unlock(&precreate_pool_mutex); + out_status_p->error_code = 0; + out_status_p->count = 0; + out_status_p->position = PVFS_ITERATE_END; + return(1); + } + + if(local_position == PVFS_ITERATE_END) + { + /* we got all of the handles out of the pool */ + /* pass back pool handle by itself and go to next pool */ + handle_array[0] = pool->pool_handle; + /* skip to next pool */ + pool_index++; + out_status_p->position = pool_index << 32; + out_status_p->position |= PVFS_ITERATE_START; + out_status_p->count = 1; + out_status_p->error_code = 0; + gen_mutex_unlock(&precreate_pool_mutex); + return(1); + } + + /* get ready to post a job to trove to find handles */ + jd = alloc_job_desc(JOB_PRECREATE_POOL); + if (!jd) + { + gen_mutex_unlock(&precreate_pool_mutex); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; + } + jd->u.precreate_pool.key_array = malloc(count * sizeof(*jd->u.precreate_pool.key_array)); + if(!jd->u.precreate_pool.key_array) + { + gen_mutex_unlock(&precreate_pool_mutex); + dealloc_job_desc(jd); + out_status_p->error_code = -PVFS_ENOMEM; + return 1; + } + for(i=0; iu.precreate_pool.key_array[i].buffer = &handle_array[i]; + jd->u.precreate_pool.key_array[i].buffer_sz = sizeof(handle_array[i]); + } + jd->job_user_ptr = user_ptr; + jd->hints = hints; + jd->u.precreate_pool.position = local_position; + jd->u.precreate_pool.count = count; + jd->u.precreate_pool.precreate_handle_array = handle_array; + jd->u.precreate_pool.pool_index = pool_index; + jd->context_id = context_id; + jd->status_user_tag = status_user_tag; + jd->trove_callback.fn = precreate_pool_iterate_callback; + jd->trove_callback.data = (void*)jd; + user_ptr_internal = &jd->trove_callback; + +#ifdef __PVFS2_TROVE_SUPPORT__ + ret = trove_keyval_iterate_keys(fsid, pool->pool_handle, + &(jd->u.precreate_pool.position), + jd->u.precreate_pool.key_array, + &(jd->u.precreate_pool.count), flags, NULL, + user_ptr_internal, + global_trove_context, &tmp_id, jd->hints); +#else + gossip_err("Error: Trove support not enabled.\n"); + ret = -ENOSYS; +#endif + + if (ret < 0) + { + /* error posting trove operation */ + free(jd->u.precreate_pool.key_array); + dealloc_job_desc(jd); + jd = NULL; + out_status_p->error_code = ret; + out_status_p->status_user_tag = status_user_tag; + gen_mutex_unlock(&precreate_pool_mutex); + return (1); + } + + if (ret == 1) + { + /* immediate completion */ + out_status_p->error_code = 0; + out_status_p->status_user_tag = status_user_tag; + out_status_p->position = pool_index << 32; + out_status_p->position |= jd->u.precreate_pool.position; + out_status_p->count = jd->u.precreate_pool.count; + free(jd->u.precreate_pool.key_array); + dealloc_job_desc(jd); + jd = NULL; + gen_mutex_unlock(&precreate_pool_mutex); + return (ret); + } + + /* if we fall through to this point, the job did not + * immediately complete and we must queue up to test later + */ + *id = jd->job_id; + trove_pending_count++; + gen_mutex_unlock(&precreate_pool_mutex); + + return (0); +} + +static struct fs_pool* find_fs(PVFS_fs_id fsid) +{ + struct fs_pool* fs; + struct qlist_head* iterator; + + qlist_for_each(iterator, &precreate_pool_fs_list) + { + fs = qlist_entry(iterator, struct fs_pool, list_link); + if(fs->fsid == fsid) + { + return(fs); + } + } + return(NULL); +} + + +#endif /* __PVFS2_TROVE_SUPPORT__ */ + /* * Local variables: * c-indent-level: 4 diff --git a/src/io/job/job.h b/src/io/job/job.h index 7736718..5b8ac9e 100644 --- a/src/io/job/job.h +++ b/src/io/job/job.h @@ -9,7 +9,11 @@ #ifndef __JOB_H #define __JOB_H +#ifdef WIN32 +#include "wincommon.h" +#else #include +#endif #include "src/io/flow/flow.h" #include "bmi.h" @@ -17,6 +21,7 @@ #include "pvfs2-storage.h" #include "pvfs2-req-proto.h" #include "pint-dev.h" +#include "src/server/request-scheduler/request-scheduler.h" typedef PVFS_id_gen_t job_id_t; typedef PVFS_context_id job_context_id; @@ -31,7 +36,7 @@ typedef struct job_status /* the comments indicate which type of job will fill in which fields */ job_aint status_user_tag; /* tag supplied by caller */ int error_code; /* returned by all operations */ - PVFS_size actual_size; /* read_at, write_at, resize, bmi_recv */ + PVFS_size actual_size; /* resize, bmi_recv */ PVFS_vtag *vtag; /* most trove operations */ PVFS_ds_position position; /* iterate, iterate_keys, iterate_handles */ PVFS_handle handle; /* dspace_create */ @@ -46,10 +51,11 @@ enum job_flags JOB_NO_IMMED_COMPLETE = 1 }; + #define JOB_TIMEOUT_INF (-1) /****************************************************************** - * management functions + * management functions */ int job_initialize(int flags); @@ -63,66 +69,54 @@ void job_close_context(job_context_id context_id); int job_reset_timeout(job_id_t id, int timeout_sec); /****************************************************************** - * job posting functions + * job posting functions */ /* network send */ int job_bmi_send(PVFS_BMI_addr_t addr, - void *buffer, - bmi_size_t size, - bmi_msg_tag_t tag, - enum bmi_buffer_type buffer_type, - int send_unexpected, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id, - int timeout_sec); + void *buffer, + bmi_size_t size, + bmi_msg_tag_t tag, + enum bmi_buffer_type buffer_type, + int send_unexpected, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + int timeout_sec, + PVFS_hint hints); /* network send (list of buffers) */ int job_bmi_send_list(PVFS_BMI_addr_t addr, - void **buffer_list, - bmi_size_t * size_list, - int list_count, - bmi_size_t total_size, - bmi_msg_tag_t tag, - enum bmi_buffer_type buffer_type, - int send_unexpected, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id, - int timeout_sec); + void **buffer_list, + bmi_size_t * size_list, + int list_count, + bmi_size_t total_size, + bmi_msg_tag_t tag, + enum bmi_buffer_type buffer_type, + int send_unexpected, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + int timeout_sec, + PVFS_hint hints); /* network receive */ int job_bmi_recv(PVFS_BMI_addr_t addr, - void *buffer, - bmi_size_t size, - bmi_msg_tag_t tag, - enum bmi_buffer_type buffer_type, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id, - int timeout_sec); - -/* network receive (list of buffers) */ -int job_bmi_recv_list(PVFS_BMI_addr_t addr, - void **buffer_list, - bmi_size_t * size_list, - int list_count, - bmi_size_t total_expected_size, - bmi_msg_tag_t tag, - enum bmi_buffer_type buffer_type, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id, - int timeout_sec); + void *buffer, + bmi_size_t size, + bmi_msg_tag_t tag, + enum bmi_buffer_type buffer_type, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + int timeout_sec, + PVFS_hint hints); /* OSD message submit */ struct osd_command; @@ -130,57 +124,75 @@ int job_bmi_osd_submit(PVFS_BMI_addr_t addr, struct osd_command *command, job_aint status_user_tag, job_context_id context_id, void *job_user_ptr, int timeout_sec); +/* network receive (list of buffers) */ +int job_bmi_recv_list(PVFS_BMI_addr_t addr, + void **buffer_list, + bmi_size_t * size_list, + int list_count, + bmi_size_t total_expected_size, + bmi_msg_tag_t tag, + enum bmi_buffer_type buffer_type, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + int timeout_sec, + PVFS_hint hints); + /* unexpected network receive */ int job_bmi_unexp(struct BMI_unexpected_info *bmi_unexp_d, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - enum job_flags flags, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + enum job_flags flags, + job_context_id context_id); + +int job_bmi_unexp_cancel(job_id_t id); int job_bmi_cancel(job_id_t id, - job_context_id context_id); + job_context_id context_id); /* unexpected device receive */ int job_dev_unexp(struct PINT_dev_unexp_info* dev_unexp_d, - void* user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t* id, - enum job_flags flags, - job_context_id context_id); + void* user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t* id, + enum job_flags flags, + job_context_id context_id); /* device write */ int job_dev_write(void* buffer, - int size, - PVFS_id_gen_t tag, - enum PINT_dev_buffer_type buffer_type, - void* user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + int size, + PVFS_id_gen_t tag, + enum PINT_dev_buffer_type buffer_type, + void* user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); /* device write list */ int job_dev_write_list(void** buffer_list, - int* size_list, - int list_count, - int total_size, - PVFS_id_gen_t tag, - enum PINT_dev_buffer_type buffer_type, - void* user_ptr, - job_aint status_user_tag, - job_status_s* out_status_p, - job_id_t* id, - job_context_id context_id); + int* size_list, + int list_count, + int total_size, + PVFS_id_gen_t tag, + enum PINT_dev_buffer_type buffer_type, + void* user_ptr, + job_aint status_user_tag, + job_status_s* out_status_p, + job_id_t* id, + job_context_id context_id); /* request scheduler post */ int job_req_sched_post(enum PVFS_server_op op, PVFS_fs_id fs_id, PVFS_handle handle, - int readonly, - int schedule, + enum PINT_server_req_access_type access_type, + enum PINT_server_sched_policy sched_policy, void *user_ptr, job_aint status_user_tag, job_status_s * out_status_p, @@ -196,46 +208,34 @@ int job_req_sched_change_mode(enum PVFS_server_mode mode, job_context_id context_id); int job_req_sched_post_timer(int msecs, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); /* request scheduler release */ int job_req_sched_release(job_id_t in_completed_id, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * out_id, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * out_id, + job_context_id context_id); /* complex I/O operation (disk, net, or mem) */ int job_flow(flow_descriptor * flow_d, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id, - int timeout_sec); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + int timeout_sec, + PVFS_hint hints); int job_flow_cancel(job_id_t id, job_context_id context_id); /* storage byte stream write */ -int job_trove_bstream_write_at(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_offset offset, - void *buffer, - PVFS_size size, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); - int job_trove_bstream_write_list(PVFS_fs_id coll_id, PVFS_handle handle, char **mem_offset_array, @@ -251,22 +251,11 @@ int job_trove_bstream_write_list(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id); + job_context_id context_id, + PVFS_hint hints); /* storage byte stream read */ -int job_trove_bstream_read_at(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_offset offset, - void *buffer, - PVFS_size size, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); int job_trove_bstream_read_list(PVFS_fs_id coll_id, PVFS_handle handle, @@ -283,82 +272,89 @@ int job_trove_bstream_read_list(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id); + job_context_id context_id, + PVFS_hint hints); /* byte stream flush to storage */ int job_trove_bstream_flush(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_flags flags, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); - + PVFS_handle handle, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + /* storage key/value read */ int job_trove_keyval_read(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_keyval * key_p, - PVFS_ds_keyval * val_p, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_ds_keyval * key_p, + PVFS_ds_keyval * val_p, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* storage key/value read list */ int job_trove_keyval_read_list(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_keyval * key_array, - PVFS_ds_keyval * val_array, + PVFS_handle handle, + PVFS_ds_keyval * key_array, + PVFS_ds_keyval * val_array, PVFS_error * err_array, - int count, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + int count, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* storage key/value write */ int job_trove_keyval_write(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_keyval * key_p, - PVFS_ds_keyval * val_p, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_ds_keyval * key_p, + PVFS_ds_keyval * val_p, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* storage key/value write list */ int job_trove_keyval_write_list(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_keyval * key_array, - PVFS_ds_keyval * val_array, - int count, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_ds_keyval * key_array, + PVFS_ds_keyval * val_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* flush keyval data to storage */ int job_trove_keyval_flush(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_flags flags, - void * user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_ds_flags flags, + void * user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* get handle info for a keyval */ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, @@ -369,131 +365,172 @@ int job_trove_keyval_get_handle_info(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id); + job_context_id context_id, + PVFS_hint hints); /* read generic dspace attributes */ int job_trove_dspace_getattr(PVFS_fs_id coll_id, - PVFS_handle handle, - void *user_ptr, + PVFS_handle handle, + void *user_ptr, PVFS_ds_attributes *out_ds_attr_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* read generic dspace attributes for a set of handles */ int job_trove_dspace_getattr_list(PVFS_fs_id coll_id, - int nhandles, - PVFS_handle *handle_array, - void *user_ptr, - PVFS_error *out_error_array, - PVFS_ds_attributes *out_ds_attr_ptr, - job_aint status_user_tag, - job_status_s *out_status_p, - job_id_t *id, - job_context_id context_id); + int nhandles, + PVFS_handle *handle_array, + void *user_ptr, + PVFS_error *out_error_array, + PVFS_ds_attributes *out_ds_attr_ptr, + job_aint status_user_tag, + job_status_s *out_status_p, + job_id_t *id, + job_context_id context_id, + PVFS_hint hints); /* write generic dspace attributes */ int job_trove_dspace_setattr(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_attributes * ds_attr_p, + PVFS_handle handle, + PVFS_ds_attributes * ds_attr_p, PVFS_ds_flags flags, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* resize (truncate or preallocate) a storage byte stream */ int job_trove_bstream_resize(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_size size, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_size size, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* check consistency of a bytestream for a given vtag */ int job_trove_bstream_validate(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* remove a key/value entry */ int job_trove_keyval_remove(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_keyval * key_p, + PVFS_handle handle, + PVFS_ds_keyval * key_p, PVFS_ds_keyval * val_p, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + +/* remove a list of key/value entries */ +int job_trove_keyval_remove_list(PVFS_fs_id coll_id, + PVFS_handle handle, + PVFS_ds_keyval * key_a, + PVFS_ds_keyval * val_a, + int * error_a, + int count, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* check consistency of a key/value pair for a given vtag */ int job_trove_keyval_validate(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* iterate through all of the key/value pairs for a data space */ int job_trove_keyval_iterate(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_position position, - PVFS_ds_keyval * key_array, - PVFS_ds_keyval * val_array, - int count, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_ds_position position, + PVFS_ds_keyval * key_array, + PVFS_ds_keyval * val_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* iterate through all of the keys for a data space */ int job_trove_keyval_iterate_keys(PVFS_fs_id coll_id, - PVFS_handle handle, - PVFS_ds_position position, - PVFS_ds_keyval * key_array, - int count, - PVFS_ds_flags flags, - PVFS_vtag * vtag, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_handle handle, + PVFS_ds_position position, + PVFS_ds_keyval * key_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag * vtag, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* iterates through all handles in a collection */ int job_trove_dspace_iterate_handles(PVFS_fs_id coll_id, - PVFS_ds_position position, - PVFS_handle* handle_array, - int count, - PVFS_ds_flags flags, - PVFS_vtag* vtag, - void* user_ptr, - job_aint status_user_tag, - job_status_s* out_status_p, - job_id_t* id, - job_context_id context_id); + PVFS_ds_position position, + PVFS_handle* handle_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag* vtag, + void* user_ptr, + job_aint status_user_tag, + job_status_s* out_status_p, + job_id_t* id, + job_context_id context_id); /* create a new data space object */ int job_trove_dspace_create(PVFS_fs_id coll_id, + PVFS_handle_extent_array *handle_extent_array, + PVFS_ds_type type, + void *hint, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + +/* create a set of new data space objects */ +int job_trove_dspace_create_list(PVFS_fs_id coll_id, PVFS_handle_extent_array *handle_extent_array, + PVFS_handle* out_handle_arry, + int count, PVFS_ds_type type, void *hint, PVFS_ds_flags flags, @@ -501,78 +538,107 @@ int job_trove_dspace_create(PVFS_fs_id coll_id, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id); + job_context_id context_id, + PVFS_hint hints); /* remove an entire data space object (byte stream and key/value) */ int job_trove_dspace_remove(PVFS_fs_id coll_id, - PVFS_handle handle, + PVFS_handle handle, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + +/* remove a list of data space objects (byte stream and key/value) */ +int job_trove_dspace_remove_list(PVFS_fs_id coll_id, + PVFS_handle* handle_array, + PVFS_error *out_error_array, + int count, PVFS_ds_flags flags, void *user_ptr, job_aint status_user_tag, job_status_s * out_status_p, job_id_t * id, - job_context_id context_id); + job_context_id context_id, + PVFS_hint hints); /* verify that a given dataspace exists and discover its type */ int job_trove_dspace_verify(PVFS_fs_id coll_id, - PVFS_handle handle, + PVFS_handle handle, PVFS_ds_flags flags, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); int job_trove_dspace_cancel(PVFS_fs_id coll_id, - job_id_t id, - job_context_id context_id); + job_id_t id, + job_context_id context_id); /* create a new file system */ int job_trove_fs_create(char *collname, - PVFS_fs_id new_coll_id, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_fs_id new_coll_id, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); /* remove an existing file system */ int job_trove_fs_remove(char *collname, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); /* lookup a file system based on a string name */ int job_trove_fs_lookup(char *collname, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); /* set extended attributes for a file system */ int job_trove_fs_seteattr(PVFS_fs_id coll_id, - PVFS_ds_keyval * key_p, - PVFS_ds_keyval * val_p, - PVFS_ds_flags flags, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_ds_keyval * key_p, + PVFS_ds_keyval * val_p, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); /* read extended attributes for a file system */ int job_trove_fs_geteattr(PVFS_fs_id coll_id, - PVFS_ds_keyval * key_p, - PVFS_ds_keyval * val_p, - PVFS_ds_flags flags, - void *user_ptr, - job_aint status_user_tag, - job_status_s * out_status_p, - job_id_t * id, - job_context_id context_id); + PVFS_ds_keyval * key_p, + PVFS_ds_keyval * val_p, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + +/* delete extended attributes for a file system */ +int job_trove_fs_deleattr(PVFS_fs_id coll_id, + PVFS_ds_keyval * key_p, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); int job_null( int error_code, @@ -582,31 +648,108 @@ int job_null( job_id_t * id, job_context_id context_id); +int job_precreate_pool_fill( + PVFS_handle precreate_pool, + PVFS_fs_id fsid, + PVFS_handle* precreate_handle_array, + int precreate_handle_count, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + +int job_precreate_pool_fill_signal_error( + PVFS_handle precreate_pool, + PVFS_fs_id fsid, + int error_code, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); + +int job_precreate_pool_check_level( + PVFS_handle precreate_pool, + PVFS_fs_id fsid, + int low_threshold, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id); + +int job_precreate_pool_iterate_handles( + PVFS_fs_id fsid, + PVFS_ds_position position, + PVFS_handle* handle_array, + int count, + PVFS_ds_flags flags, + PVFS_vtag* vtag, + void* user_ptr, + job_aint status_user_tag, + job_status_s* out_status_p, + job_id_t* id, + job_context_id context_id, + PVFS_hint hints); + +int job_precreate_pool_get_handles( + PVFS_fs_id fsid, + int count, + PVFS_ds_type type, + const char** servers, + PVFS_handle* handle_array, + PVFS_ds_flags flags, + void *user_ptr, + job_aint status_user_tag, + job_status_s * out_status_p, + job_id_t * id, + job_context_id context_id, + PVFS_hint hints); + +int job_precreate_pool_register_server( + const char* host, + PVFS_ds_type type, + PVFS_fs_id fsid, + PVFS_handle pool_handle, + int count, + uint32_t *batch_count); + +int job_precreate_pool_lookup_server( + const char* host, + PVFS_ds_type type, + PVFS_fs_id fsid, + PVFS_handle* pool_handle); + +void job_precreate_pool_set_index( + int server_index); + /****************************************************************** - * job test/wait for completion functions + * job test/wait for completion functions */ int job_test(job_id_t id, - int *out_count_p, - void **returned_user_ptr_p, - job_status_s * out_status_p, - int timeout_ms, - job_context_id context_id); + int *out_count_p, + void **returned_user_ptr_p, + job_status_s * out_status_p, + int timeout_ms, + job_context_id context_id); int job_testsome(job_id_t * id_array, - int *inout_count_p, - int *out_index_array, - void **returned_user_ptr_array, - job_status_s * out_status_array_p, - int timeout_ms, - job_context_id context_id); + int *inout_count_p, + int *out_index_array, + void **returned_user_ptr_array, + job_status_s * out_status_array_p, + int timeout_ms, + job_context_id context_id); int job_testcontext(job_id_t * out_id_array_p, - int *inout_count_p, - void **returned_user_ptr_array, - job_status_s * out_status_array_p, - int timeout_ms, - job_context_id context_id); + int *inout_count_p, + void **returned_user_ptr_array, + job_status_s * out_status_array_p, + int timeout_ms, + job_context_id context_id); #endif /* __JOB_H */ diff --git a/src/io/job/thread-mgr.c b/src/io/job/thread-mgr.c index 86f7e48..5e8761f 100644 --- a/src/io/job/thread-mgr.c +++ b/src/io/job/thread-mgr.c @@ -16,6 +16,9 @@ #include "trove.h" #include "pvfs2-internal.h" +#include "pint-event.h" +#include + #define THREAD_MGR_TEST_COUNT 5 #define THREAD_MGR_TEST_TIMEOUT 10 static int thread_mgr_test_timeout = THREAD_MGR_TEST_TIMEOUT; @@ -53,6 +56,7 @@ static pthread_t dev_thread_id; static pthread_cond_t bmi_test_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t trove_test_cond = PTHREAD_COND_INITIALIZER; +static pthread_cond_t dev_unexp_test_cond = PTHREAD_COND_INITIALIZER; #endif /* __PVFS2_JOB_THREADED__ */ /* used to indicate that a bmi testcontext is in progress; we can't simply @@ -71,6 +75,8 @@ static int bmi_thread_running = 0; static int trove_thread_running = 0; static int dev_thread_running = 0; +static gen_mutex_t bmi_thread_running_mutex = GEN_MUTEX_INITIALIZER; + /* trove_thread_function() * * function executed by the thread in charge of trove @@ -83,6 +89,7 @@ static void *trove_thread_function(void *ptr) int timeout = thread_mgr_test_timeout; #ifdef __PVFS2_JOB_THREADED__ + PINT_event_thread_start("TROVE"); while (trove_thread_running) #endif { @@ -142,10 +149,12 @@ static void *trove_thread_function(void *ptr) stat_trove_error_code_array[i]); } } +#ifdef __PVFS2_JOB_THREADED__ + PINT_event_thread_stop(); +#endif return (NULL); } - /* bmi_thread_function() * * function executed by the thread in charge of BMI @@ -158,11 +167,16 @@ static void *bmi_thread_function(void *ptr) int i=0; int test_timeout = thread_mgr_test_timeout; struct PINT_thread_mgr_bmi_callback *tmp_callback; + int thread_running=0; + gen_mutex_lock(&bmi_thread_running_mutex); + thread_running = bmi_thread_running; + gen_mutex_unlock(&bmi_thread_running_mutex); #ifdef __PVFS2_JOB_THREADED__ - while (bmi_thread_running) + PINT_event_thread_start("BMI"); + while (thread_running) #endif - { + {/*start block*/ gen_mutex_lock(&bmi_mutex); if(bmi_unexp_count) { @@ -179,6 +193,7 @@ static void *bmi_thread_function(void *ptr) #ifdef __PVFS2_JOB_THREADED__ continue; #endif + return NULL; } @@ -206,9 +221,6 @@ static void *bmi_thread_function(void *ptr) gen_mutex_unlock(&bmi_mutex); } -#ifdef __PVFS2_CLIENT__ - test_timeout = 0; /* hack for 5ms delays in TCP when also OSD */ -#else /* decide how long we are willing to wait on the main test call */ if(quick_flag) { @@ -219,7 +231,6 @@ static void *bmi_thread_function(void *ptr) { test_timeout = thread_mgr_test_timeout; } -#endif /* indicate that a test is in progress */ gen_mutex_lock(&bmi_test_mutex); @@ -258,6 +269,7 @@ static void *bmi_thread_function(void *ptr) gossip_err("bmi_thread_function thread terminating\n"); break; #endif + return NULL; } @@ -281,8 +293,14 @@ static void *bmi_thread_function(void *ptr) stat_bmi_actual_size_array[i], stat_bmi_error_code_array[i]); } - } + gen_mutex_lock(&bmi_thread_running_mutex); + thread_running = bmi_thread_running; + gen_mutex_unlock(&bmi_thread_running_mutex); + } /*end block*/ +#ifdef __PVFS2_JOB_THREADED__ + PINT_event_thread_stop(); +#endif return (NULL); } @@ -303,8 +321,21 @@ static void *dev_thread_function(void *ptr) { gen_mutex_lock(&dev_mutex); incount = dev_unexp_count; + while(incount == 0) + { + /* we need to wait until more unexp dev operations are posted */ +#ifdef __PVFS2_JOB_THREADED__ + pthread_cond_wait(&dev_unexp_test_cond, &dev_mutex); + incount = dev_unexp_count; +#else + gen_mutex_unlock(&dev_mutex); + return(NULL); +#endif + } if(incount > THREAD_MGR_TEST_COUNT) + { incount = THREAD_MGR_TEST_COUNT; + } gen_mutex_unlock(&dev_mutex); ret = PINT_dev_test_unexpected( @@ -461,14 +492,18 @@ int PINT_thread_mgr_bmi_start(void) return(ret); } + gen_mutex_lock(&bmi_thread_running_mutex); bmi_thread_running = 1; + gen_mutex_unlock(&bmi_thread_running_mutex); #ifdef __PVFS2_JOB_THREADED__ ret = pthread_create(&bmi_thread_id, NULL, bmi_thread_function, NULL); if(ret != 0) { BMI_close_context(global_bmi_context); gen_mutex_unlock(&bmi_mutex); + gen_mutex_lock(&bmi_thread_running_mutex); bmi_thread_running = 0; + gen_mutex_unlock(&bmi_thread_running_mutex); /* TODO: convert error code */ return(-ret); } @@ -493,8 +528,8 @@ int PINT_thread_mgr_dev_stop(void) { assert(dev_thread_ref_count == 0); /* sanity check */ dev_thread_running = 0; -#ifdef __PVFS2_JOB_THREADED__ gen_mutex_unlock(&dev_mutex); +#ifdef __PVFS2_JOB_THREADED__ pthread_join(dev_thread_id, NULL); #endif } @@ -584,8 +619,8 @@ int PINT_thread_mgr_trove_stop(void) { assert(trove_thread_ref_count == 0); /* sanity check */ trove_thread_running = 0; -#ifdef __PVFS2_JOB_THREADED__ gen_mutex_unlock(&trove_mutex); +#ifdef __PVFS2_JOB_THREADED__ pthread_join(trove_thread_id, NULL); #endif #ifdef __PVFS2_TROVE_SUPPORT__ @@ -615,9 +650,11 @@ int PINT_thread_mgr_bmi_stop(void) if(bmi_thread_ref_count <= 0) { assert(bmi_thread_ref_count == 0); /* sanity check */ + gen_mutex_lock(&bmi_thread_running_mutex); bmi_thread_running = 0; -#ifdef __PVFS2_JOB_THREADED__ + gen_mutex_unlock(&bmi_thread_running_mutex); gen_mutex_unlock(&bmi_mutex); +#ifdef __PVFS2_JOB_THREADED__ pthread_join(bmi_thread_id, NULL); #endif BMI_close_context(global_bmi_context); @@ -751,6 +788,13 @@ int PINT_thread_mgr_dev_unexp_handler( } dev_unexp_fn = fn; dev_unexp_count++; + if(dev_unexp_count == 1) + { + /* signal worker thread that may have been waiting for more ops */ +#ifdef __PVFS2_JOB_THREADED__ + pthread_cond_signal(&dev_unexp_test_cond); +#endif + } gen_mutex_unlock(&dev_mutex); return(0); } diff --git a/src/io/trove/module.mk.in b/src/io/trove/module.mk.in index 1e60920..bdf151f 100644 --- a/src/io/trove/module.mk.in +++ b/src/io/trove/module.mk.in @@ -2,6 +2,7 @@ DIR := src/io/trove SERVERSRC += \ $(DIR)/trove-mgmt.c \ $(DIR)/trove-error.c \ + $(DIR)/trove-migrate.c \ $(DIR)/trove.c # Autogenerated code has been disabled. diff --git a/src/io/trove/pvfs2-storage.h b/src/io/trove/pvfs2-storage.h index 53dbc94..3eb163c 100644 --- a/src/io/trove/pvfs2-storage.h +++ b/src/io/trove/pvfs2-storage.h @@ -20,6 +20,14 @@ enum PVFS_coll_getinfo_options_e }; typedef enum PVFS_coll_getinfo_options_e PVFS_coll_getinfo_options; +struct PVFS_vtag_s +{ + /* undefined */ +#ifdef WIN32 + int field; +#endif +}; +typedef struct PVFS_vtag_s PVFS_vtag; /* key/value descriptor definition moved to include/pvfs2-types.h */ #if 0 /* key/value descriptors */ @@ -35,12 +43,21 @@ struct PVFS_ds_keyval_s typedef struct PVFS_ds_keyval_s PVFS_ds_keyval; #endif -/* vtag; contents not yet defined */ -struct PVFS_vtag_s +struct PVFS_ds_metadata_attr_s { - /* undefined */ + uint32_t dfile_count; + uint32_t dist_size; +}; + +struct PVFS_ds_datafile_attr_s +{ + PVFS_size b_size; /* bstream size */ +}; + +struct PVFS_ds_dirdata_attr_s +{ + uint64_t count; }; -typedef struct PVFS_vtag_s PVFS_vtag; /* dataspace attributes that are not explicitly stored within the * dataspace itself. @@ -50,8 +67,7 @@ typedef struct PVFS_vtag_s PVFS_vtag; * across the wire/to the user, so some translation is done. * * PVFS_object_attr attributes are what the users and the server deal - * with. Trove only deals with *_ds_storedattr objects (trove on disk - * formats) and *_ds_attributes (trove in memory format). + * with. Trove deals with TROVE_ds_attributes (trove on disk and in-memory format). * * Trove version 0.0.1 and version 0.0.2 differ in this aspect, since * many members have been moved, added to make this structure friendlier @@ -67,37 +83,22 @@ struct PVFS_ds_attributes_s PVFS_uid uid; PVFS_gid gid; PVFS_permissions mode; + PVFS_handle cid; int32_t __pad1; PVFS_time ctime; PVFS_time mtime; PVFS_time atime; - uint32_t dfile_count; - uint32_t dist_size; - /* non-stored attributes need to be below here */ - PVFS_size b_size; /* bstream size */ + union + { + struct PVFS_ds_metadata_attr_s metafile; + struct PVFS_ds_datafile_attr_s datafile; + struct PVFS_ds_dirdata_attr_s dirdata; + } u; } ; typedef struct PVFS_ds_attributes_s PVFS_ds_attributes; -struct PVFS_ds_storedattr_s -{ - PVFS_ds_type type; - PVFS_fs_id fs_id; - PVFS_handle handle; - PVFS_uid uid; - PVFS_gid gid; - PVFS_permissions mode; - int32_t __pad1; - - PVFS_time ctime; - PVFS_time mtime; - PVFS_time atime; - uint32_t dfile_count; - uint32_t dist_size; -}; -typedef struct PVFS_ds_storedattr_s PVFS_ds_storedattr; - #define PVFS_ds_init_time(__dsa) \ do { \ (__dsa)->ctime = time(NULL); \ @@ -105,17 +106,6 @@ do { \ (__dsa)->mtime = time(NULL); \ } while (0) -#define PVFS_ds_attr_to_stored(__from, __to) \ -do { \ - (__to) = * ((PVFS_ds_storedattr *) &(__from)); \ -} while (0) - -#define PVFS_ds_stored_to_attr(__from, __to, __b_size) \ -do { \ - memcpy(&(__to), &(__from), sizeof(PVFS_ds_storedattr)); \ - (__to).b_size = (__b_size); \ -} while (0) - #define PVFS_ds_attr_to_object_attr(__dsa, __oa) \ do { \ (__oa)->owner = (__dsa)->uid; \ @@ -125,21 +115,23 @@ do { \ (__oa)->mtime = (__dsa)->mtime; \ (__oa)->atime = (__dsa)->atime; \ (__oa)->objtype = (__dsa)->type; \ - (__oa)->u.meta.dfile_count = (__dsa)->dfile_count; \ - (__oa)->u.meta.dist_size = (__dsa)->dist_size; \ + (__oa)->cid = (__dsa)->cid; \ + (__oa)->u.meta.dfile_count = (__dsa)->u.metafile.dfile_count; \ + (__oa)->u.meta.dist_size = (__dsa)->u.metafile.dist_size; \ } while(0) -#define PVFS_object_attr_to_ds_attr(__oa, __dsa) \ -do { \ - (__dsa)->uid = (__oa)->owner; \ - (__dsa)->gid = (__oa)->group; \ - (__dsa)->mode = (__oa)->perms; \ - (__dsa)->ctime = (__oa)->ctime; \ - (__dsa)->mtime = (__oa)->mtime; \ - (__dsa)->atime = (__oa)->atime; \ - (__dsa)->type = (__oa)->objtype; \ - (__dsa)->dfile_count = (__oa)->u.meta.dfile_count; \ - (__dsa)->dist_size = (__oa)->u.meta.dist_size; \ +#define PVFS_object_attr_to_ds_attr(__oa, __dsa) \ + do { \ + (__dsa)->uid = (__oa)->owner; \ + (__dsa)->gid = (__oa)->group; \ + (__dsa)->mode = (__oa)->perms; \ + (__dsa)->ctime = (__oa)->ctime; \ + (__dsa)->mtime = (__oa)->mtime; \ + (__dsa)->atime = (__oa)->atime; \ + (__dsa)->type = (__oa)->objtype; \ + (__dsa)->cid = (__oa)->cid; \ + (__dsa)->u.metafile.dfile_count = (__oa)->u.meta.dfile_count; \ + (__dsa)->u.metafile.dist_size = (__oa)->u.meta.dist_size; \ } while(0) #define PVFS_object_attr_overwrite_setable(dest, src) \ @@ -150,6 +142,8 @@ do { \ (dest)->group = (src)->group; \ if ((src)->mask & PVFS_ATTR_COMMON_PERM) \ (dest)->perms = (src)->perms; \ + if ((src)->mask & PVFS_ATTR_COMMON_CID) \ + (dest)->cid = (src)->cid; \ if ((src)->mask & PVFS_ATTR_COMMON_ATIME) \ { \ if ((src)->mask & PVFS_ATTR_COMMON_ATIME_SET) \ @@ -169,7 +163,7 @@ do { \ } \ else \ { \ - (dest)->mtime = time(NULL); \ + (dest)->mtime = PINT_util_mktime_version(time(NULL)); \ } \ } \ if ((src)->mask & PVFS_ATTR_COMMON_CTIME) \ diff --git a/src/io/trove/trove-dbpf/README b/src/io/trove/trove-dbpf/README index 2b1d729..3813ceb 100644 --- a/src/io/trove/trove-dbpf/README +++ b/src/io/trove/trove-dbpf/README @@ -53,6 +53,8 @@ dbpf_collection_seteattr() dbpf_collection_geteattr() +dbpf_collection_deleattr() + dbpf_initialize() -- called once before DBPF interface is used (?) dbpf_finalize() diff --git a/src/io/trove/trove-dbpf/dbpf-alt-aio.c b/src/io/trove/trove-dbpf/dbpf-alt-aio.c index dacd74e..b6bf260 100644 --- a/src/io/trove/trove-dbpf/dbpf-alt-aio.c +++ b/src/io/trove/trove-dbpf/dbpf-alt-aio.c @@ -36,17 +36,19 @@ int alt_lio_listio(int mode, struct aiocb * const list[], int ret, i; pthread_t *tids; pthread_attr_t attr; + pthread_t master_tid; tids = (pthread_t *)malloc(sizeof(pthread_t) * nent); if(!tids) { return (-1); } + memset(tids,0,sizeof(pthread_t) * nent); for(i = 0; i < nent; ++i) { int spawnmode= PTHREAD_CREATE_JOINABLE; - tmp_item = (struct alt_aio_item*)malloc(sizeof(struct alt_aio_item)*nent); + tmp_item = (struct alt_aio_item*)malloc(sizeof(struct alt_aio_item)); if(!tmp_item) { return (-1); @@ -95,7 +97,19 @@ int alt_lio_listio(int mode, struct aiocb * const list[], } /* create thread to perform I/O and trigger callback */ - ret = pthread_create(&tids[i], &attr, alt_lio_thread, tmp_item); + if(mode == LIO_NOWAIT && i == (nent - 1)) + { + /* note: in this case don't store the master's tid in the array; + * some thread implementations may allow the the + * alt_lio_thread() to complete (and try to free the array) + * before pthread_create() finishes execution here. + */ + ret = pthread_create(&master_tid, &attr, alt_lio_thread, tmp_item); + } + else + { + ret = pthread_create(&tids[i], &attr, alt_lio_thread, tmp_item); + } if(ret != 0) { int j = 0; @@ -113,10 +127,13 @@ int alt_lio_listio(int mode, struct aiocb * const list[], errno = ret; return(-1); } - gossip_debug(GOSSIP_BSTREAM_DEBUG, - "[alt-aio]: pthread_create completed:" - " id: %d, thread_id: %p\n", - i, (void *)tids[i]); + if (tids) + { + gossip_debug(GOSSIP_BSTREAM_DEBUG, + "[alt-aio]: pthread_create completed:" + " id: %d, thread_id: %p\n", + i, (void *)tids[i]); + } } ret = 0; @@ -270,68 +287,72 @@ static void* alt_lio_thread(void* foo) static int alt_aio_bstream_read_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, - TROVE_offset *stream_offset_array, + TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { return dbpf_bstream_rw_list(coll_id, handle, - mem_offset_array, + mem_offset_array, mem_size_array, mem_count, - stream_offset_array, + stream_offset_array, stream_size_array, stream_count, out_size_p, - flags, + flags, vtag, user_ptr, context_id, out_op_id_p, LIO_READ, - &alt_aio_ops); + &alt_aio_ops, + hints); } static int alt_aio_bstream_write_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, - TROVE_offset *stream_offset_array, + TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { return dbpf_bstream_rw_list(coll_id, handle, - mem_offset_array, + mem_offset_array, mem_size_array, mem_count, - stream_offset_array, + stream_offset_array, stream_size_array, stream_count, out_size_p, - flags, + flags, vtag, user_ptr, context_id, out_op_id_p, LIO_WRITE, - &alt_aio_ops); + &alt_aio_ops, + hints); } static struct dbpf_aio_ops alt_aio_ops = @@ -354,7 +375,8 @@ struct TROVE_bstream_ops alt_aio_bstream_ops = dbpf_bstream_validate, alt_aio_bstream_read_list, alt_aio_bstream_write_list, - dbpf_bstream_flush + dbpf_bstream_flush, + NULL }; /* diff --git a/src/io/trove/trove-dbpf/dbpf-attr-cache.c b/src/io/trove/trove-dbpf/dbpf-attr-cache.c index 4299cb7..3d79341 100644 --- a/src/io/trove/trove-dbpf/dbpf-attr-cache.c +++ b/src/io/trove/trove-dbpf/dbpf-attr-cache.c @@ -140,7 +140,7 @@ int dbpf_attr_cache_initialize( int dbpf_attr_cache_finalize(void) { - int ret = -1, i = 0; + int ret = -1, i = 0, j = 0; struct qlist_head *hash_link = NULL; dbpf_attr_cache_elem_t *cache_elem = NULL; @@ -156,6 +156,13 @@ int dbpf_attr_cache_finalize(void) { cache_elem = qhash_entry( hash_link, dbpf_attr_cache_elem_t, hash_link); + for( j = 0; j < cache_elem->num_keyval_pairs; j++ ) + { + /* any elements still existing need their keyval pairs + * data pointer free'd before the element is free'd */ + if( cache_elem->keyval_pairs[j].data ) + free(cache_elem->keyval_pairs[j].data); + } free(cache_elem); s_current_num_cache_elems--; } @@ -237,7 +244,7 @@ int dbpf_attr_cache_ds_attr_update_cached_data_bsize( { if (cache_elem) { - cache_elem->attr.b_size = b_size; + cache_elem->attr.u.datafile.b_size = b_size; gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, "Updating " "cached b_size for key %llu\n", llu(key.handle)); @@ -347,6 +354,11 @@ int dbpf_attr_cache_keyval_pair_fetch_cached_data( { if (cache_elem && keyval_pair) { + if(*target_data_sz < keyval_pair->data_sz) + { + /* cached value is too big for buffer */ + return(-TROVE_EINVAL); + } memcpy(target_data, keyval_pair->data, keyval_pair->data_sz); *target_data_sz = keyval_pair->data_sz; ret = 0; @@ -456,7 +468,7 @@ int dbpf_attr_cache_insert( GOSSIP_DBPF_ATTRCACHE_DEBUG, "dbpf_attr_cache_insert: inserting %llu " "(b_size is %llu)\n", llu(key.handle), - llu(cache_elem->attr.b_size)); + llu(cache_elem->attr.u.datafile.b_size)); } ret = 0; } diff --git a/src/io/trove/trove-dbpf/dbpf-bstream-direct.c b/src/io/trove/trove-dbpf/dbpf-bstream-direct.c new file mode 100644 index 0000000..dc9b782 --- /dev/null +++ b/src/io/trove/trove-dbpf/dbpf-bstream-direct.c @@ -0,0 +1,1757 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_MALLOC_H +#include +#endif +#include +#include +#include + +#include "gossip.h" +#include "pvfs2-debug.h" +#include "trove.h" +#include "trove-internal.h" +#include "dbpf.h" +#include "dbpf-op.h" +#include "dbpf-op-queue.h" +#include "dbpf-attr-cache.h" +#include "dbpf-bstream.h" +#include "dbpf-sync.h" +#include "pint-mem.h" +#include "pint-mgmt.h" +#include "pint-context.h" +#include "pint-op.h" + +static gen_mutex_t dbpf_update_size_lock = GEN_MUTEX_INITIALIZER; +static gen_mutex_t grow_bstream_table_lock = GEN_MUTEX_INITIALIZER; + +typedef struct +{ + char *buffer; + TROVE_size size; + TROVE_offset offset; +} dbpf_stream_extents_t; + +struct qhash_table *grow_bstream_table = NULL; + +struct grow_bstream_handle +{ + struct qlist_head hash_link; + gen_mutex_t handle_lock; + gen_mutex_t refcount_lock; + PVFS_handle handle; + int refcount; +}; + +static int dbpf_bstream_get_extents( + char **mem_offset_array, + TROVE_size *mem_size_array, + int mem_count, + TROVE_offset *stream_offset_array, + TROVE_size *stream_size_array, + int stream_count, + int *ext_count, + dbpf_stream_extents_t *extents); + +static int hash_handle_compare( + void *key, + struct qlist_head *link); + +static int hash_handle( + void *handle, + int table_size); + +static int grow_bstream_handle_table_init( int size ); +static int grow_bstream_handle_acquire_lock( TROVE_object_ref ref ); +static int grow_bstream_handle_release_lock( TROVE_object_ref ref ); + +static size_t direct_aligned_write(int fd, + void *buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size); + +static size_t direct_locked_write(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size); + +#if 0 +static size_t new_direct_write(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size); +#endif + +static size_t direct_write(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size); + +static size_t direct_aligned_read(int fd, + void *buf, + off_t buf_offset, + size_t size, + off_t file_offset, + off_t stream_size); + +static size_t direct_locked_read(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t file_offset, + off_t stream_size); + +static size_t direct_read(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t file_offset, + off_t stream_size); + +#define BLOCK_SIZE 4096 + +/* compute the mask of 1s that allows us to essentially throw away + * all bits less than the block size. + */ +#define BLOCK_MULTIPLES_MASK (~((uintptr_t) BLOCK_SIZE - 1)) + +/* calculate the max offset that is a multiple of the block size but still + * less than or equal to requested offset passed in + */ +#define ALIGNED_OFFSET(__offset) (__offset & BLOCK_MULTIPLES_MASK) + +/* calculate the minimum size that is a multiple of the block size and + * still greater than or equal to the requested size + */ +#define ALIGNED_SIZE(__offset, __size) \ + (((__offset + __size + BLOCK_SIZE - 1) \ + & BLOCK_MULTIPLES_MASK) - ALIGNED_OFFSET(__offset)) + +#define IS_ALIGNED_PTR(__ptr) \ + ((((uintptr_t)__ptr) & BLOCK_MULTIPLES_MASK) == (uintptr_t)__ptr) + +extern PINT_manager_t io_thread_mgr; +extern PINT_worker_id io_worker_id; +extern PINT_queue_id io_queue_id; + +#if 0 +struct aligned_block +{ + void *ptr; + struct qlist_head link; +}; +static struct aligned_block *blocks; +static void *aligned_blocks_buffer; +static QLIST_HEAD(aligned_blocks_unused); +static QLIST_HEAD(aligned_blocks_used); +static gen_mutex_t aligned_blocks_mutex = GEN_MUTEX_INITIALIZER; +static int used_count; + +int dbpf_aligned_blocks_init(void); +void * dbpf_aligned_block_get(void); +int dbpf_aligned_block_put(void *ptr); +int dbpf_aligned_blocks_finalize(void); +#endif + +/** + * Perform an write in direct mode (no buffering). + * + * @param fd - The file descriptor of the bstream to do the write on. THe + * file descriptor is required to opened with O_DIRECT. In debug mode, + * the O_DIRECT option is checked. + * + * @param buf - the buffer containing the bytes to write to the bstream. The + * buffer is required to be allocated with the correct alignment (to a block + * size of 512) + * + * @param buf_offset - the offset into the buffer that the write should start + * + * @param size - the size of bytes to write from the buffer to + * the file. + * + * @param write_offset - the offset into the bstream to start the write + * + * @param stream_size - the actual size of the bstream (might be stored + * elsewhere) + * + * @returns bytes written, otherwise a negative errno error code + */ +static size_t direct_aligned_write(int fd, + void *buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size) +{ + int ret; + +#ifndef NDEBUG + /* if debug is enabled, check that fd was opened with O_DIRECT */ +#ifdef HAVE_OPEN_O_DIRECT + if(!(fcntl(fd, F_GETFL) & O_DIRECT)) + { + return -EINVAL; + } +#endif + +#ifdef HAVE_FNCTL_F_NOCACHE + if (!(fcntl(fd, F_GETFL) & F_NOCACHE)) + { + return -EINVAL; + } +#endif +#endif + + /* verify that the buffer is aligned properly */ + assert(IS_ALIGNED_PTR(buf)); + + /* verify that the offset is aligned as well */ + assert(ALIGNED_OFFSET(buf_offset) == buf_offset); + + /* and the size */ + assert(ALIGNED_SIZE(write_offset, size) == size); + + /* and the offset into the file */ + assert(ALIGNED_OFFSET(write_offset) == write_offset); + + ret = dbpf_pwrite(fd, (((char *)buf) + buf_offset), size, write_offset); + if(ret < 0) + { + gossip_err( + "dbpf_direct_write: failed to perform aligned write\n"); + return ret; + } + + return ret; +} + +/* static int writes_outstanding = 0; +gen_mutex_t writes_lock = GEN_MUTEX_INITIALIZER; */ + +static size_t direct_locked_write(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size) +{ + struct flock writelock; + int ret, write_ret; +/* struct timeval start, end; */ + + writelock.l_type = F_WRLCK; + writelock.l_whence = SEEK_SET; + writelock.l_start = (off_t)ALIGNED_OFFSET(write_offset); + writelock.l_len = (off_t)ALIGNED_SIZE(write_offset, size); + ret = fcntl(fd, F_SETLKW, &writelock); + if(ret < 0 && errno == EINTR) + { + gossip_err("%s: failed to lock flock before writing\n", __func__); + return -trove_errno_to_trove_error(errno); + } + writelock.l_type = F_UNLCK; + + write_ret = direct_write( + fd, buf, buf_offset, size, write_offset, stream_size); + + ret = fcntl(fd, F_SETLK, &writelock); + if (ret < 0) + { + gossip_err("%s: failed to unlock flock after writing\n", __func__); + return -trove_errno_to_trove_error (errno); + } + +#if 0 + if(write_ret > 0) + { + if((write_offset + size) > stream_size) + { + ret = DBPF_RESIZE(fd, (write_offset + size)); + if(ret < 0) + { + gossip_err("failed ftruncate of O_DIRECT fd to size: %d\n", + (write_offset + size)); + return -trove_errno_to_trove_error(errno); + } + } + } +#endif + + return write_ret; +} + +#if 0 +static size_t new_direct_write(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size) +{ + size_t ret; + void *aligned_buf; + size_t aligned_size; + off_t aligned_offset, end_offset, aligned_end_offset; + + aligned_size = ALIGNED_SIZE(write_offset, size); + aligned_offset = ALIGNED_OFFSET(write_offset); + + /* if the buffer passed in, the offsets, and the size are all + * aligned properly, just pass through directly + */ + if(IS_ALIGNED_PTR(buf) && + ALIGNED_OFFSET(buf_offset) == buf_offset && + aligned_size == size) + { + return direct_aligned_write(fd, buf, buf_offset, + size, write_offset, stream_size); + } + + gossip_debug(GOSSIP_DIRECTIO_DEBUG, + "requested write is not aligned, doing memcpy:\n\t" + "buf: %p, " + "buf_offset: %llu, " + "size: %zu, \n\t" + "write_offset: %llu, " + "stream_size: %zu\n", + buf, + llu(buf_offset), + size, + llu(write_offset), + stream_size); + + aligned_buf = dbpf_aligned_block_get(); + if(!aligned_buf) + { + return -ENOMEM; + } + + /* Do read-modify-write on the ends of the buffer if + * the offsets and sizes aren't aligned properly + */ + if(aligned_offset < write_offset) + { + ret = 0; + if(ALIGNED_SIZE(0, stream_size) > aligned_offset) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "Doing RMW at front\n"); + /* read the first block */ + ret = dbpf_pread(fd, aligned_buf, BLOCK_SIZE, aligned_offset); + if(ret < 0) + { + int pread_errno = errno; + gossip_err( + "direct_memcpy_write: RMW failed at " + "beginning of request\n"); + dbpf_aligned_block_put(aligned_buf); + + return -trove_errno_to_trove_error(pread_errno); + } + } + else + { + memset(aligned_buf, 0, BLOCK_SIZE); + } + + memcpy(((char *)buf) - (write_offset - aligned_offset), + aligned_buf, (write_offset - aligned_offset)); + } + + end_offset = write_offset + size; + aligned_end_offset = aligned_offset + aligned_size; + + if(aligned_end_offset > end_offset) + { + ret = 0; + if(ALIGNED_SIZE(0, stream_size) >= aligned_end_offset) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "Doing RMW at end\n"); + ret = dbpf_pread(fd, + aligned_buf, + BLOCK_SIZE, + aligned_end_offset - BLOCK_SIZE); + if(ret < 0) + { + int pread_errno = errno; + gossip_err( + "direct_memcpy_write: RMW failed at end of request\n"); + dbpf_aligned_block_put(aligned_buf); + + return -trove_errno_to_trove_error(pread_errno); + } + } + else + { + memset(aligned_buf, 0, BLOCK_SIZE); + } + + memcpy(((char *)buf) + size, + ((char *)aligned_buf) + (end_offset % BLOCK_SIZE), + (aligned_end_offset - end_offset)); + } + + ret = direct_aligned_write( + fd, + ((char *)buf) - (write_offset - aligned_offset), 0, + aligned_size, aligned_offset, stream_size); + + dbpf_aligned_block_put(aligned_buf); + + return size; +} +#endif + +static size_t direct_write(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t write_offset, + off_t stream_size) +{ + size_t ret; + void * aligned_buf; + size_t aligned_size; + off_t aligned_offset, end_offset, aligned_end_offset; + + aligned_size = ALIGNED_SIZE(write_offset, size); + aligned_offset = ALIGNED_OFFSET(write_offset); + + /* if the buffer passed in, the offsets, and the size are all + * aligned properly, just pass through directly + */ + if(IS_ALIGNED_PTR(buf) && + ALIGNED_OFFSET(buf_offset) == buf_offset && + aligned_size == size) + { + return direct_aligned_write(fd, buf, buf_offset, + size, write_offset, stream_size); + } + + gossip_debug(GOSSIP_DIRECTIO_DEBUG, + "requested write is not aligned, doing memcpy:\n\t" + "buf: %p, " + "buf_offset: %llu, " + "size: %zu, \n\t" + "write_offset: %llu, " + "stream_size: %llu\n", + buf, + llu(buf_offset), + size, + llu(write_offset), + llu(stream_size)); + + aligned_buf = PINT_mem_aligned_alloc(aligned_size, BLOCK_SIZE); + if(!aligned_buf) + { + return -ENOMEM; + } + + /* Do read-modify-write on the ends of the buffer if + * the offsets and sizes aren't aligned properly + */ + if(aligned_offset < write_offset) + { + ret = 0; + if(ALIGNED_SIZE(0, stream_size) > aligned_offset) + { + /* read the first block */ + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "Doing RMW at front\n"); + ret = dbpf_pread(fd, aligned_buf, BLOCK_SIZE, aligned_offset); + if(ret < 0) + { + int pread_errno = errno; + gossip_err( + "direct_memcpy_write: RMW failed at " + "beginning of request\n"); + PINT_mem_aligned_free(aligned_buf); + + return -trove_errno_to_trove_error(pread_errno); + } + } + else + { + memset(aligned_buf, 0, BLOCK_SIZE); + } + } + + end_offset = write_offset + size; + aligned_end_offset = aligned_offset + aligned_size; + + if(aligned_end_offset > end_offset) + { + ret = 0; + if(ALIGNED_SIZE(0, stream_size) >= aligned_end_offset) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "Doing RMW at end\n"); + ret = dbpf_pread( + fd, + ((char *)aligned_buf) + aligned_size - BLOCK_SIZE, + BLOCK_SIZE, + aligned_end_offset - BLOCK_SIZE); + if(ret < 0) + { + int pread_errno = errno; + gossip_err( + "direct_memcpy_write: RMW failed at end of request\n"); + PINT_mem_aligned_free(aligned_buf); + + return -trove_errno_to_trove_error(pread_errno); + } + } + else + { + memset(((char *)aligned_buf) + aligned_size - BLOCK_SIZE, + 0, BLOCK_SIZE); + } + } + + /* now we're read to memcpy the actual (unaligned) request into the + * aligned buffer + */ + memcpy(((char *)aligned_buf) + (write_offset - aligned_offset), + ((char *)buf) + buf_offset, size); + + ret = direct_aligned_write(fd, aligned_buf, 0, + aligned_size, aligned_offset, stream_size); + + PINT_mem_aligned_free(aligned_buf); + + return (ret < 0) ? ret : size; +} + +/** + * Perform a read in direct mode (no buffering). + * + * @param fd - The file descriptor of the bstream to do the read from. The + * file descriptor is required to be opened with O_DIRECT. In debug mode, + * the O_DIRECT option is checked, and if it doesn't exist on the open file + * descriptor, EINVAL is returned. + * + * @param buf - The buffer to read data into. This function assumes that + * the buffer has been allocated with the correct alignment (i.e. to a block + * size of 512, using posix_memalign or such). + * + * @param buf_offset - The offset into the buffer that data is read. + * + * @param buf_size - The available size of the buffer + * + * @param file_offset - offset into the file to start the read + * + * @param request_size - number of bytes to read from the file + * + * @param stream_size - size of the file + * + * @return number of bytes read + */ +static size_t direct_aligned_read(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t file_offset, + off_t stream_size) +{ + int ret; + + if(file_offset >= stream_size) + { + /* the offset is past EOF, return 0 bytes read */ + return 0; + } + +#ifndef NDEBUG + /* if debug is enabled, check that fd was opened with O_DIRECT */ +#ifdef HAVE_OPEN_O_DIRECT + if(!(fcntl(fd, F_GETFL) & O_DIRECT)) +#elif defined(HAVE_FNCTL_F_NOCACHE) + if (!(fcntl(fd, F_GETFL) & F_NOCACHE)) +#else + if (0) //TODO: error? fall back to madvise? +#endif + { + gossip_err("dbpf_direct_read: trying to do direct IO but file wasn't " + "opened with O_DIRECT\n"); + return -EINVAL; + } +#endif + + /* verify that stuff is aligned properly */ + assert(IS_ALIGNED_PTR(buf)); + assert(ALIGNED_OFFSET(buf_offset) == buf_offset); + assert(ALIGNED_SIZE(file_offset, size) == size); + assert(ALIGNED_OFFSET(file_offset) == file_offset); + + ret = dbpf_pread(fd, (((char *)buf) + buf_offset), size, file_offset); + if(ret < 0) + { + gossip_err("dbpf_direct_read: failed to perform aligned read\n"); + return -trove_errno_to_trove_error(errno); + } + + return ret; +} + +static size_t direct_locked_read(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t file_offset, + off_t stream_size) +{ + int ret, read_ret; + struct flock readlock; + + readlock.l_type = F_RDLCK; + readlock.l_whence = SEEK_SET; + readlock.l_start = (off_t)ALIGNED_OFFSET(file_offset); + readlock.l_len = (off_t)ALIGNED_SIZE(file_offset, size); + ret = fcntl(fd, F_SETLKW, &readlock); + if(ret < 0 && errno == EINTR) + { + return -trove_errno_to_trove_error(errno); + } + readlock.l_type = F_UNLCK; + + read_ret = direct_read(fd, buf, buf_offset, size, file_offset, stream_size); + + ret = fcntl(fd, F_SETLK, &readlock); + if(ret < 0) + { + return -trove_errno_to_trove_error(errno); + } + + return read_ret; +} + +static size_t direct_read(int fd, + void * buf, + off_t buf_offset, + size_t size, + off_t file_offset, + off_t stream_size) +{ + void * aligned_buf; + off_t aligned_offset; + size_t aligned_size, read_size; + size_t ret; + + if(file_offset > stream_size) + { + return 0; + } + + read_size = size; + if(stream_size < (file_offset + size)) + { + read_size = stream_size - file_offset; + } + + aligned_offset = ALIGNED_OFFSET(file_offset); + aligned_size = ALIGNED_SIZE(file_offset, read_size); + + if(IS_ALIGNED_PTR(buf) && + ALIGNED_OFFSET(buf_offset) == buf_offset && + aligned_size == read_size) + { + return direct_aligned_read(fd, buf, buf_offset, read_size, + file_offset, stream_size); + } + + aligned_buf = PINT_mem_aligned_alloc(aligned_size, BLOCK_SIZE); + if(!aligned_buf) + { + return -ENOMEM; + } + + ret = direct_aligned_read(fd, aligned_buf, 0, aligned_size, + aligned_offset, stream_size); + if(ret < 0) + { + PINT_mem_aligned_free(aligned_buf); + + return ret; + } + + memcpy(((char *)buf) + buf_offset, + ((char *)aligned_buf) + (file_offset - aligned_offset), + read_size); + + PINT_mem_aligned_free(aligned_buf); + + return ret; +} + +static int dbpf_bstream_direct_read_op_svc(void *ptr, PVFS_hint hint) +{ + int ret = -TROVE_EINVAL; + TROVE_object_ref ref; + TROVE_ds_attributes attr; + dbpf_queued_op_t *qop_p; + struct dbpf_bstream_rw_list_op *rw_op; + dbpf_stream_extents_t *stream_extents = NULL; + int i, extent_count; + + rw_op = (struct dbpf_bstream_rw_list_op *)ptr; + qop_p = (dbpf_queued_op_t *)rw_op->queued_op_ptr; + + ref.fs_id = qop_p->op.coll_p->coll_id; + ref.handle = qop_p->op.handle; + + /* not in attribute cache. get the size from dspace */ + ret = dbpf_dspace_attr_get(qop_p->op.coll_p, ref, &attr); + if(ret != 0) + { + gossip_err("%s: failed to get size in dspace attr: (error=%d)\n", + __func__, ret); + goto done; + } + + ret = dbpf_bstream_get_extents( + rw_op->mem_offset_array, + rw_op->mem_size_array, + rw_op->mem_array_count, + rw_op->stream_offset_array, + rw_op->stream_size_array, + rw_op->stream_array_count, + &extent_count, + NULL); + if(ret != 0) + { + gossip_err("%s: failed to get bstream extents from offset/sizes: " + "(error=%d)\n", __func__, ret); + goto done; + } + + stream_extents = malloc(sizeof(*stream_extents) * extent_count); + if(!stream_extents) + { + return -TROVE_ENOMEM; + } + + ret = dbpf_bstream_get_extents( + rw_op->mem_offset_array, + rw_op->mem_size_array, + rw_op->mem_array_count, + rw_op->stream_offset_array, + rw_op->stream_size_array, + rw_op->stream_array_count, + &extent_count, + stream_extents); + if(ret != 0) + { + gossip_err("%s: failed to get bstream extents from offset/sizes: " + "(error=%d)\n", __func__, ret); + goto done; + } + + for(i = 0; i < extent_count; ++ i) + { + ret = direct_locked_read(rw_op->open_ref.fd, + stream_extents[i].buffer, + 0, + stream_extents[i].size, + stream_extents[i].offset, + attr.u.datafile.b_size); + if(ret < 0) + { + ret = -trove_errno_to_trove_error(-ret); + gossip_err("%s: direct_locked_read failed: (error=%d)\n", __func__, + ret); + goto done; + } + } + + ret = DBPF_OP_COMPLETE; + +done: + if(stream_extents) + { + free(stream_extents); + } + dbpf_open_cache_put(&rw_op->open_ref); + return ret; +} + +static int dbpf_bstream_direct_write_op_svc(void *ptr, PVFS_hint hint) +{ + int ret = -TROVE_EINVAL; + TROVE_object_ref ref; + TROVE_ds_attributes attr; + dbpf_stream_extents_t *stream_extents = NULL; + int i, extent_count; + struct dbpf_bstream_rw_list_op *rw_op; + dbpf_queued_op_t *qop_p; + PVFS_size eor = -1; + int sync_required = 0; + + rw_op = (struct dbpf_bstream_rw_list_op *)ptr; + qop_p = (dbpf_queued_op_t *)rw_op->queued_op_ptr; + + ref.fs_id = qop_p->op.coll_p->coll_id; + ref.handle = qop_p->op.handle; + + ret = dbpf_bstream_get_extents( + rw_op->mem_offset_array, + rw_op->mem_size_array, + rw_op->mem_array_count, + rw_op->stream_offset_array, + rw_op->stream_size_array, + rw_op->stream_array_count, + &extent_count, + NULL); + if(ret != 0) + { + gossip_err("%s: failed to count extents from stream offset/sizes: " + "(error=%d)\n", __func__, ret); + goto cache_put; + } + + stream_extents = malloc(sizeof(*stream_extents) * extent_count); + if(!stream_extents) + { + ret = -TROVE_ENOMEM; + goto cache_put; + } + + ret = dbpf_bstream_get_extents( + rw_op->mem_offset_array, + rw_op->mem_size_array, + rw_op->mem_array_count, + rw_op->stream_offset_array, + rw_op->stream_size_array, + rw_op->stream_array_count, + &extent_count, + stream_extents); + if(ret != 0) + { + gossip_err("%s: failed to get stream extents from stream offset/sizes: " + "(error=%d)\n", __func__, ret); + goto cache_put; + } + + if( grow_bstream_table == NULL ) + { + ret = grow_bstream_handle_table_init( 1021 ); + if( ret != 0 ) + { + gossip_err("%s: failed to create grow_bstream_handle_table\n", + __func__); + goto cache_put; + } + } + + /* acquire a lock on this handle prior to getting the size to prevent + * a race condition between multiple writes getting the wrong size */ + grow_bstream_handle_acquire_lock( ref ); + + ret = dbpf_dspace_attr_get(qop_p->op.coll_p, ref, &attr); + if(ret != 0) + { + gossip_err("%s: failed to get dspace attr for bstream: (error=%d)\n", + __func__, ret); + grow_bstream_handle_release_lock( ref ); + goto cache_put; + } + + /* prior to writes see if we are growing the file, if not, release the + * lock for growing file size since we won't be updating the file size + * below */ + for(i = 0; i < extent_count; ++ i) + { + if(eor < stream_extents[i].offset + stream_extents[i].size) + { + eor = stream_extents[i].offset + stream_extents[i].size; + } + } + if(eor <= attr.u.datafile.b_size) + { + /* file size is not growing so we do not need to hold the lock + * since we won't update the size attribute below */ + grow_bstream_handle_release_lock( ref ); + } + + *rw_op->out_size_p = 0; + + for(i = 0; i < extent_count; ++ i) + { + ret = direct_locked_write(rw_op->open_ref.fd, + stream_extents[i].buffer, + 0, + stream_extents[i].size, + stream_extents[i].offset, + attr.u.datafile.b_size); + if(ret < 0) + { + gossip_err("%s: failed to perform direct locked write: " + "(error=%d)\n", __func__, ret); + if(eor > attr.u.datafile.b_size) + { + grow_bstream_handle_release_lock( ref ); + } + goto cache_put; + } + /* did this calculation above + * if(eor < stream_extents[i].offset + stream_extents[i].size) + * { + * eor = stream_extents[i].offset + stream_extents[i].size; + * } + */ + *rw_op->out_size_p += ret; + } + + if(eor > attr.u.datafile.b_size) + { + int outcount; + + gen_mutex_lock(&dbpf_update_size_lock); + ret = dbpf_dspace_attr_get(qop_p->op.coll_p, ref, &attr); + if(ret != 0) + { + gossip_err("%s: failed to get size from dspace attr: (error=%d)\n", + __func__, ret); + gen_mutex_unlock(&dbpf_update_size_lock); + grow_bstream_handle_release_lock( ref ); + goto cache_put; + } + + if(eor > attr.u.datafile.b_size) + { + /* set the size of the file */ + attr.u.datafile.b_size = eor; + ret = dbpf_dspace_attr_set(qop_p->op.coll_p, ref, &attr); + if(ret != 0) + { + gossip_err("%s: failed to update size in dspace attr: " + "(error=%d)\n", __func__, ret); + gen_mutex_unlock(&dbpf_update_size_lock); + grow_bstream_handle_release_lock( ref ); + goto cache_put; + } + sync_required = 1; + } + gen_mutex_unlock(&dbpf_update_size_lock); + + if(sync_required == 1) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, + "directio updating size for handle %llu\n", llu(ref.handle)); + + dbpf_open_cache_put(&rw_op->open_ref); + + /* If we updated the size, then convert cur_op into a setattr. + * Note that we are not actually going to perform a setattr. + * We just want the coalescing path to treat it like a setattr + * so that the size update is synced before we complete. + */ + dbpf_queued_op_init(qop_p, + DSPACE_SETATTR, + ref.handle, + qop_p->op.coll_p, + dbpf_dspace_setattr_op_svc, + qop_p->op.user_ptr, + TROVE_SYNC, + qop_p->op.context_id); + qop_p->op.state = OP_IN_SERVICE; + ret = dbpf_sync_coalesce(qop_p, 0, &outcount); + if(ret < 0) + { + gossip_err("%s: failed to coalesce size update in dspace " + "attr: (error=%d)\n", __func__, ret); + grow_bstream_handle_release_lock( ref ); + goto done; + } + + ret = grow_bstream_handle_release_lock( ref ); + if( ret != 0 ) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "%s: failed to release " + "grow_bstream_handle lock when not updating " + "file size\n", __func__ ); + } + + ret = PINT_MGMT_OP_CONTINUE; + goto done; + } + else + { + /* still need to release the lock even thought we didn't update + * the size because the size calc prior to doing the writes told + * use we would */ + ret = grow_bstream_handle_release_lock( ref ); + if( ret != 0 ) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "%s: failed to release " + "grow_bstream_handle lock when not updating " + "file size\n", __func__ ); + } + } + } + /* if we don't try to update the size then we already released the + * handle grow lock above */ + + ret = PINT_MGMT_OP_COMPLETED; + +cache_put: + dbpf_open_cache_put(&rw_op->open_ref); +done: + if(stream_extents) + { + free(stream_extents); + } + return ret; +} + +static int dbpf_bstream_direct_read_at(TROVE_coll_id coll_id, + TROVE_handle handle, + void *buffer, + TROVE_size *inout_size_p, + TROVE_offset offset, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + return -TROVE_ENOSYS; +} + +static int dbpf_bstream_direct_write_at(TROVE_coll_id coll_id, + TROVE_handle handle, + void *buffer, + TROVE_size *inout_size_p, + TROVE_offset offset, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + return -TROVE_ENOSYS; +} + +static int dbpf_bstream_direct_read_list(TROVE_coll_id coll_id, + TROVE_handle handle, + char **mem_offset_array, + TROVE_size *mem_size_array, + int mem_count, + TROVE_offset *stream_offset_array, + TROVE_size *stream_size_array, + int stream_count, + TROVE_size *out_size_p, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_bstream_rw_list_op *op; + struct dbpf_collection *coll_p = NULL; + int ret; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) + { + gossip_err("%s: failed to find collection with fsid %d\n", + __func__, coll_id); + return -TROVE_EINVAL; + } + + q_op_p = dbpf_queued_op_alloc(); + if (q_op_p == NULL) + { + return -TROVE_ENOMEM; + } + + /* initialize all the common members */ + dbpf_queued_op_init(q_op_p, + BSTREAM_READ_LIST, + handle, + coll_p, + NULL, + user_ptr, + flags, + context_id); + op = (struct dbpf_bstream_rw_list_op *)&q_op_p->op.u.b_rw_list; + + /* initialize the op-specific members */ + op->stream_array_count = stream_count; + op->stream_offset_array = stream_offset_array; + op->stream_size_array = stream_size_array; + op->out_size_p = out_size_p; + + op->mem_array_count = mem_count; + op->mem_offset_array = mem_offset_array; + op->mem_size_array = mem_size_array; + op->queued_op_ptr = q_op_p; + + ret = dbpf_open_cache_get( + coll_id, handle, + DBPF_FD_DIRECT_READ, + &op->open_ref); + if(ret < 0) + { + if(ret == -TROVE_ENOENT) + { + /* We create the bstream lazily, so here we'll just assume the read + * was done before writes to this bstream occured, and return + * a successful read of size 0. + */ + *out_size_p = 0; + ret = DBPF_OP_COMPLETE; + } + dbpf_queued_op_free(q_op_p); + return ret; + } + + *out_op_id_p = q_op_p->op.id; + ret = PINT_manager_id_post( + io_thread_mgr, q_op_p, &q_op_p->mgr_op_id, + dbpf_bstream_direct_read_op_svc, op, NULL, io_queue_id); + if(ret < 0) + { + gossip_err("%s: failed to post direct read op: (error=%d)\n", + __func__, ret); + return ret; + } + + return DBPF_OP_CONTINUE; +} + +static int dbpf_bstream_direct_write_list(TROVE_coll_id coll_id, + TROVE_handle handle, + char **mem_offset_array, + TROVE_size *mem_size_array, + int mem_count, + TROVE_offset *stream_offset_array, + TROVE_size *stream_size_array, + int stream_count, + TROVE_size *out_size_p, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_bstream_rw_list_op *op; + struct dbpf_collection *coll_p = NULL; + int ret; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) + { + return -TROVE_EINVAL; + } + + q_op_p = dbpf_queued_op_alloc(); + if(!q_op_p) + { + return -TROVE_ENOMEM; + } + dbpf_queued_op_init(q_op_p, + BSTREAM_WRITE_LIST, + handle, + coll_p, + NULL, + user_ptr, + TROVE_SYNC, + context_id); + + op = &q_op_p->op.u.b_rw_list; + + /* initialize the op-specific members */ + op->stream_array_count = stream_count; + op->stream_offset_array = stream_offset_array; + op->stream_size_array = stream_size_array; + op->out_size_p = out_size_p; + + op->mem_array_count = mem_count; + op->mem_offset_array = mem_offset_array; + op->mem_size_array = mem_size_array; + op->queued_op_ptr = q_op_p; + + ret = dbpf_open_cache_get( + coll_id, handle, + DBPF_FD_DIRECT_WRITE, + &op->open_ref); + if(ret < 0) + { + dbpf_queued_op_free(q_op_p); + return ret; + } + + *out_op_id_p = q_op_p->op.id; + + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "%s: queuing direct write operation\n", + __func__); + PINT_manager_id_post( + io_thread_mgr, q_op_p, &q_op_p->mgr_op_id, + dbpf_bstream_direct_write_op_svc, op, NULL, io_queue_id); + + return DBPF_OP_CONTINUE; +} + +static int dbpf_bstream_direct_resize_op_svc(struct dbpf_op *op_p) +{ + int ret; + TROVE_ds_attributes attr; + TROVE_object_ref ref; + dbpf_queued_op_t *q_op_p; + struct open_cache_ref open_ref; + PVFS_size tmpsize; + + q_op_p = (dbpf_queued_op_t *)op_p->u.b_resize.queued_op_ptr; + ref.fs_id = op_p->coll_p->coll_id; + ref.handle = op_p->handle; + + gen_mutex_lock(&dbpf_update_size_lock); + ret = dbpf_dspace_attr_get(op_p->coll_p, ref, &attr); + if(ret != 0) + { + gen_mutex_unlock(&dbpf_update_size_lock); + return ret; + } + + tmpsize = op_p->u.b_resize.size; + attr.u.datafile.b_size = tmpsize; + + ret = dbpf_dspace_attr_set(op_p->coll_p, ref, &attr); + if(ret < 0) + { + gen_mutex_unlock(&dbpf_update_size_lock); + return ret; + } + gen_mutex_unlock(&dbpf_update_size_lock); + + /* setup op for sync coalescing */ + dbpf_queued_op_init(q_op_p, + DSPACE_SETATTR, + ref.handle, + q_op_p->op.coll_p, + dbpf_dspace_setattr_op_svc, + q_op_p->op.user_ptr, + TROVE_SYNC, + q_op_p->op.context_id); + q_op_p->op.state = OP_IN_SERVICE; + + /* truncate file after attributes are set */ + ret = dbpf_open_cache_get( + op_p->coll_p->coll_id, op_p->handle, + DBPF_FD_DIRECT_WRITE, + &open_ref); + if(ret < 0) + { + return ret; + } + + ret = DBPF_RESIZE(open_ref.fd, tmpsize); + if(ret < 0) + { + return(ret); + } + + dbpf_open_cache_put(&open_ref); + + return DBPF_OP_COMPLETE; +} + +static int dbpf_bstream_direct_resize(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_size *inout_size_p, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_collection *coll_p = NULL; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) + { + return -TROVE_EINVAL; + } + + q_op_p = dbpf_queued_op_alloc(); + if (q_op_p == NULL) + { + return -TROVE_ENOMEM; + } + + /* initialize all the common members */ + dbpf_queued_op_init(q_op_p, + BSTREAM_RESIZE, + handle, + coll_p, + dbpf_bstream_direct_resize_op_svc, + user_ptr, + flags, + context_id); + + /* initialize the op-specific members */ + q_op_p->op.u.b_resize.size = *inout_size_p; + q_op_p->op.u.b_resize.queued_op_ptr = q_op_p; + *out_op_id_p = dbpf_queued_op_queue(q_op_p); + + return 0; +} + +static int dbpf_bstream_direct_validate(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + return -TROVE_ENOSYS; +} + +static int dbpf_bstream_direct_flush(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + return DBPF_OP_COMPLETE; +} + +static int dbpf_bstream_direct_cancel( + TROVE_coll_id coll_id, + TROVE_op_id cancel_id, + TROVE_context_id context_id) +{ + dbpf_queued_op_t *op; + int ret; + + op = id_gen_fast_lookup(cancel_id); + if(!op) + { + gossip_lerr("Invalid op-id to cancel\n"); + return -TROVE_EINVAL; + } + + ret = PINT_manager_cancel(io_thread_mgr, op->mgr_op_id); + if(ret < 0) + { + return ret|PVFS_ERROR_TROVE; + } + + return ret; +} + +struct TROVE_bstream_ops dbpf_bstream_direct_ops = +{ + dbpf_bstream_direct_read_at, + dbpf_bstream_direct_write_at, + dbpf_bstream_direct_resize, + dbpf_bstream_direct_validate, + dbpf_bstream_direct_read_list, + dbpf_bstream_direct_write_list, + dbpf_bstream_direct_flush, + dbpf_bstream_direct_cancel +}; + +static int dbpf_bstream_get_extents( + char **mem_offset_array, + TROVE_size *mem_size_array, + int mem_count, + TROVE_offset *stream_offset_array, + TROVE_size *stream_size_array, + int stream_count, + int *ext_count, + dbpf_stream_extents_t *extents) +{ + int mct = 0, sct = 0, act = 0; + int oom = 0, oos = 0; + TROVE_size cur_mem_size = 0; + char *cur_mem_off = NULL; + char *ext_ptr = NULL; + TROVE_size ext_size = 0, cur_stream_size = 0; + TROVE_offset ext_off = 0, cur_stream_off = 0; + + cur_mem_size = mem_size_array[mct]; + cur_mem_off = mem_offset_array[mct]; + + cur_stream_size = stream_size_array[sct]; + cur_stream_off = stream_offset_array[sct]; + + while (1) + { + /* + determine if we're either out of memory (oom) regions, or + out of stream (oos) regions + */ + /* in many (all?) cases mem_count is 1, so oom will end up being 1 */ + oom = (((mct + 1) < mem_count) ? 0 : 1); + oos = (((sct + 1) < stream_count) ? 0 : 1); + + if (cur_mem_size == cur_stream_size) + { + /* consume both mem and stream regions */ + ext_size = cur_mem_size; + ext_ptr = cur_mem_off; + ext_off = cur_stream_off; + + if (!oom) + { + cur_mem_size = mem_size_array[++mct]; + cur_mem_off = mem_offset_array[mct]; + } + else + { + cur_mem_size = 0; + } + if (!oos) + { + cur_stream_size = stream_size_array[++sct]; + cur_stream_off = stream_offset_array[sct]; + } + else + { + cur_stream_size = 0; + } + } + else if (cur_mem_size < cur_stream_size) + { + /* consume mem region and update stream region */ + ext_size = cur_mem_size; + ext_ptr = cur_mem_off; + ext_off = cur_stream_off; + + cur_stream_size -= cur_mem_size; + cur_stream_off += cur_mem_size; + + if (!oom) + { + cur_mem_size = mem_size_array[++mct]; + cur_mem_off = mem_offset_array[mct]; + } + else + { + cur_mem_size = 0; + } + } + else /* cur_mem_size > cur_stream_size */ + { + /* consume stream region and update mem region */ + ext_size = cur_stream_size; + ext_ptr = cur_mem_off; + ext_off = cur_stream_off; + + cur_mem_size -= cur_stream_size; + cur_mem_off += cur_stream_size; + + if (!oos) + { + cur_stream_size = stream_size_array[++sct]; + cur_stream_off = stream_offset_array[sct]; + } + else + { + cur_stream_size = 0; + } + } + + if(extents) + { + extents[act].buffer = ext_ptr; + extents[act].offset = ext_off; + extents[act].size = ext_size; + } + act++; + + /* process until there are no bytes left in the current piece */ + if ((oom && cur_mem_size == 0) || (oos && cur_stream_size == 0)) + { + break; + } + } + + /* return the number actually used */ + *ext_count = act; + return 0; +} + +/* grow_bstream_handle_table_init() + * + * initialize the grow_bstream_table + * + * size: prime number of hash table size + */ +static int grow_bstream_handle_table_init( int size ) +{ + gen_mutex_lock( &grow_bstream_table_lock ); + if( grow_bstream_table == NULL ) + { + grow_bstream_table = qhash_init(hash_handle_compare, hash_handle, size); + if( grow_bstream_table == NULL ) + { + return -PVFS_ENOMEM; + } + } + gen_mutex_unlock( &grow_bstream_table_lock ); + return 0; +} + +/* obtains a per-handle lock by locking an existing entry in the + * grow_bstream_table or creating an entry and grabbing the lock. + * + * returns 0 on success and the handle lock held or an error + */ +static int grow_bstream_handle_acquire_lock( TROVE_object_ref ref ) +{ + struct qlist_head *hash_link = NULL; + struct grow_bstream_handle *grow_handle = NULL; + + gen_mutex_lock( &grow_bstream_table_lock ); + if( grow_bstream_table == NULL ) + { + gen_mutex_unlock( &grow_bstream_table_lock ); + return -PVFS_EINVAL; + } + + hash_link = qhash_search(grow_bstream_table, &(ref.handle) ); + if( hash_link ) + { + grow_handle = qlist_entry( hash_link, struct grow_bstream_handle, + hash_link); + } + else + { + grow_handle = calloc( 1, sizeof( struct grow_bstream_handle )); + if( grow_handle == NULL ) + { + gen_mutex_unlock( &grow_bstream_table_lock ); + gossip_err( "%s: failed to alloc memory\n", __func__); + return -PVFS_ENOMEM; + } + grow_handle->handle = ref.handle; + gen_mutex_init( &(grow_handle->handle_lock) ); + gen_mutex_init( &(grow_handle->refcount_lock) ); + + /* we're safe adding it and waiting on grabbing the lock because + * we still have the lock on the table so no one else + * should access this new member */ + qhash_add( grow_bstream_table, &(grow_handle->handle), + &(grow_handle->hash_link) ); + } + + /* increment the number of things using the hash member */ + gen_mutex_lock( &(grow_handle->refcount_lock) ); + grow_handle->refcount++; + gen_mutex_unlock( &(grow_handle->refcount_lock) ); + + gen_mutex_unlock( &grow_bstream_table_lock ); + + gen_mutex_lock( &(grow_handle->handle_lock)); + + return 0; +} + +static int grow_bstream_handle_release_lock( TROVE_object_ref ref ) +{ + struct qlist_head *hash_link = NULL; + struct grow_bstream_handle *grow_handle = NULL; + int rcount = 0; + + gen_mutex_lock( &grow_bstream_table_lock ); + if( grow_bstream_table == NULL ) + { + gen_mutex_unlock( &grow_bstream_table_lock ); + return -PVFS_EINVAL; + } + + hash_link = qhash_search(grow_bstream_table, &(ref.handle) ); + if( hash_link ) + { + grow_handle = qlist_entry(hash_link, struct grow_bstream_handle, + hash_link); + + gen_mutex_lock( &(grow_handle->refcount_lock) ); + rcount = --grow_handle->refcount; + gen_mutex_unlock( &(grow_handle->refcount_lock) ); + + /* if we're the last reference remove it from the hash and free the + * memory. may want to optimize this so we aren't continuously + * alloc/free for each read */ + if( rcount == 0 ) + { + gen_mutex_unlock( &(grow_handle->handle_lock)); + qhash_del(hash_link); + gen_mutex_destroy( &(grow_handle->handle_lock) ); + gen_mutex_destroy( &(grow_handle->refcount_lock) ); + free( grow_handle ); + } + else + { + gen_mutex_unlock( &(grow_handle->handle_lock)); + } + } + else + { + /* should have an entry, but if not just report it for debugging */ + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "%s: no grow_handle entry when " + "trying to remove with refcount %d\n", __func__, rcount); + } + gen_mutex_unlock( &grow_bstream_table_lock ); + return 0; +} + +/* hash_handle() + * + * hash function for handles added to table + * taken from src/server/request-scheduler/request-scheduler.c + * + * returns integer offset into table + */ +static int hash_handle( + void *handle, + int table_size) +{ + /* TODO: update this later with a better hash function, + * depending on what handles look like, for now just modding + * + */ + unsigned long tmp = 0; + PVFS_handle *real_handle = handle; + + tmp += (*(real_handle)); + tmp = tmp % table_size; + + return ((int) tmp); +} + +/* hash_handle_compare() + * + * performs a comparison of a hash table entro to a given key + * (used for searching) + * taken from src/server/request-scheduler/request-scheduler.c + * + * returns 1 if match found, 0 otherwise + */ +static int hash_handle_compare( + void *key, + struct qlist_head *link) +{ + struct grow_bstream_handle *my_handle; + PVFS_handle *real_handle = key; + + my_handle = qlist_entry(link, struct grow_bstream_handle, hash_link); + if (my_handle->handle == *real_handle) + { + return (1); + } + + return (0); +} + +#if 0 +int dbpf_aligned_blocks_init(void) +{ + int i; + + aligned_blocks_buffer = PINT_mem_aligned_alloc(BLOCK_SIZE*256, BLOCK_SIZE); + blocks = malloc(sizeof(*blocks) * 256); + used_count = 0; + gen_mutex_lock(&aligned_blocks_mutex); + for(i = 0; i < 256; ++i) + { + blocks[i].ptr = ((char *)aligned_blocks_buffer) + (i*BLOCK_SIZE); + qlist_add_tail(&(blocks[i].link), &aligned_blocks_unused); + } + gen_mutex_unlock(&aligned_blocks_mutex); + return 0; +} + +int dbpf_aligned_blocks_finalize(void) +{ + free(blocks); + PINT_mem_aligned_free(aligned_blocks_buffer); + return 0; +} + +void *dbpf_aligned_block_get(void) +{ + void *ptr; + struct aligned_block *ablock; + gen_mutex_lock(&aligned_blocks_mutex); + if(used_count > 255) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, "ran out of aligned blocks: %d\n", + used_count); + gen_mutex_unlock(&aligned_blocks_mutex); + return NULL; + } + if(qlist_empty(&aligned_blocks_unused)) + { + gossip_debug(GOSSIP_DIRECTIO_DEBUG, + "aligned_block_get: unused list empty.\n"); + gen_mutex_unlock(&aligned_blocks_mutex); + return NULL; + } + + ablock = qlist_entry(aligned_blocks_unused.next, struct aligned_block, + link); + qlist_del(&ablock->link); + ptr = ablock->ptr; + ablock->ptr = NULL; + qlist_add_tail(&ablock->link, &aligned_blocks_used); + ++used_count; + gen_mutex_unlock(&aligned_blocks_mutex); + return ptr; +} + +int dbpf_aligned_block_put(void *ptr) +{ + struct aligned_block *ablock; + + gen_mutex_lock(&aligned_blocks_mutex); + ablock = qlist_entry(aligned_blocks_used.next, struct aligned_block, link); + qlist_del(&ablock->link); + ablock->ptr = ptr; + qlist_add_tail((&(ablock->link)), &aligned_blocks_unused); + --used_count; + gen_mutex_unlock(&aligned_blocks_mutex); + return 0; +} +#endif + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/trove/trove-dbpf/dbpf-bstream-direct.h b/src/io/trove/trove-dbpf/dbpf-bstream-direct.h new file mode 100644 index 0000000..fd8a065 --- /dev/null +++ b/src/io/trove/trove-dbpf/dbpf-bstream-direct.h @@ -0,0 +1,15 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __DBPF_BSTREAM_DIRECT_H +#define __DBPF_BSTREAM_DIRECT_H + +#include "trove-types.h" + +int dbpf_bstream_direct_read_op_svc(void *ptr, TROVE_hint *hints); +int dbpf_bstream_direct_write_op_svc(void *ptr, TROVE_hint *hints); + +#endif diff --git a/src/io/trove/trove-dbpf/dbpf-bstream.c b/src/io/trove/trove-dbpf/dbpf-bstream.c index 8a8bff6..781d165 100644 --- a/src/io/trove/trove-dbpf/dbpf-bstream.c +++ b/src/io/trove/trove-dbpf/dbpf-bstream.c @@ -26,6 +26,7 @@ #include "dbpf-attr-cache.h" #include "pint-event.h" #include "dbpf-open-cache.h" +#include "dbpf-sync.h" #include "dbpf-alt-aio.h" @@ -37,6 +38,7 @@ extern int TROVE_max_concurrent_io; static int s_dbpf_ios_in_progress = 0; static dbpf_op_queue_p s_dbpf_io_ready_queue = NULL; static gen_mutex_t s_dbpf_io_mutex = GEN_MUTEX_INITIALIZER; +static gen_mutex_t dbpf_update_size_lock = GEN_MUTEX_INITIALIZER; static struct dbpf_aio_ops aio_ops; @@ -54,13 +56,10 @@ static char *list_proc_state_strings[] __attribute__((unused)) = { }; #endif -static int dbpf_bstream_read_at_op_svc(struct dbpf_op *op_p); -static int dbpf_bstream_write_at_op_svc(struct dbpf_op *op_p); #ifndef __PVFS2_TROVE_AIO_THREADED__ static int dbpf_bstream_rw_list_op_svc(struct dbpf_op *op_p); #endif static int dbpf_bstream_flush_op_svc(struct dbpf_op *op_p); -static int dbpf_bstream_resize_op_svc(struct dbpf_op *op_p); #ifdef __PVFS2_TROVE_AIO_THREADED__ #include "dbpf-thread.h" @@ -76,6 +75,11 @@ static void aio_progress_notification(union sigval sig) struct dbpf_op *op_p = NULL; int ret, i, aiocb_inuse_count, state = 0; struct aiocb *aiocb_p = NULL, *aiocb_ptr_array[AIOCB_ARRAY_SZ] = {0}; + PVFS_size eor = -1; + int j; + TROVE_ds_attributes attr; + TROVE_object_ref ref; + int sync_required = 0; cur_op = (dbpf_queued_op_t *)sig.sival_ptr; assert(cur_op); @@ -141,26 +145,108 @@ static void aio_progress_notification(union sigval sig) ret = 0; final_threaded_aio_cleanup: + if ((op_p->type == BSTREAM_WRITE_AT) || (op_p->type == BSTREAM_WRITE_LIST)) { DBPF_AIO_SYNC_IF_NECESSARY( op_p, op_p->u.b_rw_list.fd, ret); + + /* TODO: need similar logic for non-threaded aio case too */ + + /* calculate end of request */ + for(j=0; ju.b_rw_list.stream_array_count; j++) + { + if(eor < op_p->u.b_rw_list.stream_offset_array[j] + + op_p->u.b_rw_list.stream_size_array[j]) + { + eor = op_p->u.b_rw_list.stream_offset_array[j] + + op_p->u.b_rw_list.stream_size_array[j]; + } + } + + ref.fs_id = op_p->coll_p->coll_id; + ref.handle = op_p->handle; + + gen_mutex_lock(&dbpf_update_size_lock); + ret = dbpf_dspace_attr_get(op_p->coll_p, ref, &attr); + if(ret != 0) + { + gen_mutex_unlock(&dbpf_update_size_lock); + goto error_in_cleanup; + } + + if(eor > attr.u.datafile.b_size) + { + /* set the size of the file */ + attr.u.datafile.b_size = eor; + ret = dbpf_dspace_attr_set(op_p->coll_p, ref, &attr); + if(ret != 0) + { + gen_mutex_unlock(&dbpf_update_size_lock); + goto error_in_cleanup; + } + if(op_p->flags & TROVE_SYNC) + { + sync_required = 1; + } + } + gen_mutex_unlock(&dbpf_update_size_lock); } +error_in_cleanup: + dbpf_open_cache_put(&op_p->u.b_rw_list.open_ref); op_p->u.b_rw_list.fd = -1; - gossip_debug(GOSSIP_TROVE_DEBUG, "*** starting delayed ops if any " - "(state is %s)\n", - list_proc_state_strings[ - op_p->u.b_rw_list.list_proc_state]); - cur_op->state = ret; /* this is a macro defined in dbpf-thread.h */ - dbpf_queued_op_complete( - cur_op, - ((ret == -TROVE_ECANCEL) ? OP_CANCELED : OP_COMPLETED)); + + if(sync_required) + { + int outcount; + + gossip_debug(GOSSIP_TROVE_DEBUG, + "aio updating size for handle %llu\n", llu(ref.handle)); + + /* If we updated the size, then convert cur_op into a setattr. + * Note that we are not actually going to perform a setattr. + * We just want the coalescing path to treat it like a setattr + * so that the size update is synced before we complete. + */ + + /* We need to free the aiocb_array in this case, since the + * dbpf_queued_op_free function won't know to do that anymore + */ + free(cur_op->op.u.b_rw_list.aiocb_array); + cur_op->op.u.b_rw_list.aiocb_array = NULL; + + dbpf_queued_op_init(cur_op, + DSPACE_SETATTR, + ref.handle, + cur_op->op.coll_p, + dbpf_dspace_setattr_op_svc, + cur_op->op.user_ptr, + TROVE_SYNC, + cur_op->op.context_id); + cur_op->op.state = OP_IN_SERVICE; + dbpf_sync_coalesce(cur_op, 0, &outcount); + } + else + { + dbpf_queued_op_complete(cur_op, OP_COMPLETED); + } + + /* if sync is not required, then dbpf_queued_op_complete executes and will issue a cond_signal. if + * the signal'd thread executes before the following gossip_debug statement, then cur_op is un- + * defined, causing the gossip_debug statement to seg fault. So, we check for existence first! + */ + + /* if the signal'd thread executes op_p can also go away + * causing the list_proc_state access to segfault. there isn't really + * much debugging information to be had by accessing cur_op or + * op_p, the key is that delayed ops are starting. */ + gossip_debug(GOSSIP_TROVE_DEBUG,"*** starting delayed ops if any.\n"); start_delayed_ops_if_any(1); } @@ -423,9 +509,12 @@ static int issue_or_delay_io_operation( dbpf_open_cache_put(&cur_op->op.u.b_rw_list.open_ref); return -trove_errno_to_trove_error(errno); } - gossip_debug(GOSSIP_TROVE_DEBUG, "%s: lio_listio posted %p " - "(handle %llu, ret %d)\n", __func__, cur_op, - llu(cur_op->op.handle), ret); + if ( cur_op ) + { + gossip_debug(GOSSIP_TROVE_DEBUG, "%s: lio_listio posted %p " + "(handle %llu, ret %d)\n", __func__, cur_op, + llu(cur_op->op.handle), ret); + } } return 0; } @@ -436,92 +525,13 @@ int dbpf_bstream_read_at(TROVE_coll_id coll_id, TROVE_size *inout_size_p, TROVE_offset offset, TROVE_ds_flags flags, - TROVE_vtag_s *vtag, + TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { - dbpf_queued_op_t *q_op_p = NULL; - struct dbpf_collection *coll_p = NULL; - - coll_p = dbpf_collection_find_registered(coll_id); - if (coll_p == NULL) - { - return -TROVE_EINVAL; - } - - q_op_p = dbpf_queued_op_alloc(); - if (q_op_p == NULL) - { - return -TROVE_ENOMEM; - } - - /* initialize all the common members */ - dbpf_queued_op_init(q_op_p, - BSTREAM_READ_AT, - handle, - coll_p, - dbpf_bstream_read_at_op_svc, - user_ptr, - flags, - context_id); - - /* initialize the op-specific members */ - q_op_p->op.u.b_read_at.offset = offset; - q_op_p->op.u.b_read_at.size = *inout_size_p; - q_op_p->op.u.b_read_at.buffer = buffer; - - *out_op_id_p = dbpf_queued_op_queue(q_op_p); - - return 0; -} - -/* dbpf_bstream_read_at_op_svc() - * - * Returns 1 on completion, -TROVE_errno on error, 0 on not done. - */ -static int dbpf_bstream_read_at_op_svc(struct dbpf_op *op_p) -{ - int ret = -TROVE_EINVAL, got_fd = 0; - struct open_cache_ref tmp_ref; - - ret = dbpf_open_cache_get( - op_p->coll_p->coll_id, op_p->handle, - DBPF_FD_BUFFERED_READ, &tmp_ref); - if (ret < 0) - { - goto return_error; - } - got_fd = 1; - - ret = DBPF_LSEEK(tmp_ref.fd, - op_p->u.b_read_at.offset, SEEK_SET); - if (ret < 0) - { - ret = -trove_errno_to_trove_error(errno); - goto return_error; - } - - ret = DBPF_READ(tmp_ref.fd, op_p->u.b_read_at.buffer, - op_p->u.b_read_at.size); - if (ret < 0) - { - ret = -trove_errno_to_trove_error(errno); - goto return_error; - } - - dbpf_open_cache_put(&tmp_ref); - - gossip_debug(GOSSIP_TROVE_DEBUG, "read %d bytes.\n", ret); - - return 1; - - return_error: - if (got_fd) - { - dbpf_open_cache_put(&tmp_ref); - } - return ret; + return -TROVE_ENOSYS; } int dbpf_bstream_write_at(TROVE_coll_id coll_id, @@ -533,93 +543,10 @@ int dbpf_bstream_write_at(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { - dbpf_queued_op_t *q_op_p = NULL; - struct dbpf_collection *coll_p = NULL; - - coll_p = dbpf_collection_find_registered(coll_id); - if (coll_p == NULL) - { - return -TROVE_EINVAL; - } - - q_op_p = dbpf_queued_op_alloc(); - if (q_op_p == NULL) - { - return -TROVE_ENOMEM; - } - - /* initialize all the common members */ - dbpf_queued_op_init(q_op_p, - BSTREAM_WRITE_AT, - handle, - coll_p, - dbpf_bstream_write_at_op_svc, - user_ptr, - flags, - context_id); - - /* initialize the op-specific members */ - q_op_p->op.u.b_write_at.offset = offset; - q_op_p->op.u.b_write_at.size = *inout_size_p; - q_op_p->op.u.b_write_at.buffer = buffer; - - *out_op_id_p = dbpf_queued_op_queue(q_op_p); - - return 0; -} - -static int dbpf_bstream_write_at_op_svc(struct dbpf_op *op_p) -{ - int ret = -TROVE_EINVAL, got_fd = 0; - struct open_cache_ref tmp_ref; - TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; - - ret = dbpf_open_cache_get( - op_p->coll_p->coll_id, op_p->handle, - DBPF_FD_BUFFERED_WRITE, &tmp_ref); - if (ret < 0) - { - goto return_error; - } - got_fd = 1; - - ret = DBPF_LSEEK(tmp_ref.fd, - op_p->u.b_write_at.offset, SEEK_SET); - if (ret < 0) - { - ret = -trove_errno_to_trove_error(errno); - goto return_error; - } - - ret = DBPF_WRITE(tmp_ref.fd, op_p->u.b_write_at.buffer, - op_p->u.b_write_at.size); - if (ret < 0) - { - ret = -trove_errno_to_trove_error(errno); - goto return_error; - } - - /* remove cached attribute for this handle if it's present */ - gen_mutex_lock(&dbpf_attr_cache_mutex); - dbpf_attr_cache_remove(ref); - gen_mutex_unlock(&dbpf_attr_cache_mutex); - - DBPF_ERROR_SYNC_IF_NECESSARY(op_p, tmp_ref.fd); - - dbpf_open_cache_put(&tmp_ref); - - gossip_debug(GOSSIP_TROVE_DEBUG, "wrote %d bytes.\n", ret); - - return 1; - - return_error: - if (got_fd) - { - dbpf_open_cache_put(&tmp_ref); - } - return ret; + return -TROVE_ENOSYS; } int dbpf_bstream_flush(TROVE_coll_id coll_id, @@ -627,7 +554,8 @@ int dbpf_bstream_flush(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_collection *coll_p = NULL; @@ -653,7 +581,7 @@ int dbpf_bstream_flush(TROVE_coll_id coll_id, user_ptr, flags, context_id); - + q_op_p->op.hints = hints; *out_op_id_p = dbpf_queued_op_queue(q_op_p); return 0; } @@ -665,7 +593,7 @@ static int dbpf_bstream_flush_op_svc(struct dbpf_op *op_p) struct open_cache_ref tmp_ref; ret = dbpf_open_cache_get( - op_p->coll_p->coll_id, op_p->handle, + op_p->coll_p->coll_id, op_p->handle, DBPF_FD_BUFFERED_WRITE, &tmp_ref); if (ret < 0) { @@ -690,167 +618,86 @@ static int dbpf_bstream_flush_op_svc(struct dbpf_op *op_p) return ret; } -int dbpf_bstream_resize(TROVE_coll_id coll_id, - TROVE_handle handle, - TROVE_size *inout_size_p, - TROVE_ds_flags flags, - TROVE_vtag_s *vtag, - void *user_ptr, - TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) -{ - dbpf_queued_op_t *q_op_p = NULL; - struct dbpf_collection *coll_p = NULL; - - coll_p = dbpf_collection_find_registered(coll_id); - if (coll_p == NULL) - { - return -TROVE_EINVAL; - } - - q_op_p = dbpf_queued_op_alloc(); - if (q_op_p == NULL) - { - return -TROVE_ENOMEM; - } - - /* initialize all the common members */ - dbpf_queued_op_init(q_op_p, - BSTREAM_RESIZE, - handle, - coll_p, - dbpf_bstream_resize_op_svc, - user_ptr, - flags, - context_id); - - /* initialize the op-specific members */ - q_op_p->op.u.b_resize.size = *inout_size_p; - - *out_op_id_p = dbpf_queued_op_queue(q_op_p); - - return 0; -} - -static int dbpf_bstream_resize_op_svc(struct dbpf_op *op_p) -{ - int ret = -TROVE_EINVAL, got_fd = 0; - struct open_cache_ref tmp_ref; - TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; - - ret = dbpf_open_cache_get( - op_p->coll_p->coll_id, op_p->handle, - DBPF_FD_BUFFERED_WRITE, &tmp_ref); - if (ret < 0) - { - goto return_error; - } - got_fd = 1; - - ret = DBPF_RESIZE(tmp_ref.fd, op_p->u.b_resize.size); - if (ret != 0) - { - ret = -trove_errno_to_trove_error(errno); - goto return_error; - } - - gossip_debug(GOSSIP_TROVE_DEBUG, " RESIZED bstream %llu [fd = %d] " - "to %lld \n", llu(op_p->handle), tmp_ref.fd, - lld(op_p->u.b_resize.size)); - - dbpf_open_cache_put(&tmp_ref); - - /* adjust size in cached attribute element, if present */ - gen_mutex_lock(&dbpf_attr_cache_mutex); - dbpf_attr_cache_ds_attr_update_cached_data_bsize( - ref, op_p->u.b_resize.size); - gen_mutex_unlock(&dbpf_attr_cache_mutex); - - return 1; - -return_error: - if (got_fd) - { - dbpf_open_cache_put(&tmp_ref); - } - return ret; -} - int dbpf_bstream_validate(TROVE_coll_id coll_id, TROVE_handle handle, TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { return -TROVE_ENOSYS; } static int dbpf_bstream_read_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, - TROVE_offset *stream_offset_array, + TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { return dbpf_bstream_rw_list(coll_id, handle, - mem_offset_array, + mem_offset_array, mem_size_array, mem_count, - stream_offset_array, + stream_offset_array, stream_size_array, stream_count, out_size_p, - flags, + flags, vtag, user_ptr, context_id, out_op_id_p, LIO_READ, - &aio_ops); + &aio_ops, + hints); } static int dbpf_bstream_write_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, - TROVE_offset *stream_offset_array, + TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { return dbpf_bstream_rw_list(coll_id, handle, - mem_offset_array, + mem_offset_array, mem_size_array, mem_count, - stream_offset_array, + stream_offset_array, stream_size_array, stream_count, out_size_p, - flags, + flags, vtag, user_ptr, context_id, out_op_id_p, LIO_WRITE, - &aio_ops); + &aio_ops, + hints); } /* dbpf_bstream_rw_list() @@ -861,26 +708,29 @@ static int dbpf_bstream_write_list(TROVE_coll_id coll_id, */ inline int dbpf_bstream_rw_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, TROVE_op_id *out_op_id_p, int opcode, - struct dbpf_aio_ops * aio_ops) + struct dbpf_aio_ops * aio_ops, + PVFS_hint hints) { int ret = -TROVE_EINVAL; dbpf_queued_op_t *q_op_p = NULL; struct dbpf_collection *coll_p = NULL; enum dbpf_op_type tmp_type; - int event_type, i; + PINT_event_type event_type; + int i; + PVFS_size count_mem; #ifdef __PVFS2_TROVE_AIO_THREADED__ struct dbpf_op *op_p = NULL; int aiocb_inuse_count = 0; @@ -902,12 +752,12 @@ inline int dbpf_bstream_rw_list(TROVE_coll_id coll_id, if (opcode == LIO_READ) { tmp_type = BSTREAM_READ_LIST; - event_type = PVFS_EVENT_TROVE_READ_LIST; + event_type = trove_dbpf_read_event_id; } else { tmp_type = BSTREAM_WRITE_LIST; - event_type = PVFS_EVENT_TROVE_WRITE_LIST; + event_type = trove_dbpf_write_event_id; } /* initialize all the common members */ @@ -924,11 +774,30 @@ inline int dbpf_bstream_rw_list(TROVE_coll_id coll_id, flags, context_id); - DBPF_EVENT_START(event_type, q_op_p->op.id); + if(PINT_EVENT_ENABLED) + { + count_mem = 0; + for(i = 0; i < mem_count; ++i) + { + count_mem += mem_size_array[i]; + } + } + + q_op_p->event_type = event_type; + + PINT_EVENT_START(event_type, dbpf_pid, NULL, &q_op_p->event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_HANDLE(hints), + handle, + PINT_HINT_GET_OP_ID(hints), + count_mem); if(gossip_debug_enabled(GOSSIP_TROVE_DEBUG)) { - PVFS_size count_mem = 0, count_stream = 0; + PVFS_size count_stream = 0; + count_mem = 0; gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf_bstream_rw_list: mem_count: %d, stream_count: %d\n", mem_count, @@ -969,6 +838,7 @@ inline int dbpf_bstream_rw_list(TROVE_coll_id coll_id, q_op_p->op.u.b_rw_list.stream_array_count = stream_count; q_op_p->op.u.b_rw_list.stream_offset_array = stream_offset_array; q_op_p->op.u.b_rw_list.stream_size_array = stream_size_array; + q_op_p->op.hints = hints; q_op_p->op.u.b_rw_list.aio_ops = aio_ops; /* initialize the out size to 0 */ @@ -1114,6 +984,100 @@ inline int dbpf_bstream_rw_list(TROVE_coll_id coll_id, return 0; } +static int dbpf_bstream_cancel( + TROVE_coll_id coll_id, + TROVE_op_id cancel_id, + TROVE_context_id context_id) +{ + dbpf_queued_op_t *cur_op = NULL; + int state, ret; + + cur_op = id_gen_fast_lookup(cancel_id); + if (cur_op == NULL) + { + gossip_err("Invalid operation to test against\n"); + return -TROVE_EINVAL; + } + + /* check the state of the current op to see if it's completed */ + gen_mutex_lock(&cur_op->mutex); + state = cur_op->op.state; + gen_mutex_unlock(&cur_op->mutex); + + gossip_debug(GOSSIP_TROVE_DEBUG, "got cur_op %p\n", cur_op); + + switch(state) + { + case OP_QUEUED: + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "op %p is queued: handling\n", cur_op); + + /* dequeue and complete the op in canceled state */ + cur_op->op.state = OP_IN_SERVICE; + dbpf_queued_op_put_and_dequeue(cur_op); + assert(cur_op->op.state == OP_DEQUEUED); + + cur_op->state = 0; + /* this is a macro defined in dbpf-thread.h */ + dbpf_queued_op_complete(cur_op, OP_CANCELED); + + gossip_debug( + GOSSIP_TROVE_DEBUG, "op %p is canceled\n", cur_op); + ret = 0; + } + break; + case OP_IN_SERVICE: + { + /* + for bstream i/o op, try an aio_cancel. for other ops, + there's not much we can do other than let the op + complete normally + */ + if (((cur_op->op.type == BSTREAM_READ_LIST) || + (cur_op->op.type == BSTREAM_WRITE_LIST)) && + (cur_op->op.u.b_rw_list.aio_ops != NULL)) + { +#if 0 + ret = aio_cancel(cur_op->op.u.b_rw_list.fd, + cur_op->op.u.b_rw_list.aiocb_array); +#endif + ret = cur_op->op.u.b_rw_list.aio_ops->aio_cancel( + cur_op->op.u.b_rw_list.fd, + cur_op->op.u.b_rw_list.aiocb_array); + gossip_debug( + GOSSIP_TROVE_DEBUG, "aio_cancel returned %s\n", + ((ret == AIO_CANCELED) ? "CANCELED" : + "NOT CANCELED")); + /* + NOTE: the normal aio notification method takes care + of completing the op and moving it to the completion + queue + */ + } + else + { + gossip_debug( + GOSSIP_TROVE_DEBUG, "op is in service: ignoring " + "operation type %d\n", cur_op->op.type); + } + ret = 0; + } + break; + case OP_COMPLETED: + case OP_CANCELED: + /* easy cancelation case; do nothing */ + gossip_debug( + GOSSIP_TROVE_DEBUG, "op is completed: ignoring\n"); + ret = 0; + break; + default: + gossip_err("Invalid dbpf_op state found (%d)\n", state); + assert(0); + } + return ret; +} + /* dbpf_bstream_rw_list_op_svc() * * This function is used to service both read_list and write_list @@ -1326,8 +1290,52 @@ static int dbpf_bstream_rw_list_op_svc(struct dbpf_op *op_p) return 0; } } + #endif +inline int dbpf_pread(int fd, void *buf, size_t count, off_t offset) +{ + int ret = 0; + int ret_size = 0; + + do + { + ret = pread(fd, ((char *)buf) + ret_size, + count - ret_size, offset + ret_size); + if (ret) + { + ret_size += ret; + } + } while( (ret == -1 && errno == EINTR) || (ret_size < count && ret > 0) ); + + if(ret < 0) + { + return ret; + } + return ret_size; +} + +inline int dbpf_pwrite(int fd, const void *buf, size_t count, off_t offset) +{ + int ret = 0; + int ret_size = 0; + do + { + ret = pwrite(fd, ((char *)buf) + ret_size, + count - ret_size, offset + ret_size); + if (ret) + { + ret_size += ret; + } + } while( (ret == -1 && errno == EINTR) || (ret_size < count && ret > 0) ); + + if(ret < 0) + { + return -trove_errno_to_trove_error(errno); + } + return ret_size; +} + static struct dbpf_aio_ops aio_ops = { aio_read, @@ -1340,6 +1348,114 @@ static struct dbpf_aio_ops aio_ops = aio_fsync }; +static int dbpf_bstream_resize_op_svc(struct dbpf_op *op_p) +{ + int ret; + TROVE_ds_attributes attr; + TROVE_object_ref ref; + dbpf_queued_op_t *q_op_p; + struct open_cache_ref open_ref; + PVFS_size tmpsize; + + q_op_p = (dbpf_queued_op_t *)op_p->u.b_resize.queued_op_ptr; + + ref.fs_id = op_p->coll_p->coll_id; + ref.handle = op_p->handle; + + gen_mutex_lock(&dbpf_update_size_lock); + ret = dbpf_dspace_attr_get(op_p->coll_p, ref, &attr); + if(ret != 0) + { + gen_mutex_unlock(&dbpf_update_size_lock); + return ret; + } + + tmpsize = op_p->u.b_resize.size; + attr.u.datafile.b_size = tmpsize; + + ret = dbpf_dspace_attr_set(op_p->coll_p, ref, &attr); + if(ret < 0) + { + gen_mutex_unlock(&dbpf_update_size_lock); + return ret; + } + gen_mutex_unlock(&dbpf_update_size_lock); + + /* setup op for sync coalescing */ + dbpf_queued_op_init(q_op_p, + DSPACE_SETATTR, + ref.handle, + q_op_p->op.coll_p, + dbpf_dspace_setattr_op_svc, + q_op_p->op.user_ptr, + TROVE_SYNC, + q_op_p->op.context_id); + q_op_p->op.state = OP_IN_SERVICE; + + /* truncate file after attributes are set */ + ret = dbpf_open_cache_get( + op_p->coll_p->coll_id, op_p->handle, + DBPF_FD_BUFFERED_WRITE, + &open_ref); + if(ret < 0) + { + return ret; + } + + ret = DBPF_RESIZE(open_ref.fd, tmpsize); + if(ret < 0) + { + return(ret); + } + + dbpf_open_cache_put(&open_ref); + + return DBPF_OP_COMPLETE; +} + +int dbpf_bstream_resize(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_size *inout_size_p, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_collection *coll_p = NULL; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) + { + return -TROVE_EINVAL; + } + + q_op_p = dbpf_queued_op_alloc(); + if (q_op_p == NULL) + { + return -TROVE_ENOMEM; + } + + /* initialize all the common members */ + dbpf_queued_op_init(q_op_p, + BSTREAM_RESIZE, + handle, + coll_p, + dbpf_bstream_resize_op_svc, + user_ptr, + flags, + context_id); + + /* initialize the op-specific members */ + q_op_p->op.u.b_resize.size = *inout_size_p; + q_op_p->op.u.b_resize.queued_op_ptr = q_op_p; + *out_op_id_p = dbpf_queued_op_queue(q_op_p); + + return 0; +} + struct TROVE_bstream_ops dbpf_bstream_ops = { dbpf_bstream_read_at, @@ -1348,7 +1464,8 @@ struct TROVE_bstream_ops dbpf_bstream_ops = dbpf_bstream_validate, dbpf_bstream_read_list, dbpf_bstream_write_list, - dbpf_bstream_flush + dbpf_bstream_flush, + dbpf_bstream_cancel }; /* diff --git a/src/io/trove/trove-dbpf/dbpf-collection.c b/src/io/trove/trove-dbpf/dbpf-collection.c index 48abb02..db707e2 100644 --- a/src/io/trove/trove-dbpf/dbpf-collection.c +++ b/src/io/trove/trove-dbpf/dbpf-collection.c @@ -80,58 +80,6 @@ void dbpf_collection_deregister(struct dbpf_collection *entry) } } -void dbpf_collection_clear_registered(void) -{ - int ret = -TROVE_EINVAL; - struct dbpf_collection *ptr = root_coll_p, *free_ptr = NULL; - - while (ptr != NULL) - { - free_ptr = ptr; - ptr = ptr->next_p; - - if ((ret = free_ptr->coll_attr_db->sync( - free_ptr->coll_attr_db, 0)) != 0) - { - gossip_err("db_sync(coll_attr_db): %s\n", db_strerror(ret)); - } - - if ((ret = db_close(free_ptr->coll_attr_db)) != 0) - { - gossip_lerr("db_close(coll_attr_db): %s\n", db_strerror(ret)); - } - - if ((ret = free_ptr->ds_db->sync(free_ptr->ds_db, 0)) != 0) - { - gossip_err("db_sync(coll_ds_db): %s\n", db_strerror(ret)); - } - - if ((ret = db_close(free_ptr->ds_db)) != 0) - { - gossip_lerr("db_close(coll_ds_db): %s\n", db_strerror(ret)); - } - - if ((ret = free_ptr->keyval_db->sync(free_ptr->keyval_db, 0)) != 0) - { - gossip_err("db_sync(coll_keyval_db): %s\n", db_strerror(ret)); - } - - if ((ret = db_close(free_ptr->keyval_db)) != 0) - { - gossip_lerr("db_close(coll_keyval_db): %s\n", db_strerror(ret)); - } - - - dbpf_putdb_env(free_ptr->coll_env, free_ptr->path_name); - free(free_ptr->name); - free(free_ptr->path_name); - PINT_dbpf_keyval_pcache_finalize(free_ptr->pcache); - - free(free_ptr); - } - root_coll_p = NULL; -} - /* * Local variables: * c-indent-level: 4 diff --git a/src/io/trove/trove-dbpf/dbpf-dspace.c b/src/io/trove/trove-dbpf/dbpf-dspace.c index 51fae51..23c7afa 100644 --- a/src/io/trove/trove-dbpf/dbpf-dspace.c +++ b/src/io/trove/trove-dbpf/dbpf-dspace.c @@ -53,6 +53,9 @@ extern gen_mutex_t dbpf_attr_cache_mutex; int64_t s_dbpf_metadata_writes = 0, s_dbpf_metadata_reads = 0; +extern TROVE_method_callback global_trove_method_callback; +extern struct TROVE_bstream_ops *bstream_method_table[]; + static inline void organize_post_op_statistics( enum dbpf_op_type op_type, TROVE_op_id op_id) { @@ -77,28 +80,30 @@ static inline void organize_post_op_statistics( UPDATE_PERF_METADATA_READ(); break; case BSTREAM_READ_LIST: - DBPF_EVENT_END(PVFS_EVENT_TROVE_READ_LIST, op_id); break; case BSTREAM_WRITE_LIST: - DBPF_EVENT_END(PVFS_EVENT_TROVE_WRITE_LIST, op_id); - break; - default: break; case DSPACE_CREATE: UPDATE_PERF_METADATA_WRITE(); - DBPF_EVENT_END(PVFS_EVENT_TROVE_DSPACE_CREATE, op_id); + break; + case DSPACE_CREATE_LIST: + UPDATE_PERF_METADATA_WRITE(); + break; + default: break; } } -static int dbpf_dspace_remove_keyval( - void * args, TROVE_handle handle, TROVE_keyval_s *key, TROVE_keyval_s *val); - +static int dbpf_dspace_create_store_handle( + struct dbpf_collection* coll_p, + TROVE_ds_type type, + TROVE_handle new_handle); static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p); static int dbpf_dspace_create_op_svc(struct dbpf_op *op_p); +static int dbpf_dspace_create_list_op_svc(struct dbpf_op *op_p); static int dbpf_dspace_remove_op_svc(struct dbpf_op *op_p); +static int dbpf_dspace_remove_list_op_svc(struct dbpf_op *op_p); static int dbpf_dspace_verify_op_svc(struct dbpf_op *op_p); -static int dbpf_dspace_setattr_op_svc(struct dbpf_op *op_p); static int dbpf_dspace_getattr_op_svc(struct dbpf_op *op_p); static int dbpf_dspace_getattr_list_op_svc(struct dbpf_op *op_p); @@ -110,13 +115,17 @@ static int dbpf_dspace_create(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; struct dbpf_op *op_p; struct dbpf_collection *coll_p = NULL; int ret; + PINT_event_type event_type; + PINT_event_id event_id = 0; + coll_p = dbpf_collection_find_registered(coll_id); if (coll_p == NULL) @@ -146,7 +155,12 @@ static int dbpf_dspace_create(TROVE_coll_id coll_id, return -TROVE_EINVAL; } - DBPF_EVENT_START(PVFS_EVENT_TROVE_DSPACE_CREATE, op_p->id); + event_type = trove_dbpf_dspace_create_event_id; + DBPF_EVENT_START(coll_p, q_op_p, event_type, &event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_OP_ID(hints)); /* this array is freed in dbpf-op.c:dbpf_queued_op_free, or * in dbpf_queue_or_service in the case of immediate completion */ @@ -154,6 +168,7 @@ static int dbpf_dspace_create(TROVE_coll_id coll_id, extent_array->extent_count; op_p->u.d_create.extent_array.extent_array = malloc(extent_array->extent_count * sizeof(TROVE_extent)); + op_p->hints = hints; if (op_p->u.d_create.extent_array.extent_array == NULL) { @@ -170,21 +185,15 @@ static int dbpf_dspace_create(TROVE_coll_id coll_id, PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, + event_type, event_id); } static int dbpf_dspace_create_op_svc(struct dbpf_op *op_p) { int ret = -TROVE_EINVAL; - TROVE_ds_storedattr_s s_attr; - TROVE_ds_attributes attr; TROVE_handle new_handle = TROVE_HANDLE_NULL; - DBT key, data; TROVE_extent cur_extent; - TROVE_object_ref ref = {TROVE_HANDLE_NULL, op_p->coll_p->coll_id}; - char filename[PATH_MAX + 1]; - - memset(filename, 0, PATH_MAX + 1); cur_extent = op_p->u.d_create.extent_array.extent_array[0]; @@ -240,102 +249,217 @@ static int dbpf_dspace_create_op_svc(struct dbpf_op *op_p) if (new_handle == TROVE_HANDLE_NULL) { gossip_err("Error: handle allocator returned a zero handle.\n"); - ret = -TROVE_ENOSPC; - goto return_error; + return(-TROVE_ENOSPC); } - memset(&s_attr, 0, sizeof(TROVE_ds_storedattr_s)); - s_attr.type = op_p->u.d_create.type; + ret = dbpf_dspace_create_store_handle(op_p->coll_p, op_p->u.d_create.type, + new_handle); + if(ret < 0) + { + trove_handle_free(op_p->coll_p->coll_id, new_handle); + return(ret); + } - memset(&key, 0, sizeof(key)); - key.data = &new_handle; - key.size = key.ulen = sizeof(new_handle); + PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, + 1, PINT_PERF_SUB); - memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = data.ulen = sizeof(TROVE_ds_storedattr_s); - data.flags |= DB_DBT_USERMEM; + *op_p->u.d_create.out_handle_p = new_handle; + return DBPF_OP_COMPLETE; +} - /* check to see if handle is already used */ - ret = op_p->coll_p->ds_db->get(op_p->coll_p->ds_db, NULL, &key, &data, 0); - if (ret == 0) +static int dbpf_dspace_create_list(TROVE_coll_id coll_id, + TROVE_handle_extent_array *extent_array, + TROVE_handle *handle_array_p, + int count, + TROVE_ds_type type, + TROVE_keyval_s *hint, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_op op; + struct dbpf_op *op_p; + struct dbpf_collection *coll_p = NULL; + int ret; + PINT_event_type event_type; + PINT_event_id event_id = 0; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) { - gossip_debug(GOSSIP_TROVE_DEBUG, "handle (%llu) already exists.\n", - llu(new_handle)); - ret = -TROVE_EEXIST; - goto return_error; + return -TROVE_EINVAL; } - else if ((ret != DB_NOTFOUND) && (ret != DB_KEYEMPTY)) + + if (flags & TROVE_FORCE_REQUESTED_HANDLE || + extent_array->extent_array[0].first == TROVE_HANDLE_NULL) { - gossip_err("error in dspace create (db_p->get failed).\n"); - ret = -dbpf_db_error_to_trove_error(ret); - goto return_error; + gossip_err("Error: dbpf_dspace_create_list() does not support forced handles or empty extent specifier.\n"); + return(-TROVE_EINVAL); } - - /* check for old bstream files (these should not exist, but it is - * possible if the db gets out of sync with the rest of the collection - * somehow - */ - DBPF_GET_BSTREAM_FILENAME(filename, PATH_MAX, my_storage_p->name, - op_p->coll_p->coll_id, llu(new_handle)); - ret = access(filename, F_OK); - if(ret == 0) + + ret = dbpf_op_init_queued_or_immediate( + &op, + &q_op_p, + DSPACE_CREATE, + coll_p, + TROVE_HANDLE_NULL, + dbpf_dspace_create_list_op_svc, + flags, + NULL, + user_ptr, + context_id, + &op_p); + if(ret < 0) { - char new_filename[PATH_MAX+1]; - memset(new_filename, 0, PATH_MAX+1); + return ret; + } - gossip_err("Warning: found old bstream file %s; " - "moving to stranded-bstreams.\n", - filename); - - DBPF_GET_STRANDED_BSTREAM_FILENAME(new_filename, PATH_MAX, - my_storage_p->name, - op_p->coll_p->coll_id, - llu(new_handle)); - /* an old file exists. Move it to the stranded subdirectory */ - ret = rename(filename, new_filename); - if(ret != 0) - { - ret = -trove_errno_to_trove_error(errno); - gossip_err("Error: trove failed to rename stranded bstream: %s\n", - filename); - goto return_error; - } + if (!extent_array || (extent_array->extent_count < 1)) + { + return -TROVE_EINVAL; } - - memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = sizeof(s_attr); - - /* create new dataspace entry */ - ret = op_p->coll_p->ds_db->put(op_p->coll_p->ds_db, NULL, &key, &data, 0); - if (ret != 0) + + event_type = trove_dbpf_dspace_create_event_id; + DBPF_EVENT_START(coll_p, q_op_p, event_type, &event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + PINT_HINT_GET_OP_ID(hints)); + + /* this array is freed in dbpf-op.c:dbpf_queued_op_free, or + * in dbpf_queue_or_service in the case of immediate completion */ + op_p->u.d_create_list.extent_array.extent_count = + extent_array->extent_count; + op_p->u.d_create_list.extent_array.extent_array = + malloc(extent_array->extent_count * sizeof(TROVE_extent)); + + if (op_p->u.d_create_list.extent_array.extent_array == NULL) { - gossip_err("error in dspace create (db_p->put failed).\n"); - ret = -dbpf_db_error_to_trove_error(ret); - goto return_error; + return -TROVE_ENOMEM; } - trove_ds_stored_to_attr(s_attr, attr, 0); + memcpy(op_p->u.d_create_list.extent_array.extent_array, + extent_array->extent_array, + extent_array->extent_count * sizeof(TROVE_extent)); - /* add retrieved ds_attr to dbpf_attr cache here */ - ref.handle = new_handle; - gen_mutex_lock(&dbpf_attr_cache_mutex); - dbpf_attr_cache_insert(ref, &attr); - gen_mutex_unlock(&dbpf_attr_cache_mutex); + op_p->u.d_create_list.out_handle_array_p = handle_array_p; + op_p->u.d_create_list.count = count; + op_p->u.d_create_list.type = type; + + /* memset handle array for safety if we have to clean up later */ + memset(handle_array_p, 0, count*sizeof(TROVE_handle)); PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, - 1, PINT_PERF_SUB); + 1, PINT_PERF_ADD); + + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, + event_type, event_id); +} + +static int dbpf_dspace_create_list_op_svc(struct dbpf_op *op_p) +{ + int ret = -TROVE_EINVAL; + TROVE_handle new_handle = TROVE_HANDLE_NULL; + DBT key; + int i; + int j; + + for(i=0; iu.d_create_list.count; i++) + { + + /* + try to allocate a handle from the specified range that we're given + */ + new_handle = trove_handle_alloc_from_range( + op_p->coll_p->coll_id, &op_p->u.d_create_list.extent_array); + + /* + if we got a zero handle, we're either completely out of handles + -- or else something terrible has happened + */ + if (new_handle == TROVE_HANDLE_NULL) + { + gossip_err("Error: handle allocator returned a zero handle.\n"); + return(-TROVE_ENOSPC); + } + + ret = dbpf_dspace_create_store_handle(op_p->coll_p, + op_p->u.d_create.type, + new_handle); + if(ret < 0) + { + /* release any handles we grabbed so far */ + for(j=0; j<=i; j++) + { + if(op_p->u.d_create_list.out_handle_array_p[j] + != TROVE_HANDLE_NULL) + { + memset(&key, 0, sizeof(key)); + key.data = &op_p->u.d_create_list.out_handle_array_p[j]; + key.size = key.ulen = sizeof(TROVE_handle); + op_p->coll_p->ds_db->del(op_p->coll_p->ds_db, + NULL, &key, 0); + + trove_handle_free(op_p->coll_p->coll_id, + op_p->u.d_create_list.out_handle_array_p[j]); + } + } + return(ret); + } + + op_p->u.d_create_list.out_handle_array_p[i] = new_handle; + } + + PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, + 1, PINT_PERF_SUB); - *op_p->u.d_create.out_handle_p = new_handle; return DBPF_OP_COMPLETE; +} -return_error: - if (new_handle != TROVE_HANDLE_NULL) +static int dbpf_dspace_remove_list(TROVE_coll_id coll_id, + TROVE_handle* handle_array, + TROVE_ds_state *error_array, + int count, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p) +{ + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_collection *coll_p = NULL; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) { - trove_handle_free(op_p->coll_p->coll_id, new_handle); + return -TROVE_EINVAL; } - return ret; + q_op_p = dbpf_queued_op_alloc(); + if (q_op_p == NULL) + { + return -TROVE_ENOMEM; + } + + dbpf_queued_op_init( + q_op_p, + DSPACE_REMOVE_LIST, + TROVE_HANDLE_NULL, + coll_p, + dbpf_dspace_remove_list_op_svc, + user_ptr, + flags, + context_id); + + /* initialize op-specific members */ + q_op_p->op.u.d_remove_list.count = count; + q_op_p->op.u.d_remove_list.handle_array = handle_array; + q_op_p->op.u.d_remove_list.error_p = error_array; + + *out_op_id_p = dbpf_queued_op_queue(q_op_p); + + return 0; } static int dbpf_dspace_remove(TROVE_coll_id coll_id, @@ -343,7 +467,8 @@ static int dbpf_dspace_remove(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -372,39 +497,44 @@ static int dbpf_dspace_remove(TROVE_coll_id coll_id, { return ret; } + op_p->hints = hints; PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } -static int dbpf_dspace_remove_op_svc(struct dbpf_op *op_p) +static int remove_one_handle( + TROVE_object_ref ref, + struct dbpf_collection* coll_p) { int count = 0; int ret = -TROVE_EINVAL; DBT key; - TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; memset(&key, 0, sizeof(key)); - key.data = &op_p->handle; + key.data = &ref.handle; key.size = sizeof(TROVE_handle); - ret = op_p->coll_p->ds_db->del(op_p->coll_p->ds_db, NULL, &key, 0); + ret = coll_p->ds_db->del(coll_p->ds_db, NULL, &key, 0); switch (ret) { case DB_NOTFOUND: gossip_err("tried to remove non-existant dataspace\n"); +/* ret = -TROVE_ENOENT; goto return_error; +*/ + break; default: - op_p->coll_p->ds_db->err( - op_p->coll_p->ds_db, ret, "dbpf_dspace_remove"); + coll_p->ds_db->err( + coll_p->ds_db, ret, "dbpf_dspace_remove"); ret = -dbpf_db_error_to_trove_error(ret); goto return_error; case 0: gossip_debug(GOSSIP_TROVE_DEBUG, "removed dataspace with " - "handle %llu\n", llu(op_p->handle)); + "handle %llu\n", llu(ref.handle)); break; } @@ -416,7 +546,7 @@ static int dbpf_dspace_remove_op_svc(struct dbpf_op *op_p) /* remove bstream if it exists. Not a fatal * error if this fails (may not have ever been created) */ - ret = dbpf_open_cache_remove(op_p->coll_p->coll_id, op_p->handle); + ret = dbpf_open_cache_remove(coll_p->coll_id, ref.handle); /* remove the keyval entries for this handle if any exist. * this way seems a bit messy to me, i.e. we're operating @@ -425,40 +555,94 @@ static int dbpf_dspace_remove_op_svc(struct dbpf_op *op_p) * of a handle without having to post more operations though. */ ret = PINT_dbpf_keyval_iterate( - op_p->coll_p->keyval_db, - op_p->handle, - op_p->coll_p->pcache, + coll_p->keyval_db, + ref.handle, + coll_p->pcache, NULL, NULL, &count, TROVE_ITERATE_START, - dbpf_dspace_remove_keyval); + PINT_dbpf_dspace_remove_keyval); if(ret != 0 && ret != -TROVE_ENOENT) { goto return_error; } + /* return handle to free list */ + trove_handle_free(coll_p->coll_id, ref.handle); + return 0; + +return_error: + return ret; +} + + +static int dbpf_dspace_remove_list_op_svc(struct dbpf_op *op_p) +{ + TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; + int ret = -TROVE_EINVAL; + int i; + + for(i=0; iu.d_remove_list.count; i++) + { + ref.handle = op_p->u.d_remove_list.handle_array[i]; + ref.fs_id = op_p->coll_p->coll_id; + + /* if error_p is NULL, assume that the caller is ignoring errors */ + if(op_p->u.d_remove_list.error_p) + { + op_p->u.d_remove_list.error_p[i] = + remove_one_handle(ref, op_p->coll_p); + } + else + { + remove_one_handle(ref, op_p->coll_p); + } + } + /* we still do a non-coalesced sync of the keyval db here * because we're in a dspace operation */ DBPF_DB_SYNC_IF_NECESSARY(op_p, op_p->coll_p->keyval_db, ret); if(ret < 0) { - goto return_error; + return(ret); } PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, 1, PINT_PERF_SUB); - /* return handle to free list */ - trove_handle_free(op_p->coll_p->coll_id,op_p->handle); return DBPF_OP_COMPLETE; +} -return_error: - return ret; + +static int dbpf_dspace_remove_op_svc(struct dbpf_op *op_p) +{ + TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; + int ret = -TROVE_EINVAL; + + ret = remove_one_handle(ref, op_p->coll_p); + if(ret < 0) + { + return(ret); + } + + /* we still do a non-coalesced sync of the keyval db here + * because we're in a dspace operation + */ + DBPF_DB_SYNC_IF_NECESSARY(op_p, op_p->coll_p->keyval_db, ret); + if(ret < 0) + { + return(ret); + } + + PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, + 1, PINT_PERF_SUB); + + return DBPF_OP_COMPLETE; } -static int dbpf_dspace_remove_keyval( +int PINT_dbpf_dspace_remove_keyval( void * args, TROVE_handle handle, TROVE_keyval_s *key, TROVE_keyval_s *val) { int ret; @@ -517,7 +701,7 @@ static int dbpf_dspace_iterate_handles(TROVE_coll_id coll_id, op_p->u.d_iterate_handles.position_p = position_p; op_p->u.d_iterate_handles.count_p = inout_count_p; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) @@ -526,14 +710,15 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) DBC *dbc_p = NULL; DBT key, data; void * multiples_buffer = NULL; - TROVE_ds_storedattr_s s_attr; TROVE_handle dummy_handle; + TROVE_handle aligned_handle; size_t sizeof_handle = 0, sizeof_attr = 0; int start_size; void *tmp_ptr; void *tmp_handle; void *tmp_attr; uint32_t dbpagesize = TROVE_DEFAULT_DB_PAGESIZE; + TROVE_ds_attributes attr; if (*op_p->u.d_iterate_handles.position_p == TROVE_ITERATE_END) { @@ -575,8 +760,8 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) key.flags |= DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = data.ulen = sizeof(s_attr); + data.data = &attr; + data.size = data.ulen = sizeof(attr); data.flags |= DB_DBT_USERMEM; ret = dbc_p->c_get(dbc_p, &key, &data, DB_SET_RANGE); @@ -600,8 +785,8 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) key.flags |= DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = data.ulen = sizeof(s_attr); + data.data = &attr; + data.size = data.ulen = sizeof(attr); data.flags |= DB_DBT_USERMEM; ret = dbc_p->c_get(dbc_p, &key, &data, DB_FIRST); @@ -621,7 +806,7 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) op_p->u.d_iterate_handles.handle_array[i] = dummy_handle; ++i; - start_size = ((sizeof(TROVE_handle) + sizeof(s_attr)) * + start_size = ((sizeof(TROVE_handle) + sizeof(attr)) * (*op_p->u.d_iterate_handles.count_p - 1)); /* round up to the nearest 1024 */ start_size = (start_size + 1023) & (~(unsigned long)1023); @@ -693,6 +878,7 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) * pointer value of tmp_buffer actually changes, and it * must be derefenced to get the handle value. */ + DB_MULTIPLE_KEY_NEXT(tmp_ptr, &data, tmp_handle, sizeof_handle, tmp_attr, sizeof_attr); @@ -703,15 +889,25 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) /* verify sizes are correct */ if(sizeof_handle != sizeof(TROVE_handle) || - sizeof_attr != sizeof(s_attr)) + sizeof_attr != sizeof(attr)) { - /* something is wrong with the result */ - ret = -TROVE_EINVAL; - goto return_error; + gossip_err("Warning: got invalid handle or key size in dbpf_dspace_iterate_handles().\n"); + gossip_err("Warning: skipping entry.\n"); + i--; + continue; } - op_p->u.d_iterate_handles.handle_array[i] = - *(TROVE_handle *)tmp_handle; + /* check for duplicates */ + memcpy(&aligned_handle, tmp_handle, sizeof(TROVE_handle)); + if(i > 0 && aligned_handle == op_p->u.d_iterate_handles.handle_array[i-1]) + { + gossip_err("Warning: got duplicate handle %llu.\n", llu(aligned_handle)); + gossip_err("Warning: skipping entry.\n"); + i--; + continue; + } + + op_p->u.d_iterate_handles.handle_array[i] = aligned_handle; } } @@ -723,24 +919,37 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) * the position to the next handle after the last one we * return */ - DB_MULTIPLE_KEY_NEXT(tmp_ptr, &data, - tmp_handle, sizeof_handle, - tmp_attr, sizeof_attr); - if(!tmp_ptr) + sizeof_handle = sizeof(TROVE_handle); + sizeof_attr = sizeof(attr); + do { - goto get_next; - } + /* verify sizes are correct */ + if(sizeof_handle != sizeof(TROVE_handle) || + sizeof_attr != sizeof(attr)) + { + gossip_err("Warning: got invalid handle or key size in dbpf_dspace_iterate_handles().\n"); + gossip_err("Warning: skipping entry.\n"); + } + DB_MULTIPLE_KEY_NEXT(tmp_ptr, &data, + tmp_handle, sizeof_handle, + tmp_attr, sizeof_attr); + if(!tmp_ptr) + { + goto get_next; + } - /* verify sizes are correct */ - if(sizeof_handle != sizeof(TROVE_handle) || - sizeof_attr != sizeof(s_attr)) - { - /* something is wrong with the result */ - ret = -TROVE_EINVAL; - goto return_error; - } + memcpy(&aligned_handle, tmp_handle, sizeof(TROVE_handle)); + if(aligned_handle == op_p->u.d_iterate_handles.handle_array[*op_p->u.d_iterate_handles.count_p]) + { + gossip_err("Warning: found duplicate handle: %llu\n", llu(aligned_handle)); + gossip_err("Warning: skipping entry.\n"); + } - *op_p->u.d_iterate_handles.position_p = *(TROVE_handle *)tmp_handle; + } while (sizeof_handle != sizeof(TROVE_handle) || + sizeof_attr != sizeof(attr) || + aligned_handle == op_p->u.d_iterate_handles.handle_array[*op_p->u.d_iterate_handles.count_p]); + + *op_p->u.d_iterate_handles.position_p = aligned_handle; goto return_ok; } @@ -755,8 +964,8 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) key.flags |= DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = data.ulen = sizeof(s_attr); + data.data = &attr; + data.size = data.ulen = sizeof(attr); data.flags |= DB_DBT_USERMEM; ret = dbc_p->c_get(dbc_p, &key, &data, DB_NEXT); @@ -770,6 +979,13 @@ static int dbpf_dspace_iterate_handles_op_svc(struct dbpf_op *op_p) "failure @ recno\n"); ret = -dbpf_db_error_to_trove_error(ret); } + if(*op_p->u.d_iterate_handles.count_p > 0 && + dummy_handle == op_p->u.d_iterate_handles.handle_array[*op_p->u.d_iterate_handles.count_p]) + { + gossip_err("Warning: found duplicate handle: %llu\n", llu(dummy_handle)); + gossip_err("Warning: skipping entry.\n"); + (*op_p->u.d_iterate_handles.count_p)--; + } *op_p->u.d_iterate_handles.position_p = dummy_handle; return_ok: @@ -817,7 +1033,8 @@ static int dbpf_dspace_verify(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -849,24 +1066,26 @@ static int dbpf_dspace_verify(TROVE_coll_id coll_id, } /* initialize op-specific members */ + op_p->hints = hints; op_p->u.d_verify.type_p = type_p; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_dspace_verify_op_svc(struct dbpf_op *op_p) { int ret = -TROVE_EINVAL; DBT key, data; - TROVE_ds_storedattr_s s_attr; + TROVE_ds_attributes attr; memset(&key, 0, sizeof(key)); key.data = &op_p->handle; key.size = key.ulen = sizeof(TROVE_handle); + key.flags = DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = data.ulen = sizeof(s_attr); + data.data = &attr; + data.size = data.ulen = sizeof(attr); data.flags |= DB_DBT_USERMEM; /* check to see if dspace handle is used (ie. object exists) */ @@ -889,7 +1108,7 @@ static int dbpf_dspace_verify_op_svc(struct dbpf_op *op_p) } /* copy type value back into user's memory */ - *op_p->u.d_verify.type_p = s_attr.type; + *op_p->u.d_verify.type_p = attr.type; return 1; @@ -903,7 +1122,8 @@ static int dbpf_dspace_getattr(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -911,6 +1131,8 @@ static int dbpf_dspace_getattr(TROVE_coll_id coll_id, struct dbpf_collection *coll_p = NULL; TROVE_object_ref ref = {handle, coll_id}; int ret; + PINT_event_id event_id = 0; + PINT_event_type event_type; /* fast path cache hit; skips queueing */ gen_mutex_lock(&dbpf_attr_cache_mutex); @@ -926,10 +1148,26 @@ static int dbpf_dspace_getattr(TROVE_coll_id coll_id, (int)ds_attr_p->dist_size); #endif gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, "dspace_getattr fast " - "path attr cache hit on %llu\n (dfile_count=%d | " - "dist_size=%d | data_size=%lld)\n", llu(handle), - ds_attr_p->dfile_count, ds_attr_p->dist_size, - lld(ds_attr_p->b_size)); + "path attr cache hit on %llu\n", llu(handle)); + if(ds_attr_p->type == PVFS_TYPE_METAFILE) + { + gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, + "(dfile_count=%d, dist_size=%d)", + ds_attr_p->u.metafile.dfile_count, + ds_attr_p->u.metafile.dist_size); + } + else if(ds_attr_p->type == PVFS_TYPE_DATAFILE) + { + gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, + "(bstream_size=%lld)\n", + lld(ds_attr_p->u.datafile.b_size)); + } + else if(ds_attr_p->type == PVFS_TYPE_DIRDATA) + { + gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, + "(dir_count=%llu)\n", + llu(ds_attr_p->u.dirdata.count)); + } UPDATE_PERF_METADATA_READ(); gen_mutex_unlock(&dbpf_attr_cache_mutex); @@ -959,10 +1197,20 @@ static int dbpf_dspace_getattr(TROVE_coll_id coll_id, return ret; } + event_type = trove_dbpf_dspace_getattr_event_id; + DBPF_EVENT_START(coll_p, q_op_p, event_type, &event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + handle, + PINT_HINT_GET_OP_ID(hints)); + /* initialize op-specific members */ op_p->u.d_getattr.attr_p = ds_attr_p; + op_p->hints = hints; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, + event_type, event_id); } static int dbpf_dspace_getattr_list(TROVE_coll_id coll_id, @@ -973,15 +1221,17 @@ static int dbpf_dspace_getattr_list(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_collection *coll_p = NULL; TROVE_object_ref ref; int i; + int cache_hits = 0; - /* fast path cache hit; skips queueing */ gen_mutex_lock(&dbpf_attr_cache_mutex); + /* go ahead and try to hit attr cache for all handles up front */ for (i = 0; i < nhandles; i++) { ref.handle = handle_array[i]; @@ -998,21 +1248,49 @@ static int dbpf_dspace_getattr_list(TROVE_coll_id coll_id, (int)ds_attr_p->type, (int)ds_attr_p->dfile_count, (int)ds_attr_p->dist_size); #endif - gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, "dspace_getattr fast " - "path attr cache hit on %llu\n (dfile_count=%d | " - "dist_size=%d | data_size=%lld)\n", llu(handle_array[i]), - ds_attr_p->dfile_count, ds_attr_p->dist_size, - lld(ds_attr_p->b_size)); + gossip_debug( + GOSSIP_TROVE_DEBUG, "dspace_getattr_list fast " + "path attr cache hit on %llu, uid=%d, mode=%d, type=%d\n", + llu(handle_array[i]), (int)ds_attr_p[i].uid, (int)ds_attr_p[i].mode, + (int)ds_attr_p[i].type); + if(ds_attr_p[i].type == PVFS_TYPE_METAFILE) + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "\tdfile_count = %d, dist_size = %d\n", + ds_attr_p[i].u.metafile.dfile_count, + ds_attr_p[i].u.metafile.dist_size); + } + else if(ds_attr_p[i].type == PVFS_TYPE_DATAFILE) + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "\tbstream_size = %llu\n", + llu(ds_attr_p[i].u.datafile.b_size)); + } + else if(ds_attr_p[i].type == PVFS_TYPE_DIRDATA) + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "\tcount = %llu\n", + llu(ds_attr_p[i].u.dirdata.count)); + } UPDATE_PERF_METADATA_READ(); error_array[i] = 0; - continue; + cache_hits++; + } + else + { + /* no hit; mark attr entry so that we can detect that in the + * service routine + */ + ds_attr_p[i].type = PVFS_TYPE_NONE; } - break; } gen_mutex_unlock(&dbpf_attr_cache_mutex); + /* All handles hit in the cache, return */ - if (i == nhandles) { + if (cache_hits == nhandles) + { + gossip_debug(GOSSIP_DBPF_ATTRCACHE_DEBUG, "dspace_getattr_list serviced entirely from attr cache.\n"); return 1; } @@ -1038,10 +1316,10 @@ static int dbpf_dspace_getattr_list(TROVE_coll_id coll_id, context_id); /* initialize op-specific members */ - q_op_p->op.u.d_getattr_list.count = (nhandles - i); - q_op_p->op.u.d_getattr_list.handle_array = &handle_array[i]; - q_op_p->op.u.d_getattr_list.attr_p = &ds_attr_p[i]; - q_op_p->op.u.d_getattr_list.error_p = &error_array[i]; + q_op_p->op.u.d_getattr_list.count = nhandles; + q_op_p->op.u.d_getattr_list.handle_array = handle_array; + q_op_p->op.u.d_getattr_list.attr_p = ds_attr_p; + q_op_p->op.u.d_getattr_list.error_p = error_array; *out_op_id_p = dbpf_queued_op_queue(q_op_p); @@ -1055,13 +1333,16 @@ static int dbpf_dspace_setattr(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; struct dbpf_op *op_p; struct dbpf_collection *coll_p = NULL; int ret; + PINT_event_id event_id = 0; + PINT_event_type event_type; coll_p = dbpf_collection_find_registered(coll_id); if (coll_p == NULL) @@ -1085,243 +1366,170 @@ static int dbpf_dspace_setattr(TROVE_coll_id coll_id, return ret; } + event_type = trove_dbpf_dspace_setattr_event_id; + DBPF_EVENT_START(coll_p, q_op_p, event_type, &event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + handle, + PINT_HINT_GET_OP_ID(hints)); + /* initialize op-specific members */ op_p->u.d_setattr.attr_p = ds_attr_p; + op_p->hints = hints; PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, + event_type, event_id); } -static int dbpf_dspace_setattr_op_svc(struct dbpf_op *op_p) +int dbpf_dspace_attr_set(struct dbpf_collection *coll_p, + TROVE_object_ref ref, + TROVE_ds_attributes *attr) { - int ret = -TROVE_EINVAL; + int ret; DBT key, data; - TROVE_ds_storedattr_s s_attr; - TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; memset(&key, 0, sizeof(key)); - key.data = &op_p->handle; + key.data = &ref.handle; key.size = sizeof(TROVE_handle); - - memset(&data, 0, sizeof(data)); - data.data = &s_attr; - data.size = sizeof(s_attr); - - trove_ds_attr_to_stored((*op_p->u.d_setattr.attr_p), s_attr); -#if 0 - gossip_debug(GOSSIP_TROVE_DEBUG, "ATTRIB: dspace_setattr storing " - "attributes (2) on key %llu\n uid = %d, mode = %d, " - "type = %d, dfile_count = %d, dist_size = %d\n", - llu(op_p->handle), (int) s_attr.uid, (int) s_attr.mode, - (int) s_attr.type, (int) s_attr.dfile_count, - (int) s_attr.dist_size); -#endif + memset(&data, 0, sizeof(data)); + data.data = attr; + data.size = sizeof(*attr); - ret = op_p->coll_p->ds_db->put( - op_p->coll_p->ds_db, NULL, &key, &data, 0); + ret = coll_p->ds_db->put( + coll_p->ds_db, NULL, &key, &data, 0); if (ret != 0) { - op_p->coll_p->ds_db->err( - op_p->coll_p->ds_db, ret, "dspace_db->put setattr"); - ret = -dbpf_db_error_to_trove_error(ret); - goto return_error; + coll_p->ds_db->err( + coll_p->ds_db, ret, "dspace_db->put setattr"); + return -dbpf_db_error_to_trove_error(ret); } /* now that the disk is updated, update the cache if necessary */ gen_mutex_lock(&dbpf_attr_cache_mutex); - dbpf_attr_cache_ds_attr_update_cached_data( - ref, op_p->u.d_setattr.attr_p); + dbpf_attr_cache_ds_attr_update_cached_data(ref, attr); gen_mutex_unlock(&dbpf_attr_cache_mutex); - PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, - 1, PINT_PERF_SUB); - - return DBPF_OP_COMPLETE; - -return_error: - return ret; + return 0; } -static int dbpf_dspace_getattr_op_svc(struct dbpf_op *op_p) +int dbpf_dspace_setattr_op_svc(struct dbpf_op *op_p) { int ret = -TROVE_EINVAL; - DBT key, data; - TROVE_ds_storedattr_s s_attr; - TROVE_ds_attributes *attr = NULL; - TROVE_size b_size; - struct stat b_stat; TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; - struct open_cache_ref tmp_ref; - /* get an fd for the bstream so we can check size */ - ret = dbpf_open_cache_get( - op_p->coll_p->coll_id, op_p->handle, 0, &tmp_ref); - if (ret < 0) - { - b_size = 0; - } - else + ret = dbpf_dspace_attr_set(op_p->coll_p, ref, op_p->u.d_setattr.attr_p); + if(ret != 0) { - ret = DBPF_FSTAT(tmp_ref.fd, &b_stat); - dbpf_open_cache_put(&tmp_ref); - if (ret < 0) - { - ret = -TROVE_EBADF; - goto return_error; - } - b_size = (TROVE_size)b_stat.st_size; + return ret; } + PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_DSPACE_OPS, + 1, PINT_PERF_SUB); + + return DBPF_OP_COMPLETE; +} + +int dbpf_dspace_attr_get(struct dbpf_collection *coll_p, + TROVE_object_ref ref, + TROVE_ds_attributes *attr) +{ + DBT key, data; + int ret; + memset(&key, 0, sizeof(key)); - key.data = &op_p->handle; - key.size = key.ulen = sizeof(TROVE_handle); + key.data = &ref.handle; + key.size = key.ulen = sizeof(ref.handle); + key.flags = DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); - memset(&s_attr, 0, sizeof(TROVE_ds_storedattr_s)); - data.data = &s_attr; - data.size = data.ulen = sizeof(TROVE_ds_storedattr_s); + data.data = attr; + data.size = data.ulen = sizeof(*attr); data.flags |= DB_DBT_USERMEM; - ret = op_p->coll_p->ds_db->get(op_p->coll_p->ds_db, - NULL, &key, &data, 0); + ret = coll_p->ds_db->get(coll_p->ds_db, NULL, &key, &data, 0); if (ret != 0) { if(ret != DB_NOTFOUND) { - op_p->coll_p->ds_db->err(op_p->coll_p->ds_db, ret, "DB->get"); + coll_p->ds_db->err(coll_p->ds_db, ret, "DB->get"); } - ret = -dbpf_db_error_to_trove_error(ret); - goto return_error; + return(-dbpf_db_error_to_trove_error(ret)); } gossip_debug( GOSSIP_TROVE_DEBUG, "ATTRIB: retrieved attributes " - "from DISK for key %llu\n\tuid = %d, mode = %d, type = %d, " - "dfile_count = %d, dist_size = %d\n\tb_size = %lld\n", - llu(op_p->handle), (int)s_attr.uid, (int)s_attr.mode, - (int)s_attr.type, (int)s_attr.dfile_count, (int)s_attr.dist_size, - llu(b_size)); - - attr = op_p->u.d_getattr.attr_p; - trove_ds_stored_to_attr(s_attr, *attr, b_size); + "from DISK for key %llu\n\tuid = %d, mode = %d, type = %d\n", + llu(ref.handle), (int)attr->uid, (int)attr->mode, (int)attr->type); + if(attr->type == PVFS_TYPE_METAFILE) + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "\tdfile_count = %d, dist_size = %d\n", + attr->u.metafile.dfile_count, + attr->u.metafile.dist_size); + } + else if(attr->type == PVFS_TYPE_DATAFILE) + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "\tbstream_size = %llu\n", + llu(attr->u.datafile.b_size)); + } + else if(attr->type == PVFS_TYPE_DIRDATA) + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "\tcount = %llu\n", + llu(attr->u.dirdata.count)); + } /* add retrieved ds_attr to dbpf_attr cache here */ gen_mutex_lock(&dbpf_attr_cache_mutex); dbpf_attr_cache_insert(ref, attr); gen_mutex_unlock(&dbpf_attr_cache_mutex); + return 0; +} + +static int dbpf_dspace_getattr_op_svc(struct dbpf_op *op_p) +{ + int ret = -TROVE_EINVAL; + TROVE_object_ref ref = {op_p->handle, op_p->coll_p->coll_id}; + + ret = dbpf_dspace_attr_get(op_p->coll_p, ref, op_p->u.d_getattr.attr_p); + if(ret < 0) + { + return(ret); + } + return 1; - -return_error: - return ret; } static int dbpf_dspace_getattr_list_op_svc(struct dbpf_op *op_p) { int i; + TROVE_object_ref ref; for (i = 0; i < op_p->u.d_getattr_list.count; i++) { - int ret; - TROVE_ds_storedattr_s s_attr; - TROVE_ds_attributes *attr = NULL; - struct stat b_stat; - DBT key, data; - struct open_cache_ref tmp_ref; - TROVE_object_ref ref; - TROVE_size b_size = 0, k_size = 0; - DB_BTREE_STAT *k_stat_p = NULL; - - ref.handle = op_p->u.d_getattr_list.handle_array[i]; - ref.fs_id = op_p->coll_p->coll_id; - /* It is still possible that we could hit in the attribute cache because of the way - * we do queueing in the getattr_list operation - */ - if (dbpf_attr_cache_ds_attr_fetch_cached_data(ref, &op_p->u.d_getattr_list.attr_p[i]) == 0) - { - UPDATE_PERF_METADATA_READ(); - op_p->u.d_getattr_list.error_p[i] = 0; - continue; - } - - /* get an fd for the bstream so we can check size */ - ret = dbpf_open_cache_get( - op_p->coll_p->coll_id, op_p->u.d_getattr_list.handle_array[i], 0, &tmp_ref); - if (ret < 0) - { - } - else - { - ret = DBPF_FSTAT(tmp_ref.fd, &b_stat); - dbpf_open_cache_put(&tmp_ref); - if (ret < 0) - { - op_p->u.d_getattr_list.error_p[i] = -TROVE_EBADF; - continue; - } - b_size = (TROVE_size)b_stat.st_size; - } - - ret = op_p->coll_p->ds_db->stat(op_p->coll_p->ds_db, -#ifdef HAVE_TXNID_PARAMETER_TO_DB_STAT - (DB_TXN *) NULL, -#endif - &k_stat_p, -#ifdef HAVE_UNKNOWN_PARAMETER_TO_DB_STAT - NULL, -#endif - 0); - if (ret == 0) + if(op_p->u.d_getattr_list.attr_p[i].type != PVFS_TYPE_NONE) { - k_size = (TROVE_size) k_stat_p->bt_ndata; - free(k_stat_p); - } - else - { - gossip_err("Error: unable to stat handle %llu (%llx).\n", - llu(op_p->handle), llu(op_p->handle)); - op_p->u.d_getattr_list.error_p[i] = -TROVE_EIO; + /* we already serviced this one from the cache at post time; + * skip to the next element + */ + gossip_debug(GOSSIP_TROVE_DEBUG, + "dbpf_dspace_getattr_list_op_svc() skipping " + "element %d resolved from cache.\n", i); continue; } - memset(&key, 0, sizeof(key)); - key.data = &op_p->u.d_getattr_list.handle_array[i]; - key.size = key.ulen = sizeof(TROVE_handle); - - memset(&data, 0, sizeof(data)); - memset(&s_attr, 0, sizeof(TROVE_ds_storedattr_s)); - data.data = &s_attr; - data.size = data.ulen = sizeof(TROVE_ds_storedattr_s); - data.flags |= DB_DBT_USERMEM; - - ret = op_p->coll_p->ds_db->get(op_p->coll_p->ds_db, - NULL, &key, &data, 0); - if (ret != 0) - { - op_p->coll_p->ds_db->err(op_p->coll_p->ds_db, ret, "DB->get"); - op_p->u.d_getattr_list.error_p[i] = -TROVE_EIO; - continue; - } + ref.handle = op_p->u.d_getattr_list.handle_array[i]; + ref.fs_id = op_p->coll_p->coll_id; - gossip_debug( - GOSSIP_TROVE_DEBUG, "ATTRIB: retrieved attributes " - "from DISK for key %llu\n\tuid = %d, mode = %d, type = %d, " - "dfile_count = %d, dist_size = %d\n\tb_size = %lld, k_size = %lld\n", - llu(op_p->u.d_getattr_list.handle_array[i]), (int)s_attr.uid, (int)s_attr.mode, - (int)s_attr.type, (int)s_attr.dfile_count, (int)s_attr.dist_size, - llu(b_size), llu(k_size)); - - attr = &op_p->u.d_getattr_list.attr_p[i]; - trove_ds_stored_to_attr(s_attr, *attr, b_size); - - /* add retrieved ds_attr to dbpf_attr cache here */ - gen_mutex_lock(&dbpf_attr_cache_mutex); - dbpf_attr_cache_insert(ref, attr); - gen_mutex_unlock(&dbpf_attr_cache_mutex); - op_p->u.d_getattr_list.error_p[i] = 0; + op_p->u.d_getattr_list.error_p[i] = dbpf_dspace_attr_get( + op_p->coll_p, ref, &op_p->u.d_getattr_list.attr_p[i]); } return 1; @@ -1354,6 +1562,33 @@ static int dbpf_dspace_cancel( return -TROVE_EINVAL; } + /* + * for bstream ops, call the bstream cancel instead. for other ops, + * there's not much we can do other than let the op + * complete normally + */ + if(cur_op->op.type >= BSTREAM_OP_TYPE && + cur_op->op.type < KEYVAL_OP_TYPE) + { + int method_id = global_trove_method_callback(coll_id); + if(method_id < 0) + { + return -TROVE_EINVAL; + } + + if(bstream_method_table[method_id]->bstream_cancel) + { + return bstream_method_table[method_id]->bstream_cancel( + coll_id, id, context_id); + } + else + { + gossip_debug(GOSSIP_TROVE_DEBUG, "Trove cancellation is not supported for this operation type; ignoring.\n"); + return(0); + + } + } + /* check the state of the current op to see if it's completed */ gen_mutex_lock(&cur_op->mutex); state = cur_op->op.state; @@ -1364,60 +1599,30 @@ static int dbpf_dspace_cancel( switch(state) { case OP_QUEUED: - { - gossip_debug(GOSSIP_TROVE_DEBUG, - "op %p is queued: handling\n", cur_op); + { + gossip_debug(GOSSIP_TROVE_DEBUG, + "op %p is queued: handling\n", cur_op); - /* dequeue and complete the op in canceled state */ - cur_op->op.state = OP_IN_SERVICE; - dbpf_queued_op_put_and_dequeue(cur_op); - assert(cur_op->op.state == OP_DEQUEUED); + /* dequeue and complete the op in canceled state */ + cur_op->op.state = OP_IN_SERVICE; + dbpf_queued_op_put_and_dequeue(cur_op); + assert(cur_op->op.state == OP_DEQUEUED); - cur_op->state = 0; - /* this is a macro defined in dbpf-thread.h */ - dbpf_queued_op_complete(cur_op, OP_CANCELED); + cur_op->state = 0; + /* this is a macro defined in dbpf-thread.h */ + dbpf_queued_op_complete(cur_op, OP_CANCELED); - gossip_debug( - GOSSIP_TROVE_DEBUG, "op %p is canceled\n", cur_op); - ret = 0; - } - break; - case OP_IN_SERVICE: - { - /* - for bstream i/o op, try an aio_cancel. for other ops, - there's not much we can do other than let the op - complete normally - */ - if ((cur_op->op.type == BSTREAM_READ_LIST) || - (cur_op->op.type == BSTREAM_WRITE_LIST)) - { -#if 0 - ret = aio_cancel(cur_op->op.u.b_rw_list.fd, - cur_op->op.u.b_rw_list.aiocb_array); -#endif - ret = cur_op->op.u.b_rw_list.aio_ops->aio_cancel( - cur_op->op.u.b_rw_list.fd, - cur_op->op.u.b_rw_list.aiocb_array); - gossip_debug( - GOSSIP_TROVE_DEBUG, "aio_cancel returned %s\n", - ((ret == AIO_CANCELED) ? "CANCELED" : - "NOT CANCELED")); - /* - NOTE: the normal aio notification method takes care - of completing the op and moving it to the completion - queue - */ - } - else - { gossip_debug( - GOSSIP_TROVE_DEBUG, "op is in service: ignoring " - "operation type %d\n", cur_op->op.type); + GOSSIP_TROVE_DEBUG, "op %p is canceled\n", cur_op); + ret = 0; } + break; + case OP_IN_SERVICE: + gossip_debug( + GOSSIP_TROVE_DEBUG, "op is in service: ignoring " + "operation type %d\n", cur_op->op.type); ret = 0; - } - break; + break; case OP_COMPLETED: case OP_CANCELED: /* easy cancelation case; do nothing */ @@ -1427,6 +1632,7 @@ static int dbpf_dspace_cancel( break; default: gossip_err("Invalid dbpf_op state found (%d)\n", state); + gossip_err(" from op type: %d\n", cur_op->op.type); assert(0); } #endif @@ -1678,6 +1884,11 @@ static int dbpf_dspace_testcontext( } ds_id_array[out_count] = cur_op->op.id; + if(cur_op->event_type == trove_dbpf_read_event_id || + cur_op->event_type == trove_dbpf_write_event_id) + { + DBPF_EVENT_END(cur_op->event_type, cur_op->event_id); + } organize_post_op_statistics(cur_op->op.type, cur_op->op.id); dbpf_queued_op_free(cur_op); @@ -1863,41 +2074,141 @@ static int dbpf_dspace_testsome( int PINT_trove_dbpf_ds_attr_compare_reversed( DB * dbp, const DBT * a, const DBT * b) { - const TROVE_handle * handle_a; - const TROVE_handle * handle_b; + TROVE_handle handle_a = 0; + TROVE_handle handle_b = 0; - handle_a = (const TROVE_handle *) a->data; - handle_b = (const TROVE_handle *) b->data; + memcpy(&handle_a, a->data, sizeof(TROVE_handle)); + memcpy(&handle_b, b->data, sizeof(TROVE_handle)); - if(*handle_a == *handle_b) + if(handle_a == handle_b) { return 0; } - return (*handle_a < *handle_b) ? -1 : 1; + return (handle_a < handle_b) ? -1 : 1; } int PINT_trove_dbpf_ds_attr_compare( DB * dbp, const DBT * a, const DBT * b) { - const TROVE_handle * handle_a; - const TROVE_handle * handle_b; + TROVE_handle handle_a = 0; + TROVE_handle handle_b = 0; - handle_a = (const TROVE_handle *) a->data; - handle_b = (const TROVE_handle *) b->data; + memcpy(&handle_a, a->data, sizeof(TROVE_handle)); + memcpy(&handle_b, b->data, sizeof(TROVE_handle)); - if(*handle_a == *handle_b) + if(handle_a == handle_b) { return 0; } - return (*handle_a > *handle_b) ? -1 : 1; + return (handle_a > handle_b) ? -1 : 1; +} + +/* dbpf_dspace_create_store_handle() + * + * records persisent record of new dspace within trove + * + * returns 0 on success, -PVFS_error on failure + */ +static int dbpf_dspace_create_store_handle( + struct dbpf_collection* coll_p, + TROVE_ds_type type, + TROVE_handle new_handle) +{ + int ret = -TROVE_EINVAL; + TROVE_ds_attributes attr; + DBT key, data; + TROVE_object_ref ref = {TROVE_HANDLE_NULL, coll_p->coll_id}; + char filename[PATH_MAX + 1] = {0}; + + memset(&attr, 0, sizeof(attr)); + attr.type = type; + + memset(&key, 0, sizeof(key)); + key.data = &new_handle; + key.size = key.ulen = sizeof(new_handle); + key.flags = DB_DBT_USERMEM; + + memset(&data, 0, sizeof(data)); + data.data = &attr; + data.size = data.ulen = sizeof(attr); + data.flags |= DB_DBT_USERMEM; + + /* check to see if handle is already used */ + ret = coll_p->ds_db->get(coll_p->ds_db, NULL, &key, &data, 0); + if (ret == 0) + { + gossip_debug(GOSSIP_TROVE_DEBUG, "handle (%llu) already exists.\n", + llu(new_handle)); + return(-TROVE_EEXIST); + } + else if ((ret != DB_NOTFOUND) && (ret != DB_KEYEMPTY)) + { + gossip_err("error in dspace create (db_p->get failed).\n"); + ret = -dbpf_db_error_to_trove_error(ret); + return(ret); + } + + /* check for old bstream files (these should not exist, but it is + * possible if the db gets out of sync with the rest of the collection + * somehow + */ + DBPF_GET_BSTREAM_FILENAME(filename, PATH_MAX, my_storage_p->data_path, + coll_p->coll_id, llu(new_handle)); + ret = access(filename, F_OK); + if(ret == 0) + { + char new_filename[PATH_MAX+1]; + memset(new_filename, 0, PATH_MAX+1); + + gossip_err("Warning: found old bstream file %s; " + "moving to stranded-bstreams.\n", + filename); + + DBPF_GET_STRANDED_BSTREAM_FILENAME(new_filename, PATH_MAX, + my_storage_p->data_path, + coll_p->coll_id, + llu(new_handle)); + /* an old file exists. Move it to the stranded subdirectory */ + ret = rename(filename, new_filename); + if(ret != 0) + { + ret = -trove_errno_to_trove_error(errno); + gossip_err("Error: trove failed to rename stranded bstream: %s\n", + filename); + return(ret); + } + } + + memset(&data, 0, sizeof(data)); + data.data = &attr; + data.size = sizeof(attr); + + /* create new dataspace entry */ + ret = coll_p->ds_db->put(coll_p->ds_db, NULL, &key, &data, 0); + if (ret != 0) + { + gossip_err("error in dspace create (db_p->put failed).\n"); + ret = -dbpf_db_error_to_trove_error(ret); + return(ret); + } + + /* add retrieved ds_attr to dbpf_attr cache here */ + ref.handle = new_handle; + gen_mutex_lock(&dbpf_attr_cache_mutex); + dbpf_attr_cache_insert(ref, &attr); + gen_mutex_unlock(&dbpf_attr_cache_mutex); + + return(0); } struct TROVE_dspace_ops dbpf_dspace_ops = { dbpf_dspace_create, + dbpf_dspace_create_list, dbpf_dspace_remove, + dbpf_dspace_remove_list, dbpf_dspace_iterate_handles, dbpf_dspace_verify, dbpf_dspace_getattr, diff --git a/src/io/trove/trove-dbpf/dbpf-keyval-pcache.c b/src/io/trove/trove-dbpf/dbpf-keyval-pcache.c index d175042..ccabb0a 100644 --- a/src/io/trove/trove-dbpf/dbpf-keyval-pcache.c +++ b/src/io/trove/trove-dbpf/dbpf-keyval-pcache.c @@ -174,8 +174,8 @@ int PINT_dbpf_keyval_pcache_lookup( gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "Trove KeyVal pcache lookup succeeded: " - "handle: %llu, pos: %llu, key: %*s\n", - llu(handle), llu(pos), *length, (char *) *keyname); + "handle: %llu, pos: %llu\n", + llu(handle), llu(pos)); return 0; } @@ -224,8 +224,8 @@ int PINT_dbpf_keyval_pcache_insert( { gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "Trove KeyVal pcache insert failed: (error: %d) " - "handle: %llu, pos: %llu: key: %*s\n", - ret, llu(handle), llu(pos), length, keyname); + "handle: %llu, pos: %llu\n", + ret, llu(handle), llu(pos)); gen_mutex_unlock(&pcache->mutex); free(entry); @@ -235,8 +235,8 @@ int PINT_dbpf_keyval_pcache_insert( gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "Trove KeyVal pcache insert succeeded: " - "handle: %llu, pos: %llu: key: %*s\n", - llu(handle), llu(pos), length, keyname); + "handle: %llu, pos: %llu\n", + llu(handle), llu(pos)); return 0; } diff --git a/src/io/trove/trove-dbpf/dbpf-keyval.c b/src/io/trove/trove-dbpf/dbpf-keyval.c index d8bb5e7..6f1b4cb 100644 --- a/src/io/trove/trove-dbpf/dbpf-keyval.c +++ b/src/io/trove/trove-dbpf/dbpf-keyval.c @@ -119,6 +119,7 @@ static int dbpf_keyval_read_list_op_svc(struct dbpf_op *op_p); static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p); static int dbpf_keyval_write_list_op_svc(struct dbpf_op *op_p); static int dbpf_keyval_remove_op_svc(struct dbpf_op *op_p); +static int dbpf_keyval_remove_list_op_svc(struct dbpf_op *op_p); static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p); static int dbpf_keyval_iterate_keys_op_svc(struct dbpf_op *op_p); static int dbpf_keyval_flush_op_svc(struct dbpf_op *op_p); @@ -162,10 +163,11 @@ static int dbpf_keyval_read(TROVE_coll_id coll_id, TROVE_keyval_s *key_p, TROVE_keyval_s *val_p, TROVE_ds_flags flags, - TROVE_vtag_s *vtag, + TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { int ret; dbpf_queued_op_t *q_op_p = NULL; @@ -174,24 +176,33 @@ static int dbpf_keyval_read(TROVE_coll_id coll_id, struct dbpf_collection *coll_p = NULL; dbpf_attr_cache_elem_t *cache_elem = NULL; TROVE_object_ref ref = {handle, coll_id}; + PINT_event_id event_id = 0; + PINT_event_type event_type; gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "*** Trove KeyVal Read " "of %s\n", (char *)key_p->buffer); gen_mutex_lock(&dbpf_attr_cache_mutex); cache_elem = dbpf_attr_cache_elem_lookup(ref); - if (cache_elem) + if (cache_elem && (!(flags & TROVE_BINARY_KEY))) { dbpf_keyval_pair_cache_elem_t *keyval_pair = dbpf_attr_cache_elem_get_data_based_on_key( cache_elem, key_p->buffer); if (keyval_pair) { - dbpf_attr_cache_keyval_pair_fetch_cached_data( - cache_elem, keyval_pair, val_p->buffer, - &val_p->buffer_sz); val_p->read_sz = val_p->buffer_sz; + /* note: dbpf_attr_cache_keyval_pair_fetch_cached_data() will + * update read_sz appropriately + */ + ret = dbpf_attr_cache_keyval_pair_fetch_cached_data( + cache_elem, keyval_pair, val_p->buffer, + &val_p->read_sz); gen_mutex_unlock(&dbpf_attr_cache_mutex); + if(ret < 0) + { + return ret; + } return 1; } } @@ -219,11 +230,21 @@ static int dbpf_keyval_read(TROVE_coll_id coll_id, return ret; } + event_type = trove_dbpf_keyval_read_event_id; + DBPF_EVENT_START(coll_p, q_op_p, event_type, &event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + handle, + PINT_HINT_GET_OP_ID(hints)); + /* initialize the op-specific members */ op_p->u.k_read.key = key_p; op_p->u.k_read.val = val_p; + op_p->hints = hints; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, + event_type, event_id); } static int dbpf_keyval_read_op_svc(struct dbpf_op *op_p) @@ -242,6 +263,7 @@ static int dbpf_keyval_read_op_svc(struct dbpf_op *op_p) key.data = &key_entry; key.size = key.ulen = DBPF_KEYVAL_DB_ENTRY_TOTAL_SIZE( op_p->u.k_read.key->buffer_sz); + key.flags = DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); data.data = op_p->u.k_read.val->buffer; @@ -275,27 +297,30 @@ static int dbpf_keyval_read_op_svc(struct dbpf_op *op_p) op_p->u.k_read.val->read_sz = data.size; /* cache this data in the attr cache if we can */ - gen_mutex_lock(&dbpf_attr_cache_mutex); - if (dbpf_attr_cache_elem_set_data_based_on_key( - ref, key_entry.key, - op_p->u.k_read.val->buffer, data.size)) + if(!(op_p->flags & TROVE_BINARY_KEY)) { - /* - * NOTE: this can happen if the keyword isn't registered, or if - * there is no associated cache_elem for this key - */ - gossip_debug( - GOSSIP_DBPF_ATTRCACHE_DEBUG,"** CANNOT cache data retrieved " - "(key is %s)\n", (char *)key_entry.key); - } - else - { - gossip_debug( - GOSSIP_DBPF_ATTRCACHE_DEBUG,"*** cached keyval data " - "retrieved (key is %s)\n", - (char *)key_entry.key); + gen_mutex_lock(&dbpf_attr_cache_mutex); + if (dbpf_attr_cache_elem_set_data_based_on_key( + ref, key_entry.key, + op_p->u.k_read.val->buffer, data.size)) + { + /* + * NOTE: this can happen if the keyword isn't registered, or if + * there is no associated cache_elem for this key + */ + gossip_debug( + GOSSIP_DBPF_ATTRCACHE_DEBUG,"** CANNOT cache data retrieved " + "(key is %s)\n", (char *)key_entry.key); + } + else + { + gossip_debug( + GOSSIP_DBPF_ATTRCACHE_DEBUG,"*** cached keyval data " + "retrieved (key is %s)\n", + (char *)key_entry.key); + } + gen_mutex_unlock(&dbpf_attr_cache_mutex); } - gen_mutex_unlock(&dbpf_attr_cache_mutex); return 1; @@ -311,13 +336,16 @@ static int dbpf_keyval_write(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; struct dbpf_op *op_p; struct dbpf_collection *coll_p = NULL; int ret; + PINT_event_id event_id = 0; + PINT_event_type event_type; coll_p = dbpf_collection_find_registered(coll_id); if (coll_p == NULL) @@ -341,14 +369,24 @@ static int dbpf_keyval_write(TROVE_coll_id coll_id, return ret; } + event_type = trove_dbpf_keyval_write_event_id; + DBPF_EVENT_START(coll_p, q_op_p, event_type, &event_id, + PINT_HINT_GET_CLIENT_ID(hints), + PINT_HINT_GET_REQUEST_ID(hints), + PINT_HINT_GET_RANK(hints), + handle, + PINT_HINT_GET_OP_ID(hints)); + /* initialize the op-specific members */ op_p->u.k_write.key = *key_p; op_p->u.k_write.val = *val_p; + op_p->hints = hints; PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, + event_type, event_id); } static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p) @@ -360,11 +398,14 @@ static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p) u_int32_t dbflags = 0; struct dbpf_keyval_db_entry key_entry; - gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, - "dbpf_keyval_write_op_svc: handle: %llu, key: %*s\n", - llu(op_p->handle), - op_p->u.k_write.key.buffer_sz, - (char *)op_p->u.k_write.key.buffer); + if(!(op_p->flags & TROVE_BINARY_KEY)) + { + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, + "dbpf_keyval_write_op_svc: handle: %llu, key: %*s\n", + llu(op_p->handle), + op_p->u.k_write.key.buffer_sz, + (char *)op_p->u.k_write.key.buffer); + } key_entry.handle = op_p->handle; @@ -378,6 +419,7 @@ static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p) key.data = &key_entry; key.size = key.ulen = DBPF_KEYVAL_DB_ENTRY_TOTAL_SIZE( op_p->u.k_write.key.buffer_sz); + key.flags = DB_DBT_USERMEM; data.data = op_p->u.k_write.val.buffer; data.size = op_p->u.k_write.val.buffer_sz; @@ -426,13 +468,16 @@ static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p) } } - gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, - "keyval_db->put(handle= %llu, key= %*s (%d)) size=%d\n", - llu(key_entry.handle), - op_p->u.k_write.key.buffer_sz, - key_entry.key, - op_p->u.k_write.key.buffer_sz, - key.size); + if(!(op_p->flags & TROVE_BINARY_KEY)) + { + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, + "keyval_db->put(handle= %llu, key= %*s (%d)) size=%d\n", + llu(key_entry.handle), + op_p->u.k_write.key.buffer_sz, + key_entry.key, + op_p->u.k_write.key.buffer_sz, + key.size); + } ret = op_p->coll_p->keyval_db->put( op_p->coll_p->keyval_db, NULL, &key, &data, dbflags); @@ -449,8 +494,11 @@ static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p) goto return_error; } - gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "*** Trove KeyVal Write " - "of %s\n", (char *)key_entry.key); + if(!(op_p->flags & TROVE_BINARY_KEY)) + { + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "*** Trove KeyVal Write " + "of %s\n", (char *)key_entry.key); + } if(op_p->flags & TROVE_NOOVERWRITE) { @@ -466,33 +514,35 @@ static int dbpf_keyval_write_op_svc(struct dbpf_op *op_p) * now that the data is written to disk, update the cache if it's * an attr keyval we manage. */ - gen_mutex_lock(&dbpf_attr_cache_mutex); - cache_elem = dbpf_attr_cache_elem_lookup(ref); - if (cache_elem) + if(!(op_p->flags & TROVE_BINARY_KEY)) { - if (dbpf_attr_cache_elem_set_data_based_on_key( - ref, key_entry.key, - op_p->u.k_write.val.buffer, data.size)) - { - /* - * NOTE: this can happen if the keyword isn't registered, - * or if there is no associated cache_elem for this key - */ - gossip_debug( - GOSSIP_DBPF_ATTRCACHE_DEBUG,"** CANNOT cache data written " - "(key is %s)\n", (char *)key_entry.key); - } - else + gen_mutex_lock(&dbpf_attr_cache_mutex); + cache_elem = dbpf_attr_cache_elem_lookup(ref); + if (cache_elem) { - gossip_debug( - GOSSIP_DBPF_ATTRCACHE_DEBUG,"*** cached keyval data " - "written (key is %s)\n", - (char *)key_entry.key); + if (dbpf_attr_cache_elem_set_data_based_on_key( + ref, key_entry.key, + op_p->u.k_write.val.buffer, data.size)) + { + /* + * NOTE: this can happen if the keyword isn't registered, + * or if there is no associated cache_elem for this key + */ + gossip_debug( + GOSSIP_DBPF_ATTRCACHE_DEBUG,"** CANNOT cache data written " + "(key is %s)\n", (char *)key_entry.key); + } + else + { + gossip_debug( + GOSSIP_DBPF_ATTRCACHE_DEBUG,"*** cached keyval data " + "written (key is %s)\n", + (char *)key_entry.key); + } } + gen_mutex_unlock(&dbpf_attr_cache_mutex); } - gen_mutex_unlock(&dbpf_attr_cache_mutex); - ret = DBPF_OP_COMPLETE; PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, 1, PINT_PERF_SUB); @@ -509,7 +559,8 @@ static int dbpf_keyval_remove(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -540,6 +591,7 @@ static int dbpf_keyval_remove(TROVE_coll_id coll_id, } /* initialize op-specific members */ + op_p->hints = hints; op_p->u.k_remove.key = *key_p; if(val_p) { @@ -553,18 +605,21 @@ static int dbpf_keyval_remove(TROVE_coll_id coll_id, PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_keyval_remove_op_svc(struct dbpf_op *op_p) { int ret = -TROVE_EINVAL; - gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, - "dbpf_keyval_remove_op_svc: handle: %llu, key: %*s\n", - llu(op_p->handle), - op_p->u.k_remove.key.buffer_sz, - (char *)op_p->u.k_remove.key.buffer); + if(!(op_p->flags & TROVE_BINARY_KEY)) + { + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, + "dbpf_keyval_remove_op_svc: handle: %llu, key: %*s\n", + llu(op_p->handle), + op_p->u.k_remove.key.buffer_sz, + (char *)op_p->u.k_remove.key.buffer); + } ret = dbpf_keyval_do_remove(op_p->coll_p->keyval_db, op_p->handle, @@ -589,13 +644,144 @@ static int dbpf_keyval_remove_op_svc(struct dbpf_op *op_p) return ret; } +static int dbpf_keyval_remove_list(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_keyval_s *key_array, + TROVE_keyval_s *val_array, + int *error_array, + int count, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + dbpf_queued_op_t *q_op_p = NULL; + struct dbpf_op op; + struct dbpf_op *op_p; + struct dbpf_collection *coll_p = NULL; + int ret; + + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) + { + return -TROVE_EINVAL; + } + + ret = dbpf_op_init_queued_or_immediate( + &op, &q_op_p, + KEYVAL_WRITE_LIST, + coll_p, + handle, + dbpf_keyval_remove_list_op_svc, + flags, + NULL, + user_ptr, + context_id, + &op_p); + if(ret < 0) + { + return ret; + } + + /* initialize the op-specific members */ + op_p->u.k_remove_list.key_array = key_array; + op_p->u.k_remove_list.val_array = val_array; + op_p->u.k_remove_list.error_array = error_array; + op_p->u.k_remove_list.count = count; + + PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, + 1, PINT_PERF_ADD); + + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); +} + +static int dbpf_keyval_remove_list_op_svc(struct dbpf_op *op_p) +{ + int ret = -TROVE_EINVAL; + DBT key, data; + int k; + TROVE_keyval_handle_info info; + struct dbpf_keyval_db_entry key_entry; + int remove_count = 0; + + /* read each key to see if it is present */ + for (k = 0; k < op_p->u.k_remove_list.count; k++) + { + ret = dbpf_keyval_do_remove(op_p->coll_p->keyval_db, + op_p->handle, + &op_p->u.k_remove_list.key_array[k], + &op_p->u.k_remove_list.val_array[k]); + if(ret != 0) + { + op_p->u.k_remove_list.error_array[k] = ret; + } + else + { + remove_count++; + } + } + + if(op_p->flags & TROVE_KEYVAL_HANDLE_COUNT) + { + key_entry.handle = op_p->handle; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.flags = DB_DBT_USERMEM; + key.data = &key_entry; + key.size = key.ulen = DBPF_KEYVAL_DB_ENTRY_TOTAL_SIZE(0); + data.data = &info; + data.ulen = sizeof(TROVE_keyval_handle_info); + data.flags = DB_DBT_USERMEM; + + ret = op_p->coll_p->keyval_db->get( + op_p->coll_p->keyval_db, NULL, &key, &data, 0); + if(ret == DB_NOTFOUND) + { + /* doesn't exist yet so we can set to 0 */ + memset(&info, 0, sizeof(TROVE_keyval_handle_info)); + data.size = sizeof(TROVE_keyval_handle_info); + } + else if(ret != 0) + { + op_p->coll_p->keyval_db->err( + op_p->coll_p->keyval_db, ret, "DB->get"); + return -dbpf_db_error_to_trove_error(ret); + } + + info.count -= remove_count; + + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, + "[DBPF KEYVAL]: handle_info keyval_remove_list: handle: %llu, count: %d\n", + llu(op_p->handle), info.count); + + ret = op_p->coll_p->keyval_db->put( + op_p->coll_p->keyval_db, NULL, &key, &data, 0); + if(ret != 0) + { + op_p->coll_p->keyval_db->err( + op_p->coll_p->keyval_db, ret, + "keyval_db->put keyval handle info ops"); + return -dbpf_db_error_to_trove_error(ret); + } + } + + ret = DBPF_OP_COMPLETE; + PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, + 1, PINT_PERF_SUB); + + return ret; +} + static int dbpf_keyval_validate(TROVE_coll_id coll_id, TROVE_handle handle, TROVE_ds_flags flags, TROVE_vtag_s *vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { return -TROVE_ENOSYS; } @@ -610,7 +796,8 @@ static int dbpf_keyval_iterate(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -645,8 +832,9 @@ static int dbpf_keyval_iterate(TROVE_coll_id coll_id, op_p->u.k_iterate.val_array = val_array; op_p->u.k_iterate.position_p = position_p; op_p->u.k_iterate.count_p = inout_count_p; + op_p->hints = hints; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } /* dbpf_keyval_iterate_op_svc() @@ -673,6 +861,8 @@ static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p) { int count, ret; uint64_t tmp_pos = 0; + PINT_dbpf_keyval_iterate_callback tmp_callback = NULL; + int i; assert(*op_p->u.k_iterate.count_p > 0); @@ -695,6 +885,11 @@ static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p) return 1; } + if(op_p->flags & TROVE_KEYVAL_ITERATE_REMOVE) + { + tmp_callback = PINT_dbpf_dspace_remove_keyval; + } + ret = PINT_dbpf_keyval_iterate(op_p->coll_p->keyval_db, op_p->handle, op_p->coll_p->pcache, @@ -702,7 +897,7 @@ static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p) op_p->u.k_iterate.val_array, &count, *op_p->u.k_iterate.position_p, - NULL); + tmp_callback); if (ret == -TROVE_ENOENT) { *op_p->u.k_iterate.position_p = TROVE_ITERATE_END; @@ -738,6 +933,18 @@ static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p) op_p->u.k_iterate.key_array[count-1].buffer, op_p->u.k_iterate.key_array[count-1].read_sz); } + + if(op_p->flags & TROVE_KEYVAL_ITERATE_REMOVE) + { + for(i=0; iu.k_iterate.count_p = count; @@ -759,7 +966,8 @@ static int dbpf_keyval_iterate_keys(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -793,8 +1001,9 @@ static int dbpf_keyval_iterate_keys(TROVE_coll_id coll_id, op_p->u.k_iterate_keys.key_array = key_array; op_p->u.k_iterate_keys.position_p = position_p; op_p->u.k_iterate_keys.count_p = inout_count_p; + op_p->hints = hints; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } /* dbpf_keyval_iterate_keys_op_svc() @@ -820,6 +1029,8 @@ static int dbpf_keyval_iterate_keys(TROVE_coll_id coll_id, static int dbpf_keyval_iterate_keys_op_svc(struct dbpf_op *op_p) { int count, ret; + PINT_dbpf_keyval_iterate_callback tmp_callback = NULL; + int i; count = *op_p->u.k_iterate_keys.count_p; @@ -833,6 +1044,11 @@ static int dbpf_keyval_iterate_keys_op_svc(struct dbpf_op *op_p) return 1; } + if(op_p->flags & TROVE_KEYVAL_ITERATE_REMOVE) + { + tmp_callback = PINT_dbpf_dspace_remove_keyval; + } + ret = PINT_dbpf_keyval_iterate(op_p->coll_p->keyval_db, op_p->handle, op_p->coll_p->pcache, @@ -841,7 +1057,7 @@ static int dbpf_keyval_iterate_keys_op_svc(struct dbpf_op *op_p) NULL, &count, *op_p->u.k_iterate_keys.position_p, - NULL); + tmp_callback); if (ret == -TROVE_ENOENT) { *op_p->u.k_iterate_keys.position_p = TROVE_ITERATE_END; @@ -870,6 +1086,17 @@ static int dbpf_keyval_iterate_keys_op_svc(struct dbpf_op *op_p) op_p->u.k_iterate_keys.key_array[count-1].buffer, op_p->u.k_iterate_keys.key_array[count-1].read_sz); } + if(op_p->flags & TROVE_KEYVAL_ITERATE_REMOVE) + { + for(i=0; iu.k_iterate_keys.count_p = count; @@ -887,7 +1114,8 @@ static int dbpf_keyval_read_list(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -922,8 +1150,9 @@ static int dbpf_keyval_read_list(TROVE_coll_id coll_id, op_p->u.k_read_list.val_array = val_array; op_p->u.k_read_list.err_array = err_array; op_p->u.k_read_list.count = count; + op_p->hints = hints; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_keyval_read_list_op_svc(struct dbpf_op *op_p) @@ -991,7 +1220,8 @@ static int dbpf_keyval_write_list(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -1025,11 +1255,12 @@ static int dbpf_keyval_write_list(TROVE_coll_id coll_id, op_p->u.k_write_list.key_array = key_array; op_p->u.k_write_list.val_array = val_array; op_p->u.k_write_list.count = count; + op_p->hints = hints; PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_keyval_write_list_op_svc(struct dbpf_op *op_p) @@ -1062,7 +1293,14 @@ static int dbpf_keyval_write_list_op_svc(struct dbpf_op *op_p) ret = op_p->coll_p->keyval_db->get( op_p->coll_p->keyval_db, NULL, &key, &data, 0); - if (ret != 0) + /* check for DB_BUFFER_SMALL in case the key is there but the data + * is simply too big for the temporary data buffer used + */ +#ifdef HAVE_DB_BUFFER_SMALL + if (ret != 0 && ret != DB_BUFFER_SMALL) +#else + if (ret != 0 && ret != ENOMEM) +#endif { if(ret == DB_NOTFOUND && ((op_p->flags & TROVE_NOOVERWRITE) || (!(op_p->flags & TROVE_ONLYOVERWRITE)))) @@ -1093,16 +1331,28 @@ static int dbpf_keyval_write_list_op_svc(struct dbpf_op *op_p) op_p->u.k_write_list.key_array[k].buffer_sz); data.flags = 0; - data.data = op_p->u.k_write_list.val_array[k].buffer; - data.size = data.ulen = op_p->u.k_write_list.val_array[k].buffer_sz; + /* allow NULL val array (writes an empty value to each position */ + if(!op_p->u.k_write_list.val_array) + { + data.data = NULL; + data.size = data.ulen = 0; + } + else + { + data.data = op_p->u.k_write_list.val_array[k].buffer; + data.size = data.ulen = op_p->u.k_write_list.val_array[k].buffer_sz; + } - gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, - "keyval_db->put(handle= %llu, key= %*s (%d)) size=%d\n", - llu(key_entry.handle), - op_p->u.k_write_list.key_array[k].buffer_sz, - key_entry.key, - op_p->u.k_write_list.key_array[k].buffer_sz, - key.size); + if(!(op_p->flags & TROVE_BINARY_KEY)) + { + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, + "keyval_db->put(handle= %llu, key= %*s (%d)) size=%d\n", + llu(key_entry.handle), + op_p->u.k_write_list.key_array[k].buffer_sz, + key_entry.key, + op_p->u.k_write_list.key_array[k].buffer_sz, + key.size); + } ret = op_p->coll_p->keyval_db->put( op_p->coll_p->keyval_db, NULL, &key, &data, 0); @@ -1115,39 +1365,55 @@ static int dbpf_keyval_write_list_op_svc(struct dbpf_op *op_p) goto return_error; } - gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "*** Trove KeyVal Write " - "of %s\n", (char *)op_p->u.k_write_list.key_array[k].buffer); + if(!(op_p->flags & TROVE_BINARY_KEY)) + { + gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "*** Trove KeyVal Write " + "of %s\n", (char *)op_p->u.k_write_list.key_array[k].buffer); + } + + if(op_p->flags & TROVE_NOOVERWRITE) + { + ret = dbpf_keyval_handle_info_ops( + op_p, DBPF_KEYVAL_HANDLE_COUNT_INCREMENT); + if(ret != 0) + { + goto return_error; + } + } /* now that the data is written to disk, update the cache if it's an attr keyval we manage. */ - gen_mutex_lock(&dbpf_attr_cache_mutex); - cache_elem = dbpf_attr_cache_elem_lookup(ref); - if (cache_elem) + if(!(op_p->flags & TROVE_BINARY_KEY)) { - if (dbpf_attr_cache_elem_set_data_based_on_key( - ref, key_entry.key, - op_p->u.k_write_list.val_array[k].buffer, data.size)) + gen_mutex_lock(&dbpf_attr_cache_mutex); + cache_elem = dbpf_attr_cache_elem_lookup(ref); + if (cache_elem) { - /* -NOTE: this can happen if the keyword isn't registered, -or if there is no associated cache_elem for this key -*/ - gossip_debug( - GOSSIP_DBPF_ATTRCACHE_DEBUG,"** CANNOT cache data written " - "(key is %s)\n", - (char *)key_entry.key); - } - else - { - gossip_debug( - GOSSIP_DBPF_ATTRCACHE_DEBUG,"*** cached keyval data " - "written (key is %s)\n", - (char *)key_entry.key); + if (dbpf_attr_cache_elem_set_data_based_on_key( + ref, key_entry.key, + data.data, data.size)) + { + /* + NOTE: this can happen if the keyword isn't registered, + or if there is no associated cache_elem for this key + */ + gossip_debug( + GOSSIP_DBPF_ATTRCACHE_DEBUG,"** CANNOT cache data written " + "(key is %s)\n", + (char *)key_entry.key); + } + else + { + gossip_debug( + GOSSIP_DBPF_ATTRCACHE_DEBUG,"*** cached keyval data " + "written (key is %s)\n", + (char *)key_entry.key); + } } + gen_mutex_unlock(&dbpf_attr_cache_mutex); } - gen_mutex_unlock(&dbpf_attr_cache_mutex); } ret = DBPF_OP_COMPLETE; @@ -1163,7 +1429,8 @@ static int dbpf_keyval_flush(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -1176,7 +1443,7 @@ static int dbpf_keyval_flush(TROVE_coll_id coll_id, { return -TROVE_EINVAL; } - + ret = dbpf_op_init_queued_or_immediate( &op, &q_op_p, KEYVAL_FLUSH, @@ -1192,8 +1459,9 @@ static int dbpf_keyval_flush(TROVE_coll_id coll_id, { return ret; } + op_p->hints = hints; - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_keyval_flush_op_svc(struct dbpf_op *op_p) @@ -1328,9 +1596,12 @@ int PINT_dbpf_keyval_iterate( goto return_error; } + #if 0 + /* not safe to print this if binary keys may be present */ gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "iterate key: %*s, val: %llu\n", key->read_sz, (char *)key->buffer, (val ? llu(*(PVFS_handle *)val->buffer) : 0)); + #endif if(callback) { @@ -1366,9 +1637,12 @@ static int dbpf_keyval_do_remove( struct dbpf_keyval_db_entry key_entry; DBT db_key, db_val; + #if 0 + /* not safe to print this if it may be a binary key */ gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "PINT_dbpf_keyval_remove: handle (%llu), key: (%d) %*s\n", llu(handle), key->buffer_sz, key->buffer_sz, (char *)key->buffer); + #endif key_entry.handle = handle; memcpy(key_entry.key, key->buffer, key->buffer_sz); @@ -1376,6 +1650,7 @@ static int dbpf_keyval_do_remove( memset(&db_key, 0, sizeof(db_key)); db_key.data = &key_entry; db_key.size = db_key.ulen = DBPF_KEYVAL_DB_ENTRY_TOTAL_SIZE(key->buffer_sz); + db_key.flags = DB_DBT_USERMEM; gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, "keyval_db->del(handle= %llu, key= %*s (%d)) size=%d\n", @@ -1464,7 +1739,7 @@ static int dbpf_keyval_iterate_skip_to_position( /* strip the session out of the position; we need to use a true * integer offset if we get past the cache */ - pos = pos & 0xffff; + pos = pos & 0xffffffff; return dbpf_keyval_iterate_step_to_position(handle, pos, dbc_p); } @@ -1618,9 +1893,8 @@ static int dbpf_keyval_iterate_cursor_get( if (ret != 0) { gossip_lerr("Failed to perform cursor get:" - "\n\thandle: %llu\n\ttype: %d\n\tkey: %s\n\tdb error: %s\n", - llu(key_entry.handle), db_flags, - key_entry.key, db_strerror(ret)); + "\n\thandle: %llu\n\ttype: %d\n\tdb error: %s\n", + llu(key_entry.handle), db_flags, db_strerror(ret)); return -dbpf_db_error_to_trove_error(ret); } @@ -1660,7 +1934,8 @@ static int dbpf_keyval_get_handle_info( TROVE_keyval_handle_info *info, void * user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { dbpf_queued_op_t *q_op_p = NULL; struct dbpf_op op; @@ -1691,10 +1966,11 @@ static int dbpf_keyval_get_handle_info( } op_p->u.k_get_handle_info.info = info; + op_p->hints = hints; PINT_perf_count(PINT_server_pc, PINT_PERF_METADATA_KEYVAL_OPS, 1, PINT_PERF_ADD); - return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p); + return dbpf_queue_or_service(op_p, q_op_p, coll_p, out_op_id_p, 0, 0); } static int dbpf_keyval_get_handle_info_op_svc(struct dbpf_op * op_p) @@ -1785,6 +2061,13 @@ static int dbpf_keyval_handle_info_ops(struct dbpf_op * op_p, } else if(action == DBPF_KEYVAL_HANDLE_COUNT_DECREMENT) { + if(info.count <= 0) + { + gossip_lerr( + "[DBPF KEYVAL]: ERROR: handle_info " + "count decrement: handle: %llu, value: %d\n", + llu(op_p->handle), info.count); + } assert(info.count > 0); gossip_debug(GOSSIP_DBPF_KEYVAL_DEBUG, @@ -1831,15 +2114,15 @@ static int dbpf_keyval_handle_info_ops(struct dbpf_op * op_p, int PINT_trove_dbpf_keyval_compare( DB * dbp, const DBT * a, const DBT * b) { - const struct dbpf_keyval_db_entry * db_entry_a; - const struct dbpf_keyval_db_entry * db_entry_b; + struct dbpf_keyval_db_entry db_entry_a; + struct dbpf_keyval_db_entry db_entry_b; - db_entry_a = (const struct dbpf_keyval_db_entry *) a->data; - db_entry_b = (const struct dbpf_keyval_db_entry *) b->data; + memcpy(&db_entry_a, a->data, sizeof(struct dbpf_keyval_db_entry)); + memcpy(&db_entry_b, b->data, sizeof(struct dbpf_keyval_db_entry)); - if(db_entry_a->handle != db_entry_b->handle) + if(db_entry_a.handle != db_entry_b.handle) { - return (db_entry_a->handle < db_entry_b->handle) ? -1 : 1; + return (db_entry_a.handle < db_entry_b.handle) ? -1 : 1; } if(a->size > b->size) @@ -1853,7 +2136,7 @@ int PINT_trove_dbpf_keyval_compare( } /* must be equal */ - return (strncmp(db_entry_a->key, db_entry_b->key, + return (memcmp(db_entry_a.key, db_entry_b.key, DBPF_KEYVAL_DB_ENTRY_KEY_SIZE(a->size))); } @@ -1862,6 +2145,7 @@ struct TROVE_keyval_ops dbpf_keyval_ops = dbpf_keyval_read, dbpf_keyval_write, dbpf_keyval_remove, + dbpf_keyval_remove_list, dbpf_keyval_validate, dbpf_keyval_iterate, dbpf_keyval_iterate_keys, diff --git a/src/io/trove/trove-dbpf/dbpf-mgmt.c b/src/io/trove/trove-dbpf/dbpf-mgmt.c index c980cc7..98853b9 100644 --- a/src/io/trove/trove-dbpf/dbpf-mgmt.c +++ b/src/io/trove/trove-dbpf/dbpf-mgmt.c @@ -18,6 +18,8 @@ #include #include #include "trove.h" +#include "pint-context.h" +#include "pint-mgmt.h" #ifdef HAVE_MALLOC_H #include @@ -41,6 +43,25 @@ #include "pint-util.h" #include "dbpf-sync.h" +PINT_event_group trove_dbpf_event_group; + +PINT_event_type trove_dbpf_read_event_id; +PINT_event_type trove_dbpf_write_event_id; +PINT_event_type trove_dbpf_keyval_write_event_id; +PINT_event_type trove_dbpf_keyval_read_event_id; +PINT_event_type trove_dbpf_dspace_create_event_id; +PINT_event_type trove_dbpf_dspace_create_list_event_id; +PINT_event_type trove_dbpf_dspace_getattr_event_id; +PINT_event_type trove_dbpf_dspace_setattr_event_id; + +int dbpf_pid; + +PINT_manager_t io_thread_mgr; +PINT_worker_id io_worker_id; +PINT_queue_id io_queue_id; +PINT_context_id io_ctx; +static int directio_threads_started = 0; + extern gen_mutex_t dbpf_attr_cache_mutex; extern int TROVE_db_cache_size_bytes; @@ -49,6 +70,18 @@ extern int TROVE_shm_key_hint; struct dbpf_storage *my_storage_p = NULL; static int db_open_count, db_close_count; static void unlink_db_cache_files(const char* path); +static int start_directio_threads(void); +static int stop_directio_threads(void); + +static int trove_directio_threads_num = 30; +static int trove_directio_ops_per_queue = 10; +static int trove_directio_timeout = 1000; + +static int PINT_dbpf_io_completion_callback(PINT_context_id ctx_id, + int count, + PINT_op_id *op_ids, + void **user_ptrs, + PVFS_error *errors); #define COLL_ENV_FLAGS (DB_INIT_MPOOL | DB_CREATE | DB_THREAD) @@ -106,7 +139,7 @@ DB_ENV *dbpf_getdb_env(const char *path, unsigned int env_flags, int *error) retry: ret = db_env_create(&dbenv, 0); - if (ret != 0) + if (ret != 0 || dbenv == NULL) { gossip_err("dbpf_getdb_env: %s\n", db_strerror(ret)); *error = ret; @@ -258,12 +291,11 @@ int dbpf_putdb_env(DB_ENV *dbenv, const char *path) return 0; } -static int dbpf_db_create(const char *sto_path, - char *dbname, +static int dbpf_db_create(char *dbname, DB_ENV *envp, uint32_t flags); static DB *dbpf_db_open( - const char *sto_path, char *dbname, DB_ENV *envp, int *err_p, + char *dbname, DB_ENV *envp, int *err_p, int (*compare_fn) (DB *db, const DBT *dbt1, const DBT *dbt2), uint32_t flags); static int dbpf_mkpath(char *pathname, mode_t mode); @@ -296,7 +328,10 @@ int dbpf_collection_getinfo(TROVE_coll_id coll_id, PINT_statfs_t tmp_statfs; TROVE_statfs *tmp_trove_statfs = (TROVE_statfs *)parameter; - DBPF_GET_STORAGE_DIRNAME(path_name, PATH_MAX, sto_p->name); + /* XXX: this is not entirely accurate when data and metadata + * are stored on different devices. + */ + DBPF_GET_DATA_DIRNAME(path_name, PATH_MAX, sto_p->data_path); ret = PINT_statfs_lookup(path_name, &tmp_statfs); if (ret < 0) { @@ -335,7 +370,6 @@ int dbpf_collection_setinfo(TROVE_method_id method_id, struct dbpf_collection* coll; coll = dbpf_collection_find_registered(coll_id); - assert(coll); switch(option) { @@ -396,6 +430,7 @@ int dbpf_collection_setinfo(TROVE_method_id method_id, gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf collection %d - Setting HIGH_WATERMARK to %d\n", (int) coll_id, *(int *)parameter); + assert(coll); dbpf_queued_op_set_sync_high_watermark(*(int *)parameter, coll); ret = 0; break; @@ -403,6 +438,7 @@ int dbpf_collection_setinfo(TROVE_method_id method_id, gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf collection %d - Setting LOW_WATERMARK to %d\n", (int) coll_id, *(int *)parameter); + assert(coll); dbpf_queued_op_set_sync_low_watermark(*(int *)parameter, coll); ret = 0; break; @@ -411,6 +447,7 @@ int dbpf_collection_setinfo(TROVE_method_id method_id, "dbpf collection %d - %s sync mode\n", (int) coll_id, (*(int *)parameter) ? "Enabling" : "Disabling"); + assert(coll); dbpf_queued_op_set_sync_mode(*(int *)parameter, coll); ret = 0; break; @@ -419,9 +456,22 @@ int dbpf_collection_setinfo(TROVE_method_id method_id, "dbpf collection %d - %s immediate completion\n", (int) coll_id, (*(int *)parameter) ? "Enabling" : "Disabling"); + assert(coll); coll->immediate_completion = *(int *)parameter; ret = 0; break; + case TROVE_DIRECTIO_THREADS_NUM: + trove_directio_threads_num = *(int *)parameter; + ret = 0; + break; + case TROVE_DIRECTIO_OPS_PER_QUEUE: + trove_directio_ops_per_queue = *(int *)parameter; + ret = 0; + break; + case TROVE_DIRECTIO_TIMEOUT: + trove_directio_timeout = *(int *)parameter; + ret = 0; + break; } return ret; } @@ -503,6 +553,7 @@ int dbpf_collection_geteattr(TROVE_coll_id coll_id, memset(&db_data, 0, sizeof(db_data)); db_key.data = key_p->buffer; db_key.size = key_p->buffer_sz; + db_key.flags = DB_DBT_USERMEM; db_data.data = val_p->buffer; db_data.ulen = val_p->buffer_sz; @@ -512,7 +563,7 @@ int dbpf_collection_geteattr(TROVE_coll_id coll_id, NULL, &db_key, &db_data, 0); if (ret != 0) { - gossip_lerr("dbpf_collection_geteattr: %s\n", db_strerror(ret)); + gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf_collection_geteattr: %s\n", db_strerror(ret)); return -dbpf_db_error_to_trove_error(ret); } @@ -520,19 +571,164 @@ int dbpf_collection_geteattr(TROVE_coll_id coll_id, return 1; } -static int dbpf_initialize(char *stoname, +int dbpf_collection_deleattr(TROVE_coll_id coll_id, + TROVE_keyval_s *key_p, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p) +{ + int ret = -TROVE_EINVAL; + struct dbpf_storage *sto_p = NULL; + struct dbpf_collection *coll_p = NULL; + DBT db_key; + + sto_p = my_storage_p; + if (sto_p == NULL) + { + return ret; + } + coll_p = dbpf_collection_find_registered(coll_id); + if (coll_p == NULL) + { + return ret; + } + + memset(&db_key, 0, sizeof(db_key)); + db_key.data = key_p->buffer; + db_key.size = key_p->buffer_sz; + + ret = coll_p->coll_attr_db->del(coll_p->coll_attr_db, + NULL, &db_key, 0); + if (ret != 0) + { + gossip_lerr("%s: %s\n", __func__, db_strerror(ret)); + return -dbpf_db_error_to_trove_error(ret); + } + + ret = coll_p->coll_attr_db->sync(coll_p->coll_attr_db, 0); + if (ret != 0) + { + gossip_lerr("%s: %s\n", __func__, db_strerror(ret)); + return -dbpf_db_error_to_trove_error(ret); + } + + return 1; +} + +static int dbpf_initialize(char *data_path, + char *meta_path, TROVE_ds_flags flags) { int ret = -TROVE_EINVAL; struct dbpf_storage *sto_p = NULL; - if (!stoname) + /* initialize events */ + PINT_event_define_group("trove_dbpf", &trove_dbpf_event_group); + + /* Define the read event: + * START: + * (client_id, request_id, rank, metafile_handle, + * datafile_handle, op_id, requested_read_size) + * STOP: (size_read) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_read", + "%d%d%d%llu%llu%d%d", + "%llu", + &trove_dbpf_read_event_id); + + /* Define the write event: + * START: + * (client_id, request_id, rank, metafile-handle, datafile-handle, op_id, write size) + * STOP: (size_written) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_write", + "%d%d%d%llu%llu%d%d", + "%llu", + &trove_dbpf_write_event_id); + + /* Define the keyval read event: + * START: (client_id, request_id, rank, metafile-handle, op_id) + * STOP: (none) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_keyval_read", + "%d%d%d%llu%d", + "", + &trove_dbpf_keyval_read_event_id); + + /* Define the keyval write event: + * START: + * (client_id, request_id, rank, metafile-handle, op_id) + * STOP: (none) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_keyval_write", + "%d%d%d%llu%d", + "", + &trove_dbpf_keyval_write_event_id); + + /* Define the dspace create event: + * START: + * (client_id, request_id, rank, op_id) + * STOP: (new-handle) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_dspace_create", + "%d%d%d%d", + "%llu", + &trove_dbpf_dspace_create_event_id); + + /* Define the dspace create list event: + * START: + * (client_id, request_id, rank, op_id) + * STOP: (new-handle) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_dspace_create_list", + "%d%d%d%d", + "%llu", + &trove_dbpf_dspace_create_list_event_id); + + /* Define the dspace getattr event: + * START: + * (client_id, request_id, rank, metafile-handle, op_id) + * STOP: (none) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_dspace_getattr", + "%d%d%d%llu%d", + "", + &trove_dbpf_dspace_getattr_event_id); + + /* Define the dspace setattr event: + * START: + * (client_id, request_id, rank, metafile-handle, op_id) + * STOP: (none) + */ + PINT_event_define_event(&trove_dbpf_event_group, + "dbpf_dspace_setattr", + "%d%d%d%llu%d", + "", + &trove_dbpf_dspace_setattr_event_id); + + dbpf_pid = getpid(); + + if (!data_path) { - gossip_err("dbpf_initialize failure: invalid storage name\n"); + gossip_err("dbpf_initialize failure: invalid data storage path\n"); return ret; } - sto_p = dbpf_storage_lookup(stoname, &ret, flags); + if (!meta_path) + { + gossip_err("dbpf_initialize failure: invalid metadata storage path\n"); + return ret; + } + + sto_p = dbpf_storage_lookup(data_path, meta_path, &ret, flags); if (sto_p == NULL) { gossip_debug( @@ -548,6 +744,114 @@ static int dbpf_initialize(char *stoname, return dbpf_thread_initialize(); } +static int start_directio_threads(void) +{ + int ret; + PINT_worker_attr_t io_worker_attrs; + + if(directio_threads_started) + { + /* already running */ + return(0); + } + + ret = PINT_open_context(&io_ctx, PINT_dbpf_io_completion_callback); + if(ret < 0) + { + dbpf_finalize(); + return ret; + } + + ret = PINT_manager_init(&io_thread_mgr, io_ctx); + if(ret < 0) + { + PINT_close_context(io_ctx); + dbpf_finalize(); + return ret; + } + + io_worker_attrs.type = PINT_WORKER_TYPE_THREADED_QUEUES; + io_worker_attrs.u.threaded.thread_count = trove_directio_threads_num; + io_worker_attrs.u.threaded.ops_per_queue = trove_directio_ops_per_queue; + io_worker_attrs.u.threaded.timeout = trove_directio_timeout; + ret = PINT_manager_worker_add(io_thread_mgr, &io_worker_attrs, &io_worker_id); + if(ret < 0) + { + PINT_manager_destroy(io_thread_mgr); + PINT_close_context(io_ctx); + dbpf_finalize(); + return ret; + } + + ret = PINT_queue_create(&io_queue_id, NULL); + if(ret < 0) + { + PINT_manager_destroy(io_thread_mgr); + PINT_close_context(io_ctx); + dbpf_finalize(); + return ret; + } + + ret = PINT_manager_queue_add(io_thread_mgr, io_worker_id, io_queue_id); + if(ret < 0) + { + PINT_queue_destroy(io_queue_id); + PINT_manager_destroy(io_thread_mgr); + PINT_close_context(io_ctx); + dbpf_finalize(); + return ret; + } + + directio_threads_started = 1; + + return(0); +} + +static int stop_directio_threads(void) +{ + if(directio_threads_started != 1) + { + return 0; + } + + PINT_manager_queue_remove(io_thread_mgr, io_queue_id); + PINT_queue_destroy(io_queue_id); + PINT_manager_destroy(io_thread_mgr); + PINT_close_context(io_ctx); + return 0; +} + +static int dbpf_direct_initialize(char *data_path, + char *meta_path, + TROVE_ds_flags flags) +{ + int ret; + + /* some parts of initialization are shared with other methods */ + ret = dbpf_initialize(data_path, meta_path, flags); + if(ret < 0) + { + return(ret); + } + + /* fire up the IO threads for direct IO */ + ret = start_directio_threads(); + if(ret < 0) + { + dbpf_finalize(); + return(ret); + } + + return(0); +} + +static int dbpf_direct_finalize(void) +{ + stop_directio_threads(); + dbpf_finalize(); + return 0; +} + int dbpf_finalize(void) { int ret = -TROVE_EINVAL; @@ -558,39 +862,53 @@ int dbpf_finalize(void) dbpf_attr_cache_finalize(); gen_mutex_unlock(&dbpf_attr_cache_mutex); - dbpf_collection_clear_registered(); - if (my_storage_p) { - ret = my_storage_p->sto_attr_db->sync(my_storage_p->sto_attr_db, 0); - if (ret) + if( my_storage_p->sto_attr_db ) { - gossip_err("dbpf_finalize: %s\n", db_strerror(ret)); - return -dbpf_db_error_to_trove_error(ret); - } + ret = my_storage_p->sto_attr_db->sync(my_storage_p->sto_attr_db, 0); + if (ret) + { + gossip_err("dbpf_finalize attr sync: %s\n", db_strerror(ret)); + return -dbpf_db_error_to_trove_error(ret); + } - ret = db_close(my_storage_p->sto_attr_db); - if (ret) - { - gossip_err("dbpf_finalize: %s\n", db_strerror(ret)); - return -dbpf_db_error_to_trove_error(ret); + ret = db_close(my_storage_p->sto_attr_db); + if (ret) + { + gossip_err("dbpf_finalize attr close: %s\n", db_strerror(ret)); + return -dbpf_db_error_to_trove_error(ret); + } } - - ret = my_storage_p->coll_db->sync(my_storage_p->coll_db, 0); - if (ret) + else { - gossip_err("dbpf_finalize: %s\n", db_strerror(ret)); - return -dbpf_db_error_to_trove_error(ret); + gossip_err("dbpf_finalize: attribute database not defined\n"); } - ret = db_close(my_storage_p->coll_db); - if (ret) + if( my_storage_p->coll_db ) { - gossip_err("dbpf_finalize: %s\n", db_strerror(ret)); - return -dbpf_db_error_to_trove_error(ret); + ret = my_storage_p->coll_db->sync(my_storage_p->coll_db, 0); + if (ret) + { + gossip_err("dbpf_finalize collection sync: %s\n", + db_strerror(ret)); + return -dbpf_db_error_to_trove_error(ret); + } + + ret = db_close(my_storage_p->coll_db); + if (ret) + { + gossip_err("dbpf_finalize collection close: %s\n", + db_strerror(ret)); + return -dbpf_db_error_to_trove_error(ret); + } } - - free(my_storage_p->name); + else + { + gossip_err("dbpf_finalize: collections database not defined\n"); + } + free(my_storage_p->data_path); + free(my_storage_p->meta_path); free(my_storage_p); my_storage_p = NULL; } @@ -604,31 +922,40 @@ int dbpf_finalize(void) * - creating storage attribute database, propagating with create time * - creating collections database, filling in create time */ -int dbpf_storage_create(char *stoname, +int dbpf_storage_create(char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p) { int ret = -TROVE_EINVAL; - char storage_dirname[PATH_MAX] = {0}; + char data_dirname[PATH_MAX] = {0}; + char meta_dirname[PATH_MAX] = {0}; char sto_attrib_dbname[PATH_MAX] = {0}; char collections_dbname[PATH_MAX] = {0}; - DBPF_GET_STORAGE_DIRNAME(storage_dirname, PATH_MAX, stoname); - ret = dbpf_mkpath(storage_dirname, 0755); + DBPF_GET_DATA_DIRNAME(data_dirname, PATH_MAX, data_path); + ret = dbpf_mkpath(data_dirname, 0755); if (ret != 0) { return ret; } - DBPF_GET_STO_ATTRIB_DBNAME(sto_attrib_dbname, PATH_MAX, stoname); - ret = dbpf_db_create(storage_dirname, sto_attrib_dbname, NULL, 0); + DBPF_GET_META_DIRNAME(meta_dirname, PATH_MAX, meta_path); + ret = dbpf_mkpath(meta_dirname, 0755); + if (ret != 0) + { + return ret; + } + + DBPF_GET_STO_ATTRIB_DBNAME(sto_attrib_dbname, PATH_MAX, meta_path); + ret = dbpf_db_create(sto_attrib_dbname, NULL, 0); if (ret != 0) { return ret; } - DBPF_GET_COLLECTIONS_DBNAME(collections_dbname, PATH_MAX, stoname); - ret = dbpf_db_create(storage_dirname, collections_dbname, NULL, DB_RECNUM); + DBPF_GET_COLLECTIONS_DBNAME(collections_dbname, PATH_MAX, meta_path); + ret = dbpf_db_create(collections_dbname, NULL, DB_RECNUM); if (ret != 0) { gossip_lerr("dbpf_storage_create: removing storage attribute database after failed create attempt"); @@ -639,9 +966,10 @@ int dbpf_storage_create(char *stoname, return 1; } -int dbpf_storage_remove(char *stoname, - void *user_ptr, - TROVE_op_id *out_op_id_p) +int dbpf_storage_remove(char *data_path, + char *meta_path, + void *user_ptr, + TROVE_op_id *out_op_id_p) { int ret = -TROVE_EINVAL; char path_name[PATH_MAX] = {0}; @@ -649,11 +977,13 @@ int dbpf_storage_remove(char *stoname, if (my_storage_p) { db_close(my_storage_p->sto_attr_db); db_close(my_storage_p->coll_db); - free(my_storage_p->name); + free(my_storage_p->meta_path); + free(my_storage_p->data_path); free(my_storage_p); my_storage_p = NULL; } - DBPF_GET_STO_ATTRIB_DBNAME(path_name, PATH_MAX, stoname); + + DBPF_GET_STO_ATTRIB_DBNAME(path_name, PATH_MAX, meta_path); gossip_debug(GOSSIP_TROVE_DEBUG, "Removing %s\n", path_name); if (unlink(path_name) != 0) @@ -662,7 +992,7 @@ int dbpf_storage_remove(char *stoname, goto storage_remove_failure; } - DBPF_GET_COLLECTIONS_DBNAME(path_name, PATH_MAX, stoname); + DBPF_GET_COLLECTIONS_DBNAME(path_name, PATH_MAX, meta_path); gossip_debug(GOSSIP_TROVE_DEBUG, "Removing %s\n", path_name); if (unlink(path_name) != 0) @@ -671,11 +1001,20 @@ int dbpf_storage_remove(char *stoname, goto storage_remove_failure; } - DBPF_GET_STORAGE_DIRNAME(path_name, PATH_MAX, stoname); + DBPF_GET_META_DIRNAME(path_name, PATH_MAX, meta_path); + gossip_debug(GOSSIP_TROVE_DEBUG, "Removing %s\n", path_name); + if (rmdir(path_name) != 0) + { + perror("failure removing metadata directory"); + ret = -trove_errno_to_trove_error(errno); + goto storage_remove_failure; + } + + DBPF_GET_DATA_DIRNAME(path_name, PATH_MAX, data_path); gossip_debug(GOSSIP_TROVE_DEBUG, "Removing %s\n", path_name); if (rmdir(path_name) != 0) { - perror("failure removing storage space"); + perror("failure removing data directory"); ret = -trove_errno_to_trove_error(errno); goto storage_remove_failure; } @@ -725,6 +1064,7 @@ int dbpf_collection_create(char *collname, key.data = collname; key.size = strlen(collname)+1; + key.flags = DB_DBT_USERMEM; data.data = &db_data; data.ulen = sizeof(db_data); data.flags = DB_DBT_USERMEM; @@ -761,11 +1101,11 @@ int dbpf_collection_create(char *collname, return -dbpf_db_error_to_trove_error(ret); } - DBPF_GET_STORAGE_DIRNAME(path_name, PATH_MAX, sto_p->name); + DBPF_GET_DATA_DIRNAME(path_name, PATH_MAX, sto_p->data_path); ret = stat(path_name, &dirstat); if (ret < 0 && errno != ENOENT) { - gossip_err("stat failed on storage directory %s\n", path_name); + gossip_err("stat failed on data directory %s\n", path_name); return -trove_errno_to_trove_error(errno); } else if (ret < 0) @@ -773,21 +1113,49 @@ int dbpf_collection_create(char *collname, ret = mkdir(path_name, 0755); if (ret != 0) { - gossip_err("mkdir failed on storage directory %s\n", path_name); + gossip_err("mkdir failed on data directory %s\n", path_name); return -trove_errno_to_trove_error(errno); } } - DBPF_GET_COLL_DIRNAME(path_name, PATH_MAX, sto_p->name, new_coll_id); + DBPF_GET_META_DIRNAME(path_name, PATH_MAX, sto_p->meta_path); + ret = stat(path_name, &dirstat); + if (ret < 0 && errno != ENOENT) + { + gossip_err("stat failed on metadata directory %s\n", path_name); + return -trove_errno_to_trove_error(errno); + } + else if (ret < 0) + { + ret = mkdir(path_name, 0755); + if (ret != 0) + { + gossip_err("mkdir failed on metadata directory %s\n", path_name); + return -trove_errno_to_trove_error(errno); + } + } + + + DBPF_GET_COLL_DIRNAME(path_name, PATH_MAX, sto_p->data_path, new_coll_id); ret = mkdir(path_name, 0755); - if (ret != 0) + if (ret != 0 && strcmp(sto_p->data_path, sto_p->meta_path)) { - gossip_err("mkdir failed on collection directory %s\n", path_name); + gossip_err("mkdir failed on data collection directory %s\n", + path_name); return -trove_errno_to_trove_error(errno); } + DBPF_GET_COLL_DIRNAME(path_name, PATH_MAX, sto_p->meta_path, new_coll_id); + ret = mkdir(path_name, 0755); + if (ret != 0 && strcmp(sto_p->data_path, sto_p->meta_path)) + { + gossip_err("mkdir failed on metadata collection directory %s\n", + path_name); + return -trove_errno_to_trove_error(errno); + } + DBPF_GET_COLL_ATTRIB_DBNAME(path_name, PATH_MAX, - sto_p->name, new_coll_id); + sto_p->meta_path, new_coll_id); ret = stat(path_name, &dbstat); if(ret < 0 && errno != ENOENT) @@ -797,7 +1165,7 @@ int dbpf_collection_create(char *collname, } else if(ret < 0) { - ret = dbpf_db_create(sto_p->name, path_name, NULL, 0); + ret = dbpf_db_create(path_name, NULL, 0); if (ret != 0) { gossip_err("dbpf_db_create failed on attrib db %s\n", path_name); @@ -805,7 +1173,7 @@ int dbpf_collection_create(char *collname, } } - db_p = dbpf_db_open(sto_p->name, path_name, NULL, &error, NULL, 0); + db_p = dbpf_db_open(path_name, NULL, &error, NULL, 0); if (db_p == NULL) { gossip_err("dbpf_db_open failed on attrib db %s\n", path_name); @@ -853,7 +1221,8 @@ int dbpf_collection_create(char *collname, db_p->sync(db_p, 0); db_close(db_p); - DBPF_GET_DS_ATTRIB_DBNAME(path_name, PATH_MAX, sto_p->name, new_coll_id); + DBPF_GET_DS_ATTRIB_DBNAME(path_name, PATH_MAX, sto_p->meta_path, + new_coll_id); ret = stat(path_name, &dbstat); if(ret < 0 && errno != ENOENT) { @@ -862,7 +1231,7 @@ int dbpf_collection_create(char *collname, } if(ret < 0) { - ret = dbpf_db_create(sto_p->name, path_name, NULL, 0); + ret = dbpf_db_create(path_name, NULL, 0); if (ret != 0) { gossip_err("dbpf_db_create failed on %s\n", path_name); @@ -870,7 +1239,7 @@ int dbpf_collection_create(char *collname, } } - DBPF_GET_KEYVAL_DBNAME(path_name, PATH_MAX, sto_p->name, new_coll_id); + DBPF_GET_KEYVAL_DBNAME(path_name, PATH_MAX, sto_p->meta_path, new_coll_id); ret = stat(path_name, &dbstat); if(ret < 0 && errno != ENOENT) { @@ -879,7 +1248,7 @@ int dbpf_collection_create(char *collname, } if(ret < 0) { - ret = dbpf_db_create(sto_p->name, path_name, NULL, 0); + ret = dbpf_db_create(path_name, NULL, 0); if (ret != 0) { gossip_err("dbpf_db_create failed on %s\n", path_name); @@ -887,7 +1256,8 @@ int dbpf_collection_create(char *collname, } } - DBPF_GET_BSTREAM_DIRNAME(path_name, PATH_MAX, sto_p->name, new_coll_id); + DBPF_GET_BSTREAM_DIRNAME(path_name, PATH_MAX, sto_p->data_path, + new_coll_id); ret = mkdir(path_name, 0755); if(ret != 0) { @@ -906,7 +1276,7 @@ int dbpf_collection_create(char *collname, } } - DBPF_GET_STRANDED_BSTREAM_DIRNAME(path_name, PATH_MAX, sto_p->name, + DBPF_GET_STRANDED_BSTREAM_DIRNAME(path_name, PATH_MAX, sto_p->data_path, new_coll_id); ret = mkdir(path_name, 0755); if(ret != 0) @@ -949,6 +1319,7 @@ int dbpf_collection_remove(char *collname, key.data = collname; key.size = strlen(collname) + 1; + key.flags = DB_DBT_USERMEM; data.data = &db_data; data.ulen = sizeof(db_data); data.flags = DB_DBT_USERMEM; @@ -956,7 +1327,7 @@ int dbpf_collection_remove(char *collname, ret = sto_p->coll_db->get(sto_p->coll_db, NULL, &key, &data, 0); if (ret != 0) { - sto_p->coll_db->err(sto_p->coll_db, ret, "DB->get"); + sto_p->coll_db->err(sto_p->coll_db, ret, "DB->get collection"); return -dbpf_db_error_to_trove_error(ret); } @@ -983,16 +1354,17 @@ int dbpf_collection_remove(char *collname, db_close(db_collection->ds_db); db_close(db_collection->keyval_db); /* so that environment can also be cleaned up */ - dbpf_putdb_env(db_collection->coll_env, db_collection->path_name); + dbpf_putdb_env(db_collection->coll_env, db_collection->meta_path); dbpf_collection_deregister(db_collection); free(db_collection->name); - free(db_collection->path_name); + free(db_collection->meta_path); + free(db_collection->data_path); PINT_dbpf_keyval_pcache_finalize(db_collection->pcache); free(db_collection); } DBPF_GET_DS_ATTRIB_DBNAME(path_name, PATH_MAX, - sto_p->name, db_data.coll_id); + sto_p->meta_path, db_data.coll_id); if (unlink(path_name) != 0) { gossip_err("failure removing dataspace attrib db\n"); @@ -1000,7 +1372,7 @@ int dbpf_collection_remove(char *collname, } DBPF_GET_KEYVAL_DBNAME(path_name, PATH_MAX, - sto_p->name, db_data.coll_id); + sto_p->meta_path, db_data.coll_id); if(unlink(path_name) != 0) { gossip_err("failure removing keyval db\n"); @@ -1008,7 +1380,7 @@ int dbpf_collection_remove(char *collname, } DBPF_GET_COLL_ATTRIB_DBNAME(path_name, PATH_MAX, - sto_p->name, db_data.coll_id); + sto_p->meta_path, db_data.coll_id); if (unlink(path_name) != 0) { gossip_err("failure removing collection attrib db\n"); @@ -1016,7 +1388,7 @@ int dbpf_collection_remove(char *collname, } DBPF_GET_BSTREAM_DIRNAME(path_name, PATH_MAX, - sto_p->name, db_data.coll_id); + sto_p->data_path, db_data.coll_id); for(i = 0; i < DBPF_BSTREAM_MAX_NUM_BUCKETS; i++) { snprintf(dir, PATH_MAX, "%s/%.8d", path_name, i); @@ -1062,7 +1434,7 @@ int dbpf_collection_remove(char *collname, } DBPF_GET_STRANDED_BSTREAM_DIRNAME(path_name, PATH_MAX, - sto_p->name, db_data.coll_id); + sto_p->data_path, db_data.coll_id); /* remove stranded bstreams directory */ current_dir = opendir(path_name); @@ -1105,10 +1477,19 @@ int dbpf_collection_remove(char *collname, } DBPF_GET_COLL_DIRNAME(path_name, PATH_MAX, - sto_p->name, db_data.coll_id); + sto_p->meta_path, db_data.coll_id); if (rmdir(path_name) != 0) { - gossip_err("failure removing collection directory\n"); + gossip_err("failure removing metadata collection directory\n"); + ret = -trove_errno_to_trove_error(errno); + goto collection_remove_failure; + } + + DBPF_GET_COLL_DIRNAME(path_name, PATH_MAX, + sto_p->data_path, db_data.coll_id); + if (rmdir(path_name) != 0) + { + gossip_err("failure removing data collection directory\n"); ret = -trove_errno_to_trove_error(errno); } collection_remove_failure: @@ -1125,7 +1506,7 @@ int dbpf_collection_iterate(TROVE_ds_position *inout_position_p, TROVE_op_id *out_op_id_p) { int ret = -TROVE_EINVAL, i = 0; - db_recno_t recno; + db_recno_t recno = {0}; DB *db_p = NULL; DBC *dbc_p = NULL; DBT key, data; @@ -1158,18 +1539,12 @@ int dbpf_collection_iterate(TROVE_ds_position *inout_position_p, * we get back. here we make sure that the key is big * enough to hold the position that we need to pass in. */ + memset(&key, 0, sizeof(key)); - if (sizeof(recno) < name_array[0].buffer_sz) - { - key.data = name_array[0].buffer; - key.size = key.ulen = name_array[0].buffer_sz; - } - else - { - key.data = &recno; - key.size = key.ulen = sizeof(recno); - } - *(TROVE_ds_position *) key.data = *inout_position_p; + key.data = name_array[0].buffer; + key.ulen = name_array[0].buffer_sz; + *(db_recno_t *)key.data = (db_recno_t) *inout_position_p; + key.size = sizeof(db_recno_t); key.flags |= DB_DBT_USERMEM; memset(&data, 0, sizeof(data)); @@ -1286,6 +1661,98 @@ int dbpf_collection_iterate(TROVE_ds_position *inout_position_p, return ret; } +static int dbpf_direct_collection_clear(TROVE_coll_id coll_id) +{ + stop_directio_threads(); + return dbpf_collection_clear(coll_id); +} + +int dbpf_collection_clear(TROVE_coll_id coll_id) +{ + int ret; + struct dbpf_collection *coll_p = dbpf_collection_find_registered(coll_id); + + dbpf_collection_deregister(coll_p); + + if( coll_p == NULL ) + { + gossip_err("Trove collection not defined.\n"); + return 0; + } + + if ( (coll_p->coll_attr_db != NULL ) && + (ret = coll_p->coll_attr_db->sync(coll_p->coll_attr_db, 0)) != 0) + { + gossip_err("db_sync(coll_attr_db): %s\n", db_strerror(ret)); + } + + if ( (coll_p->coll_attr_db != NULL ) && + (ret = db_close(coll_p->coll_attr_db)) != 0) + { + gossip_lerr("db_close(coll_attr_db): %s\n", db_strerror(ret)); + } + + if ( (coll_p->ds_db != NULL ) && + (ret = coll_p->ds_db->sync(coll_p->ds_db, 0)) != 0) + { + gossip_err("db_sync(coll_ds_db): %s\n", db_strerror(ret)); + } + + if ( (coll_p->ds_db != NULL ) && + (ret = db_close(coll_p->ds_db)) != 0) + { + gossip_lerr("db_close(coll_ds_db): %s\n", db_strerror(ret)); + } + + if ( (coll_p->keyval_db != NULL ) && + (ret = coll_p->keyval_db->sync(coll_p->keyval_db, 0)) != 0) + { + gossip_err("db_sync(coll_keyval_db): %s\n", db_strerror(ret)); + } + + if ( (coll_p->keyval_db != NULL ) && + (ret = db_close(coll_p->keyval_db)) != 0) + { + gossip_lerr("db_close(coll_keyval_db): %s\n", db_strerror(ret)); + } + + if( coll_p->coll_env != NULL ) + { + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + } + free(coll_p->name); + free(coll_p->data_path); + free(coll_p->meta_path); + PINT_dbpf_keyval_pcache_finalize(coll_p->pcache); + + free(coll_p); + return 0; +} + +static int dbpf_direct_collection_lookup(char *collname, + TROVE_coll_id *out_coll_id_p, + void *user_ptr, + TROVE_op_id *out_op_id_p) +{ + int ret; + + /* most of this is shared with the other methods */ + ret = dbpf_collection_lookup(collname, out_coll_id_p, + user_ptr, out_op_id_p); + if(ret < 0) + { + return(ret); + } + + /* start directio threads if they aren't already running */ + ret = start_directio_threads(); + if(ret < 0) + { + return(ret); + } + + return(0); +} int dbpf_collection_lookup(char *collname, TROVE_coll_id *out_coll_id_p, @@ -1313,6 +1780,7 @@ int dbpf_collection_lookup(char *collname, memset(&data, 0, sizeof(data)); key.data = collname; key.size = strlen(collname)+1; + key.flags = DB_DBT_USERMEM; data.data = &db_data; data.ulen = sizeof(db_data); data.flags = DB_DBT_USERMEM; @@ -1324,7 +1792,7 @@ int dbpf_collection_lookup(char *collname, } else if (ret != 0) { - sto_p->coll_db->err(sto_p->coll_db, ret, "DB->get"); + sto_p->coll_db->err(sto_p->coll_db, ret, "DB->get collection"); gossip_debug(GOSSIP_TROVE_DEBUG, "lookup got error (%d)\n", ret); return -dbpf_db_error_to_trove_error(ret); } @@ -1362,32 +1830,47 @@ int dbpf_collection_lookup(char *collname, free(coll_p); return -TROVE_ENOMEM; } - /* Path to collection */ - snprintf(path_name, PATH_MAX, "/%s/%08x/", sto_p->name, coll_p->coll_id); - coll_p->path_name = strdup(path_name); - if (!coll_p->path_name) + /* Path to data collection dir */ + snprintf(path_name, PATH_MAX, "/%s/%08x/", sto_p->data_path, + coll_p->coll_id); + coll_p->data_path = strdup(path_name); + if (!coll_p->data_path) { free(coll_p->name); free(coll_p); return -TROVE_ENOMEM; } - /* per-collection environment */ - if ((coll_p->coll_env = dbpf_getdb_env(coll_p->path_name, COLL_ENV_FLAGS, &ret)) == NULL) + + snprintf(path_name, PATH_MAX, "/%s/%08x/", + sto_p->meta_path, coll_p->coll_id); + coll_p->meta_path = strdup(path_name); + if (!coll_p->meta_path) + { + free(coll_p->data_path); + free(coll_p->name); + free(coll_p); + return -TROVE_ENOMEM; + } + + if ((coll_p->coll_env = dbpf_getdb_env(coll_p->meta_path, COLL_ENV_FLAGS, &ret)) == NULL) { - free(coll_p->path_name); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); return -dbpf_db_error_to_trove_error(ret); } DBPF_GET_COLL_ATTRIB_DBNAME(path_name, PATH_MAX, - sto_p->name, coll_p->coll_id); - coll_p->coll_attr_db = dbpf_db_open(sto_p->name, path_name, coll_p->coll_env, + sto_p->meta_path, coll_p->coll_id); + + coll_p->coll_attr_db = dbpf_db_open(path_name, coll_p->coll_env, &ret, NULL, 0); if (coll_p->coll_attr_db == NULL) { - dbpf_putdb_env(coll_p->coll_env, coll_p->path_name); - free(coll_p->path_name); + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); return ret; @@ -1398,6 +1881,7 @@ int dbpf_collection_lookup(char *collname, memset(&data, 0, sizeof(data)); key.data = TROVE_DBPF_VERSION_KEY; key.size = strlen(TROVE_DBPF_VERSION_KEY); + key.flags = DB_DBT_USERMEM; data.data = &trove_dbpf_version; data.ulen = 32; data.flags = DB_DBT_USERMEM; @@ -1410,8 +1894,9 @@ int dbpf_collection_lookup(char *collname, gossip_err("Failed to retrieve collection version: %s\n", db_strerror(ret)); db_close(coll_p->coll_attr_db); - dbpf_putdb_env(coll_p->coll_env, coll_p->path_name); - free(coll_p->path_name); + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); return -dbpf_db_error_to_trove_error(ret); @@ -1453,8 +1938,9 @@ int dbpf_collection_lookup(char *collname, !strcmp(trove_dbpf_version, "0.1.1")) { db_close(coll_p->coll_attr_db); - dbpf_putdb_env(coll_p->coll_env, coll_p->path_name); - free(coll_p->path_name); + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); gossip_err("Trove-dbpf metadata format version mismatch!\n"); @@ -1466,13 +1952,13 @@ int dbpf_collection_lookup(char *collname, } DBPF_GET_DS_ATTRIB_DBNAME(path_name, PATH_MAX, - sto_p->name, coll_p->coll_id); + sto_p->meta_path, coll_p->coll_id); if(sto_major == 0 && sto_minor == 1 && sto_inc < 3) { /* use old comparison function */ coll_p->ds_db = dbpf_db_open( - sto_p->name, path_name, coll_p->coll_env, &ret, + path_name, coll_p->coll_env, &ret, &PINT_trove_dbpf_ds_attr_compare_reversed, 0); } else @@ -1481,30 +1967,33 @@ int dbpf_collection_lookup(char *collname, * DB does page reads in the right order (for handle_iterate) */ coll_p->ds_db = dbpf_db_open( - sto_p->name, path_name, coll_p->coll_env, &ret, + path_name, coll_p->coll_env, &ret, &PINT_trove_dbpf_ds_attr_compare, 0); } if (coll_p->ds_db == NULL) { db_close(coll_p->coll_attr_db); - dbpf_putdb_env(coll_p->coll_env, coll_p->path_name); - free(coll_p->path_name); + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); return ret; } DBPF_GET_KEYVAL_DBNAME(path_name, PATH_MAX, - sto_p->name, coll_p->coll_id); - coll_p->keyval_db = dbpf_db_open(sto_p->name, path_name, coll_p->coll_env, + sto_p->meta_path, coll_p->coll_id); + + coll_p->keyval_db = dbpf_db_open(path_name, coll_p->coll_env, &ret, PINT_trove_dbpf_keyval_compare, 0); if(coll_p->keyval_db == NULL) { db_close(coll_p->coll_attr_db); db_close(coll_p->ds_db); - dbpf_putdb_env(coll_p->coll_env, coll_p->path_name); - free(coll_p->path_name); + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); return ret; @@ -1516,8 +2005,9 @@ int dbpf_collection_lookup(char *collname, db_close(coll_p->coll_attr_db); db_close(coll_p->keyval_db); db_close(coll_p->ds_db); - dbpf_putdb_env(coll_p->coll_env, coll_p->path_name); - free(coll_p->path_name); + dbpf_putdb_env(coll_p->coll_env, coll_p->meta_path); + free(coll_p->meta_path); + free(coll_p->data_path); free(coll_p->name); free(coll_p); return -TROVE_ENOMEM; @@ -1532,9 +2022,11 @@ int dbpf_collection_lookup(char *collname, coll_p->c_low_watermark = 1; coll_p->meta_sync_enabled = 1; /* MUST be 1 !*/ - dbpf_collection_register(coll_p); *out_coll_id_p = coll_p->coll_id; + + clear_stranded_bstreams(coll_p->coll_id); + return 1; } @@ -1555,7 +2047,7 @@ int dbpf_collection_lookup(char *collname, * structure associated with that collection. */ struct dbpf_storage *dbpf_storage_lookup( - char *stoname, int *error_p, TROVE_ds_flags flags) + char *data_path, char *meta_path, int *error_p, TROVE_ds_flags flags) { char path_name[PATH_MAX] = {0}; struct dbpf_storage *sto_p = NULL; @@ -1566,7 +2058,7 @@ struct dbpf_storage *dbpf_storage_lookup( return my_storage_p; } - if (stat(stoname, &sbuf) < 0) + if (stat(data_path, &sbuf) < 0) { *error_p = -TROVE_ENOENT; return NULL; @@ -1574,10 +2066,22 @@ struct dbpf_storage *dbpf_storage_lookup( if (!S_ISDIR(sbuf.st_mode)) { *error_p = -TROVE_EINVAL; - gossip_err("%s is not a directory\n", stoname); + gossip_err("%s is not a directory\n", data_path); return NULL; } + if (stat(meta_path, &sbuf) < 0) + { + *error_p = -TROVE_ENOENT; + return NULL; + } + if (!S_ISDIR(sbuf.st_mode)) + { + *error_p = -TROVE_EINVAL; + gossip_err("%s is not a directory\n", meta_path); + return NULL; + } + sto_p = (struct dbpf_storage *)malloc(sizeof(struct dbpf_storage)); if (sto_p == NULL) { @@ -1586,17 +2090,25 @@ struct dbpf_storage *dbpf_storage_lookup( } memset(sto_p, 0, sizeof(struct dbpf_storage)); - sto_p->name = strdup(stoname); - if (sto_p->name == NULL) + sto_p->data_path = strdup(data_path); + if (sto_p->data_path == NULL) { free(sto_p); *error_p = -TROVE_ENOMEM; return NULL; } + sto_p->meta_path = strdup(meta_path); + if (sto_p->meta_path == NULL) + { + free(sto_p->data_path); + free(sto_p); + *error_p = -TROVE_ENOMEM; + return NULL; + } sto_p->refct = 0; sto_p->flags = flags; - DBPF_GET_STO_ATTRIB_DBNAME(path_name, PATH_MAX, stoname); + DBPF_GET_STO_ATTRIB_DBNAME(path_name, PATH_MAX, meta_path); /* we want to stat the attrib db first in case it doesn't * exist but the storage directory does @@ -1607,25 +2119,31 @@ struct dbpf_storage *dbpf_storage_lookup( return NULL; } - sto_p->sto_attr_db = dbpf_db_open(sto_p->name, path_name, NULL, + sto_p->sto_attr_db = dbpf_db_open(path_name, NULL, error_p, NULL, 0); if (sto_p->sto_attr_db == NULL) { - free(sto_p->name); + free(sto_p->meta_path); + free(sto_p->data_path); free(sto_p); + gossip_err("Failure opening attribute database\n"); + my_storage_p = NULL; return NULL; } - DBPF_GET_COLLECTIONS_DBNAME(path_name, PATH_MAX, stoname); + DBPF_GET_COLLECTIONS_DBNAME(path_name, PATH_MAX, meta_path); - sto_p->coll_db = dbpf_db_open(sto_p->name, path_name, NULL, + sto_p->coll_db = dbpf_db_open(path_name, NULL, error_p, NULL, DB_RECNUM); if (sto_p->coll_db == NULL) { db_close(sto_p->sto_attr_db); - free(sto_p->name); + free(sto_p->meta_path); + free(sto_p->data_path); free(sto_p); + gossip_err("Failure opening collection database\n"); + my_storage_p = NULL; return NULL; } @@ -1761,8 +2279,7 @@ int db_close(DB *db_p) /* Internal function for creating first instances of the databases for * a db plus files storage region. */ -static int dbpf_db_create(const char *sto_path, - char *dbname, +static int dbpf_db_create(char *dbname, DB_ENV *envp, uint32_t flags) { @@ -1805,7 +2322,7 @@ static int dbpf_db_create(const char *sto_path, * integer pointed to by error_p. */ static DB *dbpf_db_open( - const char *sto_path, char *dbname, DB_ENV *envp, int *error_p, + char *dbname, DB_ENV *envp, int *error_p, int (*compare_fn) (DB *db, const DBT *dbt1, const DBT *dbt2), uint32_t flags) { @@ -1855,6 +2372,29 @@ static void dbpf_db_error_callback( gossip_err("%s: %s\n", errpfx, msg); } +/* dbpf_mgmt_direct_ops + * + * Structure holding pointers to all the management operations + * functions for this storage interface implementation. + */ +struct TROVE_mgmt_ops dbpf_mgmt_direct_ops = +{ + dbpf_direct_initialize, + dbpf_direct_finalize, + dbpf_storage_create, + dbpf_storage_remove, + dbpf_collection_create, + dbpf_collection_remove, + dbpf_direct_collection_lookup, + dbpf_direct_collection_clear, + dbpf_collection_iterate, + dbpf_collection_setinfo, + dbpf_collection_getinfo, + dbpf_collection_seteattr, + dbpf_collection_geteattr, + dbpf_collection_deleattr +}; + /* dbpf_mgmt_ops * * Structure holding pointers to all the management operations @@ -1869,11 +2409,13 @@ struct TROVE_mgmt_ops dbpf_mgmt_ops = dbpf_collection_create, dbpf_collection_remove, dbpf_collection_lookup, + dbpf_collection_clear, dbpf_collection_iterate, dbpf_collection_setinfo, dbpf_collection_getinfo, dbpf_collection_seteattr, - dbpf_collection_geteattr + dbpf_collection_geteattr, + dbpf_collection_deleattr }; typedef struct @@ -1900,12 +2442,19 @@ static __dbpf_op_type_str_map_t s_dbpf_op_type_str_map[] = { KEYVAL_READ_LIST, "KEYVAL_READ_LIST" }, { KEYVAL_WRITE_LIST, "KEYVAL_WRITE_LIST" }, { KEYVAL_FLUSH, "KEYVAL_FLUSH" }, + { KEYVAL_GET_HANDLE_INFO, "KEYVAL_GET_HANDLE_INFO" }, { DSPACE_CREATE, "DSPACE_CREATE" }, { DSPACE_REMOVE, "DSPACE_REMOVE" }, { DSPACE_ITERATE_HANDLES, "DSPACE_ITERATE_HANDLES" }, { DSPACE_VERIFY, "DSPACE_VERIFY" }, { DSPACE_GETATTR, "DSPACE_GETATTR" }, - { DSPACE_SETATTR, "DSPACE_SETATTR" } + { DSPACE_SETATTR, "DSPACE_SETATTR" }, + { DSPACE_GETATTR_LIST, "DSPACE_GETATTR_LIST" }, + { DSPACE_CREATE_LIST, "DSPACE_CREATE_LIST" }, + { DSPACE_REMOVE_LIST, "DSPACE_REMOVE_LIST" } + /* NOTE: this list should be kept in sync with enum dbpf_op_type + * from dbpf.h + */ }; char *dbpf_op_type_to_str(enum dbpf_op_type op_type) @@ -1946,8 +2495,7 @@ static void unlink_db_cache_files(const char* path) { for(i=0; i +#include +#include +#include +#include +#include +#include +#ifdef HAVE_MALLOC_H +#include +#endif +#include +#include +#include + +#include "gossip.h" +#include "pvfs2-debug.h" +#include "trove.h" +#include "trove-internal.h" +#include "dbpf.h" +#include "quicklist.h" +#include "pthread.h" +#include "dbpf.h" + + +static int null_lio_listio(int mode, struct aiocb * const list[], + int nent, struct sigevent *sig); +static int null_aio_error(const struct aiocb *aiocbp); +static ssize_t null_aio_return(struct aiocb *aiocbp); +static int null_aio_cancel(int filedesc, struct aiocb * aiocbp); +static int null_aio_suspend(const struct aiocb * const list[], int nent, + const struct timespec * timeout); +static int null_aio_read(struct aiocb * aiocbp); +static int null_aio_write(struct aiocb * aiocbp); +static int null_aio_fsync(int operation, struct aiocb * aiocbp); + +static struct dbpf_aio_ops null_aio_ops; + +struct null_aio_item +{ + struct aiocb *cb_p; + struct sigevent *sig; + struct qlist_head list_link; + int master; + pthread_t *tids; + int nent; +}; +static void* null_lio_thread(void*); + +int null_lio_listio(int mode, struct aiocb * const list[], + int nent, struct sigevent *sig) +{ + struct null_aio_item* tmp_item; + int ret, i; + pthread_t *tids; + pthread_attr_t attr; + + tids = (pthread_t *)malloc(sizeof(pthread_t) * nent); + if(!tids) + { + return (-1); + } + + for(i = 0; i < nent; ++i) + { + int spawnmode= PTHREAD_CREATE_JOINABLE; + tmp_item = (struct null_aio_item*)malloc(sizeof(struct null_aio_item)*nent); + if(!tmp_item) + { + return (-1); + } + memset(tmp_item, 0, sizeof(struct null_aio_item)); + + if(mode == LIO_NOWAIT && i == (nent - 1)) + { + /* This is the master thread and needs to wait for the others. + * We make the master the last thread to get created, so that + * we don't end up in a race with the thread ids getting set + * properly + */ + tmp_item->master = 1; + tmp_item->tids = tids; + tmp_item->nent = nent; + spawnmode= PTHREAD_CREATE_DETACHED; + } + + tmp_item->cb_p = list[i]; + tmp_item->sig = sig; + + /* setup state */ +#ifdef HAVE_AIOCB_ERROR_CODE + tmp_item->cb_p->__error_code = EINPROGRESS; +#endif + + /* set detached state */ + ret = pthread_attr_init(&attr); + if(ret != 0) + { + free(tmp_item); + errno = ret; + + return(-1); + } + ret = pthread_attr_setdetachstate( + &attr, + spawnmode + ); + if(ret != 0) + { + free(tmp_item); + errno = ret; + return(-1); + } + + /* create thread to perform I/O and trigger callback */ + ret = pthread_create(&tids[i], &attr, null_lio_thread, tmp_item); + if(ret != 0) + { + int j = 0; + + if(mode == LIO_WAIT) + { + for(; j < i; ++j) + { + pthread_join(tids[j], NULL); + } + } + + free(tmp_item); + free(tids); + errno = ret; + return(-1); + } + gossip_debug(GOSSIP_BSTREAM_DEBUG, + "[null-aio]: pthread_create completed:" + " id: %d, thread_id: %p\n", + i, (void *)tids[i]); + } + + ret = 0; + if(mode == LIO_WAIT) + { + for(i = 0; i < nent; ++i) + { + pthread_join(tids[i], NULL); + if(ret != 0 && null_aio_error(list[i]) != 0) + { + /* for now we're just overwriting previous errors + * since we have no way to store and return them + * in the blocking case. + * The caller should call aio_error to get the + * element specific errors + */ + ret = null_aio_error(list[i]); + } + } + + free(tids); + } + return(ret); +} + +static int null_aio_error(const struct aiocb *aiocbp) +{ +#ifdef HAVE_AIOCB_ERROR_CODE + return aiocbp->__error_code; +#else + return 0; +#endif +} + +static ssize_t null_aio_return(struct aiocb *aiocbp) +{ +#ifdef HAVE_AIOCB_RETURN_VALUE + return aiocbp->__return_value; +#else + return 0; +#endif +} + +static int null_aio_cancel(int filedesc, struct aiocb *aiocbp) +{ + errno = ENOSYS; + return -1; +} + +static int null_aio_suspend(const struct aiocb * const list[], int nent, + const struct timespec * timeout) +{ + errno = ENOSYS; + return -1; +} + +static int null_aio_read(struct aiocb * aiocbp) +{ + errno = ENOSYS; + return -1; +} + +static int null_aio_write(struct aiocb * aiocbp) +{ + errno = ENOSYS; + return -1; +} + +static int null_aio_fsync(int operation, struct aiocb * aiocbp) +{ + errno = ENOSYS; + return -1; +} + +static void* null_lio_thread(void* foo) +{ + struct null_aio_item* tmp_item = (struct null_aio_item*)foo; + int ret = 0; + struct stat statbuf; + + if(tmp_item->cb_p->aio_lio_opcode == LIO_READ) + { + ret = tmp_item->cb_p->aio_nbytes; + } + else if(tmp_item->cb_p->aio_lio_opcode == LIO_WRITE) + { + gossip_debug(GOSSIP_BSTREAM_DEBUG, + "[null-aio]: pwrite: cb_p: %p, " + "fd: %d, bufp: %p, size: %zd off:%llu\n", + tmp_item->cb_p, tmp_item->cb_p->aio_fildes, + tmp_item->cb_p->aio_buf, tmp_item->cb_p->aio_nbytes, + llu(tmp_item->cb_p->aio_offset)); + + /* check size of file */ + /* note, if either fstat or ftruncate fail, then we let the ret and + * errno drop through to the logic below. Otherwise we report the + * size that would have been written. + */ + ret = fstat(tmp_item->cb_p->aio_fildes, &statbuf); + if(ret == 0) + { + if(statbuf.st_size < + (tmp_item->cb_p->aio_nbytes + tmp_item->cb_p->aio_offset)) + { + /* this write would extend the file */ + ret = ftruncate(tmp_item->cb_p->aio_fildes, + (tmp_item->cb_p->aio_nbytes + tmp_item->cb_p->aio_offset)); + if(ret == 0) + { + ret = tmp_item->cb_p->aio_nbytes; + } + } + else + { + ret = tmp_item->cb_p->aio_nbytes; + } + } + } + else + { + /* this should have been caught already */ + assert(0); + } + + /* store error and return codes */ + if(ret < 0) + { +#ifdef HAVE_AIOCB_ERROR_CODE + tmp_item->cb_p->__error_code = errno; +#endif + } + else + { +#ifdef HAVE_AIOCB_ERROR_CODE + tmp_item->cb_p->__error_code = 0; +#endif + +#ifdef HAVE_AIOCB_RETURN_VALUE + tmp_item->cb_p->__return_value = ret; +#endif + } + + if(tmp_item->master) + { + int i; + /* I'm the master, gotta wait for the others to call notify */ + + /* we skip the last one because that's us */ + for(i = 0; i < (tmp_item->nent - 1); ++i) + { + ret = pthread_join(tmp_item->tids[i], NULL); + if(ret != 0) + { + gossip_err("pthread_join failed: %d (%s), i: %d, tid: %p\n", + ret, strerror(ret), i, (void *)tmp_item->tids[i]); + } + } + + free(tmp_item->tids); + /* run callback fn */ + tmp_item->sig->sigev_notify_function(tmp_item->sig->sigev_value); + } + + free(tmp_item); + + pthread_exit(NULL); + return NULL; +} + +static int null_aio_bstream_read_list(TROVE_coll_id coll_id, + TROVE_handle handle, + char **mem_offset_array, + TROVE_size *mem_size_array, + int mem_count, + TROVE_offset *stream_offset_array, + TROVE_size *stream_size_array, + int stream_count, + TROVE_size *out_size_p, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + return dbpf_bstream_rw_list(coll_id, + handle, + mem_offset_array, + mem_size_array, + mem_count, + stream_offset_array, + stream_size_array, + stream_count, + out_size_p, + flags, + vtag, + user_ptr, + context_id, + out_op_id_p, + LIO_READ, + &null_aio_ops, + hints); +} + +static int null_aio_bstream_write_list(TROVE_coll_id coll_id, + TROVE_handle handle, + char **mem_offset_array, + TROVE_size *mem_size_array, + int mem_count, + TROVE_offset *stream_offset_array, + TROVE_size *stream_size_array, + int stream_count, + TROVE_size *out_size_p, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints) +{ + return dbpf_bstream_rw_list(coll_id, + handle, + mem_offset_array, + mem_size_array, + mem_count, + stream_offset_array, + stream_size_array, + stream_count, + out_size_p, + flags, + vtag, + user_ptr, + context_id, + out_op_id_p, + LIO_WRITE, + &null_aio_ops, + hints); +} + +static struct dbpf_aio_ops null_aio_ops = +{ + null_aio_read, + null_aio_write, + null_lio_listio, + null_aio_error, + null_aio_return, + null_aio_cancel, + null_aio_suspend, + null_aio_fsync +}; + +struct TROVE_bstream_ops null_aio_bstream_ops = +{ + dbpf_bstream_read_at, + dbpf_bstream_write_at, + dbpf_bstream_resize, + dbpf_bstream_validate, + null_aio_bstream_read_list, + null_aio_bstream_write_list, + dbpf_bstream_flush, + NULL +}; + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/io/trove/trove-dbpf/dbpf-op-queue.c b/src/io/trove/trove-dbpf/dbpf-op-queue.c index abcec8c..cbbb878 100644 --- a/src/io/trove/trove-dbpf/dbpf-op-queue.c +++ b/src/io/trove/trove-dbpf/dbpf-op-queue.c @@ -316,7 +316,9 @@ int dbpf_queue_or_service( struct dbpf_op *op_p, dbpf_queued_op_t *q_op_p, struct dbpf_collection *coll_p, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PINT_event_type event_type, + PINT_event_id event_id) { int ret; @@ -347,13 +349,14 @@ int dbpf_queue_or_service( } /* only one that allocs anything, see dbpf_queued_op_free */ - if(op_p->type == DSPACE_CREATE) + if(op_p->type == DSPACE_CREATE || op_p->type == DSPACE_CREATE_LIST) { free(op_p->u.d_create.extent_array.extent_array); op_p->u.d_create.extent_array.extent_array = NULL; } ret = 1; + DBPF_EVENT_END(event_type, event_id); } else @@ -370,6 +373,20 @@ int dbpf_queue_or_service( int dbpf_queued_op_complete(dbpf_queued_op_t * qop_p, enum dbpf_op_state state) { + if(qop_p->event_type != trove_dbpf_read_event_id && + qop_p->event_type != trove_dbpf_write_event_id) + { + if(qop_p->event_type == trove_dbpf_dspace_create_event_id) + { + PINT_EVENT_END(qop_p->event_type, dbpf_pid, NULL, qop_p->event_id, + *qop_p->op.u.d_create.out_handle_p); + } + else + { + PINT_EVENT_END(qop_p->event_type, dbpf_pid, NULL, qop_p->event_id); + } + } + DBPF_COMPLETION_START(qop_p, state); DBPF_COMPLETION_SIGNAL(); DBPF_COMPLETION_FINISH(qop_p->op.context_id); diff --git a/src/io/trove/trove-dbpf/dbpf-op-queue.h b/src/io/trove/trove-dbpf/dbpf-op-queue.h index c3f264b..c78444b 100644 --- a/src/io/trove/trove-dbpf/dbpf-op-queue.h +++ b/src/io/trove/trove-dbpf/dbpf-op-queue.h @@ -81,7 +81,9 @@ int dbpf_queue_or_service( struct dbpf_op *op_p, dbpf_queued_op_t *q_op_p, struct dbpf_collection *coll_p, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PINT_event_type event_type, + PINT_event_id event_id); int dbpf_queued_op_complete(dbpf_queued_op_t * op, enum dbpf_op_state state); diff --git a/src/io/trove/trove-dbpf/dbpf-op.c b/src/io/trove/trove-dbpf/dbpf-op.c index 5eff77e..0dfd36a 100644 --- a/src/io/trove/trove-dbpf/dbpf-op.c +++ b/src/io/trove/trove-dbpf/dbpf-op.c @@ -52,7 +52,7 @@ void dbpf_queued_op_init( void dbpf_queued_op_free(dbpf_queued_op_t *q_op_p) { - if (q_op_p->op.type == DSPACE_CREATE) + if (q_op_p->op.type == DSPACE_CREATE || q_op_p->op.type == DSPACE_CREATE_LIST) { free(q_op_p->op.u.d_create.extent_array.extent_array); q_op_p->op.u.d_create.extent_array.extent_array = NULL; diff --git a/src/io/trove/trove-dbpf/dbpf-op.h b/src/io/trove/trove-dbpf/dbpf-op.h index 513480e..b5c8647 100644 --- a/src/io/trove/trove-dbpf/dbpf-op.h +++ b/src/io/trove/trove-dbpf/dbpf-op.h @@ -14,6 +14,7 @@ extern "C" { #include "quicklist.h" #include "trove.h" #include "dbpf.h" +#include "pint-op.h" #include "id-generator.h" @@ -58,6 +59,10 @@ typedef struct /* the operation return code after being services */ TROVE_ds_state state; + PINT_event_type event_type; + PINT_event_id event_id; + + PINT_op_id mgr_op_id; struct qlist_head link; } dbpf_queued_op_t; diff --git a/src/io/trove/trove-dbpf/dbpf-open-cache.c b/src/io/trove/trove-dbpf/dbpf-open-cache.c index e11f2b1..66abc5a 100644 --- a/src/io/trove/trove-dbpf/dbpf-open-cache.c +++ b/src/io/trove/trove-dbpf/dbpf-open-cache.c @@ -10,6 +10,8 @@ * will all get new fds that are closed on put */ +#define XOPEN_SOURCE 500 + #include #include #include @@ -18,7 +20,9 @@ #include #include #include +#include #include +#include #include "trove.h" #include "trove-internal.h" @@ -38,10 +42,33 @@ struct open_cache_entry TROVE_coll_id coll_id; TROVE_handle handle; int fd; + int remove_flag; + enum open_cache_open_type type; struct qlist_head queue_link; }; +struct unlink_context +{ + pthread_t thread_id; + pthread_mutex_t mutex; + pthread_cond_t data_available; + struct qlist_head global_list; +}; + +struct file_struct +{ + struct qlist_head list_link; + char *pathname; +}; + +static struct unlink_context dbpf_unlink_context; +static void* unlink_bstream(void *context); +static int fast_unlink( + const char *pathname, + TROVE_coll_id coll_id, + TROVE_handle handle); + /* "used_list" is for active objects (ref_ct > 0) */ static QLIST_HEAD(used_list); /* "unused_list" is for inactive objects (ref_ct == 0) that we are still @@ -61,6 +88,10 @@ static int open_fd( TROVE_handle handle, enum open_cache_open_type type); +static void close_fd( + int fd, + enum open_cache_open_type type); + inline static struct open_cache_entry * dbpf_open_cache_find_entry( struct qlist_head * list, const char * list_name, @@ -69,7 +100,7 @@ inline static struct open_cache_entry * dbpf_open_cache_find_entry( void dbpf_open_cache_initialize(void) { - int i = 0; + int i = 0, ret = 0; gen_mutex_lock(&cache_mutex); @@ -81,6 +112,9 @@ void dbpf_open_cache_initialize(void) gossip_err("Warning: dbpf_open_cache disabled.\n"); } + /* initialize prealloc array */ + memset(&prealloc[0],0,sizeof(struct open_cache_entry)*OPEN_CACHE_SIZE); + for (i = 0; i < OPEN_CACHE_SIZE; i++) { prealloc[i].fd = -1; @@ -88,6 +122,17 @@ void dbpf_open_cache_initialize(void) } gen_mutex_unlock(&cache_mutex); + + /* Initialize and create the worker thread for threaded deletes */ + INIT_QLIST_HEAD(&dbpf_unlink_context.global_list); + pthread_mutex_init(&dbpf_unlink_context.mutex, NULL); + pthread_cond_init(&dbpf_unlink_context.data_available, NULL); + ret = pthread_create(&dbpf_unlink_context.thread_id, NULL, unlink_bstream, (void*)&dbpf_unlink_context); + if(ret) + { + gossip_err("dbpf_open_cache_initialize: failed [%d]\n", ret); + return; + } } static void dbpf_open_cache_entries_finalize( @@ -104,6 +149,8 @@ void dbpf_open_cache_finalize(void) dbpf_open_cache_entries_finalize(&free_list); gen_mutex_unlock(&cache_mutex); + + pthread_cancel(dbpf_unlink_context.thread_id); } /** @@ -141,10 +188,25 @@ int dbpf_open_cache_get( tmp_entry = dbpf_open_cache_find_entry( &used_list, "used list", coll_id, handle); + if (tmp_entry && tmp_entry->remove_flag) + { + gossip_err("DBPF_OPEN_CACHE_GET: pulled EXISTING entry from the used-list with the " + "remove flag set.\n"); + gossip_err("\t\thandle:%llu\n",llu(tmp_entry->handle)); + gossip_err("\t\tref-ct:%d \tfd:%d\n",tmp_entry->ref_ct,tmp_entry->fd); + } + if(!tmp_entry) { tmp_entry = dbpf_open_cache_find_entry( &unused_list, "unused list", coll_id, handle); + if (tmp_entry && tmp_entry->remove_flag) + { + gossip_err("DBPF_OPEN_CACHE_GET: pulled EXISTING entry from the UNused-list with the " + "remove flag set.\n"); + gossip_err("\t\thandle:%llu\n",llu(tmp_entry->handle)); + gossip_err("\t\tref-ct:%d \tfd:%d\n",tmp_entry->ref_ct,tmp_entry->fd); + } } out_ref->fd = -1; @@ -159,8 +221,10 @@ int dbpf_open_cache_get( gen_mutex_unlock(&cache_mutex); return ret; } + tmp_entry->type = type; } out_ref->fd = tmp_entry->fd; + out_ref->type = type; out_ref->internal = tmp_entry; tmp_entry->ref_ct++; @@ -192,6 +256,13 @@ int dbpf_open_cache_get( found = 1; gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, "dbpf_open_cache_get: resetting entry from free list.\n"); + if (tmp_entry->remove_flag) + { + gossip_err("DBPF_OPEN_CACHE_GET: pulled FIRST entry from the free-list with the " + "remove flag turned on.\n"); + gossip_err("\t\t\tSetting remove-flag to zero.\n"); + tmp_entry->remove_flag=0; + } } /* anything in unused list (still open, but ref_ct == 0)? */ @@ -205,9 +276,16 @@ int dbpf_open_cache_get( gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, "dbpf_open_cache_get: resetting entry from unused list.\n"); + if (tmp_entry->remove_flag) + { + gossip_err("DBPF_OPEN_CACHE_GET: pulled FIRST entry from the UNused-list with the " + "remove flag turned on.\n"); + gossip_err("\t\t\tSetting remove-flag to zero.\n"); + tmp_entry->remove_flag=0; + } if (tmp_entry->fd > -1) { - DBPF_CLOSE(tmp_entry->fd); + close_fd(tmp_entry->fd, tmp_entry->type); tmp_entry->fd = -1; } } @@ -230,6 +308,8 @@ int dbpf_open_cache_get( gen_mutex_unlock(&cache_mutex); return ret; } + tmp_entry->type = type; + out_ref->type = type; out_ref->fd = tmp_entry->fd; out_ref->internal = tmp_entry; @@ -256,6 +336,7 @@ int dbpf_open_cache_get( gen_mutex_unlock(&cache_mutex); return ret; } + out_ref->type = type; out_ref->internal = NULL; gen_mutex_unlock(&cache_mutex); @@ -304,7 +385,7 @@ void dbpf_open_cache_put( /* this wasn't cached; go ahead and close up */ if(in_ref->fd > -1) { - DBPF_CLOSE(in_ref->fd); + close_fd(in_ref->fd, in_ref->type); in_ref->fd = -1; } } @@ -321,8 +402,9 @@ int dbpf_open_cache_remove( int found = 0; char filename[PATH_MAX]; int ret = -1; - struct qlist_head* scratch; int tmp_error = 0; + struct qlist_head* scratch; + char open_type[32] = {0}; gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, "dbpf_open_cache_remove: called\n"); @@ -342,7 +424,46 @@ int dbpf_open_cache_remove( if ((tmp_entry->handle == handle) && (tmp_entry->coll_id == coll_id)) { - assert(0); + gossip_err("DBPF_OPEN_CACHE_REMOVE: BINGO! Entry found in the USED_list when trying to " + "remove from the UNused_list.\n"); + gossip_err("\t\tused_list entry:\n"); + gossip_err("\t\t\t handle:%llu\n",llu(tmp_entry->handle)); + gossip_err("\t\t\t ref-ct:%d \tfd:%d\n",tmp_entry->ref_ct,tmp_entry->fd); + gossip_err("\t\t\tremove-flag:%d\n",tmp_entry->remove_flag); + switch(tmp_entry->type) + { + case DBPF_FD_BUFFERED_READ: + { + strcpy(&open_type[0],"DBPF_FD_BUFFERED_READ"); + break; + } + case DBPF_FD_BUFFERED_WRITE: + { + strcpy(&open_type[0],"DBPF_FD_BUFFERED_WRITE"); + break; + } + case DBPF_FD_DIRECT_READ: + { + strcpy(&open_type[0],"DBPF_FD_DIRECT_READ"); + break; + } + case DBPF_FD_DIRECT_WRITE: + { + strcpy(&open_type[0],"DBPF_FD_DIRECT_WRITE"); + break; + } + default: + { + strcpy(&open_type[0],"UNKNOWN FD TYPE"); + break; + } + }/*end switch*/ + gossip_err("\t\t\t type:%s\n",open_type); + + tmp_entry->remove_flag=1; + + return (0); + //assert(0); } } @@ -364,9 +485,15 @@ int dbpf_open_cache_remove( { gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, "dbpf_open_cache_remove: unused entry.\n"); + if (tmp_entry->remove_flag) + { + gossip_err("DBPF_OPEN_CACHE_REMOVE: handle:%llu found in the UNused list with" + " remove-flag turned on\n",llu(tmp_entry->handle)); + } + tmp_entry->remove_flag = 0; if (tmp_entry->fd > -1) { - DBPF_CLOSE(tmp_entry->fd); + close_fd(tmp_entry->fd, tmp_entry->type); tmp_entry->fd = -1; } qlist_add(&tmp_entry->queue_link, &free_list); @@ -380,17 +507,15 @@ int dbpf_open_cache_remove( tmp_error = 0; DBPF_GET_BSTREAM_FILENAME(filename, PATH_MAX, - my_storage_p->name, coll_id, llu(handle)); + my_storage_p->data_path, coll_id, llu(handle)); + + ret = fast_unlink(filename, coll_id, handle); - ret = DBPF_UNLINK(filename); if ((ret != 0) && (errno != ENOENT)) { - tmp_error = -trove_errno_to_trove_error(errno); + tmp_error = -trove_errno_to_trove_error(errno); } - gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, "Unlinked filename: " - "(ret=%d, errno=%d)\n%s\n", ret, errno, filename); - gen_mutex_unlock(&cache_mutex); gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, @@ -414,20 +539,36 @@ static int open_fd( llu(handle)); DBPF_GET_BSTREAM_FILENAME(filename, PATH_MAX, - my_storage_p->name, coll_id, llu(handle)); + my_storage_p->data_path, coll_id, llu(handle)); gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, "dbpf_open_cache open_fd: filename: %s\n", filename); flags = O_RDWR; - if(type == DBPF_FD_BUFFERED_WRITE) + if(type == DBPF_FD_BUFFERED_WRITE || + type == DBPF_FD_DIRECT_WRITE) { flags |= O_CREAT; mode = TROVE_FD_MODE; } +#ifdef HAVE_OPEN_O_DIRECT + if(type == DBPF_FD_DIRECT_WRITE || type == DBPF_FD_DIRECT_READ) + { + flags |= O_DIRECT; + } +#endif + *fd = DBPF_OPEN(filename, flags, mode); + +#ifdef HAVE_FCNTL_F_NOCACHE + if(type == DBPF_FD_DIRECT_WRITE || type == DBPF_FD_DIRECT_READ) + { + fcntl(*fd, F_NOCACHE, 1); + } +#endif + return ((*fd < 0) ? -trove_errno_to_trove_error(errno) : 0); } @@ -441,11 +582,13 @@ static void dbpf_open_cache_entries_finalize(struct qlist_head *list) entry = qlist_entry(list_entry, struct open_cache_entry, queue_link); if(entry->fd > -1) { - DBPF_CLOSE(entry->fd); + close_fd(entry->fd, entry->type); entry->fd = -1; } qlist_del(&entry->queue_link); } + /* Cancel the deletion thread */ + pthread_cancel(dbpf_unlink_context.thread_id); } inline static struct open_cache_entry * dbpf_open_cache_find_entry( @@ -475,6 +618,172 @@ inline static struct open_cache_entry * dbpf_open_cache_find_entry( return NULL; } +int fast_unlink(const char *pathname, TROVE_coll_id coll_id, TROVE_handle handle) +{ + int ret; + struct file_struct *tmp_item; + + tmp_item = (struct file_struct *) malloc(sizeof(struct file_struct)); + if(!tmp_item) + { + gossip_err("Unable to allocate memory for file_struct [%d].\n", errno); + return -TROVE_ENOMEM; + } + tmp_item->pathname = malloc(PATH_MAX); + if(!tmp_item->pathname) + { + gossip_err("Unable to allocate memory for pathname[%d].\n", errno); + free(tmp_item); + return -TROVE_ENOMEM; + } + DBPF_GET_STRANDED_BSTREAM_FILENAME(tmp_item->pathname, PATH_MAX, + my_storage_p->data_path, + coll_id, + llu(handle)); + + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "Renaming [%s] to [%s] for threaded delete.\n", pathname, tmp_item->pathname); + + ret = rename(pathname, tmp_item->pathname); + if(ret != 0) + { + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "Warning: During unlink, the rename failed on file [%s] with errno [%d] strerr [%s].\n", + pathname, errno, strerror(errno)); + free(tmp_item->pathname); + free(tmp_item); + return ret; + } + + /* Add to the queue */ + pthread_mutex_lock(&dbpf_unlink_context.mutex); + qlist_add_tail(&tmp_item->list_link, &dbpf_unlink_context.global_list); + /* Moved gossip_debug BEFORE pthread_cond_signal; otherwise, tmp_item->pathname caused a seg fault + * if the unlink signal processed BEFORE the debug statement. + */ + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "Added [%s] to the queue.\n", tmp_item->pathname); + pthread_cond_signal(&dbpf_unlink_context.data_available); + pthread_mutex_unlock(&dbpf_unlink_context.mutex); + + return(0); +} + +static void* unlink_bstream(void *context) +{ + struct unlink_context *loc_context = (struct unlink_context *) context; + int ret; + time_t start_time; + struct qlist_head *tmp_item; + struct file_struct *tmp_st; + + while(1) + { + pthread_mutex_lock(&loc_context->mutex); + /* If there is no work to do, go into a condition wait */ + if(qlist_empty(&loc_context->global_list)) + { + pthread_cond_wait(&loc_context->data_available, &loc_context->mutex); + } + + if(!qlist_empty(&loc_context->global_list)) + { + tmp_item = loc_context->global_list.next; + qlist_del(tmp_item); + pthread_mutex_unlock(&loc_context->mutex); + } + else /* Condition triggered without items in qlist */ + { + pthread_mutex_unlock(&loc_context->mutex); + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "Unlink condition triggered when qlist empty\n"); + continue; /* Enter while loop again */ + } + + tmp_st = qlist_entry(tmp_item, struct file_struct, list_link); + time(&start_time); + ret = DBPF_UNLINK(tmp_st->pathname); + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "Unlinked filename: (ret=%d, errno=%d, elapsed-time=%ld(secs) )\n%s\n", + ret, errno, (time(NULL) - start_time), tmp_st->pathname); + free(tmp_st->pathname); + free(tmp_st); + } + + pthread_exit(&loc_context->thread_id); + return NULL; +} + +static void close_fd( + int fd, + enum open_cache_open_type type) +{ + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "dbpf_open_cache closing fd %d of type %d\n", fd, type); + DBPF_CLOSE(fd); +} + +void clear_stranded_bstreams(TROVE_coll_id coll_id) +{ + char path_name[PATH_MAX]; + DIR *current_dir = NULL; + struct dirent *current_dirent = NULL; + struct stat file_info; + struct file_struct *tmp_item; + + DBPF_GET_STRANDED_BSTREAM_DIRNAME(path_name, PATH_MAX, + my_storage_p->data_path, coll_id); + + /* remove entries in the stranded bstreams directory */ + current_dir = opendir(path_name); + if(current_dir) + { + while((current_dirent = readdir(current_dir))) + { + if((strcmp(current_dirent->d_name, ".") == 0) || + (strcmp(current_dirent->d_name, "..") == 0)) + { + continue; + } + tmp_item = (struct file_struct *) malloc(sizeof(struct file_struct)); + if(!tmp_item) + { + gossip_err("Unable to allocate memory for file_struct [%d].\n", errno); + return; + } + tmp_item->pathname = malloc(PATH_MAX); + if(!tmp_item->pathname) + { + gossip_err("Unable to allocate memory for pathname[%d].\n", errno); + free(tmp_item); + return; + } + snprintf(tmp_item->pathname, PATH_MAX, "%s/%s", path_name, + current_dirent->d_name); + if(stat(tmp_item->pathname, &file_info) < 0) + { + gossip_err("error doing stat on bstream entry\n"); + continue; + } + assert(S_ISREG(file_info.st_mode)); + /* Add to the queue */ + + pthread_mutex_lock(&dbpf_unlink_context.mutex); + qlist_add_tail(&tmp_item->list_link, &dbpf_unlink_context.global_list); + pthread_cond_signal(&dbpf_unlink_context.data_available); + pthread_mutex_unlock(&dbpf_unlink_context.mutex); + gossip_debug(GOSSIP_DBPF_OPEN_CACHE_DEBUG, + "Added [%s] to the queue.\n", tmp_item->pathname); + } + closedir(current_dir); + } + else + { + gossip_err("Unable to open stranded bstream directory [%s] to " + "perform initialization of stranded bstream cleanup", path_name); + } +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/io/trove/trove-dbpf/dbpf-open-cache.h b/src/io/trove/trove-dbpf/dbpf-open-cache.h index 3931b11..120844a 100644 --- a/src/io/trove/trove-dbpf/dbpf-open-cache.h +++ b/src/io/trove/trove-dbpf/dbpf-open-cache.h @@ -12,16 +12,19 @@ #include "trove.h" #include "trove-internal.h" -struct open_cache_ref +enum open_cache_open_type { - int fd; - void* internal; /* pointer to underlying data structure */ + DBPF_FD_BUFFERED_READ = 1, + DBPF_FD_BUFFERED_WRITE, + DBPF_FD_DIRECT_READ, + DBPF_FD_DIRECT_WRITE }; -enum open_cache_open_type +struct open_cache_ref { - DBPF_FD_BUFFERED_READ = 1, - DBPF_FD_BUFFERED_WRITE + int fd; + enum open_cache_open_type type; + void* internal; /* pointer to underlying data structure */ }; void dbpf_open_cache_initialize(void); @@ -41,6 +44,8 @@ int dbpf_open_cache_remove( TROVE_coll_id coll_id, TROVE_handle handle); +void clear_stranded_bstreams(TROVE_coll_id coll_id); + #endif /* __DBPF_OPEN_CACHE_H__ */ /* diff --git a/src/io/trove/trove-dbpf/dbpf-sync.c b/src/io/trove/trove-dbpf/dbpf-sync.c index 8dd9858..e28f196 100644 --- a/src/io/trove/trove-dbpf/dbpf-sync.c +++ b/src/io/trove/trove-dbpf/dbpf-sync.c @@ -90,8 +90,16 @@ void dbpf_sync_context_destroy(int context_index) context_index); for(c=0; c < COALESCE_CONTEXT_LAST; c++) { + /* grab lock...should be the last one, since we are shutting down */ gen_mutex_lock(&sync_array[c][context_index].mutex); + + /* we have to unlock the mutex before we can destroy it */ + gen_mutex_unlock(&sync_array[c][context_index].mutex); + + /* destroy the mutex */ gen_mutex_destroy(&sync_array[c][context_index].mutex); + + /* cleanup the op queue */ dbpf_op_queue_cleanup(sync_array[c][context_index].sync_queue); } } @@ -216,9 +224,19 @@ int dbpf_sync_coalesce(dbpf_queued_op_t *qop_p, int retcode, int * outcount) ret = dbpf_sync_db(dbp, sync_context_type, sync_context); gossip_debug(GOSSIP_DBPF_COALESCE_DEBUG, - "[SYNC_COALESCE]: moving op %p with handle: %llu " + "[SYNC_COALESCE]: moving op: %p, handle: %llu , type: %d " "to completion queue\n", - qop_p, llu(qop_p->op.handle)); + qop_p, llu(qop_p->op.handle), qop_p->op.type); + + if(qop_p->event_type == trove_dbpf_dspace_create_event_id) + { + PINT_EVENT_END(qop_p->event_type, dbpf_pid, NULL, qop_p->event_id, + qop_p->op.u.d_create.out_handle_p); + } + else + { + PINT_EVENT_END(qop_p->event_type, dbpf_pid, NULL, qop_p->event_id); + } DBPF_COMPLETION_START(qop_p, OP_COMPLETED); (*outcount)++; @@ -231,10 +249,20 @@ int dbpf_sync_coalesce(dbpf_queued_op_t *qop_p, int retcode, int * outcount) { ready_op = dbpf_op_queue_shownext(sync_context->sync_queue); + if(ready_op->event_type == trove_dbpf_dspace_create_event_id) + { + PINT_EVENT_END(ready_op->event_type, dbpf_pid, NULL, ready_op->event_id, + ready_op->op.u.d_create.out_handle_p); + } + else + { + PINT_EVENT_END(ready_op->event_type, dbpf_pid, NULL, ready_op->event_id); + } + gossip_debug(GOSSIP_DBPF_COALESCE_DEBUG, - "[SYNC_COALESCE]: moving op: %p with handle: %llu " + "[SYNC_COALESCE]: moving op: %p, handle: %llu , type: %d " "to completion queue\n", - ready_op, llu(ready_op->op.handle)); + ready_op, llu(ready_op->op.handle), ready_op->op.type); dbpf_op_queue_remove(ready_op); DBPF_COMPLETION_ADD(ready_op, OP_COMPLETED); diff --git a/src/io/trove/trove-dbpf/dbpf-thread.c b/src/io/trove/trove-dbpf/dbpf-thread.c index c1b3636..fee6cc5 100644 --- a/src/io/trove/trove-dbpf/dbpf-thread.c +++ b/src/io/trove/trove-dbpf/dbpf-thread.c @@ -18,6 +18,8 @@ #include "dbpf-bstream.h" #include "dbpf-op-queue.h" #include "dbpf-sync.h" +#include "pint-context.h" +#include "pint-mgmt.h" extern struct qlist_head dbpf_op_queue; extern gen_mutex_t dbpf_op_queue_mutex; @@ -31,6 +33,8 @@ pthread_cond_t dbpf_op_incoming_cond = PTHREAD_COND_INITIALIZER; pthread_cond_t dbpf_op_completed_cond = PTHREAD_COND_INITIALIZER; #endif +extern int TROVE_max_concurrent_io; + int dbpf_thread_initialize(void) { int ret = 0; @@ -54,6 +58,7 @@ int dbpf_thread_initialize(void) gossip_debug( GOSSIP_TROVE_DEBUG, "dbpf_thread_initialize: failed (1)\n"); } + #endif return ret; } @@ -83,29 +88,40 @@ void *dbpf_thread_function(void *ptr) gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf_thread_function started\n"); + PINT_event_thread_start("TROVE-DBPF"); while(dbpf_thread_running) { /* check if we any have ops to service in our work queue */ gen_mutex_lock(&dbpf_op_queue_mutex); op_queued_empty = qlist_empty(&dbpf_op_queue); - gen_mutex_unlock(&dbpf_op_queue_mutex); if (!op_queued_empty) { + gen_mutex_unlock(&dbpf_op_queue_mutex); dbpf_do_one_work_cycle(&out_count); +#ifndef __PVFS2_TROVE_AIO_THREADED__ + if(out_count == 0) + { + /* if we aren't using aio callbacks, and the outcount is + * zero, then that means that the only ops in the queue are + * I/O operations that we can do nothing with except call + * aio_error() repeatedly. + */ + /* There is no convenient way to handle this. Just sleep + * breifly to prevent busy spin (which means we may sleep + * through a metadata op being posted). If someone cares about + * optimizing this case then the solution is to explicitly + * track if any of the operations in the queue are metadata + * operations or not so that we know up front to do a + * cond_timedwait() here. + */ + wait_time.tv_sec = 0; + wait_time.tv_nsec = 1000; + nanosleep(&wait_time, NULL); + } +#endif } - - /* - if we have no work to do, wait nicely until an operation to - be serviced has entered the system. - - if the queue isn't empty, and the out_count is 0, that means - that we're driving i/o operations without using the aio - callback completion. we sleep between those calls to avoid - busy waiting (i.e. the timedwait call is okay in those - cases) - */ - if ((op_queued_empty) || (!op_queued_empty && (out_count == 0))) + else { /* compute how long to wait */ gettimeofday(&base, NULL); @@ -119,15 +135,21 @@ void *dbpf_thread_function(void *ptr) wait_time.tv_sec++; } - gen_mutex_lock(&dbpf_op_queue_mutex); ret = pthread_cond_timedwait(&dbpf_op_incoming_cond, &dbpf_op_queue_mutex, &wait_time); + if( ret == EINVAL || ret == EPERM ) + { + /* an error other than timeout occured */ + gossip_debug(GOSSIP_TROVE_DEBUG, "%s: pthread_cond_timedwait " + "returned an error\n", __func__); + } gen_mutex_unlock(&dbpf_op_queue_mutex); } } gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf_thread_function ending\n"); + PINT_event_thread_stop(); #endif return ptr; } diff --git a/src/io/trove/trove-dbpf/dbpf.h b/src/io/trove/trove-dbpf/dbpf.h index 699b203..2e254bd 100644 --- a/src/io/trove/trove-dbpf/dbpf.h +++ b/src/io/trove/trove-dbpf/dbpf.h @@ -18,6 +18,7 @@ extern "C" { #include "pvfs2-internal.h" #include "dbpf-keyval-pcache.h" #include "dbpf-open-cache.h" +#include "pint-event.h" /* For unknown Berkeley DB errors, we return some large value */ @@ -29,7 +30,7 @@ extern "C" { * Major versions aren't either, but refer to architectural storage format changes. */ #define TROVE_DBPF_VERSION_KEY "trove-dbpf-version" -#define TROVE_DBPF_VERSION_VALUE "0.1.3" +#define TROVE_DBPF_VERSION_VALUE "0.1.5" #define LAST_HANDLE_STRING "last_handle" @@ -67,93 +68,104 @@ extern "C" { #define DBPF_BSTREAM_GET_BUCKET(__handle) \ ((__handle) % DBPF_BSTREAM_MAX_NUM_BUCKETS) -#define DBPF_EVENT_START(__op, __id) \ - PINT_event_timestamp(PVFS_EVENT_API_TROVE, __op, 0, __id, \ - PVFS_EVENT_FLAG_START) +#define DBPF_GET_DATA_DIRNAME(__buf, __path_max, __base) \ +do { snprintf(__buf, __path_max, "/%s", __base); } while (0) -#define DBPF_EVENT_END(__op, __id) \ - PINT_event_timestamp(PVFS_EVENT_API_TROVE, __op, 0, __id, \ - PVFS_EVENT_FLAG_END) - -#define DBPF_GET_STORAGE_DIRNAME(__buf, __path_max, __stoname) \ -do { snprintf(__buf, __path_max, "/%s", __stoname); } while (0) +#define DBPF_GET_META_DIRNAME(__buf, __path_max, __base) \ +do { snprintf(__buf, __path_max, "/%s", __base); } while (0) #define STO_ATTRIB_DBNAME "storage_attributes.db" -#define DBPF_GET_STO_ATTRIB_DBNAME(__buf, __path_max, __stoname) \ +#define DBPF_GET_STO_ATTRIB_DBNAME(__buf, __path_max, __base) \ do { \ - snprintf(__buf, __path_max, "/%s/%s", __stoname, STO_ATTRIB_DBNAME); \ + snprintf(__buf, __path_max, "/%s/%s", __base, STO_ATTRIB_DBNAME); \ } while (0) #define COLLECTIONS_DBNAME "collections.db" -#define DBPF_GET_COLLECTIONS_DBNAME(__buf, __path_max, __stoname) \ +#define DBPF_GET_COLLECTIONS_DBNAME(__buf, __path_max, __base) \ do { \ - snprintf(__buf, __path_max, "/%s/%s", __stoname, COLLECTIONS_DBNAME); \ + snprintf(__buf, __path_max, "/%s/%s", __base, COLLECTIONS_DBNAME); \ } while (0) -#define DBPF_GET_COLL_DIRNAME(__buf, __path_max, __stoname, __collid) \ +#define DBPF_GET_COLL_DIRNAME(__buf, __path_max, __base, __collid) \ do { \ - snprintf(__buf, __path_max, "/%s/%08x", __stoname, __collid); \ + snprintf(__buf, __path_max, "/%s/%08x", __base, __collid); \ } while (0) #define COLL_ATTRIB_DBNAME "collection_attributes.db" -#define DBPF_GET_COLL_ATTRIB_DBNAME(__buf,__path_max,__stoname,__collid) \ +#define DBPF_GET_COLL_ATTRIB_DBNAME(__buf,__path_max,__base,__collid) \ do { \ - snprintf(__buf, __path_max, "/%s/%08x/%s", __stoname, __collid, \ + snprintf(__buf, __path_max, "/%s/%08x/%s", __base, __collid, \ COLL_ATTRIB_DBNAME); \ } while (0) #define DS_ATTRIB_DBNAME "dataspace_attributes.db" -#define DBPF_GET_DS_ATTRIB_DBNAME(__buf,__path_max,__stoname,__collid) \ +#define DBPF_GET_DS_ATTRIB_DBNAME(__buf,__path_max,__base,__collid) \ do { \ - snprintf(__buf, __path_max, "/%s/%08x/%s", __stoname, __collid, \ + snprintf(__buf, __path_max, "/%s/%08x/%s", __base, __collid, \ DS_ATTRIB_DBNAME); \ } while (0) #define BSTREAM_DIRNAME "bstreams" -#define DBPF_GET_BSTREAM_DIRNAME(__buf, __path_max, __stoname, __collid) \ +#define DBPF_GET_BSTREAM_DIRNAME(__buf, __path_max, __base, __collid) \ do { \ - snprintf(__buf, __path_max, "/%s/%08x/%s", __stoname, __collid, \ + snprintf(__buf, __path_max, "/%s/%08x/%s", __base, __collid, \ BSTREAM_DIRNAME); \ } while (0) #define STRANDED_BSTREAM_DIRNAME "stranded-bstreams" #define DBPF_GET_STRANDED_BSTREAM_DIRNAME( \ - __buf, __path_max, __stoname, __collid) \ + __buf, __path_max, __base, __collid) \ do { \ snprintf(__buf, __path_max, "/%s/%08x/%s", \ - __stoname, __collid, STRANDED_BSTREAM_DIRNAME); \ + __base, __collid, STRANDED_BSTREAM_DIRNAME); \ } while(0) -/* arguments are: buf, path_max, stoname, collid, handle */ -#define DBPF_GET_BSTREAM_FILENAME(__b, __pm, __stoname, __cid, __handle) \ +/* arguments are: buf, path_max, base, collid, handle */ +#define DBPF_GET_BSTREAM_FILENAME(__b, __pm, __base, __cid, __handle) \ do { \ snprintf(__b, __pm, "/%s/%08x/%s/%.8llu/%08llx.bstream", \ - __stoname, __cid, BSTREAM_DIRNAME, \ + __base, __cid, BSTREAM_DIRNAME, \ llu(DBPF_BSTREAM_GET_BUCKET(__handle)), llu(__handle)); \ } while (0) -/* arguments are: buf, path_max, stoname, collid, handle */ +/* arguments are: buf, path_max, base, collid, handle */ #define DBPF_GET_STRANDED_BSTREAM_FILENAME( \ - __b, __pm, __stoname, __cid, __handle) \ + __b, __pm, __base, __cid, __handle) \ do { \ snprintf(__b, __pm, "/%s/%08x/%s/%08llx.bstream", \ - __stoname, __cid, STRANDED_BSTREAM_DIRNAME, \ + __base, __cid, STRANDED_BSTREAM_DIRNAME, \ llu(__handle)); \ } while(0) -/* arguments are: buf, path_max, stoname, collid */ +/* arguments are: buf, path_max, base, collid */ #define KEYVAL_DBNAME "keyval.db" -#define DBPF_GET_KEYVAL_DBNAME(__buf,__path_max,__stoname,__collid) \ +#define DBPF_GET_KEYVAL_DBNAME(__buf,__path_max,__base,__collid) \ do { \ - snprintf(__buf, __path_max, "/%s/%08x/%s", __stoname, __collid, \ + snprintf(__buf, __path_max, "/%s/%08x/%s", __base, __collid, \ KEYVAL_DBNAME); \ } while (0) +inline int dbpf_pread(int fd, void *buf, size_t count, off_t offset); +inline int dbpf_pwrite(int fd, const void *buf, size_t count, off_t offset); + extern struct TROVE_bstream_ops dbpf_bstream_ops; extern struct TROVE_dspace_ops dbpf_dspace_ops; extern struct TROVE_keyval_ops dbpf_keyval_ops; extern struct TROVE_mgmt_ops dbpf_mgmt_ops; +extern PINT_event_group trove_dbpf_event_group; + +extern PINT_event_type trove_dbpf_read_event_id; +extern PINT_event_type trove_dbpf_write_event_id; +extern PINT_event_type trove_dbpf_keyval_write_event_id; +extern PINT_event_type trove_dbpf_keyval_read_event_id; +extern PINT_event_type trove_dbpf_dspace_create_event_id; +extern PINT_event_type trove_dbpf_dspace_create_list_event_id; +extern PINT_event_type trove_dbpf_dspace_getattr_event_id; +extern PINT_event_type trove_dbpf_dspace_setattr_event_id; + +extern int dbpf_pid; + struct dbpf_aio_ops { int (* aio_read) (struct aiocb * aiocbp); @@ -181,11 +193,15 @@ int PINT_dbpf_keyval_iterate( TROVE_ds_position pos, PINT_dbpf_keyval_iterate_callback callback); +int PINT_dbpf_dspace_remove_keyval( + void * args, TROVE_handle handle, TROVE_keyval_s *key, TROVE_keyval_s *val); + struct dbpf_storage { TROVE_ds_flags flags; int refct; - char *name; + char *data_path; /* path to data storage directory */ + char *meta_path; /* path to metadata storage directory */ DB *sto_attr_db; DB *coll_db; }; @@ -194,7 +210,8 @@ struct dbpf_collection { int refct; char *name; - char *path_name; + char *data_path; /* path to data collection directory */ + char *meta_path; /* path to metadata collection directory */ DB *coll_attr_db; DB *ds_db; DB *keyval_db; @@ -236,6 +253,14 @@ int PINT_trove_dbpf_ds_attr_compare( int PINT_trove_dbpf_ds_attr_compare_reversed( DB * dbp, const DBT * a, const DBT * b); +int dbpf_dspace_attr_get(struct dbpf_collection *coll_p, + TROVE_object_ref ref, + TROVE_ds_attributes *attr); + +int dbpf_dspace_attr_set(struct dbpf_collection *coll_p, + TROVE_object_ref ref, + TROVE_ds_attributes *attr); + struct dbpf_dspace_create_op { TROVE_handle_extent_array extent_array; @@ -244,6 +269,16 @@ struct dbpf_dspace_create_op /* hint? */ }; +struct dbpf_dspace_create_list_op +{ + TROVE_handle_extent_array extent_array; + TROVE_handle *out_handle_array_p; + TROVE_ds_type type; + int count; + /* hint? */ +}; + + /* struct dbpf_dspace_remove_op {}; -- nothing belongs in here */ struct dbpf_dspace_iterate_handles_op @@ -268,6 +303,13 @@ struct dbpf_dspace_getattr_op TROVE_ds_attributes_s *attr_p; }; +struct dbpf_dspace_remove_list_op +{ + int count; + TROVE_handle *handle_array; + TROVE_ds_state *error_p; +}; + struct dbpf_dspace_getattr_list_op { int count; @@ -312,6 +354,14 @@ struct dbpf_keyval_remove_op /* vtag? */ }; +struct dbpf_keyval_remove_list_op +{ + TROVE_keyval_s *key_array; + TROVE_keyval_s *val_array; + int *error_array; + int count; /* TODO: MAKE INOUT? */ +}; + struct dbpf_keyval_iterate_op { TROVE_keyval_s *key_array; @@ -329,19 +379,11 @@ struct dbpf_keyval_iterate_keys_op /* vtag? */ }; -/* used for both read and write at */ -struct dbpf_bstream_rw_at_op -{ - TROVE_offset offset; - TROVE_size size; - void *buffer; - /* vtag? */ -}; - struct dbpf_bstream_resize_op { TROVE_size size; /* vtag? */ + void *queued_op_ptr; }; /* Used to maintain state of partial processing of a listio operation @@ -387,27 +429,26 @@ struct dbpf_bstream_rw_list_op struct sigevent sigev; struct dbpf_aio_ops *aio_ops; struct bstream_listio_state lio_state; -#ifndef __PVFS2_TROVE_AIO_THREADED__ void *queued_op_ptr; -#endif }; inline int dbpf_bstream_rw_list(TROVE_coll_id coll_id, TROVE_handle handle, - char **mem_offset_array, + char **mem_offset_array, TROVE_size *mem_size_array, int mem_count, TROVE_offset *stream_offset_array, TROVE_size *stream_size_array, int stream_count, TROVE_size *out_size_p, - TROVE_ds_flags flags, + TROVE_ds_flags flags, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, TROVE_op_id *out_op_id_p, int opcode, - struct dbpf_aio_ops * aio_ops); + struct dbpf_aio_ops * aio_ops, + PVFS_hint hints); struct dbpf_keyval_get_handle_info_op { @@ -455,6 +496,11 @@ enum dbpf_op_type DSPACE_GETATTR, DSPACE_SETATTR, DSPACE_GETATTR_LIST, + DSPACE_CREATE_LIST, + DSPACE_REMOVE_LIST, + /* NOTE: if you change or add items to this list, please update + * s_dbpf_op_type_str_map[] accordingly (dbpf-mgmt.c) + */ }; #define DBPF_OP_DOES_SYNC(__op) \ @@ -462,6 +508,7 @@ enum dbpf_op_type __op == KEYVAL_REMOVE_KEY || \ __op == KEYVAL_WRITE_LIST || \ __op == DSPACE_CREATE || \ + __op == DSPACE_CREATE_LIST || \ __op == DSPACE_REMOVE || \ __op == DSPACE_SETATTR) @@ -498,18 +545,18 @@ struct dbpf_op void *user_ptr; TROVE_ds_flags flags; TROVE_context_id context_id; + PVFS_hint hints; union { /* all the op types go in here; structs are all * defined just below the prototypes for the functions. */ struct dbpf_dspace_create_op d_create; + struct dbpf_dspace_create_list_op d_create_list; struct dbpf_dspace_iterate_handles_op d_iterate_handles; struct dbpf_dspace_verify_op d_verify; struct dbpf_dspace_getattr_op d_getattr; struct dbpf_dspace_setattr_op d_setattr; - struct dbpf_bstream_rw_at_op b_read_at; - struct dbpf_bstream_rw_at_op b_write_at; struct dbpf_bstream_rw_list_op b_rw_list; struct dbpf_bstream_resize_op b_resize; struct dbpf_keyval_read_op k_read; @@ -519,7 +566,9 @@ struct dbpf_op struct dbpf_keyval_iterate_keys_op k_iterate_keys; struct dbpf_keyval_read_list_op k_read_list; struct dbpf_keyval_read_list_op k_write_list; + struct dbpf_keyval_remove_list_op k_remove_list; struct dbpf_dspace_getattr_list_op d_getattr_list; + struct dbpf_dspace_remove_list_op d_remove_list; struct dbpf_keyval_get_handle_info_op k_get_handle_info; } u; }; @@ -541,7 +590,7 @@ PVFS_error dbpf_db_error_to_trove_error(int db_error_value); #define DBPF_READ read #define DBPF_CLOSE close #define DBPF_UNLINK unlink -#define DBPF_SYNC fsync +#define DBPF_SYNC fdatasync #define DBPF_RESIZE ftruncate #define DBPF_FSTAT fstat #define DBPF_ACCESS access @@ -605,6 +654,22 @@ do { \ } \ } while(0) +#define DBPF_EVENT_START(__coll_p, __q_op_p, __event_type, __event_id, args...) \ + if(__coll_p->immediate_completion) \ + { \ + PINT_EVENT_START(__event_type, dbpf_pid, NULL, (__event_id), \ + ## args); \ + } \ + else \ + { \ + __q_op_p->event_type = __event_type; \ + PINT_EVENT_START(__event_type, dbpf_pid, NULL, (__event_id), \ + ## args); \ + *(__event_id) = __q_op_p->event_id; \ + } + +#define DBPF_EVENT_END(__event_type, __event_id) \ + PINT_EVENT_END(__event_type, dbpf_pid, NULL, __event_id) extern struct dbpf_storage *my_storage_p; @@ -626,14 +691,18 @@ extern int dbpf_putdb_env(DB_ENV *dbenv, const char *path); extern int db_open(DB *db_p, const char *dbname, int, int); extern int db_close(DB *db_p); +int dbpf_dspace_setattr_op_svc(struct dbpf_op *op_p); + struct dbpf_storage *dbpf_storage_lookup( - char *stoname, int *error_p, TROVE_ds_flags flags); + char *data_path, char *meta_path, int *error_p, TROVE_ds_flags flags); -int dbpf_storage_create(char *stoname, +int dbpf_storage_create(char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p); -int dbpf_storage_remove(char *stoname, +int dbpf_storage_remove(char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p); @@ -651,6 +720,8 @@ int dbpf_collection_lookup(char *collname, void *user_ptr, TROVE_op_id *out_op_id_p); +int dbpf_collection_clear(TROVE_coll_id coll_id); + int dbpf_collection_iterate(TROVE_ds_position *inout_position_p, TROVE_keyval_s *name_array, TROVE_coll_id *coll_id_array, @@ -687,6 +758,13 @@ int dbpf_collection_geteattr(TROVE_coll_id coll_id, TROVE_context_id context_id, TROVE_op_id *out_op_id_p); +int dbpf_collection_deleattr(TROVE_coll_id coll_id, + TROVE_keyval_s *key_p, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p); + int dbpf_finalize(void); int dbpf_bstream_read_at(TROVE_coll_id coll_id, @@ -695,10 +773,11 @@ int dbpf_bstream_read_at(TROVE_coll_id coll_id, TROVE_size *inout_size_p, TROVE_offset offset, TROVE_ds_flags flags, - TROVE_vtag_s *vtag, + TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int dbpf_bstream_write_at(TROVE_coll_id coll_id, TROVE_handle handle, @@ -709,7 +788,16 @@ int dbpf_bstream_write_at(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + +int dbpf_bstream_flush(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int dbpf_bstream_resize(TROVE_coll_id coll_id, TROVE_handle handle, @@ -718,7 +806,8 @@ int dbpf_bstream_resize(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int dbpf_bstream_validate(TROVE_coll_id coll_id, TROVE_handle handle, @@ -726,14 +815,8 @@ int dbpf_bstream_validate(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); - -int dbpf_bstream_flush(TROVE_coll_id coll_id, - TROVE_handle handle, - TROVE_ds_flags flags, - void *user_ptr, - TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); #if defined(__cplusplus) } diff --git a/src/io/trove/trove-dbpf/module.mk.in b/src/io/trove/trove-dbpf/module.mk.in index e3d6e1a..031e090 100644 --- a/src/io/trove/trove-dbpf/module.mk.in +++ b/src/io/trove/trove-dbpf/module.mk.in @@ -15,7 +15,9 @@ SERVERSRC += \ $(DIR)/dbpf-mgmt.c \ $(DIR)/dbpf-keyval-pcache.c \ $(DIR)/dbpf-sync.c \ - $(DIR)/dbpf-alt-aio.c + $(DIR)/dbpf-alt-aio.c \ + $(DIR)/dbpf-null-aio.c \ + $(DIR)/dbpf-bstream-direct.c # Grab trove-ledger.h from handle-mgmt. Also make _GNU_SOURCE definition # required for access to pread/pwrite on Linux. _XOPEN_SOURCE seems to be diff --git a/src/io/trove/trove-handle-mgmt/trove-handle-mgmt.c b/src/io/trove/trove-handle-mgmt/trove-handle-mgmt.c index f33d7e5..2fe119e 100644 --- a/src/io/trove/trove-handle-mgmt/trove-handle-mgmt.c +++ b/src/io/trove/trove-handle-mgmt/trove-handle-mgmt.c @@ -126,10 +126,9 @@ static int trove_check_handle_ranges(TROVE_coll_id coll_id, ret = trove_handle_remove(ledger, handles[i]); if (ret != 0) { - gossip_debug( - GOSSIP_TROVE_DEBUG, "could not remove " - "handle %llu\n", llu(handles[i])); - break; + gossip_err( + "WARNING: could not remove " + "handle %llu from ledger; continuing.\n", llu(handles[i])); } } ret = ((i == count) ? 0 : -1); diff --git a/src/io/trove/trove-internal.h b/src/io/trove/trove-internal.h index b5bace9..b0a3938 100644 --- a/src/io/trove/trove-internal.h +++ b/src/io/trove/trove-internal.h @@ -11,6 +11,8 @@ PVFS_error trove_errno_to_trove_error(int errno_value); +int trove_get_version (TROVE_coll_id coll_id, int* major, int* minor, int* incremental); +int trove_put_version (TROVE_coll_id coll_id, int major, int minor, int incremental); /* These structures contains the function pointers that should be provided * by valid trove "method" implementations @@ -28,7 +30,8 @@ struct TROVE_bstream_ops TROVE_vtag_s *out_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*bstream_write_at)( TROVE_coll_id coll_id, @@ -40,7 +43,8 @@ struct TROVE_bstream_ops TROVE_vtag_s *inout_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*bstream_resize)( TROVE_coll_id coll_id, @@ -50,7 +54,8 @@ struct TROVE_bstream_ops TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*bstream_validate)( TROVE_coll_id coll_id, @@ -59,7 +64,8 @@ struct TROVE_bstream_ops TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*bstream_read_list)( TROVE_coll_id coll_id, @@ -75,7 +81,8 @@ struct TROVE_bstream_ops TROVE_vtag_s *out_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*bstream_write_list)( TROVE_coll_id coll_id, @@ -91,7 +98,8 @@ struct TROVE_bstream_ops TROVE_vtag_s *inout_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*bstream_flush)( TROVE_coll_id coll_id, @@ -99,7 +107,13 @@ struct TROVE_bstream_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + + int (*bstream_cancel)( + TROVE_coll_id coll_id, + TROVE_op_id cancel_id, + TROVE_context_id context_id); }; struct TROVE_keyval_ops @@ -113,7 +127,8 @@ struct TROVE_keyval_ops TROVE_vtag_s *out_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_write)( TROVE_coll_id coll_id, @@ -124,7 +139,8 @@ struct TROVE_keyval_ops TROVE_vtag_s *inout_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_remove)( TROVE_coll_id coll_id, @@ -135,7 +151,22 @@ struct TROVE_keyval_ops TROVE_vtag_s *inout_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + + int (*keyval_remove_list)( + TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_keyval_s *key_array, + TROVE_keyval_s *val_array, + int *error_array, + int count, + TROVE_ds_flags flags, + TROVE_vtag_s *inout_vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_validate)( TROVE_coll_id coll_id, @@ -144,7 +175,8 @@ struct TROVE_keyval_ops TROVE_vtag_s *inout_vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_iterate)( TROVE_coll_id coll_id, @@ -157,7 +189,8 @@ struct TROVE_keyval_ops TROVE_vtag_s *inout_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_iterate_keys)( TROVE_coll_id coll_id, @@ -169,7 +202,8 @@ struct TROVE_keyval_ops TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_read_list)( TROVE_coll_id coll_id, @@ -182,7 +216,8 @@ struct TROVE_keyval_ops TROVE_vtag_s *out_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_write_list)( TROVE_coll_id coll_id, @@ -194,14 +229,16 @@ struct TROVE_keyval_ops TROVE_vtag_s *inout_vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_flush)( TROVE_coll_id coll_id, TROVE_handle handle, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*keyval_get_handle_info)( TROVE_coll_id coll_id, TROVE_handle handle, @@ -209,7 +246,8 @@ struct TROVE_keyval_ops TROVE_keyval_handle_info *info, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); }; struct TROVE_dspace_ops @@ -223,7 +261,21 @@ struct TROVE_dspace_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + + int (*dspace_create_list)( + TROVE_coll_id coll_id, + TROVE_handle_extent_array *extent_array, + TROVE_handle *handle_array, + int count, + TROVE_ds_type type, + TROVE_keyval_s *hint, /* TODO: figure out what this is! */ + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*dspace_remove)( TROVE_coll_id coll_id, @@ -231,8 +283,20 @@ struct TROVE_dspace_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + + int (*dspace_remove_list)( + TROVE_coll_id coll_id, + TROVE_handle* handle_array, + TROVE_ds_state *error_array, + int count, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, TROVE_op_id *out_op_id_p); + int (*dspace_iterate_handles)( TROVE_coll_id coll_id, TROVE_ds_position *position_p, @@ -251,7 +315,8 @@ struct TROVE_dspace_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*dspace_getattr)( TROVE_coll_id coll_id, @@ -260,7 +325,8 @@ struct TROVE_dspace_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*dspace_getattr_list)( TROVE_coll_id coll_id, @@ -271,7 +337,8 @@ struct TROVE_dspace_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*dspace_setattr)( TROVE_coll_id coll_id, @@ -280,7 +347,8 @@ struct TROVE_dspace_ops TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int (*dspace_cancel)( TROVE_coll_id coll_id, @@ -321,41 +389,43 @@ struct TROVE_dspace_ops struct TROVE_mgmt_ops { int (*initialize)( - char *stoname, + char *data_path, + char *meta_path, TROVE_ds_flags flags); int (*finalize)(void); int (*storage_create)( - char *stoname, + char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p); int (*storage_remove)( - char *stoname, + char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p); int (*collection_create)( - /* char *stoname, */ char *collname, TROVE_coll_id new_coll_id, void *user_ptr, TROVE_op_id *out_op_id_p); int (*collection_remove)( - /* char *stoname, */ char *collname, void *user_ptr, TROVE_op_id *out_op_id_p); int (*collection_lookup)( - /* char *stoname, */ char *collname, TROVE_coll_id *coll_id_p, void *user_ptr, TROVE_op_id *out_op_id_p); + int (*collection_clear)(TROVE_coll_id coll_id); + int (*collection_iterate)(TROVE_ds_position *inout_position_p, TROVE_keyval_s *name_array, TROVE_coll_id *coll_id_array, @@ -396,6 +466,14 @@ struct TROVE_mgmt_ops void *user_ptr, TROVE_context_id context_id, TROVE_op_id *out_op_id_p); + + int (*collection_deleattr)( + TROVE_coll_id coll_id, + TROVE_keyval_s *key_p, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p); }; struct TROVE_context_ops diff --git a/src/io/trove/trove-mgmt.c b/src/io/trove/trove-mgmt.c index 10e8ad6..38b4629 100644 --- a/src/io/trove/trove-mgmt.c +++ b/src/io/trove/trove-mgmt.c @@ -17,42 +17,56 @@ TROVE_method_callback global_trove_method_callback; static TROVE_method_id TROVE_default_method(TROVE_coll_id id); extern struct TROVE_mgmt_ops dbpf_mgmt_ops; +extern struct TROVE_mgmt_ops dbpf_mgmt_direct_ops; extern struct TROVE_dspace_ops dbpf_dspace_ops; extern struct TROVE_keyval_ops dbpf_keyval_ops; extern struct TROVE_bstream_ops dbpf_bstream_ops; extern struct TROVE_context_ops dbpf_context_ops; extern struct TROVE_bstream_ops alt_aio_bstream_ops; +extern struct TROVE_bstream_ops null_aio_bstream_ops; +extern struct TROVE_bstream_ops dbpf_bstream_direct_ops; /* currently we only have one method for these tables to refer to */ struct TROVE_mgmt_ops *mgmt_method_table[] = { &dbpf_mgmt_ops, - &dbpf_mgmt_ops /* alt-aio */ + &dbpf_mgmt_ops, /* alt-aio */ + &dbpf_mgmt_ops, /* null-aio */ + &dbpf_mgmt_direct_ops /* direct-io */ + }; struct TROVE_dspace_ops *dspace_method_table[] = { &dbpf_dspace_ops, - &dbpf_dspace_ops /* alt-aio */ + &dbpf_dspace_ops, /* alt-aio */ + &dbpf_dspace_ops, /* null-aio */ + &dbpf_dspace_ops /* direct-io */ }; struct TROVE_keyval_ops *keyval_method_table[] = { &dbpf_keyval_ops, - &dbpf_keyval_ops /* alt-aio */ + &dbpf_keyval_ops, /* alt-aio */ + &dbpf_keyval_ops, /* null-aio */ + &dbpf_keyval_ops /* direct-io */ }; struct TROVE_bstream_ops *bstream_method_table[] = { &dbpf_bstream_ops, - &alt_aio_bstream_ops + &alt_aio_bstream_ops, + &null_aio_bstream_ops, + &dbpf_bstream_direct_ops }; struct TROVE_context_ops *context_method_table[] = { &dbpf_context_ops, - &dbpf_context_ops /* alt-aio */ + &dbpf_context_ops, /* alt-aio */ + &dbpf_context_ops, /* null-aio */ + &dbpf_context_ops /* direct-io */ }; /* trove_init_mutex, trove_init_status @@ -73,7 +87,8 @@ static int trove_init_status = 0; */ int trove_initialize(TROVE_method_id method_id, TROVE_method_callback method_callback, - char *stoname, + char *data_path, + char *meta_path, TROVE_ds_flags flags) { int ret = -TROVE_EALREADY; @@ -81,7 +96,6 @@ int trove_initialize(TROVE_method_id method_id, gen_mutex_lock(&trove_init_mutex); if (trove_init_status) { - gen_mutex_unlock(&trove_init_mutex); return ret; } @@ -106,7 +120,7 @@ int trove_initialize(TROVE_method_id method_id, those op pointers to be right either way. */ ret = mgmt_method_table[method_id]->initialize( - stoname, flags); + data_path, meta_path, flags); if (ret > -1) { ret = 1; @@ -141,24 +155,26 @@ int trove_finalize(TROVE_method_id method_id) } int trove_storage_create(TROVE_method_id method_id, - char *stoname, + char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p) { int ret = mgmt_method_table[method_id]->storage_create( - stoname, user_ptr, out_op_id_p); + data_path, meta_path, user_ptr, out_op_id_p); return ((ret < 0) ? ret : 1); } int trove_storage_remove(TROVE_method_id method_id, - char *stoname, + char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p) { int ret = mgmt_method_table[method_id]->storage_remove( - stoname, user_ptr, out_op_id_p); + data_path, meta_path, user_ptr, out_op_id_p); return ((ret < 0) ? ret : 1); } @@ -204,7 +220,7 @@ int trove_collection_lookup(TROVE_method_id method_id, int ret = mgmt_method_table[method_id]->collection_lookup( collname, coll_id_p, user_ptr, out_op_id_p); - return ((ret < 0) ? ret : 1); + return (ret < 0) ? ret : 1; } int trove_collection_iterate(TROVE_method_id method_id, @@ -266,6 +282,14 @@ int trove_close_context( return ret; } +int trove_collection_clear( + TROVE_method_id method_id, + TROVE_coll_id coll_id) +{ + return mgmt_method_table[method_id]->collection_clear(coll_id); +} + + static TROVE_method_id TROVE_default_method(TROVE_coll_id id) { return TROVE_METHOD_DBPF; diff --git a/src/io/trove/trove-migrate.c b/src/io/trove/trove-migrate.c new file mode 100644 index 0000000..f18f69e --- /dev/null +++ b/src/io/trove/trove-migrate.c @@ -0,0 +1,899 @@ +/* + * (C) 2009 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include +#include +#include +#include "pvfs2-internal.h" +#include "trove.h" +#include "gossip.h" +#include "trove-dbpf/dbpf.h" +#include "pint-cached-config.h" +#include "server-config-mgr.h" + +#undef DEBUG_MIGRATE_PERF + +/* + * Macros + */ +#define TROVE_DSPACE_WAIT(ret, coll_id, op_id, \ + context_id, op_count, state, label) \ + while (ret == 0) \ + { \ + ret = trove_dspace_test(coll_id, \ + op_id, \ + context_id, \ + &op_count, \ + NULL, \ + NULL, \ + &state, \ + TROVE_DEFAULT_TEST_TIMEOUT); \ + } \ + if (ret < 0) \ + { \ + gossip_err("trove_dspace_test failed: err=%d coll=%d \ +op=%lld context=%lld count=%d state=%d\n", \ + ret, coll_id, llu(op_id), \ + llu(context_id), op_count, state); \ + goto label; \ + } + +/* + * Prototypes + */ +static int migrate_collection_0_1_3 (TROVE_coll_id coll_id, + const char* data_path, + const char* meta_path); +static int migrate_collection_0_1_4 (TROVE_coll_id coll_id, + const char* data_path, + const char* meta_path); + +/* + * Migration Table + * + * Migration routines should be listed in ascending order. + */ +struct migration_s +{ + int major; + int minor; + int incremental; + int (*migrate)(TROVE_coll_id coll_id, + const char* data_path, + const char* meta_path); +}; + +/* format: major, minor, incremental, function to migrate. + * NOTE: this defines the version to migratem *FROM*. In other words, + * if currently running the version defined in the table, run the + * associated function. */ +struct migration_s migration_table[] = +{ + { 0, 1, 3, migrate_collection_0_1_3 }, + { 0, 1, 4, migrate_collection_0_1_4 }, + { 0, 0, 0, NULL } +}; + +/* + * trove_get_version + * coll_id - collection id + * major - return major version + * minor - return minor version + * incremental - return incremental version + * + * Return the major, minor and incremental digits of the dbpf storage version. + * \return 0 on success, non-zero otherwise + */ +int trove_get_version (TROVE_coll_id coll_id, + int* major, + int* minor, + int* incremental) +{ + TROVE_context_id context_id = PVFS_CONTEXT_NULL; + TROVE_op_id op_id; + TROVE_ds_state state; + TROVE_keyval_s key; + TROVE_keyval_s data; + char version[32] = {0}; + int ret; + int count; + + memset (&key, 0, sizeof(key)); + memset (&data, 0, sizeof(data)); + + key.buffer = TROVE_DBPF_VERSION_KEY; + key.buffer_sz = strlen(TROVE_DBPF_VERSION_KEY); + data.buffer = version; + data.buffer_sz = sizeof(version); + + ret = trove_open_context(coll_id, &context_id); + if (ret < 0) + { + gossip_err("trove_open_context failed: ret=%d coll=%d\n", + ret, coll_id); + goto complete; + } + + ret = trove_collection_geteattr(coll_id, &key, &data, 0, NULL, + context_id, &op_id); + if (ret < 0) + { + gossip_err("trove_collection_geteattr failed: ret=%d coll=%d \ +context=%lld op=%lld\n", + ret, coll_id, llu(context_id), llu(op_id)); + goto complete; + } + + TROVE_DSPACE_WAIT(ret, coll_id, op_id, context_id, count, state, complete); + + ret = sscanf(version, "%d.%d.%d", major, minor, incremental); + if (ret != 3) + { + gossip_err("sscanf failed: ret=%d errno=%d version=%s\n", + ret, errno, version); + ret = -1; + goto complete; + } + + ret = 0; + +complete: + if (context_id != PVFS_CONTEXT_NULL) + { + int rc = trove_close_context(coll_id, context_id); + if (rc < 0) + { + ret = rc; + gossip_err("trove_context_close failed: ret=%d coll=%d \ +context=%lld\n", + ret, coll_id, llu(context_id)); + } + } + + return ret; +} + +/* + * trove_put_version + * coll_id - collection id + * major - major version + * minor - minor version + * incremental - incremental version + * + * Set the major, minor and incremental digits of the dbpf storage version. + * \return 0 on success, non-zero otherwise + */ +int trove_put_version (TROVE_coll_id coll_id, + int major, int minor, int incremental) +{ + TROVE_context_id context_id = PVFS_CONTEXT_NULL; + TROVE_op_id op_id; + TROVE_ds_state state; + TROVE_keyval_s key; + TROVE_keyval_s data; + char version[32] = {0}; + int ret; + int count; + + memset (&key, 0, sizeof(key)); + memset (&data, 0, sizeof(data)); + + key.buffer = TROVE_DBPF_VERSION_KEY; + key.buffer_sz = strlen(TROVE_DBPF_VERSION_KEY); + data.buffer = version; + data.buffer_sz = sizeof(version); + + ret = trove_open_context(coll_id, &context_id); + if (ret < 0) + { + gossip_err("trove_open_context failed: ret=%d coll=%d\n", + ret, coll_id); + goto complete; + } + + ret = trove_collection_geteattr(coll_id, &key, &data, 0, NULL, + context_id, &op_id); + if (ret < 0) + { + gossip_err("trove_collection_geteattr failed: ret=%d coll=%d \ +context=%lld op=%lld\n", + ret, coll_id, llu(context_id), llu(op_id)); + goto complete; + } + + TROVE_DSPACE_WAIT(ret, coll_id, op_id, context_id, count, state, complete); + + ret = snprintf (version, sizeof(version), "%d.%d.%d", + major, minor, incremental); + if ((ret < 0) || (ret >= 32)) + { + gossip_err("snprintf failed: ret=%d errno=%d version=%s\n", + ret, errno, version); + ret = -1; + goto complete; + } + + /* set the size to a correct value, not 32 */ + data.buffer_sz = strlen(data.buffer); + ret = trove_collection_seteattr(coll_id, &key, &data, 0, NULL, + context_id, &op_id); + if (ret < 0) + { + gossip_err("trove_collection_seteattr failed: ret=%d coll=%d \ +context=%lld op=%lld\n", + ret, coll_id, llu(context_id), llu(op_id)); + goto complete; + } + + TROVE_DSPACE_WAIT(ret, coll_id, op_id, context_id, count, state, complete); + +complete: + if (context_id != PVFS_CONTEXT_NULL) + { + int rc = trove_close_context(coll_id, context_id); + if (rc < 0) + { + ret = rc; + gossip_err("trove_context_close failed: ret=%d coll=%d \ +context=%lld\n", + ret, coll_id, llu(context_id)); + } + } + + return ret; +} + +#ifdef DEBUG_MIGRATE_PERF +static double wtime(void) +{ + struct timeval t; + + gettimeofday(&t, NULL); + return((double)t.tv_sec + (double)t.tv_usec / 1000000); +} +#endif + +/* + * trove_migrate + * method_id - method used to for trove access + * data_path - path to data storage + * meta_path - path to metadata storage + * + * Iterate over all collections and migrate each one. + * \return 0 on success, non-zero on failure + */ +int trove_migrate (TROVE_method_id method_id, const char* data_path, + const char* meta_path) +{ + TROVE_ds_position pos; + TROVE_coll_id coll_id; + TROVE_op_id op_id; + TROVE_keyval_s name = {0}; + struct migration_s *migrate_p; + int count; + int ret = 0; + int major; + int minor; + int incremental; + int i; + int migrated; +#ifdef DEBUG_MIGRATE_PERF + double s,e; + s = wtime(); +#endif + + count = 1; + pos = TROVE_ITERATE_START; + name.buffer = malloc(PATH_MAX); + name.buffer_sz = PATH_MAX; + + if (!name.buffer) + { + ret = errno; + gossip_err("malloc failed: errno=%d\n", errno); + goto complete; + } + memset(name.buffer,0,PATH_MAX); + + while (count > 0) + { + ret = trove_collection_iterate(method_id, + &pos, + &name, + &coll_id, + &count, + 0, + NULL, + NULL, + &op_id); + if (ret < 0) + { + gossip_err("trove_collection_iterate failed: \ +ret=%d method=%d pos=%lld name=%p coll=%d count=%d op=%lld\n", + ret, method_id, llu(pos), &name, + coll_id, count, llu(op_id)); + goto complete; + } + + for (i=0; imigrate != NULL; + migrate_p++) + { + if ((major <= migrate_p->major) && + (minor <= migrate_p->minor) && + (incremental <= migrate_p->incremental)) + { + gossip_err("Trove Migration Started: Ver=%d.%d.%d\n", + migrate_p->major, + migrate_p->minor, + migrate_p->incremental); + ret = migrate_p->migrate(coll_id, data_path, meta_path); + if (ret < 0) + { + gossip_err("migrate failed: \ +ret=%d coll=%d metadir=%s datadir=%s major=%d minor=%d incremental=%d\n", + ret, coll_id, meta_path, data_path, + migrate_p->major, migrate_p->minor, + migrate_p->incremental); + goto complete; + } + gossip_err("Trove Migration Complete: Ver=%d.%d.%d\n", + migrate_p->major, + migrate_p->minor, + migrate_p->incremental); + migrated = 1; + } + } + + if (migrated) + { + ret = sscanf(TROVE_DBPF_VERSION_VALUE, "%d.%d.%d", + &major, &minor, &incremental); + if (ret !=3) + { + gossip_err("sscanf failed: ret=%d\n", ret); + goto complete; + } + + ret = trove_put_version (coll_id, major, minor, incremental); + if (ret < 0) + { + gossip_err("trove_put_version failed: ret=%d coll=%d \ +ver=%d.%d.%d\n", + ret, coll_id, major, minor, incremental); + goto complete; + } + + gossip_err("Trove Version Set: %d.%d.%d\n", + major, minor, incremental); + } + } + } + +complete: + if (name.buffer) + { + free(name.buffer); + } +#ifdef DEBUG_MIGRATE_PERF + e = wtime(); + gossip_err("migrate time: %lf seconds\n", (e-s)); +#endif + return ret; +} + +/* + * migrate_collection_0_1_3 + * coll_id - collection id + * data_path - path to data storage + * meta_path - path to metadata storage + * + * For each datafile handle, check the file length and update the + * b_size attribute. + * \return 0 on success, non-zero on failure + */ +static int migrate_collection_0_1_3 (TROVE_coll_id coll_id, + const char* data_path, + const char* meta_path) +{ + TROVE_context_id context_id = PVFS_CONTEXT_NULL; + TROVE_ds_position pos; + TROVE_ds_state state; + TROVE_op_id iterate_op_id; + TROVE_op_id setattr_op_id; + TROVE_op_id getattr_op_id; + TROVE_handle* handles; + TROVE_ds_attributes_s *attrs; + TROVE_ds_state *states; + TROVE_ds_state *completed_states; + TROVE_op_id *completed_ids; + void **user; + int base_count; + int handle_count; + int completed_count; + int op_count; + int ret; + int i, j, k; + int outstanding_op_count; + int immediate_completion; + + base_count = 10000; + + handles = malloc(sizeof(TROVE_handle)*base_count); + if (!handles) + { + gossip_err("malloc failed: errno=%d size=%d\n", + errno, (int)(sizeof(TROVE_handle)*base_count)); + return -1; + } + + attrs = malloc(sizeof(TROVE_ds_attributes_s)*base_count); + if (!attrs) + { + gossip_err("malloc failed: errno=%d size=%d\n", + errno, (int)(sizeof(TROVE_ds_attributes)*base_count)); + return -1; + } + + states = malloc(sizeof(TROVE_ds_state)*base_count); + if (!states) + { + gossip_err("malloc failed: errno=%d size=%d\n", + errno, (int)(sizeof(TROVE_ds_state)*base_count)); + return -1; + } + + completed_states = malloc(sizeof(TROVE_ds_state)*base_count); + if (!completed_states) + { + gossip_err("malloc failed: errno=%d size=%d\n", + errno, (int)(sizeof(TROVE_ds_state)*base_count)); + return -1; + } + + completed_ids = malloc(sizeof(TROVE_op_id)*base_count); + if (!completed_ids) + { + gossip_err("malloc failed: errno=%d size=%d\n", + errno, (int)(sizeof(TROVE_op_id)*base_count)); + return -1; + } + + user = (void**) malloc(sizeof(void*)*base_count); + if (!user) + { + gossip_err("malloc failed: errno=%d size=%d\n", + errno, (int)(sizeof(void*)*base_count)); + return -1; + } + for (i = 0; i < base_count; i++) + { + user[i] = NULL; + } + + ret = trove_open_context(coll_id, &context_id); + if (ret < 0) + { + gossip_err("trove_open_context failed: ret=%d coll=%d\n", + ret, coll_id); + goto complete; + } + + immediate_completion = 1; + ret = trove_collection_setinfo(coll_id, context_id, + TROVE_COLLECTION_IMMEDIATE_COMPLETION, + &immediate_completion); + if (ret < 0) + { + gossip_err("trove_collection_setinfo failed: ret=%d coll=%d context=%lld\n", + ret, coll_id, lld(context_id)); + goto complete; + } + + pos = TROVE_ITERATE_START; + + do + { + outstanding_op_count = 0; + handle_count = base_count; + + ret = trove_dspace_iterate_handles(coll_id, + &pos, + handles, + &handle_count, + 0, + NULL, + NULL, + context_id, + &iterate_op_id); + if (ret < 0) + { + gossip_err("trove_dspace_iterate_handles failed: \ +ret=%d coll=%d pos=%lld handles=%p count=%d context=%lld op=%lld\n", + ret, coll_id, llu(pos), handles, handle_count, + llu(context_id), llu(iterate_op_id)); + goto complete; + } + TROVE_DSPACE_WAIT(ret, coll_id, iterate_op_id, context_id, \ + op_count, state, complete); + + ret = trove_dspace_getattr_list(coll_id, + handle_count, + handles, + attrs, + states, + 0, + NULL, + context_id, + &getattr_op_id, + PVFS_HINT_NULL); + if (ret < 0) + { + gossip_err("trove_dspace_getattr_list failed: \ +ret=%d coll=%d handles=%p attrs=%p states=%p count=%d context=%lld op=%lld\n", + ret, coll_id, handles, attrs, states, handle_count, + llu(context_id), llu(getattr_op_id)); + goto complete; + } + + TROVE_DSPACE_WAIT(ret, coll_id, getattr_op_id, context_id, \ + op_count, state, complete); + for (i = 0; i < handle_count; i++) + { + if (states[i] != 0) + { + ret = -1; + gossip_err("trove_dspace_getattr_list failure: \ +coll=%d context=%lld handle=%llu state=%d\n", + coll_id, lld(context_id), + llu(handles[i]), states[i]); + goto complete; + } + + if (attrs[i].type == PVFS_TYPE_DATAFILE) + { + struct stat stat_data; + char filename[PATH_MAX]; + TROVE_size b_size; + + DBPF_GET_BSTREAM_FILENAME(filename, + PATH_MAX, + data_path, + coll_id, + llu(handles[i])); + ret = stat(filename, &stat_data); + if ((ret != 0) && (errno == ENOENT)) + { + /* The bstream does not exist, assume this is due + * to lazy creation. + */ + b_size = 0; + } + else if (ret != 0) + { + gossip_err("stat failed: ret=%d errno=%d fname=%s\n", + ret, errno, filename); + goto complete; + } + else + { + b_size = (TROVE_size) stat_data.st_size; + } + + /* + * Set bstream size + */ + attrs[i].u.datafile.b_size = b_size; + + ret = trove_dspace_setattr(coll_id, + handles[i], + &(attrs[i]), + 0, + NULL, + context_id, + &setattr_op_id, + PVFS_HINT_NULL); + if (ret < 0) + { + gossip_err("trove_dspace_setattr failed: \ +ret=%d handle=%lld context=%lld op=%lld\n", + ret, llu(handles[i]), + lld(context_id), lld(setattr_op_id)); + goto complete; + } + + if (ret == 0) + { + outstanding_op_count++; + } + } + } + + for (j = outstanding_op_count; j > 0; ) + { + completed_count = base_count; + + ret = trove_dspace_testcontext(coll_id, + completed_ids, + &completed_count, + completed_states, + user, + 10, + context_id); + if (ret < 0) + { + gossip_err("trove_dspace_testcontext failed: ret=%d \ +coll=%d ids=%p count=%d states=%p context=%lld\n", + ret, coll_id, completed_ids, + completed_count, completed_states, + lld(context_id)); + goto complete; + } + + j -= completed_count; + + for (k = 0; k < completed_count; k++) + { + if (completed_states[k] != 0) + { + gossip_err("trove_dspace_testcontext failure: \ +coll=%d id=%lld state=%d\n", + coll_id, lld(completed_ids[k]), + completed_states[k]); + goto complete; + } + } + } + } while (handle_count > 0); + +complete: + + if (context_id != PVFS_CONTEXT_NULL) + { + int rc = trove_close_context(coll_id, context_id); + if (rc < 0) + { + ret = rc; + gossip_err("trove_close_context failed: ret=%d coll=%d \ +context=%lld\n", + ret, coll_id, llu(context_id)); + } + } + + if (handles) + { + free(handles); + } + + if (attrs) + { + free(attrs); + } + + if (states) + { + free(states); + } + + if (completed_states) + { + free(completed_states); + } + + if (completed_ids) + { + free(completed_ids); + } + + if (user) + { + free(user); + } + + return ret; +} + + +/* + * migrate_collection_0_1_4 + * coll_id - collection id + * data_path - path to data storage + * meta_path - path to metadata storage + * + * Migrate existing precreate pool keys held in the collection attributes + * to include the handle type (PVFS_TYPE_DATAFILE) in the key. Since prior + * to this version only PVFS_TYPE_DATAFILE handles existed in a pool this + * is an easy conversion to make + * + * \return 0 on success, non-zero on failure + */ +static int migrate_collection_0_1_4 (TROVE_coll_id coll_id, + const char* data_path, + const char* meta_path) +{ + int ret=0, i=0, server_count=0, server_type=0, count=0, pool_key_len=0; + const char *host; + /* hostname + pool key string + handle type size */ + char pool_key[PVFS_MAX_SERVER_ADDR_LEN + 28] = { 0 }; + char type_string[11] = { 0 }; + TROVE_context_id context_id = PVFS_CONTEXT_NULL; + TROVE_keyval_s key, data; + TROVE_op_id delattr_op_id, getattr_op_id, setattr_op_id; + TROVE_ds_state state; + PVFS_BMI_addr_t* addr_array = NULL; + PVFS_handle handle = PVFS_HANDLE_NULL; + + struct server_configuration_s *user_opts = get_server_config_struct(); + assert(user_opts); + + gossip_debug(GOSSIP_TROVE_DEBUG, "%s: %d, %s, %s\n", + __func__, coll_id, data_path, meta_path); + + ret = trove_open_context(coll_id, &context_id); + if (ret < 0) + { + gossip_err("%s: trove_open_context failed: ret=%d coll=%d\n", __func__, + ret, coll_id); + return ret; + } + + /* for completeness we will check even if this server claims it's not a + * metadata server to make sure we get all precreate pool handles updated. + * If it doesn't have any defined then our geteattr calls will just return + * with no record, Also check all peer servers for a precreate pool for + * the same reason (and it's easier anyway). */ + ret = PINT_cached_config_count_servers( coll_id, PINT_SERVER_TYPE_ALL, + &server_count); + if(ret < 0) + { + gossip_err("%s: error: unable to count servers for fsid: %d\n", + __func__, (int)coll_id); + return ret; + } + + addr_array = calloc(server_count, sizeof(PVFS_BMI_addr_t)); + if(!addr_array) + { + gossip_err("%s: error: unable to allocate addr array for precreate " + "pools.\n", __func__); + ret = -PVFS_ENOMEM; + goto complete; + } + + /* resolve addrs for each I/O server */ + ret = PINT_cached_config_get_server_array(coll_id, PINT_SERVER_TYPE_ALL, + addr_array, &server_count); + if(ret < 0) + { + gossip_err("%s: error: unable retrieve servers addrs\n", __func__); + goto complete; + } + + /* check each server for a precreate pool and check for only one pool since + * that's all there was prior to this version */ + for(i=0; i " + "(%llu)\n", __func__, (char *)key.buffer, + llu(*(PVFS_handle *)data.buffer)); + ret = trove_collection_seteattr(coll_id, &key, &data, 0, NULL, + context_id, &setattr_op_id); + if (ret < 0) + { + gossip_err("%s: trove_collection_setattr failed: \ + ret=%d coll=%d context=%lld op=%lld\n", __func__, + ret, coll_id, llu(context_id), llu(setattr_op_id)); + goto complete; + } + TROVE_DSPACE_WAIT(ret, coll_id, setattr_op_id, context_id, count, state, + complete); + gossip_debug(GOSSIP_TROVE_DEBUG, "%s: successfully migrated pool %s\n", + __func__, (char *)key.buffer); + } // for each server + + /* if we just came out of the loop force ret to 0, we don't want a bad + * key lookup to spoil the whole migration (since it's expected) */ + ret = 0; + +complete: + if (context_id != PVFS_CONTEXT_NULL) + { + int rc = trove_close_context(coll_id, context_id); + if (rc < 0) + { + ret = rc; + gossip_err("%s: trove_close_context failed: ret=%d coll=%d " \ + "context=%lld\n", __func__, ret, coll_id, + llu(context_id)); + } + } + + if( addr_array ) + { + free(addr_array); + } + + return ret; +} diff --git a/src/io/trove/trove-types.h b/src/io/trove/trove-types.h index 0050529..5c983dd 100644 --- a/src/io/trove/trove-types.h +++ b/src/io/trove/trove-types.h @@ -41,7 +41,6 @@ typedef PVFS_ds_keyval_handle_info TROVE_keyval_handle_info; typedef PVFS_ds_position TROVE_ds_position; typedef PVFS_ds_attributes TROVE_ds_attributes_s; typedef PVFS_ds_attributes TROVE_ds_attributes; -typedef PVFS_ds_storedattr TROVE_ds_storedattr_s; typedef PVFS_error TROVE_ds_state; typedef PVFS_context_id TROVE_context_id; typedef PVFS_statfs TROVE_statfs; @@ -51,7 +50,9 @@ typedef PVFS_object_ref TROVE_object_ref; typedef enum { TROVE_METHOD_DBPF = 0, - TROVE_METHOD_DBPF_ALTAIO + TROVE_METHOD_DBPF_ALTAIO, + TROVE_METHOD_DBPF_NULLAIO, + TROVE_METHOD_DBPF_DIRECTIO } TROVE_method_id; typedef TROVE_method_id (*TROVE_method_callback)(TROVE_coll_id); diff --git a/src/io/trove/trove.c b/src/io/trove/trove.c index 34ac45c..5ea0abf 100644 --- a/src/io/trove/trove.c +++ b/src/io/trove/trove.c @@ -48,7 +48,8 @@ int trove_bstream_read_at( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -68,7 +69,8 @@ int trove_bstream_read_at( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate writing from a contiguous region in memory into a @@ -84,7 +86,8 @@ int trove_bstream_write_at( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -104,7 +107,8 @@ int trove_bstream_write_at( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate resizing of a bstream. This may be used to grow or @@ -119,7 +123,8 @@ int trove_bstream_resize( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -137,7 +142,8 @@ int trove_bstream_resize( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_bstream_validate( @@ -147,7 +153,8 @@ int trove_bstream_validate( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -164,7 +171,8 @@ int trove_bstream_validate( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate reading from a list of regions in a bstream into @@ -185,7 +193,8 @@ int trove_bstream_read_list( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -209,7 +218,8 @@ int trove_bstream_read_list( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate writing from a list of regions in memory into a @@ -230,7 +240,8 @@ int trove_bstream_write_list( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -254,7 +265,8 @@ int trove_bstream_write_list( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate movement of all data to storage devices for a specific @@ -266,7 +278,8 @@ int trove_bstream_flush( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -282,7 +295,8 @@ int trove_bstream_flush( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate read of a single keyword/value pair. @@ -296,7 +310,8 @@ int trove_keyval_read( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -309,8 +324,11 @@ int trove_keyval_read( /* Check arguments */ if (key_p->buffer_sz < 2) return -TROVE_EINVAL; - if (((char *)key_p->buffer)[key_p->buffer_sz-1] != 0) - return -TROVE_EINVAL; + if(!(flags & TROVE_BINARY_KEY)) + { + if (((char *)key_p->buffer)[key_p->buffer_sz-1] != 0) + return -TROVE_EINVAL; + } return keyval_method_table[method_id]->keyval_read( coll_id, @@ -321,7 +339,8 @@ int trove_keyval_read( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate write of a single keyword/value pair. @@ -340,7 +359,8 @@ int trove_keyval_write( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -353,8 +373,11 @@ int trove_keyval_write( /* Check arguments */ if (key_p->buffer_sz < 2) return -TROVE_EINVAL; - if (((char *)key_p->buffer)[key_p->buffer_sz-1] != 0) - return -TROVE_EINVAL; + if(!(flags & TROVE_BINARY_KEY)) + { + if (((char *)key_p->buffer)[key_p->buffer_sz-1] != 0) + return -TROVE_EINVAL; + } return keyval_method_table[method_id]->keyval_write( coll_id, @@ -365,7 +388,8 @@ int trove_keyval_write( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate removal of a keyword/value pair from a given data space. @@ -379,7 +403,8 @@ int trove_keyval_remove( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -398,7 +423,8 @@ int trove_keyval_remove( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_keyval_validate( @@ -408,7 +434,8 @@ int trove_keyval_validate( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -425,7 +452,8 @@ int trove_keyval_validate( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_keyval_iterate( @@ -439,7 +467,8 @@ int trove_keyval_iterate( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -460,7 +489,8 @@ int trove_keyval_iterate( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_keyval_iterate_keys( @@ -473,7 +503,8 @@ int trove_keyval_iterate_keys( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -493,7 +524,8 @@ int trove_keyval_iterate_keys( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate read of multiple keyword/value pairs from the same @@ -510,7 +542,8 @@ int trove_keyval_read_list( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; int i; @@ -526,8 +559,11 @@ int trove_keyval_read_list( { if (key_array[i].buffer_sz < 2) return -TROVE_EINVAL; - if (((char *)key_array[i].buffer)[key_array[i].buffer_sz-1] != 0) - return -TROVE_EINVAL; + if(!(flags & TROVE_BINARY_KEY)) + { + if (((char *)key_array[i].buffer)[key_array[i].buffer_sz-1] != 0) + return -TROVE_EINVAL; + } } return keyval_method_table[method_id]->keyval_read_list( @@ -541,7 +577,8 @@ int trove_keyval_read_list( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate storing of multiple keyword/value pairs to the same @@ -557,7 +594,8 @@ int trove_keyval_write_list( TROVE_vtag_s* vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { int i; TROVE_method_id method_id; @@ -573,8 +611,11 @@ int trove_keyval_write_list( { if (key_array[i].buffer_sz < 2) return -TROVE_EINVAL; - if (((char *)key_array[i].buffer)[key_array[i].buffer_sz-1] != 0) - return -TROVE_EINVAL; + if(!(flags & TROVE_BINARY_KEY)) + { + if (((char *)key_array[i].buffer)[key_array[i].buffer_sz-1] != 0) + return -TROVE_EINVAL; + } } return keyval_method_table[method_id]->keyval_write_list( @@ -587,7 +628,61 @@ int trove_keyval_write_list( vtag, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); +} + +/** Initiate storing of multiple keyword/value pairs to the same + * data space as a single operation. + */ +int trove_keyval_remove_list( + TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_keyval_s* key_array, + TROVE_keyval_s* val_array, + int *error_array, + int count, + TROVE_ds_flags flags, + TROVE_vtag_s* vtag, + void* user_ptr, + TROVE_context_id context_id, + TROVE_op_id* out_op_id_p, + PVFS_hint hints) +{ + int i; + TROVE_method_id method_id; + + method_id = global_trove_method_callback(coll_id); + if(method_id < 0) + { + return -TROVE_EINVAL; + } + + /* Check arguments */ + for (i = 0; i < count; i++) + { + if (key_array[i].buffer_sz < 2) + return -TROVE_EINVAL; + if(!(flags & TROVE_BINARY_KEY)) + { + if (((char *)key_array[i].buffer)[key_array[i].buffer_sz-1] != 0) + return -TROVE_EINVAL; + } + } + + return keyval_method_table[method_id]->keyval_remove_list( + coll_id, + handle, + key_array, + val_array, + error_array, + count, + flags, + vtag, + user_ptr, + context_id, + out_op_id_p, + hints); } /** Initiate movement of all keyword/value pairs to storage for a given @@ -599,7 +694,8 @@ int trove_keyval_flush( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -615,7 +711,8 @@ int trove_keyval_flush( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_keyval_get_handle_info(TROVE_coll_id coll_id, @@ -624,7 +721,8 @@ int trove_keyval_get_handle_info(TROVE_coll_id coll_id, TROVE_keyval_handle_info *info, void * user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p) + TROVE_op_id *out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -641,7 +739,45 @@ int trove_keyval_get_handle_info(TROVE_coll_id coll_id, info, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); +} + +/** Initiate creation of multiple new data spaces. + */ +int trove_dspace_create_list( + TROVE_coll_id coll_id, + TROVE_handle_extent_array* handle_extent_array, + TROVE_handle* out_handle_array, + int count, + TROVE_ds_type type, + TROVE_keyval_s* hint, + TROVE_ds_flags flags, + void* user_ptr, + TROVE_context_id context_id, + TROVE_op_id* out_op_id_p, + PVFS_hint hints) +{ + TROVE_method_id method_id; + + method_id = global_trove_method_callback(coll_id); + if(method_id < 0) + { + return -TROVE_EINVAL; + } + + return dspace_method_table[method_id]->dspace_create_list( + coll_id, + handle_extent_array, + out_handle_array, + count, + type, + hint, + flags, + user_ptr, + context_id, + out_op_id_p, + hints); } /** Initiate creation of a new data space. @@ -655,7 +791,8 @@ int trove_dspace_create( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -674,6 +811,39 @@ int trove_dspace_create( flags, user_ptr, context_id, + out_op_id_p, + hints); +} + +/** Initiate removal of a list of data spaces. + */ +int trove_dspace_remove_list( + TROVE_coll_id coll_id, + TROVE_handle* handle_array, + TROVE_ds_state* error_array, + int count, + TROVE_ds_flags flags, + void* user_ptr, + TROVE_context_id context_id, + TROVE_op_id* out_op_id_p, + PVFS_hint hints) +{ + TROVE_method_id method_id; + + method_id = global_trove_method_callback(coll_id); + if(method_id < 0) + { + return -TROVE_EINVAL; + } + + return dspace_method_table[method_id]->dspace_remove_list( + coll_id, + handle_array, + error_array, + count, + flags, + user_ptr, + context_id, out_op_id_p); } @@ -685,7 +855,8 @@ int trove_dspace_remove( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -701,7 +872,8 @@ int trove_dspace_remove( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_dspace_iterate_handles( @@ -742,7 +914,8 @@ int trove_dspace_verify( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -759,7 +932,8 @@ int trove_dspace_verify( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate retrieval of attributes for a given data space. @@ -771,7 +945,8 @@ int trove_dspace_getattr( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -788,7 +963,8 @@ int trove_dspace_getattr( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } /** Initiate retrieval of attributes for a list of handles. @@ -802,7 +978,8 @@ int trove_dspace_getattr_list( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { int method_id; @@ -819,7 +996,8 @@ int trove_dspace_getattr_list( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_dspace_setattr( @@ -829,7 +1007,8 @@ int trove_dspace_setattr( TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p) + TROVE_op_id* out_op_id_p, + PVFS_hint hints) { TROVE_method_id method_id; @@ -846,7 +1025,8 @@ int trove_dspace_setattr( flags, user_ptr, context_id, - out_op_id_p); + out_op_id_p, + hints); } int trove_dspace_cancel( @@ -1015,6 +1195,31 @@ int trove_collection_seteattr( out_op_id_p); } +int trove_collection_deleattr( + TROVE_coll_id coll_id, + TROVE_keyval_s* key_p, + TROVE_ds_flags flags, + void* user_ptr, + TROVE_context_id context_id, + TROVE_op_id* out_op_id_p) +{ + TROVE_method_id method_id; + + method_id = global_trove_method_callback(coll_id); + if(method_id < 0) + { + return -TROVE_EINVAL; + } + + return mgmt_method_table[method_id]->collection_deleattr( + coll_id, + key_p, + flags, + user_ptr, + context_id, + out_op_id_p); +} + int trove_collection_getinfo( TROVE_coll_id coll_id, TROVE_context_id context_id, diff --git a/src/io/trove/trove.h b/src/io/trove/trove.h index cbc67c3..9f62a9f 100644 --- a/src/io/trove/trove.h +++ b/src/io/trove/trove.h @@ -59,7 +59,9 @@ enum TROVE_DB_CACHE_MMAP = 1 << 5, TROVE_DB_CACHE_SYS = 1 << 6, - TROVE_KEYVAL_HANDLE_COUNT = 1 << 7 + TROVE_KEYVAL_HANDLE_COUNT = 1 << 7, + TROVE_BINARY_KEY = 1 << 8, /* tell trove this is a binary key */ + TROVE_KEYVAL_ITERATE_REMOVE = 1 << 9 /* tell trove to delete keyvals as it iterates */ }; enum @@ -85,7 +87,10 @@ enum TROVE_COLLECTION_COALESCING_LOW_WATERMARK, TROVE_COLLECTION_META_SYNC_MODE, TROVE_COLLECTION_IMMEDIATE_COMPLETION, - TROVE_SHM_KEY_HINT + TROVE_SHM_KEY_HINT, + TROVE_DIRECTIO_THREADS_NUM, + TROVE_DIRECTIO_OPS_PER_QUEUE, + TROVE_DIRECTIO_TIMEOUT }; /** Initializes the Trove layer. Must be called before any other Trove @@ -94,11 +99,15 @@ enum int trove_initialize( TROVE_method_id method_id, TROVE_method_callback method_callback, - char *stoname, + char *data_path, + char *meta_path, TROVE_ds_flags flags); int trove_finalize(TROVE_method_id method_id); +int trove_migrate(TROVE_method_id method_id, const char* data_path, + const char* meta_path); + int trove_open_context( TROVE_coll_id coll_id, TROVE_context_id *context_id); @@ -107,20 +116,25 @@ int trove_close_context( TROVE_coll_id coll_id, TROVE_context_id context_id); +int trove_collection_clear( + TROVE_method_id method_id, + TROVE_coll_id coll_id); + int trove_storage_create( TROVE_method_id method_id, - char *stoname, + char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p); int trove_storage_remove( TROVE_method_id method_id, - char *stoname, + char *data_path, + char *meta_path, void *user_ptr, TROVE_op_id *out_op_id_p); int trove_collection_create( -/* char *stoname, */ char *collname, TROVE_coll_id new_coll_id, void *user_ptr, @@ -128,14 +142,12 @@ int trove_collection_create( int trove_collection_remove( TROVE_method_id method_id, -/* char *stoname, */ char *collname, void *user_ptr, TROVE_op_id *out_op_id_p); int trove_collection_lookup( TROVE_method_id method_id, -/* char *stoname, */ char *collname, TROVE_coll_id *out_coll_id_p, void *user_ptr, @@ -162,7 +174,8 @@ int trove_bstream_read_at( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_bstream_write_at( TROVE_coll_id coll_id, @@ -174,7 +187,8 @@ int trove_bstream_write_at( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_bstream_resize( TROVE_coll_id coll_id, @@ -184,7 +198,8 @@ int trove_bstream_resize( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_bstream_validate( TROVE_coll_id coll_id, @@ -193,7 +208,8 @@ int trove_bstream_validate( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_bstream_read_list( TROVE_coll_id coll_id, @@ -209,7 +225,8 @@ int trove_bstream_read_list( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_bstream_write_list( TROVE_coll_id coll_id, @@ -225,14 +242,16 @@ int trove_bstream_write_list( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_bstream_flush(TROVE_coll_id coll_id, TROVE_handle handle, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_read( TROVE_coll_id coll_id, @@ -243,7 +262,8 @@ int trove_keyval_read( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_write( TROVE_coll_id coll_id, @@ -254,7 +274,8 @@ int trove_keyval_write( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_remove( TROVE_coll_id coll_id, @@ -265,7 +286,8 @@ int trove_keyval_remove( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_validate( TROVE_coll_id coll_id, @@ -274,7 +296,8 @@ int trove_keyval_validate( TROVE_vtag_s *vtag, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_iterate( TROVE_coll_id coll_id, @@ -287,7 +310,8 @@ int trove_keyval_iterate( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_iterate_keys( TROVE_coll_id coll_id, @@ -299,7 +323,8 @@ int trove_keyval_iterate_keys( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_read_list(TROVE_coll_id coll_id, TROVE_handle handle, @@ -311,7 +336,8 @@ int trove_keyval_read_list(TROVE_coll_id coll_id, TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_write_list( TROVE_coll_id coll_id, @@ -323,14 +349,29 @@ int trove_keyval_write_list( TROVE_vtag_s *vtag, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + +int trove_keyval_remove_list(TROVE_coll_id coll_id, + TROVE_handle handle, + TROVE_keyval_s *key_array, + TROVE_keyval_s *val_array, + int *error_array, + int count, + TROVE_ds_flags flags, + TROVE_vtag_s *vtag, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_flush(TROVE_coll_id coll_id, - TROVE_handle handle, - TROVE_ds_flags flags, - void *user_ptr, - TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_handle handle, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_keyval_get_handle_info(TROVE_coll_id coll_id, TROVE_handle handle, @@ -338,7 +379,8 @@ int trove_keyval_get_handle_info(TROVE_coll_id coll_id, TROVE_keyval_handle_info *info, void * user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_dspace_create(TROVE_coll_id coll_id, TROVE_handle_extent_array *handle_extent_array, @@ -348,14 +390,38 @@ int trove_dspace_create(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + +int trove_dspace_create_list(TROVE_coll_id coll_id, + TROVE_handle_extent_array *handle_extent_array, + TROVE_handle *out_handle_array, + int count, + TROVE_ds_type type, + TROVE_keyval_s *hint, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_dspace_remove(TROVE_coll_id coll_id, TROVE_handle handle, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); + +int trove_dspace_remove_list(TROVE_coll_id coll_id, + TROVE_handle* handle_array, + TROVE_ds_state *error_array, + int count, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_dspace_iterate_handles(TROVE_coll_id coll_id, TROVE_ds_position *position_p, @@ -373,7 +439,8 @@ int trove_dspace_verify(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_dspace_getattr(TROVE_coll_id coll_id, @@ -382,7 +449,8 @@ int trove_dspace_getattr(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_dspace_getattr_list(TROVE_coll_id coll_id, int nhandles, @@ -392,7 +460,8 @@ int trove_dspace_getattr_list(TROVE_coll_id coll_id, TROVE_ds_flags flags, void* user_ptr, TROVE_context_id context_id, - TROVE_op_id* out_op_id_p); + TROVE_op_id* out_op_id_p, + PVFS_hint hints); int trove_dspace_setattr(TROVE_coll_id coll_id, TROVE_handle handle, @@ -400,7 +469,8 @@ int trove_dspace_setattr(TROVE_coll_id coll_id, TROVE_ds_flags flags, void *user_ptr, TROVE_context_id context_id, - TROVE_op_id *out_op_id_p); + TROVE_op_id *out_op_id_p, + PVFS_hint hints); int trove_dspace_cancel(TROVE_coll_id coll_id, TROVE_op_id id, @@ -451,6 +521,14 @@ int trove_collection_seteattr( TROVE_context_id context_id, TROVE_op_id *out_op_id_p); +int trove_collection_deleattr( + TROVE_coll_id coll_id, + TROVE_keyval_s *key_p, + TROVE_ds_flags flags, + void *user_ptr, + TROVE_context_id context_id, + TROVE_op_id *out_op_id_p); + int trove_collection_getinfo( TROVE_coll_id coll_id, TROVE_context_id context_id, diff --git a/src/kernel/linux-2.4/Makefile.in b/src/kernel/linux-2.4/Makefile.in index ed326e6..828ea77 100644 --- a/src/kernel/linux-2.4/Makefile.in +++ b/src/kernel/linux-2.4/Makefile.in @@ -68,8 +68,11 @@ EXTRA_CFLAGS = \ -I$(absolute_src_dir)/ \ -I$(absolute_build_dir)/ \ -I$(absolute_src_dir)/include \ + -I$(absolute_build_dir)/include \ -I$(absolute_src_dir)/src/io/dev \ + -I$(absolute_src_dir)/src/io/bmi \ -I$(absolute_src_dir)/src/common/quickhash \ + -I$(absolute_src_dir)/src/proto \ -I$(absolute_src_dir)/src/common/gossip \ -I$(absolute_src_dir)/src/common/misc diff --git a/src/kernel/linux-2.6/.pvfs-request.o.d b/src/kernel/linux-2.6/.pvfs-request.o.d new file mode 100644 index 0000000..e69de29 diff --git a/src/kernel/linux-2.6/Makefile.in b/src/kernel/linux-2.6/Makefile.in index 94bd5b9..0280a46 100644 --- a/src/kernel/linux-2.6/Makefile.in +++ b/src/kernel/linux-2.6/Makefile.in @@ -60,16 +60,7 @@ hsrc = \ pvfs2-proc.h \ osd.h -desc_here := src/io/description -desc_csrc := \ - pvfs-request.c \ - pint-request.c \ - pint-distribution.c \ - pint-dist-utils.c \ - dist-basic.c \ - dist-simple-stripe.c - -objs = $(csrc:.c=.o) $(desc_csrc:.c=.o) +objs = $(csrc:.c=.o) othergen = pvfs2.o pvfs2.ko pvfs2.mod.c pvfs2.mod.o othergendir = .tmp_versions # around 2.6.6 this is generated locally cmds = $(patsubst %,.%.cmd,$(objs) $(othergen)) @@ -80,15 +71,16 @@ EXTRA_CFLAGS = \ -I$(absolute_src_dir)/ \ -I$(absolute_build_dir)/ \ -I$(absolute_src_dir)/include \ + -I$(absolute_build_dir)/include \ -I$(absolute_src_dir)/src/io/dev \ -I$(absolute_src_dir)/src/io/bmi \ - -I$(absolute_src_dir)/src/io/description \ -I$(absolute_src_dir)/src/common/quickhash \ -I$(absolute_src_dir)/src/proto \ -I$(absolute_src_dir)/src/common/gossip \ -I$(absolute_src_dir)/src/common/misc EXTRA_CFLAGS += @MMAP_RA_CACHE@ +EXTRA_CFLAGS += @RESET_FILE_POS@ EXTRA_CFLAGS += -DPVFS2_VERSION="\"@PVFS2_VERSION@\"" # uncomment the following line for kernel specific @@ -106,24 +98,18 @@ KDIR := @LINUX_KERNEL_SRC@ PWD := $(shell pwd) default: links - $(E)$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules + $(E)$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules Q=@ -# link to real source directory if out-of-tree build, and in-tree for -# sources from src/io/description +# link to real source directory if out-of-tree build links: $(E)for i in $(csrc) $(hsrc); do \ if [ ! -f $$i -a ! -L $$i ] ; then \ ln -s $(relative_src_dir)/$(here)/$$i ;\ fi ;\ done - $(E)for i in $(desc_csrc); do \ - if [ ! -f $$i -a ! -L $$i ] ; then \ - ln -s $(relative_src_dir)/$(desc_here)/$$i ;\ - fi ;\ - done clean: - $(E)for i in $(csrc) $(hsrc) $(desc_csrc); do \ + $(E)for i in $(csrc) $(hsrc); do \ if [ -L $$i ] ; then \ rm -f $$i ;\ fi ;\ diff --git a/src/kernel/linux-2.6/Module.symvers b/src/kernel/linux-2.6/Module.symvers new file mode 100644 index 0000000..e69de29 diff --git a/src/kernel/linux-2.6/acl.c b/src/kernel/linux-2.6/acl.c index 252a445..aed8f2a 100644 --- a/src/kernel/linux-2.6/acl.c +++ b/src/kernel/linux-2.6/acl.c @@ -31,6 +31,7 @@ #include #endif #include "bmi-byteswap.h" +#include /* * Encoding and Decoding the extended attributes so that we can @@ -278,7 +279,11 @@ pvfs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) name = PVFS2_XATTR_NAME_ACL_ACCESS; if (acl) { +#ifdef HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T + umode_t mode = inode->i_mode; +#else mode_t mode = inode->i_mode; +#endif /* HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T */ /* can we represent this with the UNIXy permission bits? */ error = posix_acl_equiv_mode(acl, &mode); /* uh oh some error.. */ @@ -379,8 +384,19 @@ pvfs2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) return error; } -static int pvfs2_xattr_get_acl_access(struct inode *inode, - const char *name, void *buffer, size_t size) +static int pvfs2_xattr_get_acl_access( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + const char *name, + void *buffer, + size_t size +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , int handler_flag +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + ) { gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_xattr_get_acl_access %s\n", name); if (strcmp(name, "") != 0) @@ -388,11 +404,26 @@ static int pvfs2_xattr_get_acl_access(struct inode *inode, gossip_err("get_acl_access invalid name %s\n", name); return -EINVAL; } +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + return pvfs2_xattr_get_acl(dentry->d_inode, ACL_TYPE_ACCESS, buffer, size); +#else return pvfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ } -static int pvfs2_xattr_get_acl_default(struct inode *inode, - const char *name, void *buffer, size_t size) +static int pvfs2_xattr_get_acl_default( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + const char *name, + void *buffer, + size_t size +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + ) { gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_xattr_get_acl_default %s\n", name); if (strcmp(name, "") != 0) @@ -400,15 +431,24 @@ static int pvfs2_xattr_get_acl_default(struct inode *inode, gossip_err("get_acl_default invalid name %s\n", name); return -EINVAL; } +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + return pvfs2_xattr_get_acl(dentry->d_inode, ACL_TYPE_DEFAULT, buffer, size); +#else return pvfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ } -static int -pvfs2_xattr_set_acl(struct inode *inode, int type, const void *value, +static int pvfs2_xattr_set_acl( +struct inode *inode, int type, const void *value, size_t size) { struct posix_acl *acl; int error; +#ifdef HAVE_CURRENT_FSUID + int fsuid = current_fsuid(); +#else + int fsuid = current->fsuid; +#endif gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_xattr_set_acl called with size %ld\n", (long)size); @@ -420,11 +460,11 @@ pvfs2_xattr_set_acl(struct inode *inode, int type, const void *value, return -EOPNOTSUPP; } /* Are we capable of setting acls on a file for which we should not be? */ - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + if ((fsuid != inode->i_uid) && !capable(CAP_FOWNER)) { gossip_err("pvfs2_xattr_set_acl: operation not permitted " "(current->fsuid %d), (inode->owner %d)\n", - current->fsuid, inode->i_uid); + fsuid, inode->i_uid); return -EPERM; } if (value) @@ -459,8 +499,20 @@ pvfs2_xattr_set_acl(struct inode *inode, int type, const void *value, return error; } -static int pvfs2_xattr_set_acl_access(struct inode *inode, - const char *name, const void *buffer, size_t size, int flags) +static int pvfs2_xattr_set_acl_access( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + const char *name, + const void *buffer, + size_t size, + int flags +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ) { gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_xattr_set_acl_access: %s\n", name); if (strcmp(name, "") != 0) @@ -468,11 +520,27 @@ static int pvfs2_xattr_set_acl_access(struct inode *inode, gossip_err("set_acl_access invalid name %s\n", name); return -EINVAL; } +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + return pvfs2_xattr_set_acl(dentry->d_inode, ACL_TYPE_ACCESS, buffer, size); +#else return pvfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, buffer, size); +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ } -static int pvfs2_xattr_set_acl_default(struct inode *inode, - const char *name, const void *buffer, size_t size, int flags) +static int pvfs2_xattr_set_acl_default( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + const char *name, + const void *buffer, + size_t size, + int flags +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ) { gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_xattr_set_acl_default: %s\n", name); if (strcmp(name, "") != 0) @@ -480,7 +548,11 @@ static int pvfs2_xattr_set_acl_default(struct inode *inode, gossip_err("set_acl_default invalid name %s\n", name); return -EINVAL; } +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + return pvfs2_xattr_set_acl(dentry->d_inode, ACL_TYPE_DEFAULT, buffer, size); +#else return pvfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +#endif } struct xattr_handler pvfs2_xattr_acl_access_handler = { @@ -535,8 +607,14 @@ int pvfs2_init_acl(struct inode *inode, struct inode *dir) } if (get_acl_flag(inode) == 1 && acl) { - struct posix_acl *clone; +#ifdef HAVE_POSIX_ACL_CREATE + umode_t mode; +#elif defined(HAVE_POSIX_ACL_CLONE) + struct posix_acl *clone = NULL; mode_t mode; +#else + #error No posix_acl_create or posix_acl_clone defined +#endif /* HAVE_POSIX_ACL_CREATE */ if (S_ISDIR(inode->i_mode)) { @@ -548,6 +626,9 @@ int pvfs2_init_acl(struct inode *inode, struct inode *dir) goto cleanup; } } +#ifdef HAVE_POSIX_ACL_CREATE + error = posix_acl_create(&acl, GFP_KERNEL, &mode); +#elif defined(HAVE_POSIX_ACL_CLONE) clone = posix_acl_clone(acl, GFP_KERNEL); error = -ENOMEM; if (!clone) { @@ -557,10 +638,18 @@ int pvfs2_init_acl(struct inode *inode, struct inode *dir) } mode = inode->i_mode; error = posix_acl_create_masq(clone, &mode); +#else + #error No posix_acl_create or posix_acl_clone defined +#endif /* HAVE_POSIX_ACL_CREATE */ if (error >= 0) { +#ifdef HAVE_POSIX_ACL_CREATE + gossip_debug(GOSSIP_ACL_DEBUG, "posix_acl_create changed mode " + "from %o to %o\n", inode->i_mode, mode); +#else gossip_debug(GOSSIP_ACL_DEBUG, "posix_acl_create_masq changed mode " "from %o to %o\n", inode->i_mode, mode); +#endif /* HAVE_POSIX_ACL_CREATE */ /* * Dont do a needless ->setattr() if mode has not changed */ @@ -573,11 +662,19 @@ int pvfs2_init_acl(struct inode *inode, struct inode *dir) */ if (error > 0) { +#ifdef HAVE_POSIX_ACL_CREATE + error = pvfs2_set_acl(inode, ACL_TYPE_ACCESS, acl); +#elif defined(HAVE_POSIX_ACL_CLONE) error = pvfs2_set_acl(inode, ACL_TYPE_ACCESS, clone); +#else + #error No posix_acl_create or posix_acl_clone defined +#endif /* HAVE_POSIX_ACL_CREATE */ gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_set_acl (access) returned %d\n", error); } } +#ifdef HAVE_POSIX_ACL_CLONE posix_acl_release(clone); +#endif /* HAVE_POSIX_ACL_CREATE */ } /* If mode of the inode was changed, then do a forcible ->setattr */ if (ModeFlag(pvfs2_inode)) @@ -596,7 +693,10 @@ int pvfs2_init_acl(struct inode *inode, struct inode *dir) */ int pvfs2_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl = NULL; +#ifdef HAVE_POSIX_ACL_CLONE + struct posix_acl *clone = NULL; +#endif /* HAVE_POSIX_ACL_CLONE */ int error; if (get_acl_flag(inode) == 0) @@ -623,14 +723,36 @@ int pvfs2_acl_chmod(struct inode *inode) error = 0; goto out; } +#ifdef HAVE_POSIX_ACL_CHMOD + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); +#else + error = posix_acl_chmod_masq(acl, inode->i_mode); +#endif /* HAVE_POSIX_ACL_CHMOD */ + if (!error) + { + error = pvfs2_set_acl(inode, ACL_TYPE_ACCESS, acl); + gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_acl_chmod: pvfs2 set acl " + "(access) returned %d\n", error); + } +#ifdef HAVE_POSIX_CLONE clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); if (!clone) { gossip_err("pvfs2_acl_chmod failed with ENOMEM\n"); error = -ENOMEM; goto out; } +#endif /* HAVE_POSIX_CLONE */ + +#ifdef HAVE_POSIX_ACL_CHMOD + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (!error) + { + error = pvfs2_set_acl(inode, ACL_TYPE_ACCESS, acl); + gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_acl_chmod: pvfs2 set acl " + "(access) returned %d\n", error); + } +#elif defined(HAVE_POSIX_ACL_CLONE) error = posix_acl_chmod_masq(clone, inode->i_mode); if (!error) { @@ -639,11 +761,22 @@ int pvfs2_acl_chmod(struct inode *inode) "(access) returned %d\n", error); } posix_acl_release(clone); +#else + #error No posix_acl_chmod or posix_acl_clone defined +#endif /* HAVE_POSIX_ACL_CHMOD */ + out: + posix_acl_release(acl); return error; } -static int pvfs2_check_acl(struct inode *inode, int mask) +#if defined(HAVE_THREE_PARAM_GENERIC_PERMISSION) || \ + defined(HAVE_FOUR_PARAM_GENERIC_PERMISSION) +static int pvfs2_check_acl(struct inode *inode, int mask +#ifdef HAVE_THREE_PARAM_ACL_CHECK + , unsigned int flags +#endif /* HAVE_THREE_PARAM_ACL_CHECK */ + ) { struct posix_acl *acl = NULL; @@ -670,13 +803,38 @@ static int pvfs2_check_acl(struct inode *inode, int mask) gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_check_acl returning EAGAIN\n"); return -EAGAIN; } +#endif + -int pvfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +#ifdef HAVE_TWO_PARAM_PERMISSION +int pvfs2_permission(struct inode *inode, int mask) +#else +int pvfs2_permission(struct inode *inode, int mask, +#ifdef HAVE_THREE_PARAM_PERMISSION_WITH_FLAG +unsigned int flags) +#else +struct nameidata *nd) +#endif /* HAVE_THREE_PARAM_PERMISSION_WITH_FLAG */ +#endif /* HAVE_TWO_PARAM_PERMISSION */ { +#ifdef HAVE_CURRENT_FSUID + int fsuid = current_fsuid(); +#else + int fsuid = current->fsuid; +#endif + #ifdef HAVE_GENERIC_PERMISSION int ret; - ret = generic_permission(inode, mask, pvfs2_check_acl); +#if defined(HAVE_TWO_PARAM_GENERIC_PERMISSION) + ret = generic_permission(inode, mask); +#elif defined(HAVE_THREE_PARAM_GENERIC_PERMISSION) + ret = generic_permission(inode, mask, pvfs2_check_acl); +#elif defined(HAVE_FOUR_PARAM_GENERIC_PERMISSION) + ret = generic_permission(inode, mask, 0, pvfs2_check_acl); +#else + #error generic_permission has an unknown number of parameters +#endif if (ret != 0) { gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_permission failed: inode: %llu mask = %o" @@ -684,7 +842,7 @@ int pvfs2_permission(struct inode *inode, int mask, struct nameidata *nd) "inode->i_uid = %d, inode->i_gid = %d " "in_group_p = %d " "(ret = %d)\n", - llu(get_handle_from_ino(inode)), mask, inode->i_mode, current->fsuid, + llu(get_handle_from_ino(inode)), mask, inode->i_mode, fsuid, inode->i_uid, inode->i_gid, in_group_p(inode->i_gid), ret); @@ -700,7 +858,7 @@ int pvfs2_permission(struct inode *inode, int mask, struct nameidata *nd) llu(get_handle_from_ino(inode))); } return ret; -#else +#else /* We sort of duplicate the code below from generic_permission. */ int mode = inode->i_mode; int error; @@ -709,7 +867,7 @@ int pvfs2_permission(struct inode *inode, int mask, struct nameidata *nd) "mode = %o current->fsuid = %d " "inode->i_uid = %d, inode->i_gid = %d" "in_group_p = %d\n", - llu(get_handle_from_ino(inode)), mask, mode, current->fsuid, + llu(get_handle_from_ino(inode)), mask, mode, fsuid, inode->i_uid, inode->i_gid, in_group_p(inode->i_gid)); @@ -727,7 +885,7 @@ int pvfs2_permission(struct inode *inode, int mask, struct nameidata *nd) gossip_err("pvfs2_permission: cannot write to an immutable file!\n"); return -EACCES; } - if (current->fsuid == inode->i_uid) + if (fsuid == inode->i_uid) { mode >>= 6; } @@ -783,7 +941,7 @@ int pvfs2_permission(struct inode *inode, int mask, struct nameidata *nd) gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_permission: disallowing access\n"); return -EACCES; -#endif +#endif /* HAVE_GENERIC_PERMISSION */ } #endif diff --git a/src/kernel/linux-2.6/dcache.c b/src/kernel/linux-2.6/dcache.c index f27eb78..c1966a5 100644 --- a/src/kernel/linux-2.6/dcache.c +++ b/src/kernel/linux-2.6/dcache.c @@ -13,109 +13,166 @@ #include "pvfs2-kernel.h" #include "pvfs2-internal.h" +static void __attribute__ ((unused)) print_dentry(struct dentry *entry, int ret); + /* should return 1 if dentry can still be trusted, else 0 */ static int pvfs2_d_revalidate_common(struct dentry* dentry) { int ret = 0; - struct inode *inode = (dentry ? dentry->d_inode : NULL); + struct inode *inode; struct inode *parent_inode = NULL; pvfs2_kernel_op_t *new_op = NULL; pvfs2_inode_t *parent = NULL; - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: called on dentry %p.\n", dentry); + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n", + __func__, dentry); - /* find parent inode */ - if(dentry && dentry->d_parent) + /* find inode from dentry */ + if(!dentry || !dentry->d_inode) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: parent found.\n"); - parent_inode = dentry->d_parent->d_inode; + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode not valid.\n", __func__); + goto invalid_exit; } - else + + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode valid.\n", __func__); + inode = dentry->d_inode; + + /* find parent inode */ + if(!dentry || !dentry->d_parent) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: parent not found.\n"); + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: parent not found.\n", __func__); + goto invalid_exit; } - - if (inode && parent_inode) + + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: parent found.\n", __func__); + parent_inode = dentry->d_parent->d_inode; + + /* first perform a lookup to make sure that the object not only + * exists, but is still in the expected place in the name space + */ + if (!is_root_handle(inode)) { - /* first perform a lookup to make sure that the object not only - * exists, but is still in the expected place in the name space - */ - if (!is_root_handle(inode)) + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); + new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); + if (!new_op) + { + goto invalid_exit; + } + new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; + parent = PVFS2_I(parent_inode); + if (parent && parent->refn.handle != PVFS_HANDLE_NULL && + parent->refn.fs_id != PVFS_FS_ID_NULL) + { + new_op->upcall.req.lookup.parent_refn = parent->refn; + } + else { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: attempting lookup.\n"); - new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); - if (!new_op) - { - return 0; - } - new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; - parent = PVFS2_I(parent_inode); - if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL) - { - new_op->upcall.req.lookup.parent_refn = parent->refn; - } - else - { #if defined(HAVE_IGET4_LOCKED) || defined(HAVE_IGET5_LOCKED) - gossip_lerr("Critical error: i_ino cannot be relied upon when using iget5/iget4\n"); - op_release(new_op); - return 0; -#endif - new_op->upcall.req.lookup.parent_refn.handle = - get_handle_from_ino(parent_inode); - new_op->upcall.req.lookup.parent_refn.fs_id = - PVFS2_SB(parent_inode->i_sb)->fs_id; - } - strncpy(new_op->upcall.req.lookup.d_name, - dentry->d_name.name, PVFS2_NAME_LEN); - - ret = service_operation( - new_op, "pvfs2_lookup", - get_interruptible_flag(parent_inode)); - - if((new_op->downcall.status != 0) || - !match_handle(new_op->downcall.resp.lookup.refn.handle, inode)) - { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: lookup failure or no match.\n"); - op_release(new_op); - /* mark the inode as bad so that d_delete will be aggressive - * about dropping the dentry - */ - pvfs2_make_bad_inode(inode); - return(0); - } - + gossip_lerr("Critical error: i_ino cannot be relied " + "upon when using iget5/iget4\n"); op_release(new_op); + goto invalid_exit; +#endif + new_op->upcall.req.lookup.parent_refn.handle = + get_handle_from_ino(parent_inode); + new_op->upcall.req.lookup.parent_refn.fs_id = + PVFS2_SB(parent_inode->i_sb)->fs_id; } - else + strncpy(new_op->upcall.req.lookup.d_name, + dentry->d_name.name, PVFS2_NAME_LEN); + + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", + __FILE__, __func__, __LINE__, get_interruptible_flag(parent_inode)); + + ret = service_operation( + new_op, "pvfs2_lookup", + get_interruptible_flag(parent_inode)); + + if((new_op->downcall.status != 0) || + !match_handle(new_op->downcall.resp.lookup.refn.handle, inode)) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: root handle, lookup skipped.\n"); + gossip_debug( + GOSSIP_DCACHE_DEBUG, + "%s:%s:%d lookup failure |%s| or no match |%s|.\n", + __FILE__, __func__, __LINE__, + (new_op->downcall.status != 0) ? "true" : "false", + (!match_handle(new_op->downcall.resp.lookup.refn.handle, inode)) ? "true" : "false"); + op_release(new_op); + + /* Avoid calling make_bad_inode() in this situation. On 2.4 + * (RHEL3) kernels, it can cause bogus permission denied errors + * on path elements after interrupt signals. On later 2.6 + * kernels this causes a kernel oops rather than a permission + * error. + */ +#if 0 + /* mark the inode as bad so that d_delete will be aggressive + * about dropping the dentry + */ + pvfs2_make_bad_inode(inode); +#endif + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d setting revalidate_failed = 1\n", __FILE__, __func__, __LINE__); + /* set a flag that we can detect later in d_delete() */ + PVFS2_I(inode)->revalidate_failed = 1; + d_drop(dentry); + + goto invalid_exit; } - /* now perform revalidation */ - gossip_debug(GOSSIP_DCACHE_DEBUG, " (inode %llu)\n", - llu(get_handle_from_ino(inode))); - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_revalidate_common: calling pvfs2_internal_revalidate().\n"); - ret = pvfs2_internal_revalidate(inode); + op_release(new_op); } else { - gossip_debug(GOSSIP_DCACHE_DEBUG, "\n"); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: root handle, lookup skipped.\n", __func__); + } + + /* now perform getattr */ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: doing getattr: inode: %p, handle: %llu)\n", + __func__, inode, llu(get_handle_from_ino(inode))); + ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: getattr %s (ret = %d), returning %s for dentry i_count=%d\n", + __func__, + (ret == 0 ? "succeeded" : "failed"), + ret, + (ret == 0 ? "valid" : "INVALID"), + atomic_read(&inode->i_count)); + if(ret != 0) + { + goto invalid_exit; } - return ret; + + /* dentry is valid! */ + return 1; + +invalid_exit: + return 0; } -static int pvfs2_d_delete (struct dentry * dentry) +static int pvfs2_d_delete ( +#ifdef HAVE_D_DELETE_CONST +const +#endif /* HAVE_D_DELETE_CONST */ +struct dentry * dentry +) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_delete: called on dentry %p.\n", dentry); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: called on dentry %p.\n", __func__, dentry); +#if 0 if(dentry->d_inode && is_bad_inode(dentry->d_inode)) +#endif + if(dentry->d_inode && PVFS2_I(dentry->d_inode)->revalidate_failed == 1) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_delete: returning 1 (bad inode).\n"); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: returning 1 (bad inode).\n", __func__); return 1; } else { - gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_delete: returning 0 (inode looks ok).\n"); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: returning 0 (inode looks ok).\n", __func__); return 0; } } @@ -139,10 +196,10 @@ static int pvfs2_d_revalidate( { if (nd && (nd->flags & LOOKUP_FOLLOW) && - (!nd->flags & LOOKUP_CREATE)) + ((!nd->flags) & (LOOKUP_CREATE)) ) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "\npvfs2_d_revalidate: Trusting intent; " - "skipping getattr\n"); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "\n%s: Trusting intent; skipping getattr\n", __func__); return 1; } return(pvfs2_d_revalidate_common(dentry)); @@ -155,8 +212,15 @@ static int pvfs2_d_revalidate( link_path_walk to pass our error up */ static int pvfs2_d_hash( +#ifdef HAVE_THREE_PARAM_D_HASH + const struct dentry *parent, + const struct inode *inode, + struct qstr *hash +#else struct dentry *parent, - struct qstr *hash) + struct qstr *hash +#endif /* HAVE_THREE_PARAM_D_HASH */ + ) { /* gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2: pvfs2_d_hash called " */ /* "(name: %s | len: %d | hash: %d)\n", */ @@ -164,6 +228,32 @@ static int pvfs2_d_hash( return 0; } +#ifdef HAVE_SEVEN_PARAM_D_COMPARE +static int pvfs2_d_compare( + const struct dentry *parent, + const struct inode * pinode, + const struct dentry *dentry, + const struct inode *inode, + unsigned int len, + const char *str, + const struct qstr *name) +{ + int i = 0; + gossip_debug(GOSSIP_DCACHE_DEBUG, "pvfs2_d_compare: " + "called on parent %p\n (name1: %s| name2: %s)\n", + parent, str, name->name); + + if( len != name->len ) + return 1; + + for( i=0; i < len; i++ ) + { + if( str[i] != name->name[i] ) + return 1; + } + return 0; +} +#else static int pvfs2_d_compare( struct dentry *parent, struct qstr *d_name, @@ -177,6 +267,8 @@ static int pvfs2_d_compare( (d_name->hash == name->hash) && (memcmp(d_name->name, name->name, d_name->len) == 0)); } +#endif /* HAVE_SEVEN_PARAM_D_COMPARE */ + /** PVFS2 implementation of VFS dentry operations */ struct dentry_operations pvfs2_dentry_operations = @@ -187,6 +279,49 @@ struct dentry_operations pvfs2_dentry_operations = .d_delete = pvfs2_d_delete, }; +/* print_dentry() + * + * Available for debugging purposes. Please remove the unused attribute + * before invoking + */ +static void __attribute__ ((unused)) print_dentry(struct dentry *entry, int ret) +{ + unsigned int local_count = 0; + if(!entry) + { + printk("--- dentry %p: no entry, ret: %d\n", entry, ret); + return; + } + + if(!entry->d_inode) + { + printk("--- dentry %p: no d_inode, ret: %d\n", entry, ret); + return; + } + + if(!entry->d_parent) + { + printk("--- dentry %p: no d_parent, ret: %d\n", entry, ret); + return; + } + +#ifdef HAVE_DENTRY_D_COUNT_ATOMIC + local_count = atomic_read(&entry->d_count); +#else + spin_lock(&entry->d_lock); + local_count = entry->d_count; + spin_unlock(&entry->d_lock); +#endif /* HAVE_DENTRY_D_COUNT_ATOMIC */ + + printk("--- dentry %p: d_count: %d, name: %s, parent: %p, parent name: %s, ret: %d\n", + entry, + local_count, + entry->d_name.name, + entry->d_parent, + entry->d_parent->d_name.name, + ret); +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/kernel/linux-2.6/devpvfs2-req.c b/src/kernel/linux-2.6/devpvfs2-req.c index dc45e49..fd01139 100644 --- a/src/kernel/linux-2.6/devpvfs2-req.c +++ b/src/kernel/linux-2.6/devpvfs2-req.c @@ -12,6 +12,18 @@ #include "pvfs2-dev-proto.h" #include "pvfs2-bufmap.h" #include "pvfs2-internal.h" +#include "pint-dev.h" + +/* these functions are defined in pvfs2-utils.c */ +int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string); +int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string); + +/*these variables are defined in pvfs2-proc.c*/ +extern char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; + +/*these variables are defined in pvfs2-mod.c*/ +extern unsigned int kernel_mask_set_mod_init; /* this file implements the /dev/pvfs2-req device node */ @@ -100,10 +112,10 @@ static ssize_t pvfs2_devreq_read( } else { - pvfs2_kernel_op_t *op = NULL; + pvfs2_kernel_op_t *op = NULL, *temp = NULL; /* get next op (if any) from top of list */ spin_lock(&pvfs2_request_list_lock); - list_for_each_entry (op, &pvfs2_request_list, list) + list_for_each_entry_safe (op, temp, &pvfs2_request_list, list) { PVFS_fs_id fsid = fsid_of_op(op); /* Check if this op's fsid is known and needs remounting */ @@ -117,6 +129,7 @@ static ssize_t pvfs2_devreq_read( */ else { cur_op = op; + spin_lock(&cur_op->lock); list_del(&cur_op->list); cur_op->op_linger_tmp--; /* if there is a trailer, re-add it to the request list */ @@ -130,6 +143,7 @@ static ssize_t pvfs2_devreq_read( /* readd it to the head of the list */ list_add(&cur_op->list, &pvfs2_request_list); } + spin_unlock(&cur_op->lock); break; } } @@ -468,7 +482,7 @@ static ssize_t pvfs2_devreq_writev( && op->upcall.req.io.async_vfs_io == PVFS_VFS_ASYNC_IO) { pvfs2_kiocb *x = (pvfs2_kiocb *) op->priv; - if (x == NULL || x->buffer == NULL + if (x == NULL || x->iov == NULL || x->op != op || x->bytes_to_be_copied <= 0) { @@ -476,7 +490,7 @@ static ssize_t pvfs2_devreq_writev( { gossip_debug(GOSSIP_DEV_DEBUG, "WARNING: pvfs2_iocb from op" "has invalid fields! %p, %p(%p), %d\n", - x->buffer, x->op, op, (int) x->bytes_to_be_copied); + x->iov, x->op, op, (int) x->bytes_to_be_copied); } else { @@ -503,9 +517,9 @@ static ssize_t pvfs2_devreq_writev( && bytes_copied > 0) { /* try and copy it out to user-space */ - bytes_copied = pvfs_bufmap_copy_to_user_task( + bytes_copied = pvfs_bufmap_copy_to_user_task_iovec( x->tsk, - x->buffer, + x->iov, x->nr_segs, x->buffer_index, bytes_copied); } @@ -618,7 +632,7 @@ static int pvfs2_devreq_release( { int unmounted = 0; - gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: exiting, closing device\n"); + gossip_debug(GOSSIP_DEV_DEBUG, "%s:pvfs2-client-core: exiting, closing device\n",__func__); down(&devreq_semaphore); pvfs_bufmap_finalize(); @@ -697,6 +711,10 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) static int32_t max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE; static int32_t max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE; struct PVFS_dev_map_desc user_desc; + int ret; + dev_mask_info_t mask_info = {0}; + + /* mtmoore: add locking here */ switch(command) { @@ -762,8 +780,41 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) return ret; } case PVFS_DEV_DEBUG: - return (get_user(gossip_debug_mask, (int32_t __user *)arg) == - -EFAULT) ? -EIO : 0; + ret = copy_from_user(&mask_info, (void __user *)arg + ,sizeof(mask_info)); + if (ret != 0) + return(-EIO); + + if (mask_info.mask_type == KERNEL_MASK) + { + if ( (mask_info.mask_value == 0) && (kernel_mask_set_mod_init) ) + { + /* the kernel debug mask was set when the kernel module was loaded; + * don't override it if the client-core was started without a value + * for PVFS2_KMODMASK. + */ + return(0); + } + ret = PVFS_proc_kmod_mask_to_eventlog(mask_info.mask_value + ,kernel_debug_string); + gossip_debug_mask = mask_info.mask_value; + printk("PVFS: kernel debug mask has been modified to \"%s\" (0x%08llx)\n" + ,kernel_debug_string,llu(gossip_debug_mask)); + } + else if (mask_info.mask_type == CLIENT_MASK) + { + ret = PVFS_proc_mask_to_eventlog(mask_info.mask_value + ,client_debug_string); + printk("PVFS: client debug mask has been modified to \"%s\" (0x%08llx)\n" + ,client_debug_string,llu(mask_info.mask_value)); + } + else + { + gossip_lerr("Invalid mask type....\n"); + return(-EINVAL); + } + + return(ret); break; default: return -ENOIOCTLCMD; @@ -771,8 +822,12 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) return -ENOIOCTLCMD; } +#ifdef HAVE_UNLOCKED_IOCTL_HANDLER +static long pvfs2_devreq_ioctl( +#else static int pvfs2_devreq_ioctl( struct inode *inode, +#endif /* HAVE_UNLOCKED_IOCTL_HANDLER */ struct file *file, unsigned int command, unsigned long arg) @@ -1066,8 +1121,13 @@ static unsigned int pvfs2_devreq_poll( { int poll_revent_mask = 0; + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:Is daemon in service(%d).\n" + ,__func__ + ,is_daemon_in_service()); + if (open_access_count == 1) { + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:About to call poll_wait.\n",__func__); poll_wait(file, &pvfs2_request_list_waitq, poll_table); spin_lock(&pvfs2_request_list_lock); @@ -1099,7 +1159,12 @@ struct file_operations pvfs2_devreq_file_operations = #endif .open = pvfs2_devreq_open, .release = pvfs2_devreq_release, +#ifdef HAVE_UNLOCKED_IOCTL_HANDLER + .unlocked_ioctl = pvfs2_devreq_ioctl, +#else .ioctl = pvfs2_devreq_ioctl, +#endif /* HAVE_UNLOCKED_IOCTL_HANDLER */ + #ifdef CONFIG_COMPAT #ifdef HAVE_COMPAT_IOCTL_HANDLER .compat_ioctl = pvfs2_devreq_compat_ioctl, diff --git a/src/kernel/linux-2.6/dir.c b/src/kernel/linux-2.6/dir.c index 40d040f..6342513 100644 --- a/src/kernel/linux-2.6/dir.c +++ b/src/kernel/linux-2.6/dir.c @@ -123,195 +123,247 @@ static int pvfs2_readdir( struct dentry *dentry = file->f_dentry; pvfs2_kernel_op_t *new_op = NULL; pvfs2_inode_t *pvfs2_inode = PVFS2_I(dentry->d_inode); + int buffer_full = 0; + readdir_handle_t rhandle; + int i = 0, len = 0; + ino_t current_ino = 0; + char *current_entry = NULL; + long bytes_decoded; + + gossip_ldebug(GOSSIP_DIR_DEBUG,"Entering %s.\n",__func__); + + gossip_ldebug(GOSSIP_DIR_DEBUG,"%s: file->f_pos:%lld\n",__func__,lld(file->f_pos)); pos = (PVFS_ds_position)file->f_pos; + /* are we done? */ if (pos == PVFS_READDIR_END) { gossip_debug(GOSSIP_DIR_DEBUG, - "Skipping to graceful termination " - "path since we are done\n"); - return 0; + "Skipping to graceful termination path since we are done\n"); + return (0); } - gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir called on %s (pos=%d)\n", - dentry->d_name.name, (int)pos); + gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir called on %s (pos=%llu)\n", + dentry->d_name.name, llu(pos)); + + rhandle.buffer_index = -1; + rhandle.dents_buf = NULL; + memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); - switch ((uint32_t)pos) + new_op = op_alloc(PVFS2_VFS_OP_READDIR); + if (!new_op) + { + return (-ENOMEM); + } + + new_op->uses_shared_memory = 1; + + if (pvfs2_inode && (pvfs2_inode->refn.handle != PVFS_HANDLE_NULL) + && ( pvfs2_inode->refn.fs_id != PVFS_FS_ID_NULL) ) + { + new_op->upcall.req.readdir.refn = pvfs2_inode->refn; + gossip_debug(GOSSIP_DIR_DEBUG,"%s: upcall.req.readdir.refn.handle:%llu\n" + ,__func__ + ,llu(new_op->upcall.req.readdir.refn.handle)); + } + else { - /* - if we're just starting, populate the "." and ".." entries - of the current directory; these always appear - */ - case 0: - token_set = 1; - ino = get_ino_from_handle(dentry->d_inode); - gossip_debug(GOSSIP_DIR_DEBUG, - "calling filldir of . with pos = %llu\n", llu(pos)); - if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) - { - break; - } - file->f_pos++; - pos++; - /* drop through */ - case 1: - token_set = 1; - ino = get_parent_ino_from_dentry(dentry); - gossip_debug(GOSSIP_DIR_DEBUG, - "calling filldir of .. with pos = %llu\n", llu(pos)); - if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) - { - break; - } - file->f_pos++; - pos++; - /* drop through */ - default: - { - readdir_handle_t rhandle; - - rhandle.buffer_index = -1; - rhandle.dents_buf = NULL; - memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); - - /* handle the normal cases here */ - new_op = op_alloc(PVFS2_VFS_OP_READDIR); - if (!new_op) - { - return -ENOMEM; - } - - if (pvfs2_inode && pvfs2_inode->refn.handle != PVFS_HANDLE_NULL && - pvfs2_inode->refn.fs_id != PVFS_FS_ID_NULL) - { - new_op->upcall.req.readdir.refn = pvfs2_inode->refn; - } - else - { #if defined(HAVE_IGET5_LOCKED) || defined(HAVE_IGET4_LOCKED) - gossip_lerr("Critical error: i_ino cannot be relied " - "on when using iget4/5\n"); - op_release(new_op); - return -EINVAL; + gossip_lerr("Critical error: i_ino cannot be relied on when using iget4/5\n"); + op_release(new_op); + return -EINVAL; #endif - new_op->upcall.req.readdir.refn.handle = - get_handle_from_ino(dentry->d_inode); - new_op->upcall.req.readdir.refn.fs_id = - PVFS2_SB(dentry->d_inode->i_sb)->fs_id; - } - new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT; - - /* NOTE: - the position we send to the readdir upcall is out of - sync with file->f_pos since pvfs2 doesn't include the - "." and ".." entries that we added above. + new_op->upcall.req.readdir.refn.handle = get_handle_from_ino(dentry->d_inode); + new_op->upcall.req.readdir.refn.fs_id = PVFS2_SB(dentry->d_inode->i_sb)->fs_id; + gossip_debug(GOSSIP_DIR_DEBUG,"%s: upcall.req.readdir.refn.handle:%llu\n" + ,__func__ + ,llu(new_op->upcall.req.readdir.refn.handle)); + } + + new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; + + /* NOTE: + * the position we send to the readdir upcall is out of sync with file->f_pos + * since pvfs2 doesn't include the "." and ".." entries that are added below. + */ + new_op->upcall.req.readdir.token = (pos == 0 ? PVFS_READDIR_START : pos); + +get_new_buffer_index: + ret = readdir_index_get(&buffer_index); + if (ret < 0) + { + gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n", ret); + op_release(new_op); + return(ret); + } + new_op->upcall.req.readdir.buf_index = buffer_index; + + ret = service_operation( new_op, + "pvfs2_readdir", + get_interruptible_flag(dentry->d_inode)); + + gossip_debug(GOSSIP_DIR_DEBUG, "Readdir downcall status is %d. ret:%d\n", + new_op->downcall.status,ret); + + if ( ret == -EAGAIN && op_state_purged(new_op) ) + { + /* readdir shared memory aread has been wiped due to pvfs2-client-core restarting, so + * we must get a new index into the shared memory. */ - new_op->upcall.req.readdir.token = - (pos == 2 ? PVFS_READDIR_START : pos); + gossip_debug(GOSSIP_DIR_DEBUG,"%s: Getting new buffer_index for retry of readdir..\n",__func__); + goto get_new_buffer_index; + } - ret = readdir_index_get(&buffer_index); - if (ret < 0) - { - gossip_err("pvfs2_readdir: readdir_index_get() " - "failure (%d)\n", ret); - goto err; - } - new_op->upcall.req.readdir.buf_index = buffer_index; + if ( ret == -EIO && op_state_purged(new_op) ) + { + /* pvfs2-client is down. Readdir shared memory area has been wiped clean. No need to "put" + * back the buffer_index. + */ + gossip_err("%s: Client is down. Aborting readdir call. \n",__func__); + op_release(new_op); + return (ret); + } - ret = service_operation( - new_op, "pvfs2_readdir", - get_interruptible_flag(dentry->d_inode)); + if ( ret < 0 || new_op->downcall.status != 0 ) + { + gossip_debug(GOSSIP_DIR_DEBUG, + "Readdir request failed. Status:%d\n", + new_op->downcall.status); + readdir_index_put(buffer_index); + op_release(new_op); + return ( (ret < 0 ? ret : new_op->downcall.status) ); + } - gossip_debug(GOSSIP_DIR_DEBUG, "Readdir downcall status is %d\n", - new_op->downcall.status); + if ( (bytes_decoded = readdir_handle_ctor(&rhandle, + new_op->downcall.trailer_buf, + buffer_index)) < 0 ) + { + gossip_err("pvfs2_readdir: Could not decode trailer buffer " + " into a readdir response %d\n", ret); + ret = bytes_decoded; + readdir_index_put(buffer_index); + op_release(new_op); + return(ret); + } - if (new_op->downcall.status == 0) - { - int i = 0, len = 0; - ino_t current_ino = 0; - char *current_entry = NULL; - long bytes_decoded; - - if ((bytes_decoded = readdir_handle_ctor(&rhandle, - new_op->downcall.trailer_buf, - buffer_index)) < 0) - { - ret = bytes_decoded; - gossip_err("pvfs2_readdir: Could not decode trailer buffer " - " into a readdir response %d\n", ret); - goto err; - } + if (bytes_decoded != new_op->downcall.trailer_size) + { + gossip_err("pvfs2_readdir: # bytes " + "decoded (%ld) != trailer size (%ld)\n", + bytes_decoded, (long) new_op->downcall.trailer_size); + ret = -EINVAL; + readdir_handle_dtor(&rhandle); + op_release(new_op); + return (ret); + } - if (bytes_decoded != new_op->downcall.trailer_size) - { - gossip_err("pvfs2_readdir: # bytes " - "decoded (%ld) != trailer size (%ld)\n", - bytes_decoded, (long) new_op->downcall.trailer_size); - ret = -EINVAL; - goto err; - } + if (pos == 0) + { + token_set = 1; + ino = get_ino_from_handle(dentry->d_inode); + gossip_debug(GOSSIP_DIR_DEBUG,"%s: calling filldir of \".\" with pos = %llu\n" + ,__func__ + ,llu(pos)); + if ( (ret=filldir(dirent,".",1,pos,ino,DT_DIR)) < 0) + { + readdir_handle_dtor(&rhandle); + op_release(new_op); + return(ret); + } + file->f_pos++; + gossip_ldebug(GOSSIP_DIR_DEBUG,"%s: file->f_pos:%lld\n",__func__,lld(file->f_pos)); + pos++; + } - for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) - { - len = rhandle.readdir_response.dirent_array[i].d_length; - current_entry = rhandle.readdir_response.dirent_array[i].d_name; - current_ino = pvfs2_handle_to_ino( - rhandle.readdir_response.dirent_array[i].handle); - - gossip_debug(GOSSIP_DIR_DEBUG, - "calling filldir for %s with len %d, pos %ld\n", - current_entry, len, (unsigned long) pos); - if (filldir(dirent, current_entry, len, pos, - current_ino, DT_UNKNOWN) < 0) - { - gossip_debug(GOSSIP_DIR_DEBUG, "filldir() failed.\n"); - if(token_set && (i < 2)) - { - gossip_err("Filldir failed on one of the first two true PVFS directory entries.\n"); - gossip_err("Duplicate entries may appear.\n"); - } - ret = 0; - break; - } - file->f_pos++; - pos++; - } - /* For the first time around, use the token - * returned by the readdir response */ - if (token_set == 1) - { - /* this means that all of the filldir calls succeeded */ - if (i == rhandle.readdir_response.pvfs_dirent_outcount) - { - file->f_pos = rhandle.readdir_response.token; - } - else - { - /* this means a filldir call failed */ - file->f_pos = i - 1; - gossip_debug(GOSSIP_DIR_DEBUG, "at least one filldir call failed. Setting f_pos to: %ld\n", (unsigned long) file->f_pos); - } - } + if (pos == 1) + { + token_set = 1; + ino = get_parent_ino_from_dentry(dentry); + gossip_debug(GOSSIP_DIR_DEBUG,"%s: calling filldir of \"..\" with pos = %llu\n" + ,__func__ + ,llu(pos)); + if ( (ret=filldir(dirent,"..",2,pos,ino,DT_DIR)) < 0) + { + readdir_handle_dtor(&rhandle); + op_release(new_op); + return(ret); + } + file->f_pos++; + gossip_ldebug(GOSSIP_DIR_DEBUG,"%s: file->pos:%lld\n",__func__,lld(file->f_pos)); + pos++; + } - gossip_debug(GOSSIP_DIR_DEBUG, - "pos = %llu, file->f_pos should have been %ld\n", - llu(pos), - (unsigned long) file->f_pos); - } - else + for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) + { + len = rhandle.readdir_response.dirent_array[i].d_length; + current_entry = rhandle.readdir_response.dirent_array[i].d_name; + current_ino = pvfs2_handle_to_ino( rhandle.readdir_response.dirent_array[i].handle); + + gossip_debug(GOSSIP_DIR_DEBUG, + "calling filldir for %s with len %d, pos %ld\n", + current_entry, len, (unsigned long) pos); + if ( (ret=filldir(dirent, current_entry, len, pos, current_ino, DT_UNKNOWN)) < 0) { - readdir_index_put(buffer_index); - gossip_debug(GOSSIP_DIR_DEBUG, - "Failed to readdir (downcall status %d)\n", - new_op->downcall.status); + gossip_debug(GOSSIP_DIR_DEBUG, "filldir() failed. ret:%d\n",ret); + if (token_set && (i < 2)) + { + gossip_err("Filldir failed on one of the first two true PVFS directory entries.\n"); + gossip_err("Duplicate entries may appear.\n"); + } + buffer_full = 1; + break; } -err: - readdir_handle_dtor(&rhandle); - op_release(new_op); - break; - } /* end default: block */ - } /* end switch block */ + file->f_pos++; + gossip_ldebug(GOSSIP_DIR_DEBUG,"%s: file->pos:%lld\n",__func__,lld(file->f_pos)); + + pos++; + } + + /* For the first time around, use the token returned by the readdir response */ + if (token_set == 1) + { + /* this means that all of the filldir calls succeeded */ + if (i == rhandle.readdir_response.pvfs_dirent_outcount) + { + file->f_pos = rhandle.readdir_response.token; + } + else + { + /* this means a filldir call failed */ + if(rhandle.readdir_response.token == PVFS_READDIR_END) + { + /* If PVFS hit end of directory, then there is no + * way to do math on the token that it returned. + * Instead we go by the f_pos but back up to account for + * the artificial . and .. entries. The fact that + * "token_set" is non zero indicates that we are on + * the first iteration of getdents(). + */ + file->f_pos -= 3; + } + else + { + file->f_pos = rhandle.readdir_response.token - + (rhandle.readdir_response.pvfs_dirent_outcount - i + 1); + } + gossip_debug(GOSSIP_DIR_DEBUG, "at least one filldir call failed. " + "Setting f_pos to: %lld\n" + , lld(file->f_pos)); + } + }/*end if token_set to 1*/ + + /* did we hit the end of the directory? */ + if(rhandle.readdir_response.token == PVFS_READDIR_END && !buffer_full) + { + gossip_debug(GOSSIP_DIR_DEBUG, + "End of dir detected; setting f_pos to PVFS_READDIR_END.\n"); + file->f_pos = PVFS_READDIR_END; + } + + gossip_debug(GOSSIP_DIR_DEBUG,"pos = %llu, file->f_pos is %lld\n", + llu(pos), + lld(file->f_pos)); if (ret == 0) { @@ -324,9 +376,13 @@ static int pvfs2_readdir( mark_inode_dirty_sync(dentry->d_inode); } + readdir_handle_dtor(&rhandle); + op_release(new_op); + gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n",ret); - return ret; -} + + return (ret); +}/*end pvfs2_readdir*/ #ifdef HAVE_READDIRPLUS_FILE_OPERATIONS @@ -618,7 +674,8 @@ static int pvfs2_readdirplus_common( else { #if defined(HAVE_IGET5_LOCKED) || defined(HAVE_IGET4_LOCKED) - gossip_lerr("Critical error: i_ino cannot be relied on when using iget4/5\n"); + gossip_lerr("Critical error: i_ino cannot be relied on " + "when using iget4/5\n"); op_release(new_op); return -EINVAL; #endif @@ -628,7 +685,8 @@ static int pvfs2_readdirplus_common( PVFS2_SB(dentry->d_inode->i_sb)->fs_id; } new_op->upcall.req.readdirplus.mask = pvfs2_mask; - new_op->upcall.req.readdirplus.max_dirent_count = MAX_DIRENT_COUNT; + new_op->upcall.req.readdirplus.max_dirent_count + = MAX_DIRENT_COUNT_READDIRPLUS; /* NOTE: the position we send to the readdirplus upcall is out of @@ -641,7 +699,8 @@ static int pvfs2_readdirplus_common( ret = readdir_index_get(&buffer_index); if (ret < 0) { - gossip_err("pvfs2_readdirplus: readdir_index_get() failure (%d)\n", ret); + gossip_err("pvfs2_readdirplus: readdir_index_get() " + "failure (%d)\n", ret); goto err; } new_op->upcall.req.readdirplus.buf_index = buffer_index; @@ -899,7 +958,6 @@ struct file_operations pvfs2_dir_operations = readdir : pvfs2_readdir, open : pvfs2_file_open, release : pvfs2_file_release, - llseek : pvfs2_dir_llseek #else .read = generic_read_dir, .readdir = pvfs2_readdir, diff --git a/src/kernel/linux-2.6/file.c b/src/kernel/linux-2.6/file.c index 6e32648..767d1ff 100644 --- a/src/kernel/linux-2.6/file.c +++ b/src/kernel/linux-2.6/file.c @@ -34,6 +34,11 @@ static int pvfs2_precheck_file_write(struct file *file, struct inode *inode, size_t *count, loff_t *ppos); #endif +static ssize_t wait_for_cached_io(struct rw_options *old_rw, + struct iovec *vec, + int nr_segs, + size_t total_size) __attribute__((unused)); + static ssize_t wait_for_direct_io(struct rw_options *rw, struct iovec *vec, unsigned long nr_segs, @@ -45,6 +50,9 @@ static ssize_t wait_for_iox(struct rw_options *rw, struct xtvec *xtvec, unsigned long xtnr_segs, size_t total_size); +#ifdef RESET_FILE_POS +static ssize_t do_readv_writev_wrapper( struct rw_options *rw); +#endif #define wake_up_daemon_for_return(op) \ do { \ @@ -98,11 +106,12 @@ int pvfs2_file_open( ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_SIZE); if (ret == 0) { - file->f_pos = i_size_read(inode); + file->f_pos = pvfs2_i_size_read(inode); gossip_debug(GOSSIP_FILE_DEBUG, "f_pos = %ld\n", (unsigned long)file->f_pos); } else { + gossip_debug(GOSSIP_FILE_DEBUG, "%s:%s:%d calling make bad inode\n", __FILE__, __func__, __LINE__); pvfs2_make_bad_inode(inode); gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_file_open returning error: %d\n", ret); return(ret); @@ -170,6 +179,7 @@ struct rw_options { /* Contiguous file I/O operations use a single offset */ struct { loff_t *offset; + loff_t offset_before_request; } io; /* Non-contiguous file I/O operations use a vector of offsets */ struct { @@ -296,6 +306,8 @@ static int postcopy_buffers(int buffer_index, struct rw_options *rw, return ret; } +#ifndef PVFS2_LINUX_KERNEL_2_4 + /* Copy from page-cache to application address space * @rw - operation context, contains information about the I/O operation * and holds the pointers to the page-cache page array from which @@ -329,7 +341,7 @@ static int copy_from_pagecache(struct rw_options *rw, gossip_err("copy_from_pagecache: failed allocating memory\n"); return -ENOMEM; } - memcpy(copied_iovec, vec, nr_segs * sizeof(struct iovec)); + memcpy(copied_iovec, vec, nr_segs * sizeof(*copied_iovec)); /* * Go through each segment in the iovec and make sure that * the summation of iov_len is greater than the given size. @@ -421,6 +433,8 @@ static int copy_from_pagecache(struct rw_options *rw, return 0; } +#endif //#ifndef PVFS2_LINUX_KERNEL_2_4 + /* * Post and wait for the I/O upcall to finish * @rw - contains state information to initiate the I/O operation @@ -441,7 +455,7 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, || !rw->pvfs2_inode || !rw->inode || !rw->fnstr) { gossip_lerr("invalid parameters (rw: %p, vec: %p, nr_segs: %lu, " - "total_size: %zd)\n", rw, vec, nr_segs, total_size); + "total_size: %zd)\n", rw, vec, nr_segs, total_size); ret = -EINVAL; goto out; } @@ -453,6 +467,7 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, rw->pvfs2_inode, rw->copy_dest_type == COPY_DEST_PAGES ? 1 : 0, rw->copy_to_user_addresses); + if (ret >= 0) goto out; /* success */ if (ret != -EOPNOTSUPP) @@ -471,6 +486,8 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, new_op->upcall.req.io.io_type = (rw->type == IO_READV) ? PVFS_IO_READ : PVFS_IO_WRITE; new_op->upcall.req.io.refn = rw->pvfs2_inode->refn; + +populate_shared_memory: /* get a shared buffer index */ ret = pvfs_bufmap_get(&buffer_index); if (ret < 0) @@ -479,24 +496,59 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, rw->fnstr, (long) ret); goto out; } - gossip_debug(GOSSIP_FILE_DEBUG, "GET op %p -> buffer_index %d\n", new_op, buffer_index); + gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): GET op %p -> buffer_index %d\n" + , __func__ + ,rw->fnstr + , llu(rw->pvfs2_inode->refn.handle) + , new_op, buffer_index); + new_op->uses_shared_memory = 1; new_op->upcall.req.io.buf_index = buffer_index; new_op->upcall.req.io.count = total_size; new_op->upcall.req.io.offset = *(rw->off.io.offset); - gossip_debug(GOSSIP_FILE_DEBUG, "%s: copy_to_user %d nr_segs %lu, " - "offset: %llu total_size: %zd\n", rw->fnstr, rw->copy_to_user_addresses, - nr_segs, llu(*(rw->off.io.offset)), total_size); + gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): copy_to_user %d nr_segs %lu, " + "offset: %llu total_size: %zd\n" + ,__func__ + ,rw->fnstr + ,llu(rw->pvfs2_inode->refn.handle) + ,rw->copy_to_user_addresses + ,nr_segs + ,llu(*(rw->off.io.offset)) + ,total_size); + + /* Stage 1: copy the buffers into client-core's address space */ + /* precopy_buffers only pertains to writes. */ if ((ret = precopy_buffers(buffer_index, rw, vec, nr_segs, total_size)) < 0) { goto out; } + + gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): Calling post_io_request with tag(%d)\n" + ,__func__ + ,rw->fnstr + ,llu(rw->pvfs2_inode->refn.handle) + ,(int)new_op->tag); + /* Stage 2: Service the I/O operation */ ret = service_operation(new_op, rw->fnstr, get_interruptible_flag(rw->inode)); + /* If service_operation() returns -EAGAIN #and# the operation was purged from + * pvfs2_request_list or htable_ops_in_progress, then we know that the + * client was restarted, causing the shared memory area to be wiped clean. To restart a + * write operation in this case, we must re-copy the data from the user's iovec + * to a NEW shared memory location. To restart a read operation, we must get a new + * shared memory location. + */ + if ( ret == -EAGAIN && op_state_purged(new_op) ) + { + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:going to repopulate_shared_memory.\n",__func__); + goto populate_shared_memory; + } + + if (ret < 0) { /* this macro is defined in pvfs2-kernel.h */ @@ -516,7 +568,7 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, { gossip_err( "%s: error in %s handle %llu, " - "FILE: %s\n -- returning %ld\n", + "FILE: %s, returning %ld\n", rw->fnstr, rw->type == IO_READV ? "vectored read from" : "vectored write to", llu(get_handle_from_ino(rw->inode)), @@ -526,7 +578,9 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, } goto out; } + /* Stage 3: Post copy buffers from client-core's address space */ + /* postcopy_buffers only pertains to reads. */ if ((ret = postcopy_buffers(buffer_index, rw, vec, nr_segs, new_op->downcall.resp.io.amt_complete)) < 0) { /* put error codes in downcall so that handle_io_error() @@ -536,8 +590,15 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, handle_io_error(); goto out; } + + gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): Amount written as returned by the sys-io call:%d\n" + ,__func__ + ,rw->fnstr + ,llu(rw->pvfs2_inode->refn.handle) + ,(int)new_op->downcall.resp.io.amt_complete); + ret = new_op->downcall.resp.io.amt_complete; - gossip_debug(GOSSIP_FILE_DEBUG, "wait_for_io returning %ld\n", (long) ret); + /* tell the device file owner waiting on I/O that this read has completed and it can return now. in this exact case, on @@ -550,7 +611,10 @@ static ssize_t wait_for_direct_io(struct rw_options *rw, if (buffer_index >= 0) { pvfs_bufmap_put(buffer_index); - gossip_debug(GOSSIP_FILE_DEBUG, "PUT buffer_index %d\n", buffer_index); + gossip_debug(GOSSIP_FILE_DEBUG, "%s(%llu): PUT buffer_index %d\n" + , rw->fnstr + , llu(rw->pvfs2_inode->refn.handle) + , buffer_index); buffer_index = -1; } if (new_op) @@ -650,7 +714,7 @@ static int split_iovecs( count += orig_iovec[seg].iov_len; memcpy(&new_iovec[tmpnew_nr_segs], &orig_iovec[seg], - sizeof(struct iovec)); + sizeof(*new_iovec)); tmpnew_nr_segs++; sizes[sizes_count]++; } @@ -686,7 +750,7 @@ static int split_iovecs( return 0; } -static long estimate_max_iovecs(const struct iovec *curr, unsigned long nr_segs, ssize_t *total_count) +static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs, ssize_t *total_count) { unsigned long i; long max_nr_iovecs; @@ -716,11 +780,14 @@ static long estimate_max_iovecs(const struct iovec *curr, unsigned long nr_segs, return max_nr_iovecs; } +#ifndef PVFS2_LINUX_KERNEL_2_4 + #ifdef HAVE_OBSOLETE_STRUCT_PAGE_COUNT_NO_UNDERSCORE #define pg_ref_count(pg) atomic_read(&(pg)->count) #else #define pg_ref_count(pg) atomic_read(&(pg)->_count) #endif + /* * Cleaning up pages in the cache involves dropping the reference count * while cleaning up pages that were newly allocated involves unlocking @@ -787,12 +854,16 @@ static int pvfs2_readpages_fill_cb(void *_data, struct page *page) return 0; } + #if defined(HAVE_SPIN_LOCK_PAGE_ADDR_SPACE_STRUCT) #define lock_mapping_tree(mapping) spin_lock(&mapping->page_lock) #define unlock_mapping_tree(mapping) spin_unlock(&mapping->page_lock) -#elif defined(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT) +#elif defined(HAVE_RW_LOCK_TREE_ADDR_SPACE_STRUCT) #define lock_mapping_tree(mapping) read_lock(&mapping->tree_lock) #define unlock_mapping_tree(mapping) read_unlock(&mapping->tree_lock) +#elif defined(HAVE_SPIN_LOCK_TREE_ADDR_SPACE_STRUCT) +#define lock_mapping_tree(mapping) spin_lock(&mapping->tree_lock) +#define unlock_mapping_tree(mapping) spin_unlock(&mapping->tree_lock) #elif defined(HAVE_RT_PRIV_LOCK_ADDR_SPACE_STRUCT) #define lock_mapping_tree(mapping) spin_lock(&mapping->priv_lock) #define unlock_mapping_tree(mapping) spin_unlock(&mapping->priv_lock) @@ -858,7 +929,7 @@ static int locate_file_pages(struct rw_options *rw, size_t total_size) gossip_lerr("invalid options\n"); return -EINVAL; } - isize = i_size_read(rw->inode); + isize = pvfs2_i_size_read(rw->inode); rw->copy_dest_type = COPY_DEST_PAGES; /* start with an empty page list */ INIT_LIST_HEAD(&rw->dest.pages.page_list); @@ -1044,8 +1115,10 @@ static ssize_t wait_for_missing_io(struct rw_options *rw) if (rw->dest.pages.nr_issue_pages) { int contig_on_file = 0; - gossip_debug(GOSSIP_FILE_DEBUG, "Number of pages for I/O issue %ld, total_size: %ld\n", - rw->dest.pages.nr_issue_pages, (rw->dest.pages.nr_issue_pages << PAGE_CACHE_SHIFT)); + gossip_debug(GOSSIP_FILE_DEBUG, "Number of pages for I/O issue %ld," + " total_size: %ld\n", + rw->dest.pages.nr_issue_pages + , (rw->dest.pages.nr_issue_pages << PAGE_CACHE_SHIFT)); /* scan through the issue pages array and see if we can submit a direct * contiguous request first. */ @@ -1143,19 +1216,20 @@ static ssize_t wait_for_missing_io(struct rw_options *rw) * Returns the actual size of completed I/O. */ static ssize_t wait_for_cached_io(struct rw_options *old_rw, struct iovec *vec, - int nr_segs, size_t total_size) + int nr_segs, size_t total_size) { - ssize_t err = 0, total_actual_io; + ssize_t err = 0, total_actual_io = 0; + ssize_t ret = 0; struct rw_options rw; loff_t isize, offset; - memcpy(&rw, old_rw, sizeof(struct rw_options)); + memcpy(&rw, old_rw, sizeof(rw)); if (rw.type != IO_READV) { gossip_err("writes are not handled yet!\n"); return -EOPNOTSUPP; } offset = *(rw.off.io.offset); - isize = i_size_read(rw.inode); + isize = pvfs2_i_size_read(rw.inode); /* If our file offset was greater than file size, we should return 0 */ if (offset >= isize) { return 0; @@ -1170,13 +1244,23 @@ static ssize_t wait_for_cached_io(struct rw_options *old_rw, struct iovec *vec, /* Issue and wait for I/O only for pages that are not uptodate * or are not found in the cache */ - if ((err = wait_for_missing_io(&rw)) < 0) { - gossip_err("wait_for_missing_io: error in waiting for missing I/O %ld\n", (long) err); + if ((ret = wait_for_missing_io(&rw)) < 0) { + gossip_err("wait_for_missing_io: error in waiting for missing I/O %ld\n" + ,(long)err); goto cleanup; } /* return value is basically file size minus current file offset */ - total_actual_io = isize - offset; - gossip_debug(GOSSIP_FILE_DEBUG, "total_actual_io to be staged from page-cache %zd\n", total_actual_io); + //total_actual_io = isize - offset; + + /* number of bytes to retrieve from the pagecache should be based on + * the number of bytes returned from wait_for_missing_io, which executes + * the io call with the number of bytes requested and returns the number + * of bytes actually transferred. + */ + total_actual_io = ret; + + gossip_debug(GOSSIP_FILE_DEBUG, "total_actual_io to be staged from " + "page-cache %zd\n", total_actual_io); /* Copy the data from the page-cache to the application's address space */ err = copy_from_pagecache(&rw, vec, nr_segs, total_actual_io); err = 0; @@ -1184,6 +1268,7 @@ static ssize_t wait_for_cached_io(struct rw_options *old_rw, struct iovec *vec, cleanup_cache_pages(rw.dest.pages.nr_pages, &rw, err); return err == 0 ? total_actual_io : err; } +#endif //#ifndef PVFS2_LINUX_KERNEL_2_4 /* * Common entry point for read/write/readv/writev @@ -1247,11 +1332,19 @@ static ssize_t do_readv_writev(struct rw_options *rw) goto out; } /* Compute total and max number of segments after split */ - if ((max_new_nr_segs = estimate_max_iovecs(iov, nr_segs, &count)) < 0) + if ((max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count)) < 0) { - gossip_lerr("%s: could not estimate iovec %lu\n", rw->fnstr, max_new_nr_segs); + gossip_lerr("%s: could not bound iovec %lu\n", rw->fnstr + , max_new_nr_segs); goto out; } + + gossip_debug(GOSSIP_FILE_DEBUG,"%s-BEGIN/%s(%llu): count(%d) after estimate_max_iovecs.\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,(int)count); + if (rw->type == IO_WRITEV) { if (!file) @@ -1259,9 +1352,9 @@ static ssize_t do_readv_writev(struct rw_options *rw) gossip_err("%s: Invalid file pointer\n", rw->fnstr); goto out; } - if (file->f_pos > i_size_read(inode)) + if (file->f_pos > pvfs2_i_size_read(inode)) { - i_size_write(inode, file->f_pos); + pvfs2_i_size_write(inode, file->f_pos); } /* perform generic linux kernel tests for sanity of write * arguments @@ -1276,20 +1369,31 @@ static ssize_t do_readv_writev(struct rw_options *rw) gossip_err("%s: failed generic argument checks.\n", rw->fnstr); goto out; } - gossip_debug(GOSSIP_FILE_DEBUG, "%s: proceeding with offset : %llu, size %zd\n", - rw->fnstr, llu(*offset), count); - } + + gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): proceeding with offset : %llu, size %d\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,llu(*offset), (int)count); + } /*endif IO_WRITEV*/ + if (count == 0) { ret = 0; goto out; } + rw->count = count; /* * if the total size of data transfer requested is greater than * the kernel-set blocksize of PVFS2, then we split the iovecs * such that no iovec description straddles a block size limit */ + + gossip_debug(GOSSIP_FILE_DEBUG,"%s: pvfs_bufmap_size:%d\n" + ,rw->fnstr + ,pvfs_bufmap_size_query()); + if (count > pvfs_bufmap_size_query()) { /* @@ -1310,10 +1414,12 @@ static ssize_t do_readv_writev(struct rw_options *rw) if(ret < 0) { gossip_err("%s: Failed to split iovecs to satisfy larger " - " than blocksize readv/writev request %zd\n", rw->fnstr, ret); + " than blocksize readv/writev request %zd\n", rw->fnstr + , ret); goto out; } - gossip_debug(GOSSIP_FILE_DEBUG, "%s: Splitting iovecs from %lu to %lu [max_new %lu]\n", + gossip_debug(GOSSIP_FILE_DEBUG, "%s: Splitting iovecs from %lu to %lu" + " [max_new %lu]\n", rw->fnstr, nr_segs, new_nr_segs, max_new_nr_segs); /* We must free seg_array and iovecptr */ to_free = 1; @@ -1332,26 +1438,33 @@ static ssize_t do_readv_writev(struct rw_options *rw) } ptr = iovecptr; - gossip_debug(GOSSIP_FILE_DEBUG, "%s %zd@%llu\n", - rw->fnstr, count, llu(*offset)); - gossip_debug(GOSSIP_FILE_DEBUG, "%s: new_nr_segs: %lu, seg_count: %lu\n", - rw->fnstr, new_nr_segs, seg_count); + gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu) %d@%llu\n" + , __func__ + , rw->fnstr + , llu(pvfs2_inode->refn.handle) + , (int)count, llu(*offset)); + gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): new_nr_segs: %lu, seg_count: %lu\n" + , __func__ + , rw->fnstr + , llu(pvfs2_inode->refn.handle) + , new_nr_segs, seg_count); + #ifdef PVFS2_KERNEL_DEBUG for (seg = 0; seg < new_nr_segs; seg++) { gossip_debug(GOSSIP_FILE_DEBUG, "%s: %d) %p to %p [%d bytes]\n", rw->fnstr, - seg + 1, iovecptr[seg].iov_base, + (int)seg + 1, iovecptr[seg].iov_base, iovecptr[seg].iov_base + iovecptr[seg].iov_len, (int) iovecptr[seg].iov_len); } for (seg = 0; seg < seg_count; seg++) { - gossip_debug(GOSSIP_FILE_DEBUG, "%s: %d) %lu\n", + gossip_debug(GOSSIP_FILE_DEBUG, "%s: %zd) %lu\n", rw->fnstr, seg + 1, seg_array[seg]); - } + } #endif - seg = 0; + seg = 0; while (total_count < count) { size_t each_count, amt_complete; @@ -1359,20 +1472,46 @@ static ssize_t do_readv_writev(struct rw_options *rw) /* how much to transfer in this loop iteration */ each_count = (((count - total_count) > pvfs_bufmap_size_query()) ? pvfs_bufmap_size_query() : (count - total_count)); +#ifndef PVFS2_LINUX_KERNEL_2_4 + /* caching is not working properly. removing functionality for now. Becky Ligon. */ + /* caching REQUIRES the user's buffer to be a multiple of 4096; the code breaks if */ + /* it is not! */ + /* if a file is immutable, stage its I/O * through the cache */ - if (IS_IMMUTABLE(rw->inode)) { + //if (IS_IMMUTABLE(rw->inode)) { /* Stage the I/O through the kernel's pagecache */ - ret = wait_for_cached_io(rw, ptr, seg_array[seg], each_count); - } - else { + // ret = wait_for_cached_io(rw, ptr, seg_array[seg], each_count); + //} + //else +#endif /* PVFS2_LINUX_KERNEL_2_4 */ + //{ /* push the I/O directly through to storage */ - ret = wait_for_direct_io(rw, ptr, seg_array[seg], each_count); - } + + gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): size of each_count(%d)\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,(int)each_count); + gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): BEFORE wait_for_io: offset is %d\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,(int)*offset); + + ret = wait_for_direct_io(rw, ptr, seg_array[seg], each_count); + + gossip_debug(GOSSIP_FILE_DEBUG,"%s%s(%llu): return from wait_for_io:%d\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,(int)ret); + if (ret < 0) { goto out; } + /* advance the iovec pointer */ ptr += seg_array[seg]; seg++; @@ -1380,6 +1519,12 @@ static ssize_t do_readv_writev(struct rw_options *rw) total_count += ret; amt_complete = ret; + gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): AFTER wait_for_io: offset is %d\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,(int)*offset); + /* if we got a short I/O operations, * fall out and return what we got so far */ @@ -1387,7 +1532,8 @@ static ssize_t do_readv_writev(struct rw_options *rw) { break; } - } + }/*end while*/ + if (total_count > 0) { ret = total_count; @@ -1412,6 +1558,13 @@ static ssize_t do_readv_writev(struct rw_options *rw) } mark_inode_dirty_sync(inode); } + + gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): Value(%d) returned.\n" + ,__func__ + ,rw->fnstr + ,llu(pvfs2_inode->refn.handle) + ,(int)ret); + return ret; } @@ -1459,6 +1612,11 @@ ssize_t pvfs2_file_read( struct rw_options rw; struct iovec vec; + gossip_debug(GOSSIP_IO_DEBUG,"pvfs2_file_read: count=%zd \toffset=%lld\n" + ,count + ,(long long)*offset); + + memset(&rw, 0, sizeof(rw)); rw.async = 0; rw.type = IO_READ; @@ -1466,7 +1624,7 @@ ssize_t pvfs2_file_read( rw.copy_to_user_addresses = 1; rw.fnstr = __FUNCTION__; vec.iov_base = buf; - vec.iov_len = count; + vec.iov_len = count; rw.inode = file->f_dentry->d_inode; rw.pvfs2_inode = PVFS2_I(rw.inode); rw.file = file; @@ -1474,16 +1632,14 @@ ssize_t pvfs2_file_read( rw.dest.address.nr_segs = 1; rw.off.io.offset = offset; - if (IS_IMMUTABLE(rw.inode)) - { - rw.readahead_size = (rw.inode)->i_size; - } - else - { - rw.readahead_size = 0; - } + rw.readahead_size = 0; g_pvfs2_stats.reads++; + +#ifdef RESET_FILE_POS + return do_readv_writev_wrapper(&rw); +#else return do_readv_writev(&rw); +#endif } /** Write data from a contiguous user buffer into a file at a specified @@ -1510,11 +1666,17 @@ static ssize_t pvfs2_file_write( rw.file = file; rw.inode = file->f_dentry->d_inode; rw.pvfs2_inode = PVFS2_I(rw.inode); + rw.pvfs2_inode->refn.handle = 1000001; rw.dest.address.iov = &vec; rw.dest.address.nr_segs = 1; rw.off.io.offset = offset; g_pvfs2_stats.writes++; + +#ifdef RESET_FILE_POS + return do_readv_writev_wrapper(&rw); +#else return do_readv_writev(&rw); +#endif } /* compat code, < 2.6.19 */ @@ -1589,7 +1751,7 @@ static int construct_file_offset_trailer(char **trailer, int i; struct read_write_x *rwx; - *trailer_size = seg_count * sizeof(struct read_write_x); + *trailer_size = seg_count * sizeof(*rwx); *trailer = (char *) vmalloc(*trailer_size); if (*trailer == NULL) { @@ -1694,7 +1856,7 @@ static int split_xtvecs( count += orig_xtvec[seg].xtv_len; memcpy(&new_xtvec[tmpnew_nr_segs], &orig_xtvec[seg], - sizeof(struct xtvec)); + sizeof(*new_xtvec)); tmpnew_nr_segs++; sizes[sizes_count]++; } @@ -1731,7 +1893,7 @@ static int split_xtvecs( } static long -estimate_max_xtvecs(const struct xtvec *curr, unsigned long nr_segs, size_t *total_count) +bound_max_xtvecs(const struct xtvec *curr, unsigned long nr_segs, size_t *total_count) { unsigned long i; long max_nr_xtvecs; @@ -1958,9 +2120,9 @@ static ssize_t do_readx_writex(struct rw_options *rw) goto out; } /* Compute total and max number of segments after split of the memory vector */ - if ((max_new_nr_segs_mem = estimate_max_iovecs(iov, nr_segs, &count_mem)) < 0) + if ((max_new_nr_segs_mem = bound_max_iovecs(iov, nr_segs, &count_mem)) < 0) { - gossip_lerr("%s: could not estimate iovec %lu\n", rw->fnstr, max_new_nr_segs_mem); + gossip_lerr("%s: could not bound iovec %lu\n", rw->fnstr, max_new_nr_segs_mem); goto out; } xtvec = rw->off.iox.xtvec; @@ -1972,9 +2134,9 @@ static ssize_t do_readx_writex(struct rw_options *rw) goto out; } /* Calculate the total stream length amd max segments after split of the stream vector */ - if ((max_new_nr_segs_stream = estimate_max_xtvecs(xtvec, xtnr_segs, &count_stream)) < 0) + if ((max_new_nr_segs_stream = bound_max_xtvecs(xtvec, xtnr_segs, &count_stream)) < 0) { - gossip_lerr("%s: could not estimate xtvec %lu\n", rw->fnstr, max_new_nr_segs_stream); + gossip_lerr("%s: could not bound xtvec %lu\n", rw->fnstr, max_new_nr_segs_stream); goto out; } if (count_mem == 0) @@ -2295,9 +2457,9 @@ static ssize_t pvfs2_aio_retry(struct kiocb *iocb) error = x->bytes_copied; op->priv = NULL; spin_unlock(&op->lock); - gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_aio_retry: buffer %p," + gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_aio_retry: iov %p," " size %d return %d bytes\n", - x->buffer, (int) x->bytes_to_be_copied, (int) error); + x->iov, (int) x->bytes_to_be_copied, (int) error); if (error > 0) { struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -2387,7 +2549,9 @@ pvfs2_aio_cancel(struct kiocb *iocb, struct io_event *event) * htable_in_progress or from the req list * as the case may be. */ - clean_up_interrupted_operation(op); + gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation aio_cancel " + "(tag %lld, op %p)\n", __func__, lld(op->tag), op); + pvfs2_clean_up_interrupted_operation(op); /* * However, we need to make sure that * the client daemon is not transferring data @@ -2529,6 +2693,11 @@ static void pvfs2_aio_dtor(struct kiocb *iocb) x->op->priv = NULL; put_op(x->op); } + if (x->iov) + { + kfree(x->iov); + x->iov = NULL; + } x->needs_cleanup = 0; } gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_aio_dtor: kiocb_release %p\n", x); @@ -2537,12 +2706,12 @@ static void pvfs2_aio_dtor(struct kiocb *iocb) return; } -static inline void +static inline int fill_default_kiocb(pvfs2_kiocb *x, struct task_struct *tsk, struct kiocb *iocb, int rw, int buffer_index, pvfs2_kernel_op_t *op, - void __user *buffer, + const struct iovec *iovec, unsigned long nr_segs, loff_t offset, size_t count, int (*aio_cancel)(struct kiocb *, struct io_event *)) { @@ -2551,13 +2720,23 @@ fill_default_kiocb(pvfs2_kiocb *x, x->buffer_index = buffer_index; x->op = op; x->rw = rw; - x->buffer = buffer; x->bytes_to_be_copied = count; x->offset = offset; x->bytes_copied = 0; x->needs_cleanup = 1; iocb->ki_cancel = aio_cancel; - return; + /* Allocate a private pointer to store the + * iovector since the caller could pass in a + * local variable for the iovector. + */ + x->iov = kmalloc(nr_segs * sizeof(*x->iov), PVFS2_BUFMAP_GFP_FLAGS); + if (x->iov == NULL) + { + return -ENOMEM; + } + memcpy(x->iov, iovec, nr_segs * sizeof(*x->iov)); + x->nr_segs = nr_segs; + return 0; } /* @@ -2575,7 +2754,10 @@ fill_default_kiocb(pvfs2_kiocb *x, * that get completion notification from interrupt * context, we get completion notification from a process * context (i.e. the client daemon). - * TODO: We do not handle vectored aio requests yet + * TODO: We handle vectored aio requests now but we do + * not handle the case where the total size of IO is + * larger than our FS transfer block size (4 MB + * default). */ static ssize_t do_aio_read_write(struct rw_options *rw) { @@ -2620,17 +2802,11 @@ static ssize_t do_aio_read_write(struct rw_options *rw) iov, nr_segs); goto out_error; } - if (nr_segs > 1) - { - gossip_lerr("%s: not implemented yet (aio with %ld segments)\n", - rw->fnstr, nr_segs); - goto out_error; - } count = 0; /* Compute total and max number of segments after split */ - if ((max_new_nr_segs = estimate_max_iovecs(iov, nr_segs, &count)) < 0) + if ((max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count)) < 0) { - gossip_lerr("%s: could not estimate iovecs %ld\n", rw->fnstr, max_new_nr_segs); + gossip_lerr("%s: could not bound iovecs %ld\n", rw->fnstr, max_new_nr_segs); goto out_error; } if (unlikely(((ssize_t)count)) < 0) @@ -2641,7 +2817,18 @@ static ssize_t do_aio_read_write(struct rw_options *rw) /* synchronous I/O */ if (!rw->async) { + +#ifdef RESET_FILE_POS + error = do_readv_writev_wrapper(rw); +#else error = do_readv_writev(rw); +#endif + + /* not sure this is the correct place or way to update ki_pos but it + * definitely needs to occur somehow. otherwise, a write following + * a synchronous writev will not write at the correct file position. + * store the offset from the read/write into the kiocb struct */ + iocb->ki_pos = *offset; goto out_error; } /* Asynchronous I/O */ @@ -2682,7 +2869,6 @@ static ssize_t do_aio_read_write(struct rw_options *rw) { int buffer_index = -1; pvfs2_kernel_op_t *new_op = NULL; - char __user *current_buf = (char *) rw->dest.address.iov[0].iov_base; pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode); new_op = op_alloc(PVFS2_VFS_OP_FILE_IO); @@ -2715,14 +2901,30 @@ static ssize_t do_aio_read_write(struct rw_options *rw) if (rw->type == IO_WRITE) { /* - * copy the data from the application for writes - * Should this be done here even for async I/O? + * copy the data from the application for writes. * We could return -EIOCBRETRY here and have * the data copied in the pvfs2_aio_retry routine, - * I think. But I dont see the point in doing that... + * I dont see too much point in doing that + * since the app would have touched the + * memory pages prior to the write and + * hence accesses to the page won't block. */ - error = pvfs_bufmap_copy_from_user( - buffer_index, current_buf, count); + if (rw->copy_to_user_addresses) + { + error = pvfs_bufmap_copy_iovec_from_user( + buffer_index, + iov, + nr_segs, + count); + } + else + { + error = pvfs_bufmap_copy_iovec_from_kernel( + buffer_index, + iov, + nr_segs, + count); + } if (error < 0) { gossip_err("%s: Failed to copy user buffer %ld. Make sure that pvfs2-client-core" @@ -2749,22 +2951,34 @@ static ssize_t do_aio_read_write(struct rw_options *rw) goto out_error; } gossip_debug(GOSSIP_FILE_DEBUG, "kiocb_alloc: %p\n", x); - /* - * destructor function to make sure that we free - * up this allocated piece of memory - */ - iocb->ki_dtor = pvfs2_aio_dtor; /* * We need to set the cancellation callbacks + * other state information * here if the asynchronous request is going to * be successfully submitted */ - fill_default_kiocb(x, current, iocb, - (rw->type == IO_READ) ? PVFS_IO_READ : PVFS_IO_WRITE, - buffer_index, new_op, current_buf, - *offset, count, - &pvfs2_aio_cancel); + error = fill_default_kiocb(x, current, iocb, + (rw->type == IO_READ) ? PVFS_IO_READ : PVFS_IO_WRITE, + buffer_index, + new_op, iov, nr_segs, + *offset, count, + &pvfs2_aio_cancel); + if (error != 0) + { + kiocb_release(x); + /* drop the buffer index */ + pvfs_bufmap_put(buffer_index); + gossip_debug(GOSSIP_FILE_DEBUG, "%s: pvfs_bufmap_put %d\n", + rw->fnstr, buffer_index); + /* drop the reference count and deallocate */ + put_op(new_op); + goto out_error; + } + /* + * destructor function to make sure that we free + * up this allocated piece of memory + */ + iocb->ki_dtor = pvfs2_aio_dtor; /* * We need to be able to retrieve this structure from * the op structure as well, since the client-daemon @@ -2802,6 +3016,7 @@ static ssize_t pvfs2_file_aio_read_iovec(struct kiocb *iocb, unsigned long nr_segs, loff_t offset) { struct rw_options rw; + memset(&rw, 0, sizeof(rw)); rw.async = !is_sync_kiocb(iocb); rw.type = IO_READ; @@ -2883,8 +3098,12 @@ pvfs2_file_aio_write(struct kiocb *iocb, const char __user *buffer, */ #ifdef HAVE_NO_FS_IOC_FLAGS +#ifdef HAVE_UNLOCKED_IOCTL_HANDLER +long pvfs2_ioctl( +#else int pvfs2_ioctl( struct inode *inode, +#endif /* HAVE_UNLOCKED_IOCTL_HANDLER */ struct file *file, unsigned int cmd, unsigned long arg) @@ -2893,8 +3112,12 @@ int pvfs2_ioctl( } #else +#ifdef HAVE_UNLOCKED_IOCTL_HANDLER +long pvfs2_ioctl( +#else int pvfs2_ioctl( struct inode *inode, +#endif /* HAVE_UNLOCKED_IOCTL_HANDLER */ struct file *file, unsigned int cmd, unsigned long arg) @@ -2911,9 +3134,19 @@ int pvfs2_ioctl( if(cmd == FS_IOC_GETFLAGS) { val = 0; - ret = pvfs2_xattr_get_default(inode, - "user.pvfs2.meta_hint", - &val, sizeof(val)); + ret = pvfs2_xattr_get_default( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + file->f_dentry, +#else + file->f_dentry->d_inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + "user.pvfs2.meta_hint", + &val, + sizeof(val) +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , 0 +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + ); if(ret < 0 && ret != -ENODATA) { return ret; @@ -2934,7 +3167,14 @@ int pvfs2_ioctl( { return -EFAULT; } - if(uval & (~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL))) + /* PVFS_MIRROR_FL is set internally when the mirroring mode is turned + * on for a file. The user is not allowed to turn on this bit, but the + * bit is present if the user first gets the flags and then updates the + * flags with some new settings. So, we ignore it in the following + * edit. bligon. + */ + if((uval & ~PVFS_MIRROR_FL) & + (~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL))) { gossip_err("pvfs2_ioctl: the FS_IOC_SETFLAGS only supports setting " "one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); @@ -2943,9 +3183,20 @@ int pvfs2_ioctl( val = uval; gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_ioctl: FS_IOC_SETFLAGS: %llu\n", (unsigned long long)val); - ret = pvfs2_xattr_set_default(inode, - "user.pvfs2.meta_hint", - &val, sizeof(val), 0); + ret = pvfs2_xattr_set_default( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + file->f_dentry, +#else + file->f_dentry->d_inode, +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + "user.pvfs2.meta_hint", + &val, + sizeof(val), + 0 +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , 0 +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ); } return ret; @@ -3031,7 +3282,13 @@ int pvfs2_file_release( */ int pvfs2_fsync( struct file *file, +#ifdef HAVE_FSYNC_LOFF_T_PARAMS + loff_t start, + loff_t end, +#endif +#ifdef HAVE_FSYNC_DENTRY_PARAM struct dentry *dentry, +#endif int datasync) { int ret = -EINVAL; @@ -3080,6 +3337,7 @@ loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_SIZE); if (ret) { + gossip_debug(GOSSIP_FILE_DEBUG, "%s:%s:%d calling make bad inode\n", __FILE__, __func__, __LINE__); pvfs2_make_bad_inode(inode); return ret; } @@ -3124,7 +3382,7 @@ static void do_bypass_page_cache_read(struct file *filp, loff_t *ppos, begin_index = *ppos >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; - isize = i_size_read(inode); + isize = pvfs2_i_size_read(inode); if (!isize) { return; @@ -3274,8 +3532,10 @@ struct file_operations pvfs2_file_operations = .write = pvfs2_file_write, #ifdef HAVE_COMBINED_AIO_AND_VECTOR /* for >= 2.6.19 */ +#ifdef HAVE_AIO_VFS_SUPPORT .aio_read = pvfs2_file_aio_read_iovec, .aio_write = pvfs2_file_aio_write_iovec, +#endif .lock = pvfs2_lock, #else .readv = pvfs2_file_readv, @@ -3285,7 +3545,11 @@ struct file_operations pvfs2_file_operations = .aio_write = pvfs2_file_aio_write, # endif #endif +#ifdef HAVE_UNLOCKED_IOCTL_HANDLER + .unlocked_ioctl = pvfs2_ioctl, +#else .ioctl = pvfs2_ioctl, +#endif /* HAVE_UNLOCKED_IOCTL_HANDLER */ .mmap = pvfs2_file_mmap, .open = pvfs2_file_open, .release = pvfs2_file_release, @@ -3411,6 +3675,41 @@ static int pvfs2_precheck_file_write(struct file *file, struct inode *inode, } #endif + +#ifdef RESET_FILE_POS +/* This function wrapper imposes the rule that the user's + * request was either entirely fulfilled or it wasn't. If it wasn't, + * then errno will be set appropriately, -1 will be returned as the + * request's return value, and the file offset will be repositioned to + * the beginning of the request. If it was successfully completed, then + * the amount written/read will be returned and the file offset will be + * incremented the appropriate amount. + */ +static ssize_t do_readv_writev_wrapper( struct rw_options *rw) +{ + ssize_t ret; + + gossip_err("Wrapper called.\n"); + + /* Save the file's current offset before issuing this read/write + * request. + */ + rw->off.io.offset_before_request = *(rw->off.io.offset); + + /* If the return code from the request is negative, + * restore the offset to it's original value. + */ + ret = do_readv_writev(rw); + if (ret < 0) + { + *(rw->off.io.offset) = rw->off.io.offset_before_request; + } + return (ret); +} +#endif + + + /* * Local variables: * c-indent-level: 4 diff --git a/src/kernel/linux-2.6/inode.c b/src/kernel/linux-2.6/inode.c index e690807..a094eca 100644 --- a/src/kernel/linux-2.6/inode.c +++ b/src/kernel/linux-2.6/inode.c @@ -93,6 +93,8 @@ static int pvfs2_readpages( list_del(&page->lru); if (!add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { ret = read_one_page(page); + gossip_debug(GOSSIP_INODE_DEBUG, "failure adding page to cache, " + "read_one_page returned: %d\n", ret); } else { page_cache_release(page); @@ -133,7 +135,10 @@ static int pvfs2_releasepage(struct page *page, gfp_t foo) struct backing_dev_info pvfs2_backing_dev_info = { - .ra_pages = 1024, +#ifdef HAVE_BACKING_DEV_INFO_NAME + .name = "pvfs2", +#endif + .ra_pages = 0, #ifdef HAVE_BDI_MEMORY_BACKED /* old interface, up through 2.6.11 */ .memory_backed = 1 /* does not contribute to dirty memory */ @@ -160,7 +165,7 @@ struct address_space_operations pvfs2_address_operations = */ void pvfs2_truncate(struct inode *inode) { - loff_t orig_size = i_size_read(inode); + loff_t orig_size = pvfs2_i_size_read(inode); if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; @@ -171,7 +176,7 @@ void pvfs2_truncate(struct inode *inode) * although the mtime updates are propagated lazily! */ if (pvfs2_truncate_inode(inode, inode->i_size) == 0 - && (orig_size != i_size_read(inode))) + && (orig_size != pvfs2_i_size_read(inode))) { pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode); SetMtimeFlag(pvfs2_inode); @@ -187,12 +192,29 @@ int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) int ret = -EINVAL; struct inode *inode = dentry->d_inode; - gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: called on %s\n", dentry->d_name.name); + gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: called on %s\n", + dentry->d_name.name); ret = inode_change_ok(inode, iattr); if (ret == 0) { + +#ifdef HAVE_INODE_SETATTR ret = inode_setattr(inode, iattr); +#else + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) + { + ret = vmtruncate(inode, iattr->ia_size); + if (ret) + return ret; + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + ret = 0; +#endif /* HAVE_INODE_SETATTR */ + gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: inode_setattr returned %d\n", ret); if (ret == 0) @@ -229,6 +251,7 @@ int pvfs2_revalidate(struct dentry *dentry) if (ret) { /* assume an I/O error and flag inode as bad */ + gossip_debug(GOSSIP_INODE_DEBUG, "%s:%s:%d calling make bad inode\n", __FILE__, __func__, __LINE__); pvfs2_make_bad_inode(inode); } return ret; @@ -243,8 +266,27 @@ int pvfs2_getattr( { int ret = -ENOENT; struct inode *inode = dentry->d_inode; + pvfs2_inode_t *pvfs2_inode = NULL; - gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_getattr: called on %s\n", dentry->d_name.name); + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_getattr: called on %s\n", dentry->d_name.name); + + /* This seems to be the only place to reliably detect mount options + * parsed by the VFS layer. Propigate them to our internal sb structure so + * that we can handle lazy time updates properly. + */ +#ifdef HAVE_MNT_NOATIME + if(mnt->mnt_flags && MNT_NOATIME) + { + inode->i_sb->s_flags |= MS_NOATIME; + } +#endif +#ifdef HAVE_MNT_NODIRATIME + if(mnt->mnt_flags && MNT_NODIRATIME) + { + inode->i_sb->s_flags |= MS_NODIRATIME; + } +#endif /* * Similar to the above comment, a getattr also expects that all fields/attributes @@ -255,10 +297,14 @@ int pvfs2_getattr( if (ret == 0) { generic_fillattr(inode, kstat); + /* override block size reported to stat */ + pvfs2_inode = PVFS2_I(inode); + kstat->blksize = pvfs2_inode->blksize; } else { /* assume an I/O error and flag inode as bad */ + gossip_debug(GOSSIP_INODE_DEBUG, "%s:%s:%d calling make bad inode\n", __FILE__, __func__, __LINE__); pvfs2_make_bad_inode(inode); } return ret; @@ -306,6 +352,7 @@ int pvfs2_getattr_lite( else { /* assume an I/O error and flag inode as bad */ + gossip_debug(GOSSIP_INODE_DEBUG, "%s:%s:%d calling make bad inode\n", __FILE__, __func__, __LINE__); pvfs2_make_bad_inode(inode); } return ret; @@ -469,6 +516,8 @@ struct inode *pvfs2_iget_common(struct super_block *sb, PVFS_object_ref *ref, in } #endif } + gossip_debug(GOSSIP_INODE_DEBUG, "iget handle %llu, fsid %d hash %ld i_ino %lu\n", + ref->handle, ref->fs_id, hash, inode->i_ino); return inode; } @@ -532,8 +581,13 @@ struct inode *pvfs2_get_custom_inode_common( "pvfs2_get_custom_inode_common: inode: %p, inode->i_mode %o\n", inode, inode->i_mode); inode->i_mapping->host = inode; +#ifdef HAVE_CURRENT_FSUID + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); +#else inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; +#endif inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_size = PAGE_CACHE_SIZE; #ifdef HAVE_I_BLKSIZE_IN_STRUCT_INODE @@ -574,7 +628,7 @@ struct inode *pvfs2_get_custom_inode_common( inode->i_fop = &pvfs2_dir_operations; /* dir inodes start with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + pvfs2_i_inc_nlink(inode); } else { diff --git a/src/kernel/linux-2.6/modules.order b/src/kernel/linux-2.6/modules.order new file mode 100644 index 0000000..8ba289f --- /dev/null +++ b/src/kernel/linux-2.6/modules.order @@ -0,0 +1 @@ +kernel//usr/src/orangefs-osd/src/kernel/linux-2.6/pvfs2.ko diff --git a/src/kernel/linux-2.6/namei.c b/src/kernel/linux-2.6/namei.c index 607ae30..2ec4c65 100644 --- a/src/kernel/linux-2.6/namei.c +++ b/src/kernel/linux-2.6/namei.c @@ -81,7 +81,7 @@ static struct dentry *pvfs2_lookup( */ gossip_debug(GOSSIP_NAME_DEBUG, "pvfs2_lookup called on %s\n", dentry->d_name.name); - if (dentry->d_name.len > PVFS2_NAME_LEN) + if (dentry->d_name.len > (PVFS2_NAME_LEN-1)) { return ERR_PTR(-ENAMETOOLONG); } @@ -112,6 +112,8 @@ static struct dentry *pvfs2_lookup( if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL) { + gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %llu\n", + __FILE__, __func__, __LINE__, llu(parent->refn.handle)); new_op->upcall.req.lookup.parent_refn = parent->refn; } else @@ -200,6 +202,9 @@ static struct dentry *pvfs2_lookup( { struct dentry *res; + gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d Found good inode [%lu] with count [%d]\n", + __FILE__, __func__, __LINE__, inode->i_ino, (int)atomic_read(&inode->i_count)); + /* update dentry/inode pair into dcache */ dentry->d_op = &pvfs2_dentry_operations; @@ -219,6 +224,8 @@ static struct dentry *pvfs2_lookup( } else if (inode && is_bad_inode(inode)) { + gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d Found bad inode [%lu] with count [%d]. Returning error [%d]", + __FILE__, __func__, __LINE__, inode->i_ino, (int)atomic_read(&inode->i_count), ret); ret = -EACCES; found_pvfs2_inode = PVFS2_I(inode); /* look for an error code, possibly set by pvfs2_read_inode(), @@ -237,7 +244,7 @@ static struct dentry *pvfs2_lookup( * from pvfs2_iget was null...just return EACCESS */ op_release(new_op); - gossip_debug(GOSSIP_NAME_DEBUG, "Returning -EACCES\n"); + gossip_debug(GOSSIP_NAME_DEBUG, "Returning -EACCES for NULL inode\n"); return ERR_PTR(-EACCES); } @@ -256,7 +263,7 @@ static int pvfs2_unlink( if (ret == 0) { pvfs2_inode_t *dir_pinode = PVFS2_I(dir); - inode->i_nlink--; + pvfs2_i_drop_nlink(inode); SetMtimeFlag(dir_pinode); pvfs2_update_inode_time(dir); @@ -365,7 +372,7 @@ static int pvfs2_rmdir( if (ret == 0) { pvfs2_inode_t *dir_pinode = PVFS2_I(dir); - inode->i_nlink--; + pvfs2_i_drop_nlink(inode); #if 0 /* NOTE: we have no good way to keep nlink consistent for directories * across clients; keep constant at 1 -Phil @@ -387,15 +394,24 @@ static int pvfs2_rename( struct dentry *new_dentry) { int ret = -EINVAL, are_directories = 0; + unsigned int local_count = 0; pvfs2_inode_t *pvfs2_old_parent_inode = PVFS2_I(old_dir); pvfs2_inode_t *pvfs2_new_parent_inode = PVFS2_I(new_dir); pvfs2_kernel_op_t *new_op = NULL; struct super_block *sb = NULL; + +#ifdef HAVE_DENTRY_D_COUNT_ATOMIC + local_count = atomic_read(&new_dentry->d_count); +#else + spin_lock( &new_dentry->d_lock ); + local_count = new_dentry->d_count; + spin_unlock( &new_dentry->d_lock ); +#endif /* HAVE_DENTRY_D_COUNT_ATOMIC */ gossip_debug(GOSSIP_NAME_DEBUG, "pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); + local_count); are_directories = S_ISDIR(old_dentry->d_inode->i_mode); #if 0 diff --git a/src/kernel/linux-2.6/osd.c b/src/kernel/linux-2.6/osd.c index b3a1248..7a0393c 100644 --- a/src/kernel/linux-2.6/osd.c +++ b/src/kernel/linux-2.6/osd.c @@ -120,7 +120,8 @@ ssize_t osd_do_io(struct iovec *iov, int iov_count, uint64_t offset, __func__, pid, oid, len, offset); memset(cdb, 0, sizeof(cdb)); cdb[0] = 0x7f; - cdb[7] = 200 - 8; + //cdb[7] = 200 - 8; + cdb[7] = 236 - 8; cdb[8] = (action & 0xff00U) >> 8; cdb[9] = (action & 0x00ffU); cdb[11] = 3 << 4; @@ -148,8 +149,9 @@ ssize_t osd_do_io(struct iovec *iov, int iov_count, uint64_t offset, if (!iovs_are_pages) if (to_user) ret = blk_rq_map_user_iov(q, rq, - (struct sg_iovec *) iov, - iov_count, len); + NULL, (struct sg_iovec *) iov, + iov_count, len, GFP_KERNEL); + if (ret) goto out; diff --git a/src/kernel/linux-2.6/pvfs2-bufmap.c b/src/kernel/linux-2.6/pvfs2-bufmap.c index 5fccd30..60b7466 100644 --- a/src/kernel/linux-2.6/pvfs2-bufmap.c +++ b/src/kernel/linux-2.6/pvfs2-bufmap.c @@ -8,6 +8,9 @@ #include "pint-dev-shared.h" +DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq); + + static int bufmap_page_count, pages_per_desc; static int32_t pvfs2_bufmap_desc_size, pvfs2_bufmap_desc_shift, @@ -29,17 +32,51 @@ static struct page **bufmap_page_array = NULL; /* array to track usage of buffer descriptors */ static int *buffer_index_array = NULL; +#ifdef HAVE_SPIN_LOCK_UNLOCKED static spinlock_t buffer_index_lock = SPIN_LOCK_UNLOCKED; +#else +static DEFINE_SPINLOCK(buffer_index_lock); +#endif /* HAVE_SPIN_LOCK_UNLOCKED */ /* array to track usage of buffer descriptors for readdir/readdirplus */ static int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT] = {0}; +#ifdef HAVE_SPIN_LOCK_UNLOCKED static spinlock_t readdir_index_lock = SPIN_LOCK_UNLOCKED; +#else +static DEFINE_SPINLOCK(readdir_index_lock); +#endif /* HAVE_SPIN_LOCK_UNLOCKED */ static struct pvfs_bufmap_desc *desc_array = NULL; static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq); static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq); +/* get_bufmap_init + * + * If bufmap_init is 1, then the shared memory system, including the + * buffer_index_array, is available. Otherwise, it is not. + * + * returns the value of bufmap_init + */ +int get_bufmap_init(void) +{ + int ret = -EINVAL; + int lock = 0; + + lock=down_read_trylock(&bufmap_init_sem); + + if (lock == 1) + { /* semaphore obtained */ + ret = bufmap_init; + up_read(&bufmap_init_sem); + return (ret); + } + + /* semaphore locked */ + /* value of lock will be zero */ + return(lock); +} + static int initialize_bufmap_descriptors(int ndescs) { int err; @@ -65,7 +102,7 @@ static int initialize_bufmap_descriptors(int ndescs) if (desc_array == NULL) { gossip_err("pvfs2: could not allocate %d bytes\n", - (int) (ndescs * sizeof(struct pvfs_bufmap_desc))); + (int) (ndescs * sizeof(*desc_array))); goto out1; } err = 0; @@ -105,7 +142,7 @@ int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) int i = 0; int offset = 0; - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_initialize: called " + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_initialize: called " "(ptr (%p) sz (%d) cnt(%d).\n", user_desc->ptr, user_desc->size, user_desc->count); @@ -215,6 +252,9 @@ int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) setting PageReserved in 2.6.x seems to cause more trouble than it's worth. in 2.4.x, marking the pages does what's expected and doesn't try to swap out our pages + + since setting the page as reserved has problems in 2.6 these pages + need to be mlock() in the user space side */ for(i = 0; i < bufmap_page_count; i++) { @@ -249,7 +289,18 @@ int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) bufmap_init = 1; up_write(&bufmap_init_sem); - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_initialize: exiting normally\n"); + /* If there are operations in pvfs2_bufmap_init_waitq, wake them up. + * This scenario occurs when the client-core is restarted and I/O + * requests in the in-progress or waiting tables are restarted. I/O + * requests cannot be restarted until the shared memory system is completely + * re-initialized, so we put the I/O requests in this waitq until + * initialization has completed. NOTE: the I/O requests are also on a + * timer, so they don't wait forever (just in case the client-core doesn't + * come back up. + */ + wake_up_interruptible(&pvfs2_bufmap_init_waitq); + + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_initialize: exiting normally\n"); return 0; init_failure: @@ -312,7 +363,7 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) { set_current_state(TASK_INTERRUPTIBLE); - /* check for available desc */ + /* check for available desc, slot_lock is the appropriate index_lock */ spin_lock(slargs->slot_lock); for(i = 0; i < slargs->slot_count; i++) { @@ -334,17 +385,24 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) if (!signal_pending(current)) { - int timeout = MSECS_TO_JIFFIES(1000 * op_timeout_secs); + int timeout = MSECS_TO_JIFFIES(1000 * slot_timeout_secs); + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "[BUFMAP]: waiting %d seconds for a slot\n", + slot_timeout_secs); if (!schedule_timeout(timeout)) { - gossip_debug(GOSSIP_BUFMAP_DEBUG, "*** wait_for_a_slot timed out\n"); + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "*** wait_for_a_slot timed out\n"); ret = -ETIMEDOUT; break; } + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "[BUFMAP]: acquired slot\n"); continue; } - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: wait_for_a_slot() interrupted.\n"); + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n", + __func__); ret = -EINTR; break; } @@ -357,12 +415,15 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) static void put_back_slot(struct slot_args *slargs, int buffer_index) { + + /* slot_lock is the appropriate index_lock */ + spin_lock(slargs->slot_lock); if (buffer_index < 0 || buffer_index >= slargs->slot_count) { + spin_unlock(slargs->slot_lock); return; } /* put the desc back on the queue */ - spin_lock(slargs->slot_lock); slargs->slot_array[buffer_index] = 0; spin_unlock(slargs->slot_lock); @@ -478,7 +539,6 @@ void readdir_index_put(int buffer_index) return; } - slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT; slargs.slot_array = readdir_index_array; slargs.slot_lock = &readdir_index_lock; @@ -497,9 +557,9 @@ void readdir_index_put(int buffer_index) int pvfs_bufmap_copy_to_user(void __user *to, int buffer_index, size_t size) { size_t ret = 0, amt_copied = 0, amt_remaining = 0, cur_copy_size = 0; - int index = 0; - void __user *offset = to; + int from_page_index = 0; void *from_kaddr = NULL; + void __user *to_kaddr = to; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_user: to %p, from %p, index %d, " @@ -521,9 +581,10 @@ int pvfs_bufmap_copy_to_user(void __user *to, int buffer_index, size_t size) cur_copy_size = ((amt_remaining > PAGE_SIZE) ? PAGE_SIZE : amt_remaining); - from_kaddr = pvfs2_kmap(from->page_array[index]); - ret = copy_to_user(offset, from_kaddr, cur_copy_size); - pvfs2_kunmap(from->page_array[index]); + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + ret = copy_to_user(to_kaddr, from_kaddr, cur_copy_size); + /* not marking dirty, mapped page isn't changed */ + pvfs2_kunmap(from->page_array[from_page_index]); if (ret) { @@ -532,20 +593,25 @@ int pvfs_bufmap_copy_to_user(void __user *to, int buffer_index, size_t size) return -EFAULT; } - offset += cur_copy_size; + to_kaddr += cur_copy_size; amt_copied += cur_copy_size; - index++; + from_page_index++; } up_read(&bufmap_init_sem); return 0; } -int pvfs_bufmap_copy_to_kernel( - void *to, int buffer_index, size_t size) +/* pvfs_bufmap_copy_to_kernel() + * + * copies data out of a mapped buffer to a kernel space address + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_copy_to_kernel(void *to, int buffer_index, size_t size) { size_t amt_copied = 0, amt_remaining = 0, cur_copy_size = 0; - int index = 0; - void *offset = to, *from_kaddr = NULL; + int from_page_index = 0; + void *to_kaddr = to, *from_kaddr = NULL; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_kernel: to %p, index %d, size %zd\n", @@ -567,13 +633,13 @@ int pvfs_bufmap_copy_to_kernel( cur_copy_size = ((amt_remaining > PAGE_SIZE) ? PAGE_SIZE : amt_remaining); - from_kaddr = pvfs2_kmap(from->page_array[index]); - memcpy(offset, from_kaddr, cur_copy_size); - pvfs2_kunmap(from->page_array[index]); + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + memcpy(to_kaddr, from_kaddr, cur_copy_size); + pvfs2_kunmap(from->page_array[from_page_index]); - offset += cur_copy_size; + to_kaddr += cur_copy_size; amt_copied += cur_copy_size; - index++; + from_page_index++; } up_read(&bufmap_init_sem); return 0; @@ -585,13 +651,12 @@ int pvfs_bufmap_copy_to_kernel( * * returns 0 on success, -errno on failure */ -int pvfs_bufmap_copy_from_user( - int buffer_index, void __user *from, size_t size) +int pvfs_bufmap_copy_from_user(int buffer_index, void __user *from, size_t size) { size_t ret = 0, amt_copied = 0, amt_remaining = 0, cur_copy_size = 0; - int index = 0; - void __user *offset = from; + void __user *from_kaddr = from; void *to_kaddr = NULL; + int to_page_index = 0; struct pvfs_bufmap_desc *to = &desc_array[buffer_index]; char* tmp_printer = NULL; int tmp_int = 0; @@ -615,16 +680,19 @@ int pvfs_bufmap_copy_from_user( cur_copy_size = ((amt_remaining > PAGE_SIZE) ? PAGE_SIZE : amt_remaining); - to_kaddr = pvfs2_kmap(to->page_array[index]); - ret = copy_from_user(to_kaddr, offset, cur_copy_size); - if(!tmp_printer) + to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); + ret = copy_from_user(to_kaddr, from_kaddr, cur_copy_size); + if (!tmp_printer) { tmp_printer = (char*)(to_kaddr); tmp_int += tmp_printer[0]; gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n", tmp_int); } - pvfs2_kunmap(to->page_array[index]); + if( !PageReserved(to->page_array[to_page_index]) ) + SetPageDirty(to->page_array[to_page_index]); + + pvfs2_kunmap(to->page_array[to_page_index]); if (ret) { @@ -633,9 +701,9 @@ int pvfs_bufmap_copy_from_user( return -EFAULT; } - offset += cur_copy_size; + from_kaddr += cur_copy_size; amt_copied += cur_copy_size; - index++; + to_page_index++; } up_read(&bufmap_init_sem); return 0; @@ -644,10 +712,12 @@ int pvfs_bufmap_copy_from_user( /* * pvfs_bufmap_copy_to_pages() * - * Copies data from client-core's address space to the specified - * pages (typically page-cache pages) for a specified size and + * Copies data from the mapped buffer to the specified set of + * kernel pages (typically page-cache pages) for a specified size and * number of pages. * NOTE: iovec is expected to store pointers to struct page + * + * Returns 0 on success, -errno on failure. */ int pvfs_bufmap_copy_to_pages(int buffer_index, const struct iovec *vec, @@ -655,7 +725,7 @@ int pvfs_bufmap_copy_to_pages(int buffer_index, size_t size) { size_t amt_copied = 0, amt_remaining = 0, cur_copy_size = 0; - int index = 0; + int from_page_index = 0; void *from_kaddr = NULL, *to_kaddr = NULL; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; struct page *page; @@ -675,17 +745,17 @@ int pvfs_bufmap_copy_to_pages(int buffer_index, while (amt_copied < size) { - if (index >= nr_segs) + if (from_page_index >= nr_segs) { gossip_err("pvfs_bufmap_copy_to_pages: count cannot exceed " "number of pages(%lu)\n", nr_segs); up_read(&bufmap_init_sem); return -EIO; } - page = (struct page *) vec[index].iov_base; + page = (struct page *) vec[from_page_index].iov_base; if (page == NULL) { gossip_err("pvfs_bufmap_copy_to_pages: invalid page pointer %d\n", - index); + from_page_index); up_read(&bufmap_init_sem); return -EIO; } @@ -694,9 +764,9 @@ int pvfs_bufmap_copy_to_pages(int buffer_index, PAGE_SIZE : amt_remaining); gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_pages:" "from_page: %p, to_page: %p\n", - from->page_array[index], page); + from->page_array[from_page_index], page); - from_kaddr = pvfs2_kmap(from->page_array[index]); + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); to_kaddr = pvfs2_kmap(page); #if 0 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_pages -> " @@ -708,11 +778,16 @@ int pvfs_bufmap_copy_to_pages(int buffer_index, if (cur_copy_size < PAGE_SIZE) { memset(to_kaddr + cur_copy_size, 0, PAGE_SIZE - cur_copy_size); } + if( !PageReserved(page) ) + SetPageDirty(page); + if( !PageReserved(from->page_array[from_page_index]) ) + SetPageDirty(from->page_array[from_page_index]); + pvfs2_kunmap(page); - pvfs2_kunmap(from->page_array[index]); + pvfs2_kunmap(from->page_array[from_page_index]); amt_copied += cur_copy_size; - index++; + from_page_index++; } up_read(&bufmap_init_sem); return 0; @@ -721,10 +796,12 @@ int pvfs_bufmap_copy_to_pages(int buffer_index, /* * pvfs_bufmap_copy_from_pages() * - * Copies data to client-core's address space from the specified target + * Copies data to the mapped buffer from the specified set of target * pages (typically the kernel's page-cache) * for a given size and number of pages. - * NOTE: iovec is expected to store pointers to struct page + * NOTE: iovec is expected to store pointers to struct page. + * + * Returns 0 on success and -errno on failure. */ int pvfs_bufmap_copy_from_pages(int buffer_index, const struct iovec *vec, @@ -732,7 +809,7 @@ int pvfs_bufmap_copy_from_pages(int buffer_index, size_t size) { size_t amt_copied = 0, amt_remaining = 0, cur_copy_size = 0; - int index = 0; + int to_page_index = 0; void *from_kaddr = NULL, *to_kaddr = NULL; struct pvfs_bufmap_desc *to = &desc_array[buffer_index]; struct page *page; @@ -752,13 +829,13 @@ int pvfs_bufmap_copy_from_pages(int buffer_index, while (amt_copied < size) { - if (index >= nr_segs) { + if (to_page_index >= nr_segs) { gossip_err("pvfs_bufmap_copy_from_pages: count cannot exceed number of" "pages(%lu)\n", nr_segs); up_read(&bufmap_init_sem); return -EIO; } - page = (struct page *) vec[index].iov_base; + page = (struct page *) vec[to_page_index].iov_base; if (page == NULL) { gossip_err("pvfs_bufmap_copy_from_pages: invalid page pointer\n"); up_read(&bufmap_init_sem); @@ -768,8 +845,8 @@ int pvfs_bufmap_copy_from_pages(int buffer_index, cur_copy_size = ((amt_remaining > PAGE_SIZE) ? PAGE_SIZE : amt_remaining); gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_from_pages:" - "from_page: %p, to_page: %p\n", page, to->page_array[index]); - to_kaddr = pvfs2_kmap(to->page_array[index]); + "from_page: %p, to_page: %p\n", page, to->page_array[to_page_index]); + to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); from_kaddr = pvfs2_kmap(page); #if 0 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_from_pages -> " @@ -777,10 +854,16 @@ int pvfs_bufmap_copy_from_pages(int buffer_index, to_kaddr, from_kaddr, cur_copy_size); #endif memcpy(to_kaddr, from_kaddr, cur_copy_size); + + if( !PageReserved(to->page_array[to_page_index]) ) + SetPageDirty(to->page_array[to_page_index]); + if( !PageReserved(page) ) + SetPageDirty(page); + pvfs2_kunmap(page); - pvfs2_kunmap(to->page_array[index]); + pvfs2_kunmap(to->page_array[to_page_index]); amt_copied += cur_copy_size; - index++; + to_page_index++; } up_read(&bufmap_init_sem); return 0; @@ -806,12 +889,12 @@ int pvfs_bufmap_copy_iovec_from_user( size_t size) { size_t ret = 0, amt_copied = 0, cur_copy_size = 0; - int index = 0; + unsigned int to_page_offset = 0, to_page_index = 0; void *to_kaddr = NULL; void __user *from_addr = NULL; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *to = &desc_array[buffer_index]; - unsigned int seg, page_offset = 0; + unsigned int seg; char* tmp_printer = NULL; int tmp_int = 0; @@ -837,7 +920,7 @@ int pvfs_bufmap_copy_iovec_from_user( up_read(&bufmap_init_sem); return -ENOMEM; } - memcpy(copied_iovec, iov, nr_segs * sizeof(struct iovec)); + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); /* * Go through each segment in the iovec and make sure that * the summation of iov_len matches the given size. @@ -855,54 +938,57 @@ int pvfs_bufmap_copy_iovec_from_user( return -EINVAL; } - index = 0; + to_page_index = 0; + to_page_offset = 0; amt_copied = 0; seg = 0; - page_offset = 0; /* Go through each segment in the iovec and copy its * buffer into the mapped buffer one page at a time though */ while (amt_copied < size) { struct iovec *iv = &copied_iovec[seg]; - int inc_index = 0; + int inc_to_page_index; - if (iv->iov_len < (PAGE_SIZE - page_offset)) + if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; from_addr = iv->iov_base; - inc_index = 0; + inc_to_page_index = 0; } - else if (iv->iov_len == (PAGE_SIZE - page_offset)) + else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; from_addr = iv->iov_base; - inc_index = 1; + inc_to_page_index = 1; } else { - cur_copy_size = PVFS_util_min(PAGE_SIZE - page_offset, size - amt_copied); + cur_copy_size = PVFS_util_min(PAGE_SIZE - to_page_offset, size - amt_copied); from_addr = iv->iov_base; iv->iov_base += cur_copy_size; iv->iov_len -= cur_copy_size; - inc_index = 1; + inc_to_page_index = 1; } - to_kaddr = pvfs2_kmap(to->page_array[index]); - ret = copy_from_user(to_kaddr + page_offset, from_addr, cur_copy_size); - if(!tmp_printer) + to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); + ret = copy_from_user(to_kaddr + to_page_offset, from_addr, cur_copy_size); + if( !PageReserved(to->page_array[to_page_index]) ) + SetPageDirty(to->page_array[to_page_index]); + + if (!tmp_printer) { - tmp_printer = (char*)(to_kaddr+page_offset); + tmp_printer = (char*)(to_kaddr + to_page_offset); tmp_int += tmp_printer[0]; gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n", tmp_int); } - pvfs2_kunmap(to->page_array[index]); + pvfs2_kunmap(to->page_array[to_page_index]); #if 0 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_copy_iovec_from_user: copying from user %p to kernel %p %zd bytes (to_kddr: %p,page_offset: %d)\n", - from_addr, to_kaddr + page_offset, cur_copy_size, to_kaddr, page_offset); + from_addr, to_kaddr + to_page_offset, cur_copy_size, to_kaddr, to_page_offset); #endif if (ret) { @@ -913,12 +999,14 @@ int pvfs_bufmap_copy_iovec_from_user( } amt_copied += cur_copy_size; - if (inc_index) { - page_offset = 0; - index++; + if (inc_to_page_index) + { + to_page_offset = 0; + to_page_index++; } - else { - page_offset += cur_copy_size; + else + { + to_page_offset += cur_copy_size; } } kfree(copied_iovec); @@ -945,12 +1033,12 @@ int pvfs_bufmap_copy_iovec_from_kernel( size_t size) { size_t amt_copied = 0, cur_copy_size = 0; - int index = 0; + int to_page_index = 0; void *to_kaddr = NULL; void *from_kaddr = NULL; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *to = &desc_array[buffer_index]; - unsigned int seg, page_offset = 0; + unsigned int seg, to_page_offset = 0; gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_iovec_from_kernel: index %d, " "size %zd\n", buffer_index, size); @@ -974,7 +1062,7 @@ int pvfs_bufmap_copy_iovec_from_kernel( up_read(&bufmap_init_sem); return -ENOMEM; } - memcpy(copied_iovec, iov, nr_segs * sizeof(struct iovec)); + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); /* * Go through each segment in the iovec and make sure that * the summation of iov_len matches the given size. @@ -992,54 +1080,58 @@ int pvfs_bufmap_copy_iovec_from_kernel( return -EINVAL; } - index = 0; + to_page_index = 0; amt_copied = 0; seg = 0; - page_offset = 0; + to_page_offset = 0; /* Go through each segment in the iovec and copy its * buffer into the mapped buffer one page at a time though */ while (amt_copied < size) { struct iovec *iv = &copied_iovec[seg]; - int inc_index = 0; + int inc_to_page_index; - if (iv->iov_len < (PAGE_SIZE - page_offset)) + if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; from_kaddr = iv->iov_base; - inc_index = 0; + inc_to_page_index = 0; } - else if (iv->iov_len == (PAGE_SIZE - page_offset)) + else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; from_kaddr = iv->iov_base; - inc_index = 1; + inc_to_page_index = 1; } else { - cur_copy_size = PVFS_util_min(PAGE_SIZE - page_offset, size - amt_copied); + cur_copy_size = PVFS_util_min(PAGE_SIZE - to_page_offset, size - amt_copied); from_kaddr = iv->iov_base; iv->iov_base += cur_copy_size; iv->iov_len -= cur_copy_size; - inc_index = 1; + inc_to_page_index = 1; } - to_kaddr = pvfs2_kmap(to->page_array[index]); - memcpy(to_kaddr + page_offset, from_kaddr, cur_copy_size); - pvfs2_kunmap(to->page_array[index]); + to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); + memcpy(to_kaddr + to_page_offset, from_kaddr, cur_copy_size); + if( !PageReserved(to->page_array[to_page_index]) ) + SetPageDirty(to->page_array[to_page_index]); + pvfs2_kunmap(to->page_array[to_page_index]); #if 0 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_copy_iovec_from_kernel: copying from kernel %p to kernel %p %zd bytes (to_kddr: %p,page_offset: %d)\n", from_kaddr, to_kaddr + page_offset, cur_copy_size, to_kaddr, page_offset); #endif amt_copied += cur_copy_size; - if (inc_index) { - page_offset = 0; - index++; + if (inc_to_page_index) + { + to_page_offset = 0; + to_page_index++; } - else { - page_offset += cur_copy_size; + else + { + to_page_offset += cur_copy_size; } } kfree(copied_iovec); @@ -1062,12 +1154,12 @@ int pvfs_bufmap_copy_to_user_iovec( { size_t ret = 0, amt_copied = 0; size_t cur_copy_size = 0; - int index = 0; + int from_page_index = 0; void *from_kaddr = NULL; void __user *to_addr = NULL; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; - unsigned int seg, page_offset = 0; + unsigned int seg, from_page_offset = 0; char* tmp_printer = NULL; int tmp_int = 0; @@ -1093,7 +1185,7 @@ int pvfs_bufmap_copy_to_user_iovec( up_read(&bufmap_init_sem); return -ENOMEM; } - memcpy(copied_iovec, iov, nr_segs * sizeof(struct iovec)); + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); /* * Go through each segment in the iovec and make sure that * the summation of iov_len is greater than the given size. @@ -1111,10 +1203,10 @@ int pvfs_bufmap_copy_to_user_iovec( return -EINVAL; } - index = 0; + from_page_index = 0; amt_copied = 0; seg = 0; - page_offset = 0; + from_page_offset = 0; /* * Go through each segment in the iovec and copy from the mapper buffer, * but make sure that we do so one page at a time. @@ -1122,42 +1214,42 @@ int pvfs_bufmap_copy_to_user_iovec( while (amt_copied < size) { struct iovec *iv = &copied_iovec[seg]; - int inc_index = 0; + int inc_from_page_index; - if (iv->iov_len < (PAGE_SIZE - page_offset)) + if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; to_addr = iv->iov_base; - inc_index = 0; + inc_from_page_index = 0; } - else if (iv->iov_len == (PAGE_SIZE - page_offset)) + else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; to_addr = iv->iov_base; - inc_index = 1; + inc_from_page_index = 1; } else { - cur_copy_size = PVFS_util_min(PAGE_SIZE - page_offset, size - amt_copied); + cur_copy_size = PVFS_util_min(PAGE_SIZE - from_page_offset, size - amt_copied); to_addr = iv->iov_base; iv->iov_base += cur_copy_size; iv->iov_len -= cur_copy_size; - inc_index = 1; + inc_from_page_index = 1; } - from_kaddr = pvfs2_kmap(from->page_array[index]); - if(!tmp_printer) + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + if (!tmp_printer) { - tmp_printer = (char*)(from_kaddr + page_offset); + tmp_printer = (char*)(from_kaddr + from_page_offset); tmp_int += tmp_printer[0]; gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n", tmp_int); } - ret = copy_to_user(to_addr, from_kaddr + page_offset, cur_copy_size); - pvfs2_kunmap(from->page_array[index]); + ret = copy_to_user(to_addr, from_kaddr + from_page_offset, cur_copy_size); + pvfs2_kunmap(from->page_array[from_page_index]); #if 0 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_copy_to_user_iovec: copying to user %p from kernel %p %d bytes (from_kaddr:%p, page_offset:%d)\n", - to_addr, from_kaddr + page_offset, cur_copy_size, from_kaddr, page_offset); + to_addr, from_kaddr + from_page_offset, cur_copy_size, from_kaddr, from_page_offset); #endif if (ret) { @@ -1168,12 +1260,14 @@ int pvfs_bufmap_copy_to_user_iovec( } amt_copied += cur_copy_size; - if (inc_index) { - page_offset = 0; - index++; + if (inc_from_page_index) + { + from_page_offset = 0; + from_page_index++; } - else { - page_offset += cur_copy_size; + else + { + from_page_offset += cur_copy_size; } } kfree(copied_iovec); @@ -1196,12 +1290,12 @@ int pvfs_bufmap_copy_to_kernel_iovec( { size_t amt_copied = 0; size_t cur_copy_size = 0; - int index = 0; + int from_page_index = 0; void *from_kaddr = NULL; void *to_kaddr = NULL; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; - unsigned int seg, page_offset = 0; + unsigned int seg, from_page_offset = 0; gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_kernel_iovec: index %d, " "size %zd\n", buffer_index, size); @@ -1225,7 +1319,7 @@ int pvfs_bufmap_copy_to_kernel_iovec( up_read(&bufmap_init_sem); return -ENOMEM; } - memcpy(copied_iovec, iov, nr_segs * sizeof(struct iovec)); + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); /* * Go through each segment in the iovec and make sure that * the summation of iov_len is greater than the given size. @@ -1243,10 +1337,10 @@ int pvfs_bufmap_copy_to_kernel_iovec( return -EINVAL; } - index = 0; + from_page_index = 0; amt_copied = 0; seg = 0; - page_offset = 0; + from_page_offset = 0; /* * Go through each segment in the iovec and copy from the mapper buffer, * but make sure that we do so one page at a time. @@ -1254,44 +1348,46 @@ int pvfs_bufmap_copy_to_kernel_iovec( while (amt_copied < size) { struct iovec *iv = &copied_iovec[seg]; - int inc_index = 0; + int inc_from_page_index; - if (iv->iov_len < (PAGE_SIZE - page_offset)) + if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; to_kaddr = iv->iov_base; - inc_index = 0; + inc_from_page_index = 0; } - else if (iv->iov_len == (PAGE_SIZE - page_offset)) + else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); seg++; to_kaddr = iv->iov_base; - inc_index = 1; + inc_from_page_index = 1; } else { - cur_copy_size = PVFS_util_min(PAGE_SIZE - page_offset, size - amt_copied); + cur_copy_size = PVFS_util_min(PAGE_SIZE - from_page_offset, size - amt_copied); to_kaddr = iv->iov_base; iv->iov_base += cur_copy_size; iv->iov_len -= cur_copy_size; - inc_index = 1; + inc_from_page_index = 1; } - from_kaddr = pvfs2_kmap(from->page_array[index]); - memcpy(to_kaddr, from_kaddr + page_offset, cur_copy_size); - pvfs2_kunmap(from->page_array[index]); + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + memcpy(to_kaddr, from_kaddr + from_page_offset, cur_copy_size); + pvfs2_kunmap(from->page_array[from_page_index]); #if 0 - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_copy_to_kernel_iovec: copying to kernel %p from kernel %p %d bytes (from_kaddr:%p, page_offset:%d)\n", + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_kernel_iovec: copying to kernel %p from kernel %p %d bytes (from_kaddr:%p, page_offset:%d)\n", to_kaddr, from_kaddr + page_offset, cur_copy_size, from_kaddr, page_offset); #endif amt_copied += cur_copy_size; - if (inc_index) { - page_offset = 0; - index++; + if (inc_from_page_index) + { + from_page_offset = 0; + from_page_index++; } - else { - page_offset += cur_copy_size; + else + { + from_page_offset += cur_copy_size; } } kfree(copied_iovec); @@ -1301,40 +1397,45 @@ int pvfs_bufmap_copy_to_kernel_iovec( #ifdef HAVE_AIO_VFS_SUPPORT -/* pvfs_bufmap_copy_to_user_task() +/* pvfs_bufmap_copy_to_user_task_iovec() * - * copies data out of a mapped buffer to a user space address + * copies data out of a mapped buffer to a vector of user space address * of a given task specified by the task structure argument (tsk) * This is used by the client-daemon for completing an aio * operation that was issued by an arbitrary user program. * Unfortunately, we cannot use a copy_to_user * in that case and need to map in the user pages before * attempting the copy! - * returns number of bytes copied on success, - * -errno on failure + * + * NOTE: There is no need for an analogous copy from user task since + * the data buffers get copied in the context of the process initiating + * the write system call! + * + * Returns number of bytes copied on success, -errno on failure. */ -size_t pvfs_bufmap_copy_to_user_task( +size_t pvfs_bufmap_copy_to_user_task_iovec( struct task_struct *tsk, - void __user *to, + struct iovec *iovec, unsigned long nr_segs, int buffer_index, - size_t size) + size_t size_to_be_copied) { - size_t ret = 0, amt_copied = 0, amt_remaining = 0, cur_copy_size = 0; - int index = 0; + size_t ret = 0, amt_copied = 0, cur_copy_size = 0; + int from_page_index = 0; void *from_kaddr = NULL; + struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; struct mm_struct *mm = NULL; struct vm_area_struct *vma = NULL; struct page *page = NULL; - unsigned long to_addr = (unsigned long) to; + unsigned long to_addr = 0, copy_amt = 0; void *maddr = NULL; - int to_offset = 0, from_offset = 0; - int inc_index = 0; + unsigned int to_offset = 0; + unsigned int seg, from_page_offset = 0; - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_user_task: " - " PID: %d, to %p, from %p, index %d, " - " size %zd\n", tsk->pid, to, from, buffer_index, size); + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_user_task_iovec: " + " PID: %d, iovec %p, from %p, index %d, " + " size %zd\n", tsk->pid, iovec, from, buffer_index, size_to_be_copied); down_read(&bufmap_init_sem); if (bufmap_init == 0) @@ -1346,76 +1447,116 @@ size_t pvfs_bufmap_copy_to_user_task( up_read(&bufmap_init_sem); return -EIO; } + /* + * copy the passed in iovec so that we can change some of its fields + */ + copied_iovec = kmalloc(nr_segs * sizeof(*copied_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (copied_iovec == NULL) + { + gossip_err("pvfs_bufmap_copy_to_user_iovec: failed allocating memory\n"); + up_read(&bufmap_init_sem); + return -ENOMEM; + } + memcpy(copied_iovec, iovec, nr_segs * sizeof(*copied_iovec)); + /* + * Go through each segment in the iovec and make sure that + * the summation of iov_len is greater than the given size. + */ + for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) + { + amt_copied += copied_iovec[seg].iov_len; + } + if (amt_copied < size_to_be_copied) + { + gossip_err("pvfs_bufmap_copy_to_user_task_iovec: computed total (%zd) " + "is less than (%zd)\n", amt_copied, size_to_be_copied); + kfree(copied_iovec); + up_read(&bufmap_init_sem); + return -EINVAL; + } mm = get_task_mm(tsk); if (!mm) { + kfree(copied_iovec); up_read(&bufmap_init_sem); return -EIO; } + from_page_index = 0; + amt_copied = 0; + seg = 0; + from_page_offset = 0; /* * Go through each of the page in the specified process * address space and copy from the mapped * buffer, and make sure to do this one page at a time! */ down_read(&mm->mmap_sem); - while(amt_copied < size) + while (amt_copied < size_to_be_copied) { - int bytes = 0; + int inc_from_page_index = 0; + struct iovec *iv = &copied_iovec[seg]; + if (iv->iov_len < (PAGE_SIZE - from_page_offset)) + { + cur_copy_size = PVFS_util_min(iv->iov_len, size_to_be_copied - amt_copied); + seg++; + to_addr = (unsigned long) iv->iov_base; + inc_from_page_index = 0; + } + else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) + { + cur_copy_size = PVFS_util_min(iv->iov_len, size_to_be_copied - amt_copied); + seg++; + to_addr = (unsigned long) iv->iov_base; + inc_from_page_index = 1; + } + else + { + cur_copy_size = PVFS_util_min(PAGE_SIZE - from_page_offset, size_to_be_copied - amt_copied); + to_addr = (unsigned long) iv->iov_base; + iv->iov_base += cur_copy_size; + iv->iov_len -= cur_copy_size; + inc_from_page_index = 1; + } ret = get_user_pages(tsk, mm, to_addr, 1,/* count */ 1,/* write */ - 1,/* force */ + 0,/* force */ &page, &vma); if (ret <= 0) break; to_offset = to_addr & (PAGE_SIZE - 1); - amt_remaining = (size - amt_copied); - if ((PAGE_SIZE - to_offset) < (PAGE_SIZE - from_offset)) - { - bytes = PAGE_SIZE - to_offset; - inc_index = 0; - } - else if ((PAGE_SIZE - to_offset) == (PAGE_SIZE - from_offset)) - { - bytes = (PAGE_SIZE - to_offset); - inc_index = 1; - } - else + maddr = pvfs2_kmap(page); + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + /* FIX */ + copy_amt = copy_to_user(maddr + to_offset, from_kaddr, cur_copy_size ); + if( copy_amt != 0 ) { - bytes = (PAGE_SIZE - from_offset); - inc_index = 1; + gossip_err("%s: failure in copy_to_user, %lu could not be copied\n", + __func__, copy_amt); } - cur_copy_size = - amt_remaining > bytes - ? bytes : amt_remaining; - maddr = pvfs2_kmap(page); - from_kaddr = pvfs2_kmap(from->page_array[index]); - copy_to_user_page(vma, page, to_addr, - maddr + to_offset /* dst */, - from_kaddr + from_offset, /* src */ - cur_copy_size /* len */); set_page_dirty_lock(page); - pvfs2_kunmap(from->page_array[index]); + pvfs2_kunmap(from->page_array[from_page_index]); pvfs2_kunmap(page); page_cache_release(page); amt_copied += cur_copy_size; - to_addr += cur_copy_size; - if (inc_index) + if (inc_from_page_index) { - from_offset = 0; - index++; + from_page_offset = 0; + from_page_index++; } else { - from_offset += cur_copy_size; + from_page_offset += cur_copy_size; } } up_read(&mm->mmap_sem); mmput(mm); up_read(&bufmap_init_sem); - return (amt_copied < size) ? -EFAULT: amt_copied; + kfree(copied_iovec); + return (amt_copied < size_to_be_copied) ? -EFAULT: amt_copied; } #endif diff --git a/src/kernel/linux-2.6/pvfs2-bufmap.h b/src/kernel/linux-2.6/pvfs2-bufmap.h index 5ed24d4..27a7b9a 100644 --- a/src/kernel/linux-2.6/pvfs2-bufmap.h +++ b/src/kernel/linux-2.6/pvfs2-bufmap.h @@ -27,6 +27,8 @@ int pvfs_bufmap_shift_query(void); int pvfs_bufmap_initialize( struct PVFS_dev_map_desc *user_desc); +int get_bufmap_init(void); + void pvfs_bufmap_finalize(void); int pvfs_bufmap_get( @@ -96,8 +98,15 @@ int pvfs_bufmap_copy_from_pages( size_t pvfs_bufmap_copy_to_user_task( struct task_struct *tsk, void __user *to, - int buffer_index, - size_t size); + size_t size, + int buffer_index, + int *buffer_index_offset); +size_t pvfs_bufmap_copy_to_user_task_iovec( + struct task_struct *tsk, + struct iovec *iovec, + unsigned long nr_segs, + int buffer_index, + size_t bytes_to_be_copied); #endif #endif /* __PVFS2_BUFMAP_H */ diff --git a/src/kernel/linux-2.6/pvfs2-cache.c b/src/kernel/linux-2.6/pvfs2-cache.c index 29ada78..3192d88 100644 --- a/src/kernel/linux-2.6/pvfs2-cache.c +++ b/src/kernel/linux-2.6/pvfs2-cache.c @@ -7,13 +7,21 @@ #include "pvfs2-kernel.h" /* A list of all allocated pvfs2 inode objects */ +#ifdef HAVE_SPIN_LOCK_UNLOCKED static spinlock_t pvfs2_inode_list_lock = SPIN_LOCK_UNLOCKED; +#else +static DEFINE_SPINLOCK(pvfs2_inode_list_lock); +#endif /* HAVE_SPIN_LOCK_UNLOCKED */ + static LIST_HEAD(pvfs2_inode_list); /* tags assigned to kernel upcall operations */ static uint64_t next_tag_value; +#ifdef HAVE_SPIN_LOCK_UNLOCKED static spinlock_t next_tag_value_lock = SPIN_LOCK_UNLOCKED; - +#else +static DEFINE_SPINLOCK(next_tag_value_lock); +#endif /* HAVE_SPIN_LOCK_UNLOCKED */ /* the pvfs2 memory caches */ #ifdef HAVE_STRUCT_KMEM_CACHE @@ -202,26 +210,11 @@ void op_release(pvfs2_kernel_op_t *pvfs2_op) } } - -static void dev_req_cache_ctor( -#ifdef HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM - struct kmem_cache *cachep, - void *req -#else - void *req, - pvfs2_kmem_cache_t * cachep, - unsigned long flags -#endif -) -{ - memset(req, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE)); -} - int dev_req_cache_initialize(void) { dev_req_cache = kmem_cache_create( "pvfs2_devreqcache", MAX_ALIGNED_DEV_REQ_DOWNSIZE, 0, - PVFS2_CACHE_CREATE_FLAGS, dev_req_cache_ctor + PVFS2_CACHE_CREATE_FLAGS, NULL #ifdef HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM , NULL #endif @@ -254,6 +247,10 @@ void *dev_req_alloc(void) { gossip_err("Failed to allocate from dev_req_cache\n"); } + else + { + memset(buffer, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE)); + } return buffer; } @@ -271,7 +268,9 @@ void dev_req_release(void *buffer) } static void pvfs2_inode_cache_ctor( -#ifdef HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM +#if defined(HAVE_KMEM_CACHE_CREATE_CTOR_ONE_PARAM) + void *req +#elif defined(HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM) struct kmem_cache *cachep, void *req #else @@ -283,9 +282,7 @@ static void pvfs2_inode_cache_ctor( { pvfs2_inode_t *pvfs2_inode = req; - memset(pvfs2_inode, 0, sizeof(pvfs2_inode_t)); ClearInitFlag(pvfs2_inode); - pvfs2_inode_initialize(pvfs2_inode); #ifndef PVFS2_LINUX_KERNEL_2_4 @@ -373,7 +370,10 @@ pvfs2_inode_t* pvfs2_inode_alloc(void) { gossip_err("Failed to allocate pvfs2_inode\n"); } - else { + else + { + ClearInitFlag(pvfs2_inode); + pvfs2_inode_initialize(pvfs2_inode); add_to_pinode_list(pvfs2_inode); } return pvfs2_inode; @@ -394,26 +394,11 @@ void pvfs2_inode_release(pvfs2_inode_t *pinode) #ifdef HAVE_AIO_VFS_SUPPORT -static void kiocb_ctor( -#ifdef HAVE_KMEM_CACHE_CREATE_CTOR_TWO_PARAM - struct kmem_cache *cachep, - void *req -#else - void *req, - pvfs2_kmem_cache_t * cachep, - unsigned long flags -#endif -) -{ - memset(req, 0, sizeof(pvfs2_kiocb)); -} - - int kiocb_cache_initialize(void) { pvfs2_kiocb_cache = kmem_cache_create( "pvfs2_kiocbcache", sizeof(pvfs2_kiocb), 0, - PVFS2_CACHE_CREATE_FLAGS, kiocb_ctor + PVFS2_CACHE_CREATE_FLAGS, NULL #ifdef HAVE_KMEM_CACHE_CREATE_DESTRUCTOR_PARAM , NULL #endif @@ -446,6 +431,10 @@ pvfs2_kiocb* kiocb_alloc(void) { gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n"); } + else + { + memset(x, 0, sizeof(pvfs2_kiocb)); + } return x; } diff --git a/src/kernel/linux-2.6/pvfs2-dev-proto.h b/src/kernel/linux-2.6/pvfs2-dev-proto.h index de13e0a..1bc83cb 100644 --- a/src/kernel/linux-2.6/pvfs2-dev-proto.h +++ b/src/kernel/linux-2.6/pvfs2-dev-proto.h @@ -47,8 +47,17 @@ * Otherwise 32-64 bit interactions will be messed up :) */ #define PVFS2_NAME_LEN 0x00000100 -/* MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR */ -#define MAX_DIRENT_COUNT 0x00000020 +#define PVFS2_MAX_DEBUG_STRING_LEN 0x00000400 + +/* MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR. + * The value of PVFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 + * to accomodate an attribute object with mirrored handles. + * MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and + * MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr + * but readdirplus might. +*/ +#define MAX_DIRENT_COUNT_READDIR 0x00000060 +#define MAX_DIRENT_COUNT_READDIRPLUS 0x0000003C #include "pvfs2.h" diff --git a/src/kernel/linux-2.6/pvfs2-kernel.h b/src/kernel/linux-2.6/pvfs2-kernel.h index 8ce2be4..9cb0958 100644 --- a/src/kernel/linux-2.6/pvfs2-kernel.h +++ b/src/kernel/linux-2.6/pvfs2-kernel.h @@ -23,6 +23,7 @@ #ifdef HAVE_NOWARNINGS_WHEN_INCLUDING_LINUX_CONFIG_H #include #endif +#include #ifdef PVFS2_LINUX_KERNEL_2_4 @@ -41,7 +42,6 @@ typedef unsigned long sector_t; #endif #else /* !(PVFS2_LINUX_KERNEL_2_4) */ - #include #include #include @@ -54,7 +54,6 @@ typedef unsigned long sector_t; #endif /* PVFS2_LINUX_KERNEL_2_4 */ -#include #include #include #include @@ -65,7 +64,6 @@ typedef unsigned long sector_t; #include "pvfs2-config.h" #include "pvfs2-debug.h" #include "gossip.h" -#include "pint-dist-utils.h" #ifdef HAVE_AIO #include @@ -94,7 +92,9 @@ typedef unsigned long sector_t; #include #include #include +#ifdef HAVE_SMP_LOCK_H #include +#endif #include #include #include @@ -116,15 +116,6 @@ typedef unsigned long sector_t; #include #endif -/* taken from include/linux/fs.h from 2.4.19 or later kernels */ -#ifndef MAX_LFS_FILESIZE -#if BITS_PER_LONG == 32 -#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG))-1) -#elif BITS_PER_LONG == 64 -#define MAX_LFS_FILESIZE 0x7fffffffffffffff -#endif -#endif /* MAX_LFS_FILESIZE */ - #include "pint-dev-shared.h" #include "pvfs2-dev-proto.h" #include "pvfs2-types.h" @@ -144,6 +135,10 @@ typedef unsigned long sector_t; #define PVFS2_DEFAULT_OP_TIMEOUT_SECS 20 #endif +#define PVFS2_BUFMAP_WAIT_TIMEOUT_SECS 30 + +#define PVFS2_DEFAULT_SLOT_TIMEOUT_SECS 1800 /* 30 minutes */ + #define PVFS2_REQDEVICE_NAME "pvfs2-req" #define PVFS2_DEVREQ_MAGIC 0x20030529 @@ -251,13 +246,8 @@ enum PVFS_async_io_type #define PVFS2_GFP_FLAGS (GFP_KERNEL) #define PVFS2_BUFMAP_GFP_FLAGS (GFP_KERNEL) -#ifdef CONFIG_HIGHMEM #define pvfs2_kmap(page) kmap(page) #define pvfs2_kunmap(page) kunmap(page) -#else -#define pvfs2_kmap(page) page_address(page) -#define pvfs2_kunmap(page) do {} while(0) -#endif /* CONFIG_HIGHMEM */ /* pvfs2 xattr and acl related defines */ #ifdef HAVE_XATTR @@ -283,7 +273,12 @@ enum PVFS_async_io_type extern int pvfs2_acl_chmod(struct inode *inode); extern int pvfs2_init_acl(struct inode *inode, struct inode *dir); +#ifdef HAVE_CONST_S_XATTR_IN_SUPERBLOCK +extern const struct xattr_handler *pvfs2_xattr_handlers[]; +#else extern struct xattr_handler *pvfs2_xattr_handlers[]; +#endif /* HAVE_CONST_S_XATTR_IN_SUPERBLOCK */ + extern struct xattr_handler pvfs2_xattr_acl_default_handler, pvfs2_xattr_acl_access_handler; extern struct xattr_handler pvfs2_xattr_trusted_handler; extern struct xattr_handler pvfs2_xattr_default_handler; @@ -307,14 +302,63 @@ static inline int convert_to_internal_xattr_flags(int setxattr_flags) return internal_flag; } -int pvfs2_xattr_set_trusted(struct inode *inode, - const char *name, const void *buffer, size_t size, int flags); -int pvfs2_xattr_get_trusted(struct inode *inode, - const char *name, void *buffer, size_t size); -int pvfs2_xattr_set_default(struct inode *inode, - const char *name, const void *buffer, size_t size, int flags); -int pvfs2_xattr_get_default(struct inode *inode, - const char *name, void *buffer, size_t size); +int pvfs2_xattr_set_trusted( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + const char *name, + const void *buffer, + size_t size, + int flags +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ); + +int pvfs2_xattr_get_trusted( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + const char *name, + void *buffer, + size_t size +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + ); + +int pvfs2_xattr_set_default( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /*HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + const char *name, + const void *buffer, + size_t size, + int flags +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ); + +int pvfs2_xattr_get_default( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + const char *name, + void *buffer, + size_t size +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + ); #endif @@ -336,6 +380,11 @@ typedef struct enum pvfs2_vfs_op_states op_state; uint64_t tag; + /* Set uses_shared_memory to 1 if this operation uses shared memory. */ + /* If true, then a retry on the op must also get a new shared memory*/ + /* buffer and re-populate it. */ + int uses_shared_memory; + pvfs2_upcall_t upcall; pvfs2_downcall_t downcall; @@ -370,6 +419,7 @@ typedef struct { PVFS_object_ref refn; char link_target[PVFS_NAME_MAX]; + PVFS_size blksize; /* * Reading/Writing Extended attributes need to acquire the appropriate * reader/writer semaphore on the pvfs2_inode_t structure. @@ -383,6 +433,7 @@ typedef struct #endif sector_t last_failed_block_index_read; int error_code; + int revalidate_failed; /* State of in-memory attributes not yet flushed to disk associated with this object */ unsigned long pinode_flags; @@ -467,6 +518,8 @@ typedef struct struct super_block *sb; int mount_pending; struct list_head list; + atomic_t pvfs2_inode_alloc_count; + atomic_t pvfs2_inode_dealloc_count; } pvfs2_sb_info_t; /** a temporary structure used only for sb mount time that groups the @@ -577,7 +630,8 @@ typedef struct struct kiocb *kiocb; /* pointer to the kiocb that kicked this operation */ int buffer_index; /* buffer index that was used for the I/O */ pvfs2_kernel_op_t *op; /* pvfs2 kernel operation type */ - char __user *buffer; /* The user space buffer to which I/O is being staged */ + struct iovec *iov; /* The user space buffers from/to which I/O is being staged */ + unsigned long nr_segs; /* number of elements in the iovector */ int rw; /* set to indicate the type of the operation */ loff_t offset; /* file offset */ size_t bytes_to_be_copied; /* and the count in bytes */ @@ -722,7 +776,7 @@ int wait_for_matching_downcall( pvfs2_kernel_op_t * op); int wait_for_cancellation_downcall( pvfs2_kernel_op_t * op); -void clean_up_interrupted_operation( +void pvfs2_clean_up_interrupted_operation( pvfs2_kernel_op_t * op); void purge_waiting_ops(void); @@ -739,6 +793,11 @@ struct super_block* pvfs2_get_sb( void *data, int silent); #else +#ifdef HAVE_FSTYPE_MOUNT_ONLY +struct dentry *pvfs2_mount( + struct file_system_type *fst, int flags, + const char *devname, void *data); +#else #ifdef HAVE_VFSMOUNT_GETSB int pvfs2_get_sb( struct file_system_type *fst, int flags, @@ -748,7 +807,8 @@ int pvfs2_get_sb( struct super_block *pvfs2_get_sb( struct file_system_type *fst, int flags, const char *devname, void *data); -#endif +#endif /* HAVE_VFSMOUNT_GETSB */ +#endif /* HAVE_FSTYPE_MOUNT_ONLY */ #endif void pvfs2_read_inode( @@ -821,11 +881,15 @@ struct inode *pvfs2_iget_common( #define pvfs2_iget(sb, ref) pvfs2_iget_common(sb, ref, 0) #define pvfs2_iget_locked(sb, ref) pvfs2_iget_common(sb, ref, 1) -#ifdef PVFS2_LINUX_KERNEL_2_4 +#if defined(PVFS2_LINUX_KERNEL_2_4) || defined(HAVE_TWO_PARAM_PERMISSION) int pvfs2_permission(struct inode *, int); #else -int pvfs2_permission(struct inode *inode, - int mask, struct nameidata *nd); +int pvfs2_permission(struct inode *, int mask, +#ifdef HAVE_THREE_PARAM_PERMISSION_WITH_FLAG + unsigned int flags); +#else + struct nameidata *nd); +#endif /* HAVE_THREE_PARAM_PERMISSION_WITH_FLAG */ #endif /***************************** @@ -941,6 +1005,7 @@ extern struct semaphore devreq_semaphore; extern struct semaphore request_semaphore; extern int debug; extern int op_timeout_secs; +extern int slot_timeout_secs; extern struct list_head pvfs2_superblocks; extern spinlock_t pvfs2_superblocks_lock; extern struct list_head pvfs2_request_list; @@ -959,6 +1024,8 @@ extern struct file_operations pvfs2_dir_operations; extern struct dentry_operations pvfs2_dentry_operations; extern struct file_operations pvfs2_devreq_file_operations; +extern wait_queue_head_t pvfs2_bufmap_init_waitq; + /************************************ * misc convenience macros ************************************/ @@ -989,10 +1056,11 @@ do { \ #define remove_op_from_request_list(op) \ do { \ struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ pvfs2_kernel_op_t *tmp_op = NULL; \ \ spin_lock(&pvfs2_request_list_lock); \ - list_for_each(tmp, &pvfs2_request_list) { \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_request_list) { \ tmp_op = list_entry(tmp, pvfs2_kernel_op_t, list); \ if (tmp_op && (tmp_op == op)) { \ list_del(&tmp_op->list); \ @@ -1092,10 +1160,11 @@ do { \ #define remove_pvfs2_sb(sb) \ do { \ struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ pvfs2_sb_info_t *pvfs2_sb = NULL; \ \ spin_lock(&pvfs2_superblocks_lock); \ - list_for_each(tmp, &pvfs2_superblocks) { \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_superblocks) { \ pvfs2_sb = list_entry(tmp, pvfs2_sb_info_t, list); \ if (pvfs2_sb && (pvfs2_sb->sb == sb)) { \ gossip_debug(GOSSIP_SUPER_DEBUG, "Removing SB %p from pvfs2 superblocks\n", \ @@ -1170,9 +1239,21 @@ static inline struct dentry* pvfs2_d_splice_alias(struct dentry *dentry, struct return d_splice_alias(inode, dentry); } +#ifdef HAVE_CURRENT_FSUID #define fill_default_sys_attrs(sys_attr,type,mode)\ do \ { \ + sys_attr.owner = current_fsuid(); \ + sys_attr.group = current_fsgid(); \ + sys_attr.size = 0; \ + sys_attr.perms = PVFS_util_translate_mode(mode,0); \ + sys_attr.objtype = type; \ + sys_attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; \ +} while(0) +#else +#define fill_default_sys_attrs(sys_attr,type,mode)\ +do \ +{ \ sys_attr.owner = current->fsuid; \ sys_attr.group = current->fsgid; \ sys_attr.size = 0; \ @@ -1180,40 +1261,10 @@ do \ sys_attr.objtype = type; \ sys_attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; \ } while(0) +#endif /* HAVE_CURRENT_FSUID */ #endif /* PVFS2_LINUX_KERNEL_2_4 */ - -/************************************ - * misc convenience functions - ************************************/ -static inline int pvfs2_internal_revalidate( - struct inode *inode) -{ - int ret = -EINVAL; - if (inode) - { - /* - * The dentry revalidating function expects that all fields of the inode - * would be refreshed, so we dont have much of a choice here too. - */ - ret = ((pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT) == 0) ? 1 : 0); -#if 0 -/* Calling make_bad_inode() here results in a bad reference count on the - * inode. It therefore gets cached until the module is unloaded, when we get - * a "VFS: Busy inodes after unmount. Self-destruct in 5 seconds." error - * message. It is better to just let it be cleaned up naturally after - * validation failure. -Phil - */ - if (ret == 0) - { - pvfs2_make_bad_inode(inode); - } -#endif - } - return ret; -} - #ifdef PVFS2_LINUX_KERNEL_2_4 /* based on code from 2.6.x's fs/libfs.c with required macro support @@ -1262,23 +1313,80 @@ static inline int dcache_dir_close(struct inode *inode, struct file *file) } #endif /* PVFS2_LINUX_KERNEL_2_4_MINOR_VER */ -/* some 2.4 kernels backport a lot of stuff from 2.6, so we have to - * feature-test instead of relying on kernel versions */ -#ifndef HAVE_I_SIZE_READ -static inline loff_t i_size_read(struct inode *inode) +#endif /* PVFS2_LINUX_KERNEL_2_4 */ + +#ifdef HAVE_I_SEM_IN_STRUCT_INODE +#define pvfs2_inode_lock(__i) do \ +{ down(&(__i)->i_sem); } while (0) +#define pvfs2_inode_unlock(__i) do \ +{ up(&(__i)->i_sem); } while (0) +#else +#define pvfs2_inode_lock(__i) do \ +{ mutex_lock(&(__i)->i_mutex); } while (0) +#define pvfs2_inode_unlock(__i) do \ +{ mutex_unlock(&(__i)->i_mutex); } while (0) +#endif /* HAVE_I_SEM_IN_STRUCT_INODE */ + +static inline void pvfs2_i_size_write(struct inode *inode, loff_t i_size) { +#ifndef HAVE_I_SIZE_WRITE + inode->i_size = i_size; +#else + #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + pvfs2_inode_lock(inode); + #endif + i_size_write(inode, i_size); + #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + pvfs2_inode_unlock(inode); + #endif +#endif + return; +} + +static inline loff_t pvfs2_i_size_read(struct inode *inode) +{ +#ifndef HAVE_I_SIZE_READ return inode->i_size; +#else + return i_size_read(inode); +#endif } + +static inline void pvfs2_i_set_nlink(struct inode *inode, unsigned int nlink) +{ +#ifdef HAVE_I_SET_NLINK + set_nlink(inode, nlink); +#else + inode->i_nlink = nlink; #endif +} -#ifndef HAVE_I_SIZE_WRITE -static inline void i_size_write(struct inode *inode, loff_t i_size) +static inline void pvfs2_i_inc_nlink(struct inode *inode) { - inode->i_size = i_size; +#ifdef HAVE_I_INC_NLINK + inc_nlink(inode); +#else + inode->i_nlink++; +#endif } + +static inline void pvfs2_i_drop_nlink(struct inode *inode) +{ +#ifdef HAVE_I_DROP_NLINK + drop_nlink(inode); +#else + inode->i_nlink--; #endif +} -#endif /* PVFS2_LINUX_KERNEL_2_4 */ +static inline void pvfs2_i_clear_nlink(struct inode *inode) +{ +#ifdef HAVE_I_CLEAR_NLINK + clear_nlink(inode); +#else + inode->i_nlink = 0; +#endif +} static inline unsigned int diff(struct timeval *end, struct timeval *begin) { @@ -1304,6 +1412,15 @@ static inline void *kzalloc(size_t size, int flags) } #endif +/* add in true/false enum for 2.6 kernels that don't have it (<2.6.9), + * taken include/linux/stddef.h */ +#ifndef HAVE_TRUE_FALSE_ENUM +enum { + false = 0, + true = 1 +}; +#endif + #endif /* __PVFS2KERNEL_H */ /* @} */ diff --git a/src/kernel/linux-2.6/pvfs2-mod.c b/src/kernel/linux-2.6/pvfs2-mod.c index 4e7c0ac..abd6692 100644 --- a/src/kernel/linux-2.6/pvfs2-mod.c +++ b/src/kernel/linux-2.6/pvfs2-mod.c @@ -15,6 +15,19 @@ #define PVFS2_VERSION "Unknown" #endif +#define DEBUG_HELP_STRING_SIZE 4096 + + +/* these functions are defined in pvfs2-utils.c */ +uint64_t PVFS_proc_debug_eventlog_to_mask(const char *); +uint64_t PVFS_proc_kmod_eventlog_to_mask(const char *event_logging); +int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string); +int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string); + +/* external references */ +extern char kernel_debug_string[]; + +/* prototypes */ static int hash_func(void *key, int table_size); static int hash_compare(void *key, struct qhash_head *link); @@ -24,14 +37,33 @@ static int hash_compare(void *key, struct qhash_head *link); /* the size of the hash tables for ops in progress */ static int hash_table_size = 509; -int gossip_debug_mask = 0; + +/* the insmod command only understands "unsigned long" and NOT "unsigned long long" as + * an input parameter. So, to accomodate both 32- and 64- bit machines, we will read + * the debug mask parameter as an unsigned long (4-bytes on a 32-bit machine and 8-bytes + * on a 64-bit machine) and then cast the "unsigned long" to an "unsigned long long" + * once we have the value in the kernel. In this way, the gossip_debug_mask can remain + * as a "uint64_t" and the kernel and client may continue to use the same gossip functions. + * NOTE: the kernel debug mask currently does not have more than 32 valid keywords, so + * only reading a 32-bit integer from the insmod command line is not a problem. However, + * the /proc/sys/pvfs2/kernel-debug functionality can accomodate up to 64 keywords, in + * the event that the kernel debug mask supports more than 32 keywords. +*/ +uint32_t module_parm_debug_mask = 0; +uint64_t gossip_debug_mask = 0; +unsigned int kernel_mask_set_mod_init = false; int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS; +int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS; +uint32_t DEBUG_LINE = 50; +char debug_help_string[DEBUG_HELP_STRING_SIZE] = {0}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("PVFS2 Development Team"); MODULE_DESCRIPTION("The Linux Kernel VFS interface to PVFS2"); -MODULE_PARM_DESC(debug, "debugging level (0 for none, 1 for verbose)"); +MODULE_PARM_DESC(debug, "debugging level (see pvfs2-debug.h for values)"); MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds"); +MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds"); MODULE_PARM_DESC(hash_table_size, "size of hash table for operations in progress"); #ifdef PVFS2_LINUX_KERNEL_2_4 @@ -42,15 +74,21 @@ MODULE_PARM_DESC(hash_table_size, "size of hash table for operations in progress DECLARE_FSTYPE(pvfs2_fs_type, "pvfs2", pvfs2_get_sb, 0); MODULE_PARM(hash_table_size, "i"); -MODULE_PARM(gossip_debug_mask, "i"); +MODULE_PARM(module_parm_debug_mask, "i"); MODULE_PARM(op_timeout_secs, "i"); +MODULE_PARM(slot_timeout_secs, "i"); #else /* !PVFS2_LINUX_KERNEL_2_4 */ struct file_system_type pvfs2_fs_type = { .name = "pvfs2", +/* only define mount if the kernel no longer supports get_sb */ +#ifdef HAVE_FSTYPE_MOUNT_ONLY + .mount = pvfs2_mount, +#else .get_sb = pvfs2_get_sb, +#endif /* HAVE_FSTYPE_MOUNT_ONLY */ .kill_sb = pvfs2_kill_sb, .owner = THIS_MODULE, /* @@ -63,8 +101,9 @@ struct file_system_type pvfs2_fs_type = }; module_param(hash_table_size, int, 0); -module_param(gossip_debug_mask, int, 0); +module_param(module_parm_debug_mask, uint, 0); module_param(op_timeout_secs, int, 0); +module_param(slot_timeout_secs, int, 0); #endif /* PVFS2_LINUX_KERNEL_2_4 */ @@ -86,7 +125,11 @@ struct qhash_table *htable_ops_in_progress = NULL; LIST_HEAD(pvfs2_request_list); /* used to protect the above pvfs2_request_list */ +#ifdef HAVE_SPIN_LOCK_UNLOCKED spinlock_t pvfs2_request_list_lock = SPIN_LOCK_UNLOCKED; +#else +DEFINE_SPINLOCK(pvfs2_request_list_lock); +#endif /* HAVE_SPIN_LOCK_UNLOCKED */ /* used for incoming request notification */ DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq); @@ -94,13 +137,94 @@ DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq); static int __init pvfs2_init(void) { int ret = -1; - gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2: pvfs2_init called with debug mask 0x%x\n", gossip_debug_mask); + uint32_t index = 0; + char client_title[] = "Client Debug Keywords:\n"; + char kernel_title[] = "Kernel Debug Keywords:\n"; + uint32_t i = 0; + + /* convert input debug mask to a 64-bit unsigned integer */ + gossip_debug_mask = (uint64_t)module_parm_debug_mask; + + /*set the kernel's gossip debug string; invalid mask values will be ignored.*/ + PVFS_proc_kmod_mask_to_eventlog(gossip_debug_mask,kernel_debug_string); + + /* remove any invalid values from the mask */ + gossip_debug_mask = PVFS_proc_kmod_eventlog_to_mask(kernel_debug_string); + + /* if the mask has a non-zero value, then indicate that the mask was set when the kernel module + * was loaded. The pvfs2 dev ioctl command will look at this boolean to determine if the kernel's + * debug mask should be overwritten when the client-core is started. + */ + if (gossip_debug_mask != 0) + { + kernel_mask_set_mod_init = true; + } + + /*print information message to the system log*/ + printk(KERN_INFO "pvfs2: pvfs2_init called with debug mask: \"%s\" (0x%08llx)\n" + ,kernel_debug_string,gossip_debug_mask); + + + /* load debug_help_string...this string is used during the /proc/sys/pvfs2/debug-help operation */ + if (strlen(client_title) < DEBUG_LINE) + { + memcpy(&debug_help_string[index],client_title,sizeof(client_title)); + index += strlen(client_title); + } + + for(i=0;i #include @@ -19,6 +20,26 @@ #ifdef CONFIG_SYSCTL #include + +#define KERNEL_DEBUG "kernel-debug" +#define CLIENT_DEBUG "client-debug" +#define DEBUG_HELP "debug-help" + +/* these functions are defined in pvfs2-utils.c */ +uint64_t PVFS_proc_debug_eventlog_to_mask(const char *); +uint64_t PVFS_proc_kmod_eventlog_to_mask(const char *event_logging); +int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string); +int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string); + +/* these strings will be initialized by invoking the PVFS_DEV_DEBUG ioctl + * command when the client-core is started. otherwise, these variables are + * only set via the proc sys calls. +*/ +char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none"; +char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none"; +extern char debug_help_string[]; + + /* extra parameters provided to pvfs2 param proc handlers */ struct pvfs2_param_extra { @@ -27,12 +48,122 @@ struct pvfs2_param_extra int max; /* maximum value */ }; +/* pvfs2_proc_debug_mask_handler() + * proc file handler that will take a debug string and convert it + * into the proper debug value and then send a request to update the + * debug mask if client or update the local debug mask if kernel. +*/ +#if defined(HAVE_PROC_HANDLER_FILE_ARG) +static int pvfs2_proc_debug_mask_handler( + ctl_table *ctl, + int write, + struct file *filp, + void *buffer, + size_t *lenp, + loff_t *ppos) +#elif defined(HAVE_PROC_HANDLER_PPOS_ARG) +static int pvfs2_proc_debug_mask_handler( + ctl_table *ctl, + int write, + void *buffer, + size_t *lenp, + loff_t *ppos) +#else +static int pvfs2_proc_debug_mask_handler( + ctl_table *ctl, + int write, + struct file *filp, + void *buffer, + size_t *lenp) +#endif +{ + int ret=0; + pvfs2_kernel_op_t *new_op = NULL; + + gossip_debug(GOSSIP_PROC_DEBUG,"Executing pvfs2_proc_debug_mask_handler...\n"); + + /* use generic proc string handling function to retrieve/set string. */ +#if defined(HAVE_PROC_HANDLER_FILE_ARG) + ret = proc_dostring(ctl, write, filp, buffer, lenp, ppos); +#elif defined(HAVE_PROC_HANDLER_PPOS_ARG) + ret = proc_dostring(ctl, write, buffer, lenp, ppos); +#else + ret = proc_dostring(ctl, write, filp, buffer, lenp); +#endif + + if (ret != 0) + { + return(ret); + } + + gossip_debug(GOSSIP_PROC_DEBUG,"%s: debug string: %s\n" + ,"pvfs2_proc_debug_mask_handler" + ,(char *)ctl->data); + + /*For a user write, ctl->data will now contain the new debug string as given + *by the user. For a user read, the user's "buffer" will now contain the string + *stored in ctl->data. + */ + + /*For a write, we must convert the debug string into the proper debug mask. + *The conversion will ignore any invalid keywords sent in by the user, so we + *re-convert the debug mask back into the correct debug string. + */ + if (write && !strcmp(ctl->procname,KERNEL_DEBUG)) + { + gossip_debug_mask=PVFS_proc_kmod_eventlog_to_mask((const char *)ctl->data); + ret=PVFS_proc_kmod_mask_to_eventlog(gossip_debug_mask,(char *)ctl->data); + + gossip_debug(GOSSIP_PROC_DEBUG,"%s: kernel debug mask: %lu\n" + ,"pvfs2_proc_debug_mask_handler" + ,(unsigned long)gossip_debug_mask); + gossip_debug(GOSSIP_PROC_DEBUG,"New kernel debug string is %s.\n" + ,kernel_debug_string); + printk("PVFS: kernel debug mask has been modified to \"%s\" (0x%08llx).\n" + ,kernel_debug_string, llu(gossip_debug_mask)); + } + else if (write && !strcmp(ctl->procname,CLIENT_DEBUG)) + { + new_op = op_alloc(PVFS2_VFS_OP_PARAM); + if (!new_op) + return (-ENOMEM); + strcpy(new_op->upcall.req.param.s_value,ctl->data); + new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET; + new_op->upcall.req.param.op = PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG; + + ret=service_operation(new_op,"pvfs2_param",PVFS2_OP_INTERRUPTIBLE); + + if (ret==0) + { + gossip_debug(GOSSIP_PROC_DEBUG,"Downcall:\treturn status:%d\treturn " + "value:%x\n" + ,(int)new_op->downcall.status + ,(int)new_op->downcall.resp.param.value); + + ret=PVFS_proc_mask_to_eventlog(new_op->downcall.resp.param.value + ,client_debug_string); + gossip_debug(GOSSIP_PROC_DEBUG,"New client debug string is %s\n" + ,client_debug_string); + } + op_release(new_op); + printk("PVFS: client debug mask has been modified to \"%s\" (0x%08llx).\n" + ,client_debug_string, llu(new_op->downcall.resp.param.value)); + } + else if (write && !strcmp(ctl->procname,DEBUG_HELP)) + { + /*do nothing...the user can only READ the debug help*/ + return (0); + } + + return (0); +}/*end pvfs2_proc_debug_mask_handler*/ + /* pvfs2_param_proc_handler() * * generic proc file handler for getting and setting various tunable * pvfs2-client parameters */ -#ifdef HAVE_PROC_HANDLER_SIX_ARG +#if defined(HAVE_PROC_HANDLER_FILE_ARG) static int pvfs2_param_proc_handler( ctl_table *ctl, int write, @@ -40,6 +171,13 @@ static int pvfs2_param_proc_handler( void *buffer, size_t *lenp, loff_t *ppos) +#elif defined(HAVE_PROC_HANDLER_PPOS_ARG) +static int pvfs2_param_proc_handler( + ctl_table *ctl, + int write, + void *buffer, + size_t *lenp, + loff_t *ppos) #else static int pvfs2_param_proc_handler( ctl_table *ctl, @@ -70,8 +208,10 @@ static int pvfs2_param_proc_handler( if(write) { /* use generic proc handling function to retrive value to set */ -#ifdef HAVE_PROC_HANDLER_SIX_ARG +#if defined(HAVE_PROC_HANDLER_FILE_ARG) ret = proc_dointvec_minmax(&tmp_ctl, write, filp, buffer, lenp, ppos); +#elif defined(HAVE_PROC_HANDLER_PPOS_ARG) + ret = proc_dointvec_minmax(&tmp_ctl, write, buffer, lenp, ppos); #else ret = proc_dointvec_minmax(&tmp_ctl, write, filp, buffer, lenp); #endif @@ -101,8 +241,10 @@ static int pvfs2_param_proc_handler( /* use generic proc handling function to output value */ val = (int)new_op->downcall.resp.param.value; gossip_debug(GOSSIP_PROC_DEBUG, "pvfs2: proc read %d\n", val); -#ifdef HAVE_PROC_HANDLER_SIX_ARG +#if defined(HAVE_PROC_HANDLER_FILE_ARG) ret = proc_dointvec_minmax(&tmp_ctl, write, filp, buffer, lenp, ppos); +#elif defined(HAVE_PROC_HANDLER_PPOS_ARG) + ret = proc_dointvec_minmax(&tmp_ctl, write, buffer, lenp, ppos); #else ret = proc_dointvec_minmax(&tmp_ctl, write, filp, buffer, lenp); #endif @@ -112,7 +254,7 @@ static int pvfs2_param_proc_handler( return(ret); } -#ifdef HAVE_PROC_HANDLER_SIX_ARG +#if defined(HAVE_PROC_HANDLER_FILE_ARG) static int pvfs2_pc_proc_handler( ctl_table *ctl, int write, @@ -120,6 +262,13 @@ static int pvfs2_pc_proc_handler( void *buffer, size_t *lenp, loff_t *ppos) +#elif defined(HAVE_PROC_HANDLER_PPOS_ARG) +static int pvfs2_pc_proc_handler( + ctl_table *ctl, + int write, + void *buffer, + size_t *lenp, + loff_t *ppos) #else static int pvfs2_pc_proc_handler( ctl_table *ctl, @@ -134,7 +283,7 @@ static int pvfs2_pc_proc_handler( int pos = 0; int to_copy = 0; int* pc_type = ctl->extra1; -#ifdef HAVE_PROC_HANDLER_SIX_ARG +#if defined(HAVE_PROC_HANDLER_PPOS_ARG) || defined(HAVE_PROC_HANDLER_FILE_ARG) loff_t *offset = ppos; #else loff_t *offset = &filp->f_pos; @@ -223,6 +372,26 @@ static struct pvfs2_param_extra acache_rec_extra = { .min = 0, .max = 100, }; +static struct pvfs2_param_extra static_acache_timeout_extra = { + .op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS, + .min = 0, + .max = INT_MAX, +}; +static struct pvfs2_param_extra static_acache_hard_extra = { + .op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT, + .min = 0, + .max = INT_MAX, +}; +static struct pvfs2_param_extra static_acache_soft_extra = { + .op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT, + .min = 0, + .max = INT_MAX, +}; +static struct pvfs2_param_extra static_acache_rec_extra = { + .op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE, + .min = 0, + .max = 100, +}; static struct pvfs2_param_extra ncache_timeout_extra = { .op = PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS, .min = 0, @@ -258,22 +427,38 @@ static struct pvfs2_param_extra perf_reset_extra = { .min = 0, .max = 1, }; -static int min_debug[] = {0}, max_debug[] = {GOSSIP_MAX_DEBUG}; + static int min_op_timeout_secs[] = {0}, max_op_timeout_secs[] = {INT_MAX}; +static int min_slot_timeout_secs[] = {0}, max_slot_timeout_secs[] = {INT_MAX}; /* - * Modern kernels prefer to number the controls themselves. + * Modern kernels (up to 2.6.33) prefer to number the controls themselves. */ #ifdef CTL_UNNUMBERED -#define UNNUMBERED_OR_VAL(x) CTL_UNNUMBERED +#define UNNUMBERED_OR_VAL(x) ((x==CTL_NONE) ? CTL_NONE : CTL_UNNUMBERED) #else #define UNNUMBERED_OR_VAL(x) x #endif +/* + * API change in 2.6.33 removes .ctl_name and .strategy from ctl_table + */ +#ifdef HAVE_CTL_NAME +#define CTL_NAME(c_name) .ctl_name = UNNUMBERED_OR_VAL(c_name), +#else +#define CTL_NAME(c_name) +#endif /* HAVE_CTL_NAME */ + +#ifdef HAVE_CTL_STRATEGY +#define CTL_STATEGY(c_strategy) .strategy = (c_strategy), +#else +#define CTL_STRATEGY(strat) +#endif /* HAVE_CTL_STRATEGY */ + static ctl_table pvfs2_acache_table[] = { /* controls acache timeout */ { - .ctl_name = UNNUMBERED_OR_VAL(1), + CTL_NAME(1) .procname = "timeout-msecs", .maxlen = sizeof(int), .mode = 0644, @@ -282,7 +467,7 @@ static ctl_table pvfs2_acache_table[] = { }, /* controls acache hard limit */ { - .ctl_name = UNNUMBERED_OR_VAL(2), + CTL_NAME(2) .procname = "hard-limit", .maxlen = sizeof(int), .mode = 0644, @@ -291,7 +476,7 @@ static ctl_table pvfs2_acache_table[] = { }, /* controls acache soft limit */ { - .ctl_name = UNNUMBERED_OR_VAL(3), + CTL_NAME(3) .procname = "soft-limit", .maxlen = sizeof(int), .mode = 0644, @@ -300,19 +485,59 @@ static ctl_table pvfs2_acache_table[] = { }, /* controls acache reclaim percentage */ { - .ctl_name = UNNUMBERED_OR_VAL(4), + CTL_NAME(4) .procname = "reclaim-percentage", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &pvfs2_param_proc_handler, .extra1 = &acache_rec_extra, }, - {0} + { CTL_NAME(CTL_NONE) } }; +static ctl_table pvfs2_static_acache_table[] = { + /* controls static acache timeout */ + { + CTL_NAME(1) + .procname = "timeout-msecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &pvfs2_param_proc_handler, + .extra1 = &static_acache_timeout_extra + }, + /* controls static acache hard limit */ + { + CTL_NAME(2) + .procname = "hard-limit", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &pvfs2_param_proc_handler, + .extra1 = &static_acache_hard_extra + }, + /* controls static acache soft limit */ + { + CTL_NAME(3) + .procname = "soft-limit", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &pvfs2_param_proc_handler, + .extra1 = &static_acache_soft_extra + }, + /* controls static acache reclaim percentage */ + { + CTL_NAME(4) + .procname = "reclaim-percentage", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &pvfs2_param_proc_handler, + .extra1 = &static_acache_rec_extra, + }, + { CTL_NAME(CTL_NONE) } +}; + static ctl_table pvfs2_ncache_table[] = { /* controls ncache timeout */ { - .ctl_name = UNNUMBERED_OR_VAL(1), + CTL_NAME(1) .procname = "timeout-msecs", .maxlen = sizeof(int), .mode = 0644, @@ -321,7 +546,7 @@ static ctl_table pvfs2_ncache_table[] = { }, /* controls ncache hard limit */ { - .ctl_name = UNNUMBERED_OR_VAL(2), + CTL_NAME(2) .procname = "hard-limit", .maxlen = sizeof(int), .mode = 0644, @@ -330,7 +555,7 @@ static ctl_table pvfs2_ncache_table[] = { }, /* controls ncache soft limit */ { - .ctl_name = UNNUMBERED_OR_VAL(3), + CTL_NAME(3) .procname = "soft-limit", .maxlen = sizeof(int), .mode = 0644, @@ -339,20 +564,21 @@ static ctl_table pvfs2_ncache_table[] = { }, /* controls ncache reclaim percentage */ { - .ctl_name = UNNUMBERED_OR_VAL(4), + CTL_NAME(4) .procname = "reclaim-percentage", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &pvfs2_param_proc_handler, .extra1 = &ncache_rec_extra }, - {0} + { CTL_NAME(CTL_NONE) } }; static int acache_perf_count = PVFS2_PERF_COUNT_REQUEST_ACACHE; +static int static_acache_perf_count = PVFS2_PERF_COUNT_REQUEST_STATIC_ACACHE; static int ncache_perf_count = PVFS2_PERF_COUNT_REQUEST_NCACHE; static ctl_table pvfs2_pc_table[] = { { - .ctl_name = UNNUMBERED_OR_VAL(1), + CTL_NAME(1) .procname = "acache", .maxlen = 4096, .mode = 0444, @@ -360,14 +586,22 @@ static ctl_table pvfs2_pc_table[] = { .extra1 = &acache_perf_count, }, { - .ctl_name = UNNUMBERED_OR_VAL(2), + CTL_NAME(1) + .procname = "static-acache", + .maxlen = 4096, + .mode = 0444, + .proc_handler = pvfs2_pc_proc_handler, + .extra1 = &static_acache_perf_count, + }, + { + CTL_NAME(2) .procname = "ncache", .maxlen = 4096, .mode = 0444, .proc_handler = pvfs2_pc_proc_handler, .extra1 = &ncache_perf_count }, - {0} + { CTL_NAME(CTL_NONE) } }; pvfs2_stats g_pvfs2_stats; @@ -375,7 +609,7 @@ pvfs2_stats g_pvfs2_stats; static ctl_table pvfs2_stats_table[] = { /* shows number of hits in cache */ { - .ctl_name = UNNUMBERED_OR_VAL(1), + CTL_NAME(1) .procname = "hits", .data = &g_pvfs2_stats.cache_hits, .maxlen = sizeof(unsigned long), @@ -383,7 +617,7 @@ static ctl_table pvfs2_stats_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = UNNUMBERED_OR_VAL(2), + CTL_NAME(2) .procname = "misses", .data = &g_pvfs2_stats.cache_misses, .maxlen = sizeof(unsigned long), @@ -391,7 +625,6 @@ static ctl_table pvfs2_stats_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = UNNUMBERED_OR_VAL(3), .procname = "reads", .data = &g_pvfs2_stats.reads, .maxlen = sizeof(unsigned long), @@ -399,44 +632,73 @@ static ctl_table pvfs2_stats_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = UNNUMBERED_OR_VAL(4), + CTL_NAME(4) .procname = "writes", .data = &g_pvfs2_stats.writes, .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = &proc_dointvec, }, - {0} + { CTL_NAME(CTL_NONE) } }; + + static ctl_table pvfs2_table[] = { - /* controls debugging level */ + /* outputs the available debugging keywords */ { - .ctl_name = UNNUMBERED_OR_VAL(1), - .procname = "debug", - .data = &gossip_debug_mask, - .maxlen = sizeof(int), + CTL_NAME(14) + .procname = DEBUG_HELP, + .data = &debug_help_string, + .maxlen = PVFS2_MAX_DEBUG_STRING_LEN, + .mode = 0444, + .proc_handler = &pvfs2_proc_debug_mask_handler + }, + /* controls client-core debugging level */ + { + CTL_NAME(1) + .procname = CLIENT_DEBUG, + .data = &client_debug_string, + .maxlen = PVFS2_MAX_DEBUG_STRING_LEN, .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_debug, - .extra2 = &max_debug + .proc_handler = &pvfs2_proc_debug_mask_handler + }, + /* controls kernel debugging level using string input */ + { + CTL_NAME(2) + .procname = KERNEL_DEBUG, + .data = &kernel_debug_string, + .maxlen = PVFS2_MAX_DEBUG_STRING_LEN, + .mode = 0644, + .proc_handler = &pvfs2_proc_debug_mask_handler }, /* operation timeout */ { - .ctl_name = UNNUMBERED_OR_VAL(2), + CTL_NAME(3) .procname = "op-timeout-secs", .data = &op_timeout_secs, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + CTL_STRATEGY(&sysctl_intvec) .extra1 = &min_op_timeout_secs, .extra2 = &max_op_timeout_secs }, + /* slot timeout */ + { + CTL_NAME(4) + .procname = "slot-timeout-secs", + .data = &slot_timeout_secs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + CTL_STRATEGY(&sysctl_intvec) + .extra1 = &min_slot_timeout_secs, + .extra2 = &max_slot_timeout_secs + }, /* time interval for client side performance counters */ { - .ctl_name = UNNUMBERED_OR_VAL(3), + CTL_NAME(5) .procname = "perf-time-interval-secs", .maxlen = sizeof(int), .mode = 0644, @@ -445,7 +707,7 @@ static ctl_table pvfs2_table[] = { }, /* time interval for client side performance counters */ { - .ctl_name = UNNUMBERED_OR_VAL(4), + CTL_NAME(6) .procname = "perf-history-size", .maxlen = sizeof(int), .mode = 0644, @@ -454,7 +716,7 @@ static ctl_table pvfs2_table[] = { }, /* reset performance counters */ { - .ctl_name = UNNUMBERED_OR_VAL(5), + CTL_NAME(7) .procname = "perf-counter-reset", .maxlen = sizeof(int), .mode = 0644, @@ -463,14 +725,22 @@ static ctl_table pvfs2_table[] = { }, /* subdir for acache control */ { - .ctl_name = UNNUMBERED_OR_VAL(6), + CTL_NAME(8) .procname = "acache", .maxlen = 0, .mode = 0555, .child = pvfs2_acache_table }, + /* subdir for static acache control */ + { + CTL_NAME(9) + .procname = "static-acache", + .maxlen = 0, + .mode = 0555, + .child = pvfs2_static_acache_table + }, { - .ctl_name = UNNUMBERED_OR_VAL(7), + CTL_NAME(10) .procname = "perf-counters", .maxlen = 0, .mode = 0555, @@ -478,7 +748,7 @@ static ctl_table pvfs2_table[] = { }, /* subdir for ncache control */ { - .ctl_name = UNNUMBERED_OR_VAL(8), + CTL_NAME(11) .procname = "ncache", .maxlen = 0, .mode = 0555, @@ -486,22 +756,22 @@ static ctl_table pvfs2_table[] = { }, /* statistics maintained by the kernel module (output only below this) */ { - .ctl_name = UNNUMBERED_OR_VAL(9), + CTL_NAME(12) .procname = "stats", .maxlen = 0, .mode = 0555, .child = pvfs2_stats_table }, - {0} + { CTL_NAME(CTL_NONE) } }; static ctl_table fs_table[] = { { - .ctl_name = UNNUMBERED_OR_VAL(1), + CTL_NAME(13) .procname = "pvfs2", .mode = 0555, .child = pvfs2_table }, - {0} + { CTL_NAME(CTL_NONE) } }; #endif diff --git a/src/kernel/linux-2.6/pvfs2-utils.c b/src/kernel/linux-2.6/pvfs2-utils.c index 749c2a0..cf034ff 100644 --- a/src/kernel/linux-2.6/pvfs2-utils.c +++ b/src/kernel/linux-2.6/pvfs2-utils.c @@ -20,8 +20,13 @@ int pvfs2_gen_credentials( if (credentials) { memset(credentials, 0, sizeof(PVFS_credentials)); +#ifdef HAVE_CURRENT_FSUID + credentials->uid = current_fsuid(); + credentials->gid = current_fsgid(); +#else credentials->uid = current->fsuid; credentials->gid = current->fsgid; +#endif ret = 0; } @@ -130,7 +135,7 @@ int copy_attributes_to_inode( char *symname) { int ret = -1; - int perm_mode = 0, old_mode = 0; + int perm_mode = 0; pvfs2_inode_t *pvfs2_inode = NULL; loff_t inode_size = 0, rounded_up_size = 0; @@ -228,9 +233,6 @@ int copy_attributes_to_inode( inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; #endif - old_mode = inode->i_mode; - inode->i_mode = 0; - if (attrs->perms & PVFS_O_EXECUTE) perm_mode |= S_IXOTH; if (attrs->perms & PVFS_O_WRITE) @@ -258,13 +260,13 @@ int copy_attributes_to_inode( if (get_suid_flag(inode) == 1 && (attrs->perms & PVFS_U_SUID)) perm_mode |= S_ISUID; - inode->i_mode |= perm_mode; + inode->i_mode = perm_mode; if (is_root_handle(inode)) { /* special case: mark the root inode as sticky */ inode->i_mode |= S_ISVTX; - gossip_debug(GOSSIP_ACL_DEBUG, "Marking inode %llu as sticky\n", + gossip_debug(GOSSIP_UTILS_DEBUG, "Marking inode %llu as sticky\n", llu(get_handle_from_ino(inode))); } @@ -284,7 +286,7 @@ int copy_attributes_to_inode( * directories across clients; keep constant at 1. Why 1? If * we go with 2, then find(1) gets confused and won't work * properly withouth the -noleaf option */ - inode->i_nlink = 1; + pvfs2_i_set_nlink(inode, 1); ret = 0; break; case PVFS_TYPE_SYMLINK: @@ -307,7 +309,7 @@ int copy_attributes_to_inode( "attribute type %x\n", attrs->objtype); } gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n", - inode->i_mode, (unsigned long)i_size_read(inode)); + inode->i_mode, (unsigned long)pvfs2_i_size_read(inode)); } return ret; } @@ -442,6 +444,7 @@ int pvfs2_inode_getattr(struct inode *inode, uint32_t getattr_mask) pvfs2_inode = PVFS2_I(inode); if (!pvfs2_inode) { + gossip_debug(GOSSIP_UTILS_DEBUG, "%s:%s:%d failed to resolve to pvfs2_inode\n", __FILE__, __func__, __LINE__); return ret; } @@ -507,6 +510,22 @@ int pvfs2_inode_getattr(struct inode *inode, uint32_t getattr_mask) ret = -ENOENT; goto copy_attr_failure; } + + /* store blksize in pvfs2 specific part of inode structure; we + * are only going to use this to report to stat to make sure it + * doesn't perturb any inode related code paths + */ + if(new_op->downcall.resp.getattr.attributes.objtype + == PVFS_TYPE_METAFILE) + { + pvfs2_inode->blksize = + new_op->downcall.resp.getattr.attributes.blksize; + } + else + { + /* mimic behavior of generic_fillattr() for other types */ + pvfs2_inode->blksize = (1 << inode->i_blkbits); + } } copy_attr_failure: @@ -571,7 +590,7 @@ int pvfs2_inode_setattr( new_op, "pvfs2_inode_setattr", get_interruptible_flag(inode)); - gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_inode_setattr: returning %d\n", ret); + gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_inode_setattr: returning %d\n", ret); /* when request is serviced properly, free req op struct */ op_release(new_op); @@ -597,9 +616,23 @@ int pvfs2_flush_inode(struct inode *inode) */ struct iattr wbattr; int ret; + int mtime_flag, ctime_flag, atime_flag, mode_flag; pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode); memset(&wbattr, 0, sizeof(wbattr)); + /* check inode flags up front, and clear them if they are set. This + * will prevent multiple processes from all trying to flush the same + * inode if they call close() simultaneously + */ + mtime_flag = MtimeFlag(pvfs2_inode); + ClearMtimeFlag(pvfs2_inode); + ctime_flag = CtimeFlag(pvfs2_inode); + ClearCtimeFlag(pvfs2_inode); + atime_flag = AtimeFlag(pvfs2_inode); + ClearAtimeFlag(pvfs2_inode); + mode_flag = ModeFlag(pvfs2_inode); + ClearModeFlag(pvfs2_inode); + /* -- Lazy atime,mtime and ctime update -- * Note: all times are dictated by server in the new scheme * and not by the clients @@ -607,9 +640,9 @@ int pvfs2_flush_inode(struct inode *inode) * Also mode updates are being handled now.. */ - if (MtimeFlag(pvfs2_inode)) + if (mtime_flag) wbattr.ia_valid |= ATTR_MTIME; - if (CtimeFlag(pvfs2_inode)) + if (ctime_flag) wbattr.ia_valid |= ATTR_CTIME; /* * We do not need to honor atime flushes if @@ -617,26 +650,29 @@ int pvfs2_flush_inode(struct inode *inode) * b) object is a directory and has a nodiratime marker on the fs * c) entire file system is mounted with noatime option */ + if (!((inode->i_flags & S_NOATIME) || (inode->i_sb->s_flags & MS_NOATIME) - || ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) && AtimeFlag(pvfs2_inode)) + || ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) && atime_flag) { wbattr.ia_valid |= ATTR_ATIME; } - if (ModeFlag(pvfs2_inode)) + if (mode_flag) { wbattr.ia_mode = inode->i_mode; wbattr.ia_valid |= ATTR_MODE; - gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_flush_inode (%llu) writing mode %o\n", - llu(get_handle_from_ino(inode)), inode->i_mode); } gossip_debug(GOSSIP_UTILS_DEBUG, "*********** pvfs2_flush_inode: %llu " "(ia_valid %d)\n", llu(get_handle_from_ino(inode)), wbattr.ia_valid); if (wbattr.ia_valid == 0) { + gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_flush_inode skipping setattr()\n"); return 0; } + + gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_flush_inode (%llu) writing mode %o\n", + llu(get_handle_from_ino(inode)), inode->i_mode); ret = pvfs2_inode_setattr(inode, &wbattr); return ret; @@ -720,6 +756,7 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char* prefix, pvfs2_kernel_op_t *new_op = NULL; pvfs2_inode_t *pvfs2_inode = NULL; ssize_t length = 0; + int fsuid, fsgid; if (name == NULL || (size > 0 && buffer == NULL)) { @@ -734,8 +771,16 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char* prefix, } if (inode) { +#ifdef HAVE_CURRENT_FSUID + fsuid = current_fsuid(); + fsgid = current_fsgid(); +#else + fsuid = current->fsuid; + fsgid = current->fsgid; +#endif + gossip_debug(GOSSIP_XATTR_DEBUG, "getxattr on inode %llu, name %s (uid %o, gid %o)\n", - llu(get_handle_from_ino(inode)), name, current->fsuid, current->fsgid); + llu(get_handle_from_ino(inode)), name, fsuid, fsgid); pvfs2_inode = PVFS2_I(inode); /* obtain the xattr semaphore */ down_read(&pvfs2_inode->xattr_sem); @@ -1184,7 +1229,7 @@ static inline struct inode *pvfs2_create_file( new_op, "pvfs2_create_file", get_interruptible_flag(dir)); - gossip_debug(GOSSIP_ACL_DEBUG, "Create Got PVFS2 handle %llu on fsid %d (ret=%d)\n", + gossip_debug(GOSSIP_UTILS_DEBUG, "Create Got PVFS2 handle %llu on fsid %d (ret=%d)\n", llu(new_op->downcall.resp.create.refn.handle), new_op->downcall.resp.create.refn.fs_id, ret); @@ -1209,14 +1254,14 @@ static inline struct inode *pvfs2_create_file( dentry->d_op = &pvfs2_dentry_operations; d_instantiate(dentry, inode); - gossip_debug(GOSSIP_ACL_DEBUG, "Inode (Regular File) %llu -> %s\n", + gossip_debug(GOSSIP_UTILS_DEBUG, "Inode (Regular File) %llu -> %s\n", llu(get_handle_from_ino(inode)), dentry->d_name.name); } else { *error_code = ret; - gossip_debug(GOSSIP_ACL_DEBUG, "pvfs2_create_file: failed with error code %d\n", + gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_file: failed with error code %d\n", *error_code); } @@ -1296,7 +1341,7 @@ static inline struct inode *pvfs2_create_dir( dentry->d_op = &pvfs2_dentry_operations; d_instantiate(dentry, inode); - gossip_debug(GOSSIP_ACL_DEBUG, "Inode (Directory) %llu -> %s\n", + gossip_debug(GOSSIP_UTILS_DEBUG, "Inode (Directory) %llu -> %s\n", llu(get_handle_from_ino(inode)), dentry->d_name.name); } else @@ -1392,7 +1437,7 @@ static inline struct inode *pvfs2_create_symlink( dentry->d_op = &pvfs2_dentry_operations; d_instantiate(dentry, inode); - gossip_debug(GOSSIP_ACL_DEBUG, "Inode (Symlink) %llu -> %s\n", + gossip_debug(GOSSIP_UTILS_DEBUG, "Inode (Symlink) %llu -> %s\n", llu(get_handle_from_ino(inode)), dentry->d_name.name); } else @@ -1430,6 +1475,12 @@ struct inode *pvfs2_create_entry( { if (dir && dentry && error_code) { + if(strlen(dentry->d_name.name) > (PVFS2_NAME_LEN - 1)) + { + *error_code = -ENAMETOOLONG; + return(NULL); + } + switch (op_type) { case PVFS2_VFS_OP_CREATE: @@ -1774,7 +1825,7 @@ static int do_encode_opaque_handle(char *dst, struct inode *inode) h.atime = pvfs2_convert_time_field(&inode->i_atime); h.mtime = pvfs2_convert_time_field(&inode->i_mtime); h.ctime = pvfs2_convert_time_field(&inode->i_ctime); - h.size = i_size_read(inode); + h.size = pvfs2_i_size_read(inode); h.mask |= PVFS_ATTR_SYS_SIZE; h.objtype = PVFS_TYPE_METAFILE; /* Serialize into the buffer */ @@ -1952,6 +2003,9 @@ int pvfs2_cancel_op_in_progress(unsigned long tag) return (ret); } +/* + We want to clear everything except for rw_semaphore and the vfs_inode +*/ void pvfs2_inode_initialize(pvfs2_inode_t *pvfs2_inode) { if (!InitFlag(pvfs2_inode)) @@ -1961,6 +2015,8 @@ void pvfs2_inode_initialize(pvfs2_inode_t *pvfs2_inode) pvfs2_inode->last_failed_block_index_read = 0; memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target)); pvfs2_inode->error_code = 0; + pvfs2_inode->revalidate_failed = 0; + pvfs2_inode->pinode_flags = 0; SetInitFlag(pvfs2_inode); } } @@ -1978,14 +2034,19 @@ void pvfs2_inode_finalize(pvfs2_inode_t *pvfs2_inode) void pvfs2_op_initialize(pvfs2_kernel_op_t *op) { - op->io_completed = 0; + if( op ) + { + spin_lock( &op->lock ); + op->io_completed = 0; - op->upcall.type = PVFS2_VFS_OP_INVALID; - op->downcall.type = PVFS2_VFS_OP_INVALID; - op->downcall.status = -1; + op->upcall.type = PVFS2_VFS_OP_INVALID; + op->downcall.type = PVFS2_VFS_OP_INVALID; + op->downcall.status = -1; - op->op_state = OP_VFS_STATE_UNKNOWN; - op->tag = 0; + op->op_state = OP_VFS_STATE_UNKNOWN; + op->tag = 0; + spin_unlock( &op->lock ); + } } void pvfs2_make_bad_inode(struct inode *inode) @@ -2063,10 +2124,20 @@ int pvfs2_normalize_to_errno(PVFS_error error_code) /* convert any error codes that are in pvfs2 format */ if(IS_PVFS_NON_ERRNO_ERROR(-error_code)) { - /* assume a default error code */ - gossip_err("pvfs2: warning: " - "got error code without errno equivalent: %d.\n", error_code); - error_code = -EINVAL; + if(PVFS_NON_ERRNO_ERROR_CODE(-error_code) == PVFS_ECANCEL) + { + /* cancellation error codes generally correspond to a timeout + * from the client's perspective + */ + error_code = -ETIMEDOUT; + } + else + { + /* assume a default error code */ + gossip_err("pvfs2: warning: " + "got error code without errno equivalent: %d.\n", error_code); + error_code = -EINVAL; + } } else if(IS_PVFS_ERROR(-error_code)) { @@ -2109,6 +2180,206 @@ int32_t PVFS_util_translate_mode(int mode, int suid) #undef NUM_MODES } + +static char * pvfs2_strtok(char *s, const char *toks) +{ + static char *in_string_p; /* original string */ + char *this_string_p; /* starting value of in_string_p */ + /* during this iteration */ + uint32_t toks_len = strlen(toks); /* # of tokens */ + uint32_t i; /* index */ + + if (s) + { + /* when s has a value, we are using a new input string */ + in_string_p=s; + } + + /* set new starting position */ + this_string_p = in_string_p; + + /* loop through the string until a token or end-of-string(null) + * is found. + */ + for (;*in_string_p;in_string_p++) + { + /* Is character a token? */ + for (i=0; i end-of-word*/ + *in_string_p = 0; + in_string_p++; + return(this_string_p); + } + }/*end looping of tokens*/ + }/*end looping of the string*/ + + if (*this_string_p==0) + return(NULL); + + return (this_string_p); +}/*end function pvfs2_strtok*/ + +/*convert 64-bit debug mask into a readable string of keywords*/ +static int proc_mask_to_debug(__keyword_mask_t *mask_map + ,int num_mask_map + ,uint64_t mask + ,char *debug_string) +{ + unsigned int index = 0; + unsigned int i; + + memset(debug_string,0,PVFS2_MAX_DEBUG_STRING_LEN); + + for (i=0; i= PVFS2_MAX_DEBUG_STRING_LEN ) + { + return(0); + } + + switch( mask_map[i].mask_val ) + { + case GOSSIP_NO_DEBUG : + { + if ( mask == GOSSIP_NO_DEBUG ) + { + /* "none" */ + strcpy(debug_string,mask_map[i].keyword); + return(0); + } + break; + } + case GOSSIP_MAX_DEBUG : + { + if ( mask == GOSSIP_MAX_DEBUG ) + { + /* "all" */ + strcpy(debug_string,mask_map[i].keyword); + return(0); + } + break; + } + default : + { + if ((mask & mask_map[i].mask_val) != mask_map[i].mask_val) + { /*mask does NOT contain the mask value*/ + break; + } + if (index != 0) + { /*add comma for second and subsequent mask keywords*/ + (debug_string[index]) = ','; + index++; + } + + /*add keyword and slide index*/ + memcpy(&debug_string[index], mask_map[i].keyword + ,strlen(mask_map[i].keyword)); + index += strlen(mask_map[i].keyword); + } + }/*end switch*/ + }/*end for*/ + + return(0); +}/*end function proc_mask_to_debug*/ + + +static uint64_t proc_debug_to_mask(__keyword_mask_t *mask_map, + int num_mask_map, const char *event_logging) +{ + uint64_t mask = 0; + char *s = NULL, *t = NULL; + const char *toks = ", "; + int i = 0, negate = 0, slen = 0; + + if (event_logging) + { + /* s = strdup(event_logging); */ + slen=strlen(event_logging); + s = kmalloc(slen+1,GFP_KERNEL); + if (!s) + { + return (-ENOMEM); + } + memset(s,0,slen+1); + memcpy(s,event_logging,slen); + + /* t = strtok(s, toks); */ + t = pvfs2_strtok(s, toks); + + while(t) + { + if (*t == '-') + { + negate = 1; + ++t; + } + + for(i = 0; i < num_mask_map; i++) + { + if (!strcmp(t, mask_map[i].keyword)) + { + if (negate) + { + mask &= ~mask_map[i].mask_val; + } + else + { + mask |= mask_map[i].mask_val; + } + break; + } + } + /* t = strtok(NULL, toks); */ + t = pvfs2_strtok(NULL, toks); + } + kfree(s); + } + return mask; +} + +/* + * Based on human readable keywords, translate them into + * a mask value appropriate for the debugging level desired. + * The 'computed' mask is returned; 0 if no keywords are + * present or recognized. Unrecognized keywords are ignored when + * mixed with recognized keywords. + * + * Prefix a keyword with "-" to turn it off. All keywords + * processed in specified order. + */ +uint64_t PVFS_proc_debug_eventlog_to_mask(const char *event_logging) +{ + return proc_debug_to_mask(s_keyword_mask_map, + num_keyword_mask_map, event_logging); +} + +uint64_t PVFS_proc_kmod_eventlog_to_mask(const char *event_logging) +{ + return proc_debug_to_mask(s_kmod_keyword_mask_map, + num_kmod_keyword_mask_map, event_logging); +} + +int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string) +{ + return( proc_mask_to_debug(s_kmod_keyword_mask_map + , num_kmod_keyword_mask_map + ,mask + ,debug_string) ); +}/*end function PVFS_proc_kmod_mask_to_eventlog*/ + +int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string) +{ + + return( proc_mask_to_debug(s_keyword_mask_map + ,num_keyword_mask_map + ,mask + ,debug_string) ); +} + + /* * Local variables: * c-indent-level: 4 diff --git a/src/kernel/linux-2.6/super.c b/src/kernel/linux-2.6/super.c index e36e93b..df4f145 100644 --- a/src/kernel/linux-2.6/super.c +++ b/src/kernel/linux-2.6/super.c @@ -13,12 +13,15 @@ LIST_HEAD(pvfs2_superblocks); /* used to protect the above superblock list */ +#ifdef HAVE_SPIN_LOCK_UNLOCKED spinlock_t pvfs2_superblocks_lock = SPIN_LOCK_UNLOCKED; +#else +DEFINE_SPINLOCK(pvfs2_superblocks_lock); +#endif /* HAVE_SPIN_LOCK_UNLOCKED */ #ifdef HAVE_GET_FS_KEY_SUPER_OPERATIONS static void pvfs2_sb_get_fs_key(struct super_block *sb, char **ppkey, int *keylen); #endif -static atomic_t pvfs2_inode_alloc_count, pvfs2_inode_dealloc_count; static char *keywords[] = {"intr", "acl", "suid", "noatime", "nodiratime"}; static int num_possible_keywords = sizeof(keywords)/sizeof(char *); @@ -150,6 +153,7 @@ static int parse_mount_options( /* option string did not match any of the known keywords */ if (j == num_possible_keywords) { +#ifdef PVFS2_LINUX_KERNEL_2_4 /* assume we have a device name */ if (got_device == 0) { @@ -168,6 +172,19 @@ static int parse_mount_options( gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2: multiple device names specified: " "ignoring %s\n", options[i]); } +#else + /* filter out NULL option strings (older 2.6 kernels may leave + * these after parsing out standard options like noatime) + */ + if(options[i][0] != '\0') + { + /* in the 2.6 kernel, we don't pass device name through this + * path; we must have gotten an unsupported option. + */ + gossip_err("Error: mount option [%s] is not supported.\n", options[i]); + return(-EINVAL); + } +#endif } } } @@ -194,7 +211,7 @@ static struct inode *pvfs2_alloc_inode(struct super_block *sb) { new_inode = &pvfs2_inode->vfs_inode; gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_alloc_inode: allocated %p\n", pvfs2_inode); - atomic_inc(&pvfs2_inode_alloc_count); + atomic_inc(&(PVFS2_SB(sb)->pvfs2_inode_alloc_count)); new_inode->i_flags &= ~(S_APPEND|S_IMMUTABLE|S_NOATIME); } return new_inode; @@ -209,7 +226,7 @@ static void pvfs2_destroy_inode(struct inode *inode) gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_destroy_inode: deallocated %p destroying inode %llu\n", pvfs2_inode, llu(get_handle_from_ino(inode))); - atomic_inc(&pvfs2_inode_dealloc_count); + atomic_inc(&(PVFS2_SB(inode->i_sb)->pvfs2_inode_dealloc_count)); pvfs2_inode_finalize(pvfs2_inode); pvfs2_inode_release(pvfs2_inode); } @@ -242,6 +259,8 @@ void pvfs2_read_inode( if (pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT) != 0) { /* assume an I/O error and mark the inode as bad */ + gossip_debug(GOSSIP_SUPER_DEBUG, "%s:%s:%d calling make bad inode - [%p] (inode = %llu | ct = %d)\n", + __FILE__, __func__, __LINE__, pvfs2_inode, llu(get_handle_from_ino(inode)), (int)atomic_read(&inode->i_count)); pvfs2_make_bad_inode(inode); } } @@ -284,6 +303,8 @@ void pvfs2_read_inode( #endif if (pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT) != 0) { + gossip_debug(GOSSIP_SUPER_DEBUG, "%s:%s:%d calling make bad inode - [%p] (inode = %llu | ct = %d)\n", + __FILE__, __func__, __LINE__, pvfs2_inode, llu(get_handle_from_ino(inode)), (int)atomic_read(&inode->i_count)); pvfs2_make_bad_inode(inode); } else { @@ -294,8 +315,9 @@ void pvfs2_read_inode( } else { - gossip_err("Could not allocate pvfs2_inode from " - "pvfs2_inode_cache\n"); + gossip_err("%s:%s:%d Could not allocate pvfs2_inode from pvfs2_inode_cache." + "calling make bad inode - [%p] (inode = %llu | ct = %d)\n", + __FILE__, __func__, __LINE__, pvfs2_inode, llu(get_handle_from_ino(inode)), (int)atomic_read(&inode->i_count)); pvfs2_make_bad_inode(inode); } } @@ -314,6 +336,7 @@ static void pvfs2_clear_inode(struct inode *inode) #endif /* PVFS2_LINUX_KERNEL_2_4 */ +#ifdef HAVE_PUT_INODE /* called when the VFS removes this inode from the inode cache */ static void pvfs2_put_inode( struct inode *inode) @@ -343,6 +366,7 @@ static void pvfs2_put_inode( #endif } } +#endif /* HAVE_PUT_INODE */ #ifdef HAVE_STATFS_LITE_SUPER_OPERATIONS static int pvfs2_statfs_lite( @@ -513,7 +537,53 @@ static int pvfs2_statfs( return ret; } +/* pvfs2_remount_fs() + * + * remount as initiated by VFS layer. We just need to reparse the mount + * options, no need to signal pvfs2-client-core about it. + */ +static int pvfs2_remount_fs( + struct super_block *sb, + int *flags, + char *data) +{ + int ret = -EINVAL; + + gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n"); + + if (sb && PVFS2_SB(sb)) + { + if (data && data[0] != '\0') + { + ret = parse_mount_options(data, sb, 1); + if (ret) + { + return ret; + } +#if !defined(PVFS2_LINUX_KERNEL_2_4) && defined(HAVE_GENERIC_GETXATTR) && defined(CONFIG_FS_POSIX_ACL) + /* mark the superblock as whether it supports acl's or not */ + sb->s_flags = ((sb->s_flags & ~MS_POSIXACL) | + ((PVFS2_SB(sb)->mnt_options.acl == 1) ? MS_POSIXACL : 0)); + sb->s_xattr = pvfs2_xattr_handlers; +#endif + sb->s_flags = ((sb->s_flags & ~MS_NOATIME) | + ((PVFS2_SB(sb)->mnt_options.noatime == 1) ? MS_NOATIME : 0)); + sb->s_flags = ((sb->s_flags & ~MS_NODIRATIME) | + ((PVFS2_SB(sb)->mnt_options.nodiratime == 1) ? MS_NODIRATIME : 0)); + } + + if (data) + { + strncpy(PVFS2_SB(sb)->data, data, PVFS2_MAX_MOUNT_OPT_LEN); + } + } + return 0; +} + /* + Remount as initiated by pvfs2-client-core on restart. This is used to + repopulate mount information left from previous pvfs2-client-core. + the idea here is that given a valid superblock, we're re-initializing the user space client with the initial mount information specified when the super block was first initialized. @@ -537,7 +607,7 @@ int pvfs2_remount( if (sb && PVFS2_SB(sb)) { - if (data) + if (data && data[0] != '\0') { ret = parse_mount_options(data, sb, 1); if (ret) @@ -824,7 +894,11 @@ void fsid_key_table_finalize(void) #endif /* Called whenever the VFS dirties the inode in response to atime updates */ -static void pvfs2_dirty_inode(struct inode *inode) +static void pvfs2_dirty_inode(struct inode *inode +#ifdef HAVE_DIRTY_INODE_FLAGS + ,int flags +#endif + ) { if (inode) { @@ -840,22 +914,26 @@ struct super_operations pvfs2_s_ops = #ifdef PVFS2_LINUX_KERNEL_2_4 read_inode : pvfs2_read_inode, statfs : pvfs2_statfs, - remount_fs : pvfs2_remount, + remount_fs : pvfs2_remount_fs, put_super : pvfs2_kill_sb, dirty_inode : pvfs2_dirty_inode, clear_inode: pvfs2_clear_inode, put_inode: pvfs2_put_inode, #else +#ifdef HAVE_DROP_INODE .drop_inode = generic_delete_inode, +#endif .alloc_inode = pvfs2_alloc_inode, .destroy_inode = pvfs2_destroy_inode, #ifdef HAVE_READ_INODE .read_inode = pvfs2_read_inode, #endif .dirty_inode = pvfs2_dirty_inode, +#ifdef HAVE_PUT_INODE .put_inode = pvfs2_put_inode, +#endif .statfs = pvfs2_statfs, - .remount_fs = pvfs2_remount, + .remount_fs = pvfs2_remount_fs, #ifdef HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS .find_inode_handle = pvfs2_sb_find_inode_handle, #endif @@ -956,11 +1034,13 @@ struct super_block* pvfs2_get_sb( sb->s_blocksize = pvfs_bufmap_size_query(); sb->s_blocksize_bits = pvfs_bufmap_shift_query(); - sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_maxbytes = (unsigned long long) 1 << 63; root_object.handle = PVFS2_SB(sb)->root_handle; root_object.fs_id = PVFS2_SB(sb)->fs_id; + gossip_debug(GOSSIP_SUPER_DEBUG, "get inode %llu, fsid %d\n", + root_object.handle, root_object.fs_id); /* alloc and initialize our root directory inode by explicitly requesting * the sticky bit to be set */ root = pvfs2_get_custom_core_inode( @@ -1021,7 +1101,118 @@ struct super_block* pvfs2_get_sb( #else /* !PVFS2_LINUX_KERNEL_2_4 */ -static struct export_operations pvfs2_export_ops = {}; +#ifdef HAVE_FHTODENTRY_EXPORT_OPERATIONS +struct dentry * +pvfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + PVFS_object_ref refn; + struct inode *inode; + struct dentry *dentry; + + if (fh_len < 3 || fh_type > 2) + { + return NULL; + } + + refn.handle = (u64) (fid->raw[0]) << 32; + refn.handle |= (u32) fid->raw[1]; + refn.fs_id = (u32) fid->raw[2]; + gossip_debug(GOSSIP_SUPER_DEBUG, "fh_to_dentry: handle %llu, fs_id %d\n", + refn.handle, refn.fs_id); + + inode = pvfs2_iget(sb, &refn); + +#ifdef HAVE_D_ALLOC_ANON + if (inode == NULL) + { + return ERR_PTR(-ESTALE); + } + if (IS_ERR(inode)) + { + return (void *) inode; + } + dentry = d_alloc_anon(inode); + if (dentry == NULL) + { + iput(inode); + return ERR_PTR(-ENOMEM); + } +#else + dentry = d_obtain_alias(inode); + if(dentry == NULL) + { + return ERR_PTR(-ENOMEM); + } +#endif + + dentry->d_op = &pvfs2_dentry_operations; + return dentry; +} +#endif /* HAVE_FHTODENTRY_EXPORT_OPERATIONS */ + +#ifdef HAVE_ENCODEFH_EXPORT_OPERATIONS +int pvfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable) +{ + struct inode *inode = dentry->d_inode; + int len = *max_len; + int type = 1; + PVFS_object_ref handle; + u32 generation; + + /* + * if connectable is specified, parent handle identity has to be stashed + * as well. + */ + if (len < 3 || (connectable && len < 6)) { + gossip_lerr("fh buffer is too small for encoding\n"); + type = 255; + goto out; + } + + handle = PVFS2_I(inode)->refn; + generation = inode->i_generation; + gossip_debug(GOSSIP_SUPER_DEBUG, "Encoding fh: handle %llu, gen %u, fsid %u\n", + handle.handle, generation, handle.fs_id); + + len = 3; + fh[0] = handle.handle >> 32; + fh[1] = handle.handle & 0xffffffff; + fh[2] = handle.fs_id; + + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + + parent = dentry->d_parent->d_inode; + handle = PVFS2_I(parent)->refn; + generation = parent->i_generation; + fh[3] = handle.handle >> 32; + fh[4] = handle.handle & 0xffffffff; + fh[5] = handle.fs_id; + + spin_unlock(&dentry->d_lock); + len = 6; + type = 2; + gossip_debug(GOSSIP_SUPER_DEBUG, "Encoding parent: handle %llu, gen %u, fsid %u\n", + handle.handle, generation, handle.fs_id); + } + *max_len = len; + +out: + return type; +} +#endif /* HAVE_ENCODEFH_EXPORT_OPERATIONS */ + +static struct export_operations pvfs2_export_ops = { +#ifdef HAVE_ENCODEFH_EXPORT_OPERATIONS + .encode_fh = pvfs2_encode_fh, +#endif +#ifdef HAVE_FHTODENTRY_EXPORT_OPERATIONS + .fh_to_dentry = pvfs2_fh_to_dentry, +#endif +}; int pvfs2_fill_sb( struct super_block *sb, @@ -1046,6 +1237,7 @@ int pvfs2_fill_sb( PVFS2_SB(sb)->root_handle = mount_sb_info->root_handle; PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id; PVFS2_SB(sb)->id = mount_sb_info->id; + memcpy(&PVFS2_SB(sb)->osd_info, &mount_sb_info->osd_info, sizeof(PVFS2_SB(sb)->osd_info)); @@ -1088,6 +1280,8 @@ int pvfs2_fill_sb( root_object.handle = PVFS2_SB(sb)->root_handle; root_object.fs_id = PVFS2_SB(sb)->fs_id; + gossip_debug(GOSSIP_SUPER_DEBUG, "get inode %llu, fsid %d\n", + root_object.handle, root_object.fs_id); /* alloc and initialize our root directory inode. be explicit about sticky * bit */ root = pvfs2_get_custom_core_inode(sb, NULL, (S_IFDIR | 0755 | S_ISVTX), @@ -1111,7 +1305,13 @@ int pvfs2_fill_sb( sb->s_root = root_dentry; return 0; } - +#ifdef HAVE_FSTYPE_MOUNT_ONLY +struct dentry *pvfs2_mount( + struct file_system_type *fst, + int flags, + const char *devname, + void *data) +#else #ifdef HAVE_VFSMOUNT_GETSB int pvfs2_get_sb( struct file_system_type *fst, @@ -1125,12 +1325,16 @@ struct super_block *pvfs2_get_sb( int flags, const char *devname, void *data) -#endif +#endif /* HAVE_VFSMOUNT_GETSB */ +#endif /* HAVE_FSTYPE_MOUNT_ONLY */ { int ret = -EINVAL; struct super_block *sb = ERR_PTR(-EINVAL); pvfs2_kernel_op_t *new_op; pvfs2_mount_sb_info_t mount_sb_info; +#ifdef HAVE_FSTYPE_MOUNT_ONLY + struct dentry *mnt_sb_d = ERR_PTR(-EINVAL); +#endif gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_get_sb: called with devname %s\n", devname); @@ -1140,7 +1344,7 @@ struct super_block *pvfs2_get_sb( if (!new_op) { ret = -ENOMEM; -#ifdef HAVE_VFSMOUNT_GETSB +#if defined(HAVE_VFSMOUNT_GETSB) && !defined(HAVE_FSTYPE_MOUNT_ONLY) return ret; #else return ERR_PTR(ret); @@ -1188,6 +1392,11 @@ struct super_block *pvfs2_get_sb( here. so we store it temporarily and pass all of the info to fill_sb where it's properly copied out */ + /* kernels beyond 2.6.38 no longer have get_sb_nodev in favor of + * mount_nodev. if the kernel still has get_sb_nodev use that in + * favor of mount_nodev to minimize changes for currently working + * kernels. */ +#ifdef HAVE_GETSB_NODEV #ifdef HAVE_VFSMOUNT_GETSB ret = get_sb_nodev( fst, flags, (void *)&mount_sb_info, pvfs2_fill_sb, mnt); @@ -1197,10 +1406,30 @@ struct super_block *pvfs2_get_sb( #else sb = get_sb_nodev( fst, flags, (void *)&mount_sb_info, pvfs2_fill_sb); -#endif +#endif /* HAVE_VFSMOUNT_GETSB */ +#else /* !HAVE_GETSB_NODEV */ + mnt_sb_d = mount_nodev( + fst, flags, (void *)&mount_sb_info, pvfs2_fill_sb); + if( !IS_ERR(mnt_sb_d) ) + { + sb = mnt_sb_d->d_sb; + } + else + { + sb = ERR_CAST(mnt_sb_d); + goto free_op; + } +#endif /* HAVE_GETSB_NODEV */ if (sb && !IS_ERR(sb) && (PVFS2_SB(sb))) { + /* Older 2.6 kernels pass in NOATIME flag here. Capture it + * if present. + */ + if(flags & MS_NOATIME) + { + sb->s_flags |= MS_NOATIME; + } /* on successful mount, store the devname and data used */ strncpy(PVFS2_SB(sb)->devname, devname, PVFS_MAX_SERVER_ADDR_LEN); @@ -1229,17 +1458,22 @@ struct super_block *pvfs2_get_sb( { gossip_err("ERROR: device name not specified.\n"); } - +#ifdef HAVE_FSTYPE_MOUNT_ONLY + return mnt_sb_d; +#else #ifdef HAVE_VFSMOUNT_GETSB return ret; #else return sb; -#endif +#endif /* HAVE_VFSMOUNT_GETSB */ +#endif /* HAVE_FSTYPE_MOUNT_ONLY */ error_exit: if (ret || IS_ERR(sb)) { +#if !defined(HAVE_FSTYPE_MOUNT_ONLY) sb = ERR_PTR(ret); +#endif /* HAVE_FSTYPE_MOUNT_ONLY */ } #ifdef HAVE_VFSMOUNT_GETSB free_op: @@ -1255,9 +1489,13 @@ struct super_block *pvfs2_get_sb( { op_release(new_op); } -#ifdef HAVE_VFSMOUNT_GETSB +#if defined(HAVE_VFSMOUNT_GETSB) && !defined(HAVE_FSTYPE_MOUNT_ONLY) gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_get_sb: returning %d\n", ret); return ret; +#elif defined(HAVE_FSTYPE_MOUNT_ONLY) + gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_get_sb: returning dentry %p\n", + mnt_sb_d); + return mnt_sb_d; #else gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_get_sb: returning sb %p\n", sb); return sb; @@ -1269,6 +1507,7 @@ struct super_block *pvfs2_get_sb( static void pvfs2_flush_sb( struct super_block *sb) { +#ifdef HAVE_SB_DIRTY_LIST if (!list_empty(&sb->s_dirty)) { struct inode *inode = NULL; @@ -1277,6 +1516,7 @@ static void pvfs2_flush_sb( pvfs2_flush_inode(inode); } } +#endif return; } @@ -1313,15 +1553,10 @@ void pvfs2_kill_sb( dput(sb->s_root); } - /* free the pvfs2 superblock private data */ - kfree(PVFS2_SB(sb)); -#else - sb->u.generic_sbp = NULL; -#endif { int count1, count2; - count1 = atomic_read(&pvfs2_inode_alloc_count); - count2 = atomic_read(&pvfs2_inode_dealloc_count); + count1 = atomic_read(&(PVFS2_SB(sb)->pvfs2_inode_alloc_count)); + count2 = atomic_read(&(PVFS2_SB(sb)->pvfs2_inode_dealloc_count)); if (count1 != count2) { gossip_err("pvfs2_kill_sb: (WARNING) number of inode allocs (%d) != number of inode deallocs (%d)\n", @@ -1333,6 +1568,11 @@ void pvfs2_kill_sb( count1, count2); } } + /* free the pvfs2 superblock private data */ + kfree(PVFS2_SB(sb)); +#else + sb->u.generic_sbp = NULL; +#endif } else { diff --git a/src/kernel/linux-2.6/upcall.h b/src/kernel/linux-2.6/upcall.h index 67243b8..5a406cb 100644 --- a/src/kernel/linux-2.6/upcall.h +++ b/src/kernel/linux-2.6/upcall.h @@ -198,7 +198,12 @@ enum pvfs2_param_request_op PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8, PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9, PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10, - PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11 + PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15, + PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG = 16, }; typedef struct @@ -206,12 +211,14 @@ typedef struct enum pvfs2_param_request_type type; enum pvfs2_param_request_op op; int64_t value; + char s_value[PVFS2_MAX_DEBUG_STRING_LEN]; } pvfs2_param_request_t; enum pvfs2_perf_count_request_type { PVFS2_PERF_COUNT_REQUEST_ACACHE = 1, PVFS2_PERF_COUNT_REQUEST_NCACHE = 2, + PVFS2_PERF_COUNT_REQUEST_STATIC_ACACHE = 3, }; typedef struct { @@ -229,7 +236,10 @@ typedef struct { int32_t type; int32_t __pad1; + PVFS_credentials credentials; + int pid; + int tgid; /* currently trailer is used only by readx/writex (iox) */ PVFS_size trailer_size; PVFS2_ALIGN_VAR(char *, trailer_buf); diff --git a/src/kernel/linux-2.6/waitqueue.c b/src/kernel/linux-2.6/waitqueue.c index ccc56f2..f001123 100644 --- a/src/kernel/linux-2.6/waitqueue.c +++ b/src/kernel/linux-2.6/waitqueue.c @@ -1,6 +1,7 @@ /* * (C) 2001 Clemson University and The University of Chicago - * + * (C) 2011 Omnibond Systems + * * Changes by Acxiom Corporation to implement generic service_operation() * function, Copyright © Acxiom Corporation, 2005. * @@ -15,6 +16,7 @@ #include "pvfs2-kernel.h" #include "pvfs2-internal.h" +#include "pvfs2-bufmap.h" /* What we do in this function is to walk the list of operations that are present @@ -28,8 +30,8 @@ void purge_waiting_ops(void) spin_lock(&pvfs2_request_list_lock); list_for_each_entry(op, &pvfs2_request_list, list) { - spin_lock(&op->lock); gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %lld %s\n", lld(op->tag), get_opname_string(op)); + spin_lock(&op->lock); set_op_state_purged(op); spin_unlock(&op->lock); wake_up_interruptible(&op->waitq); @@ -56,6 +58,19 @@ int service_operation( sigset_t orig_sigset; int ret = 0; + /* irqflags and wait_entry are only used IF the client-core aborts */ + unsigned long irqflags; + DECLARE_WAITQUEUE(wait_entry, current); + + +#ifdef PVFS2_LINUX_KERNEL_2_4 + op->upcall.tgid = -1; +#else + op->upcall.tgid = current->tgid; +#endif + op->upcall.pid = current->pid; + + retry_servicing: op->downcall.status = 0; gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: service_operation: %s %p\n", op_name, op); @@ -82,12 +97,16 @@ int service_operation( return(ret); } } + + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:About to call is_daemon_in_service().\n",__func__); if (is_daemon_in_service() < 0) { /* By incrementing the per-operation attempt counter, we directly go into the timeout logic * while waiting for the matching downcall to be read */ + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:client core is NOT in service(%d).\n",__func__ + ,is_daemon_in_service()); op->attempts++; } @@ -98,6 +117,7 @@ int service_operation( } else { + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:About to call add_op_to_request_list().\n",__func__); add_op_to_request_list(op); } @@ -114,6 +134,7 @@ int service_operation( if(flags & PVFS2_OP_CANCELLATION) { + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:About to call wait_for_cancellation_downcall().\n",__func__); ret = wait_for_cancellation_downcall(op); } else @@ -149,13 +170,61 @@ int service_operation( { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: tag %lld (%s) -- operation to be retried (%d attempt)\n", lld(op->tag), op_name, op->attempts + 1); - goto retry_servicing; - } + + if (!op->uses_shared_memory) + { + /* this operation doesn't use the shared memory system */ + goto retry_servicing; + } + + /* op uses shared memory */ + if (get_bufmap_init() == 0) + { + /* This operation uses the shared memory system AND the system is not yet ready. */ + /* This situation occurs when the client-core is restarted AND there were operations */ + /* waiting to be processed or were already in process. */ + gossip_debug(GOSSIP_WAIT_DEBUG,"uses_shared_memory is true.\n"); + gossip_debug(GOSSIP_WAIT_DEBUG,"Client core in-service status(%d).\n",is_daemon_in_service()); + gossip_debug(GOSSIP_WAIT_DEBUG,"bufmap_init:%d.\n",get_bufmap_init()); + gossip_debug(GOSSIP_WAIT_DEBUG,"operation's status is 0x%0x.\n",op->op_state); + + /* let process sleep for a few seconds so shared memory system can be initialized. */ + spin_lock_irqsave(&op->lock,irqflags); + add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry); + spin_unlock_irqrestore(&op->lock,irqflags); + + set_current_state(TASK_INTERRUPTIBLE); + + /* Wait for pvfs_bufmap_initialize() to wake me up within the allotted time. */ + ret=schedule_timeout(MSECS_TO_JIFFIES(1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS)); + + gossip_debug(GOSSIP_WAIT_DEBUG,"Value returned from schedule_timeout:%d.\n",ret); + gossip_debug(GOSSIP_WAIT_DEBUG,"Is shared memory available? (%d).\n",get_bufmap_init()); + + spin_lock_irqsave(&op->lock,irqflags); + remove_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry); + spin_unlock_irqrestore(&op->lock,irqflags); + + if (get_bufmap_init() == 0) + { + gossip_err("%s:The shared memory system has not started in %d seconds after the " + "client core restarted. Aborting user's request(%s).\n" + ,__func__ + ,PVFS2_BUFMAP_WAIT_TIMEOUT_SECS + ,get_opname_string(op)); + return(-EIO); + } + + /* Return to the calling function and re-populate a shared memory buffer. */ + return(-EAGAIN); + }/*endif*/ + }/*endif*/ + gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: service_operation %s returning: %d for %p.\n", op_name, ret, op); return(ret); } -void clean_up_interrupted_operation( +void pvfs2_clean_up_interrupted_operation( pvfs2_kernel_op_t * op) { /* @@ -167,6 +236,23 @@ void clean_up_interrupted_operation( while holding the request_list lock. Here, we first lock the op and then lock the appropriate list. */ + if( !op ) + { + gossip_debug(GOSSIP_WAIT_DEBUG, "%s: op is null, ignoring\n", + __func__); + return; + } + + /* one more sanity check, make sure it's in one of the possible states + * or don't try to cancel it */ + if( ! (op_state_waiting(op) || op_state_in_progress(op) || + op_state_serviced(op) || op_state_purged(op)) ) + { + gossip_debug(GOSSIP_WAIT_DEBUG, "%s: op %p not in a valid state (%0x), " + "ignoring\n", __func__, op, op->op_state); + return; + } + spin_lock(&op->lock); if (op_state_waiting(op)) @@ -175,22 +261,24 @@ void clean_up_interrupted_operation( upcall hasn't been read; remove op from upcall request list. */ + spin_unlock(&op->lock); remove_op_from_request_list(op); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from request_list\n", op); } else if (op_state_in_progress(op)) { /* op must be removed from the in progress htable */ + spin_unlock(&op->lock); remove_op_from_htable_ops_in_progress(op); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from " "htable_ops_in_progress\n", op); } else if (!op_state_serviced(op)) { - gossip_err("interrupted operation is in a weird state 0x%x\n", + spin_unlock(&op->lock); + gossip_err("interrupted operation is in a weird state 0x%x\n", op->op_state); } - spin_unlock(&op->lock); } /** sleeps on waitqueue waiting for matching downcall. @@ -227,12 +315,13 @@ int wait_for_matching_downcall(pvfs2_kernel_op_t * op) ret = 0; break; } + spin_unlock(&op->lock); if (!signal_pending(current)) { - /* if this was our first attempt and client-core has not purged our operation, - * we are happy to simply wait - */ + /* if this was our first attempt and client-core has not purged our + * operation, we are happy to simply wait */ + spin_lock(&op->lock); if (op->attempts == 0 && !op_state_purged(op)) { spin_unlock(&op->lock); @@ -243,39 +332,41 @@ int wait_for_matching_downcall(pvfs2_kernel_op_t * op) /* subsequent attempts, we retry exactly once with timeouts */ if (!schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs))) { - gossip_debug(GOSSIP_WAIT_DEBUG, "*** operation timed out (tag %lld, %p, att %d)\n", - lld(op->tag), op, op->attempts); + gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation timed " + "out (tag %lld, %p, att %d)\n", __func__, + lld(op->tag), op, op->attempts); ret = -ETIMEDOUT; - clean_up_interrupted_operation(op); + pvfs2_clean_up_interrupted_operation(op); break; } } spin_lock(&op->lock); op->attempts++; - /* if the operation was purged in the meantime, it is better to requeue it afresh - * but ensure that we have not been purged repeatedly. This could happen if client-core - * crashes when an op is being serviced, so we requeue the op, client core crashes again - * so we requeue the op, client core starts, and so on...*/ + /* if the operation was purged in the meantime, it is better to + * requeue it afresh but ensure that we have not been purged + * repeatedly. This could happen if client-core crashes when an op + * is being serviced, so we requeue the op, client core crashes + * again so we requeue the op, client core starts, and so on...*/ if (op_state_purged(op)) { ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ? -EAGAIN : -EIO; spin_unlock(&op->lock); - clean_up_interrupted_operation(op); + gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation purged " + "(tag %lld, %p, att %d)\n", __func__, lld(op->tag), + op, op->attempts); + pvfs2_clean_up_interrupted_operation(op); break; } spin_unlock(&op->lock); continue; } - else { - spin_unlock(&op->lock); - } - gossip_debug(GOSSIP_WAIT_DEBUG, "*** operation interrupted by a signal (tag %lld, op %p)\n", - lld(op->tag), op); - clean_up_interrupted_operation(op); + gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation interrupted by a " + "signal (tag %lld, op %p)\n", __func__, lld(op->tag), op); + pvfs2_clean_up_interrupted_operation(op); ret = -EINTR; break; - } + }/*end while*/ set_current_state(TASK_RUNNING); @@ -310,21 +401,42 @@ int wait_for_cancellation_downcall(pvfs2_kernel_op_t * op) spin_lock(&op->lock); if (op_state_serviced(op)) { + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:op-state is SERVICED.\n",__func__); spin_unlock(&op->lock); ret = 0; break; } spin_unlock(&op->lock); - if (!schedule_timeout - (MSECS_TO_JIFFIES(1000 * op_timeout_secs))) + if (signal_pending(current)) + { + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:operation interrupted by a signal (tag %lld, op %p)\n" + ,__func__ + ,lld(op->tag) + ,op); + pvfs2_clean_up_interrupted_operation(op); + ret = -EINTR; + break; + } + + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:About to call schedule_timeout.\n",__func__); + ret=schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs)); + + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:Value returned from schedule_timeout(%d).\n" + ,__func__,ret); + if (!ret) { - gossip_debug(GOSSIP_WAIT_DEBUG, "*** operation timed out: %p\n", op); - clean_up_interrupted_operation(op); + gossip_debug(GOSSIP_WAIT_DEBUG, "%s:*** operation timed out: %p\n", __func__,op); + pvfs2_clean_up_interrupted_operation(op); ret = -ETIMEDOUT; break; } - } + + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",__func__); + ret = -ETIMEDOUT; + break; + }/*end while*/ + set_current_state(TASK_RUNNING); @@ -332,6 +444,8 @@ int wait_for_cancellation_downcall(pvfs2_kernel_op_t * op) remove_wait_queue(&op->waitq, &wait_entry); spin_unlock(&op->lock); + gossip_debug(GOSSIP_WAIT_DEBUG,"%s:returning ret(%d)\n",__func__,ret); + return ret; } diff --git a/src/kernel/linux-2.6/xattr-default.c b/src/kernel/linux-2.6/xattr-default.c index f346f6f..0797600 100644 --- a/src/kernel/linux-2.6/xattr-default.c +++ b/src/kernel/linux-2.6/xattr-default.c @@ -19,32 +19,84 @@ #include -int pvfs2_xattr_set_default(struct inode *inode, - const char *name, const void *buffer, size_t size, int flags) +int pvfs2_xattr_set_default( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + const char *name, + const void *buffer, + size_t size, + int flags +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ) { int internal_flag = 0; if (strcmp(name, "") == 0) return -EINVAL; + +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + if (!S_ISREG(dentry->d_inode->i_mode) && + (!S_ISDIR(dentry->d_inode->i_mode) || + dentry->d_inode->i_mode & S_ISVTX)) + { + gossip_err("pvfs2_xattr_set_default: Returning EPERM for inode %p.\n", + dentry->d_inode); + return -EPERM; + } +#else if (!S_ISREG(inode->i_mode) && (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) { - return -EPERM; + gossip_err("pvfs2_xattr_set_default: Returning EPERM for inode %p.\n", + inode); + return -EPERM; } +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_setxattr_default %s\n", name); internal_flag = convert_to_internal_xattr_flags(flags); + +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + return pvfs2_inode_setxattr(dentry->d_inode, + PVFS2_XATTR_NAME_DEFAULT_PREFIX, name, buffer, size, internal_flag); +#else return pvfs2_inode_setxattr(inode, PVFS2_XATTR_NAME_DEFAULT_PREFIX, name, buffer, size, internal_flag); +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ } -int pvfs2_xattr_get_default(struct inode *inode, - const char *name, void *buffer, size_t size) +int pvfs2_xattr_get_default( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + const char *name, + void *buffer, + size_t size +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + + ) { if (strcmp(name, "") == 0) return -EINVAL; gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_getxattr_default %s\n", name); + +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + return pvfs2_inode_getxattr(dentry->d_inode, + PVFS2_XATTR_NAME_DEFAULT_PREFIX, name, buffer, size); +#else return pvfs2_inode_getxattr(inode, PVFS2_XATTR_NAME_DEFAULT_PREFIX, name, buffer, size); +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + } #endif diff --git a/src/kernel/linux-2.6/xattr-trusted.c b/src/kernel/linux-2.6/xattr-trusted.c index ef6015d..ac0e236 100644 --- a/src/kernel/linux-2.6/xattr-trusted.c +++ b/src/kernel/linux-2.6/xattr-trusted.c @@ -17,8 +17,20 @@ #include -int pvfs2_xattr_set_trusted(struct inode *inode, - const char *name, const void *buffer, size_t size, int flags) +int pvfs2_xattr_set_trusted( +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + const char *name, + const void *buffer, + size_t size, + int flags +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ + ) { int internal_flag = 0; @@ -32,12 +44,29 @@ int pvfs2_xattr_set_trusted(struct inode *inode, return -EPERM; } internal_flag = convert_to_internal_xattr_flags(flags); + +#ifdef HAVE_XATTR_HANDLER_SET_SIX_PARAM + return pvfs2_inode_setxattr(dentry->d_inode, + PVFS2_XATTR_NAME_TRUSTED_PREFIX, name, buffer, size, internal_flag); +#else return pvfs2_inode_setxattr(inode, PVFS2_XATTR_NAME_TRUSTED_PREFIX, name, buffer, size, internal_flag); +#endif /* HAVE_XATTR_HANDLER_SET_SIX_PARAM */ } -int pvfs2_xattr_get_trusted(struct inode *inode, - const char *name, void *buffer, size_t size) +int pvfs2_xattr_get_trusted( +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + struct dentry *dentry, +#else + struct inode *inode, +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + const char *name, + void *buffer, + size_t size +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + , int handler_flags +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ + ) { gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_xattr_get_trusted: name %s, buffer_size %zd\n", name, size); @@ -48,8 +77,13 @@ int pvfs2_xattr_get_trusted(struct inode *inode, gossip_err("pvfs2_xattr_get_trusted: operation not permitted\n"); return -EPERM; } +#ifdef HAVE_XATTR_HANDLER_GET_FIVE_PARAM + return pvfs2_inode_getxattr(dentry->d_inode, + PVFS2_XATTR_NAME_TRUSTED_PREFIX, name, buffer, size); +#else return pvfs2_inode_getxattr(inode, PVFS2_XATTR_NAME_TRUSTED_PREFIX, name, buffer, size); +#endif /* HAVE_XATTR_HANDLER_GET_FIVE_PARAM */ } #endif diff --git a/src/kernel/linux-2.6/xattr.c b/src/kernel/linux-2.6/xattr.c index 69a0d72..207c045 100644 --- a/src/kernel/linux-2.6/xattr.c +++ b/src/kernel/linux-2.6/xattr.c @@ -29,7 +29,12 @@ * null-terminated array of struct xattr_handler (one for each prefix) and * hang a pointer to it off of the s_xattr field of the superblock. */ -struct xattr_handler *pvfs2_xattr_handlers[] = { +#ifdef HAVE_CONST_S_XATTR_IN_SUPERBLOCK +const struct xattr_handler *pvfs2_xattr_handlers[] = +#else +struct xattr_handler *pvfs2_xattr_handlers[] = +#endif +{ /* * ACL xattrs have special prefixes that I am handling separately * so that we get control when the acl's are set or listed or queried! diff --git a/src/proto/PINT-le-bytefield.c b/src/proto/PINT-le-bytefield.c index c75e71a..96576e8 100644 --- a/src/proto/PINT-le-bytefield.c +++ b/src/proto/PINT-le-bytefield.c @@ -10,27 +10,29 @@ #include #include -#define __PINT_REQPROTO_ENCODE_FUNCS_C /* trigger actual definitions */ +#define __PINT_REQPROTO_ENCODE_FUNCS_C /** trigger actual definitions */ #include "endecode-funcs.h" #include "bmi.h" #include "bmi-byteswap.h" #include "gossip.h" +#include "pvfs2-debug.h" #include "pvfs2-dist-basic.h" #include "pvfs2-types.h" #include "pvfs2-req-proto.h" #include "PINT-reqproto-encode.h" #include "PINT-reqproto-module.h" -#include "src/io/description/pint-request.h" /* for PINT_Request */ -#include "src/io/description/pint-distribution.h" /* for PINT_dist_lookup */ +#include "src/io/description/pint-request.h" /** for PINT_Request */ +#include "src/io/description/pint-distribution.h" /** for PINT_dist_lookup */ #include "pvfs2-internal.h" +#include "pint-hint.h" -/* defined later */ +/** defined later */ static int check_req_size(struct PVFS_server_req *req); static int check_resp_size(struct PVFS_server_resp *resp); static int initializing_sizes = 0; -/* an array of structs for storing precalculated maximum encoding sizes +/** an array of structs for storing precalculated maximum encoding sizes * for each type of server operation */ static struct { @@ -38,7 +40,7 @@ static struct { int resp; } *max_size_array = NULL; -/* lebf_initialize() +/** lebf_initialize() * * initializes the encoder module, calculates max sizes of each request type * in advance @@ -49,13 +51,14 @@ static void lebf_initialize(void) { struct PVFS_server_req req = {0}; struct PVFS_server_resp resp = {0}; - int i; + enum PVFS_server_op op_type; int reqsize, respsize; int noreq; PINT_dist tmp_dist; PINT_Request tmp_req; char *tmp_name = strdup("foo"); const int init_big_size = 1024 * 1024; + int i; gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_initialize\n"); @@ -79,16 +82,26 @@ static void lebf_initialize(void) initializing_sizes = 1; - for (i=0; i init_big_size) gossip_err("%s: op %d reqsize %d exceeded prealloced %d\n", - __func__, i, reqsize, init_big_size); + __func__, op_type, reqsize, init_big_size); if (respsize > init_big_size) gossip_err("%s: op %d respsize %d exceeded prealloced %d\n", - __func__, i, respsize, init_big_size); - max_size_array[i].req = reqsize; - max_size_array[i].resp = respsize; + __func__, op_type, respsize, init_big_size); + max_size_array[op_type].req = reqsize; + max_size_array[op_type].resp = respsize; } - /* clean up stuff just used for initialization */ + /** clean up stuff just used for initialization */ + PVFS_hint_free(req.hints); free(tmp_dist.dist_name); free(tmp_name); initializing_sizes = 0; @@ -256,7 +321,7 @@ static void lebf_finalize(void) free(max_size_array); } -/* lebf_encode_calc_max_size() +/** lebf_encode_calc_max_size() * * reports the maximum allowed encoded size for the given request type * @@ -291,14 +356,19 @@ encode_common(struct PINT_encoded_msg *target_msg, int maxsize) void *buf = NULL; gossip_debug(GOSSIP_ENDECODE_DEBUG,"encode_common\n"); - /* this encoder always uses just one buffer */ + /** this encoder always uses just one buffer */ BF_ENCODE_TARGET_MSG_INIT(target_msg); - /* allocate the max size buffer to avoid the work of calculating it */ + gossip_debug(GOSSIP_ENDECODE_DEBUG,"\tmaxsize:%d\tinitializing_sizes:%d\n" + ,maxsize,initializing_sizes); + + /** allocate the max size buffer to avoid the work of calculating it */ buf = (initializing_sizes ? malloc(maxsize) : BMI_memalloc(target_msg->dest, maxsize, BMI_SEND)); if (!buf) { + gossip_err("Error: failed to BMI_malloc memory for response.\n"); + gossip_err("Error: is BMI address %llu still valid?\n", llu(target_msg->dest)); ret = -PVFS_ENOMEM; goto out; } @@ -307,7 +377,7 @@ encode_common(struct PINT_encoded_msg *target_msg, int maxsize) target_msg->alloc_size_list[0] = maxsize; target_msg->ptr_current = buf; - /* generic header */ + /** generic header */ memcpy(target_msg->ptr_current, le_bytefield_table.generic_header, PINT_ENC_GENERIC_HEADER_SIZE); target_msg->ptr_current += PINT_ENC_GENERIC_HEADER_SIZE; @@ -316,7 +386,7 @@ encode_common(struct PINT_encoded_msg *target_msg, int maxsize) return ret; } -/* lebf_encode_req() +/** lebf_encode_req() * * encodes a request structure * @@ -329,13 +399,16 @@ static int lebf_encode_req( int ret = 0; char **p; + gossip_debug(GOSSIP_ENDECODE_DEBUG,"Executing lebf_encode_req...\n"); + gossip_debug(GOSSIP_ENDECODE_DEBUG,"\treq->op:%d\n",req->op); + ret = encode_common(target_msg, max_size_array[req->op].req); if (ret) goto out; gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_encode_req\n"); - /* every request has these fields */ + /** every request has these fields */ p = &target_msg->ptr_current; encode_PVFS_server_req(p, req); @@ -344,12 +417,18 @@ static int lebf_encode_req( switch (req->op) { - /* call standard function defined in headers */ + /** call standard function defined in headers */ CASE(PVFS_SERV_LOOKUP_PATH, lookup_path); CASE(PVFS_SERV_CREATE, create); + CASE(PVFS_SERV_MIRROR, mirror); + CASE(PVFS_SERV_UNSTUFF, unstuff); + CASE(PVFS_SERV_BATCH_CREATE, batch_create); + CASE(PVFS_SERV_BATCH_REMOVE, batch_remove); CASE(PVFS_SERV_REMOVE, remove); CASE(PVFS_SERV_MGMT_REMOVE_OBJECT, mgmt_remove_object); CASE(PVFS_SERV_MGMT_REMOVE_DIRENT, mgmt_remove_dirent); + CASE(PVFS_SERV_TREE_REMOVE, tree_remove); + CASE(PVFS_SERV_TREE_GET_FILE_SIZE, tree_get_file_size); CASE(PVFS_SERV_MGMT_GET_DIRDATA_HANDLE, mgmt_get_dirdata_handle); CASE(PVFS_SERV_IO, io); CASE(PVFS_SERV_SMALL_IO, small_io); @@ -373,18 +452,21 @@ static int lebf_encode_req( CASE(PVFS_SERV_DELEATTR, deleattr); CASE(PVFS_SERV_LISTEATTR, listeattr); CASE(PVFS_SERV_LISTATTR, listattr); + CASE(PVFS_SERV_MGMT_GET_UID, mgmt_get_uid); case PVFS_SERV_GETCONFIG: case PVFS_SERV_MGMT_NOOP: case PVFS_SERV_PROTO_ERROR: - /* nothing else */ + case PVFS_SERV_IMM_COPIES: + /** nothing else */ break; case PVFS_SERV_INVALID: case PVFS_SERV_WRITE_COMPLETION: case PVFS_SERV_PERF_UPDATE: + case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: - case PVFS_SERV_NUM_OPS: /* sentinel */ + case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_err("%s: invalid operation %d\n", __func__, req->op); ret = -PVFS_ENOSYS; break; @@ -392,7 +474,7 @@ static int lebf_encode_req( #undef CASE - /* although much more may have been allocated */ + /** although much more may have been allocated */ target_msg->total_size = target_msg->ptr_current - (char *) target_msg->buffer_list[0]; target_msg->size_list[0] = target_msg->total_size; @@ -410,7 +492,7 @@ static int lebf_encode_req( } -/* lebf_encode_resp() +/** lebf_encode_resp() * * encodes a response structure * @@ -428,7 +510,7 @@ static int lebf_encode_resp( goto out; gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_encode_resp\n"); - /* every response has these fields */ + /** every response has these fields */ p = &target_msg->ptr_current; encode_PVFS_server_resp(p, resp); @@ -436,18 +518,21 @@ static int lebf_encode_resp( case tag: encode_PVFS_servresp_##var(p,&resp->u.var); break - /* we stand a good chance of segfaulting if we try to encode the response + /** we stand a good chance of segfaulting if we try to encode the response * after something bad happened reading data from disk. */ if (resp->status == 0) { - /* extra encoding rules for particular responses */ + /** extra encoding rules for particular responses */ switch (resp->op) { - /* call standard function defined in headers */ + /** call standard function defined in headers */ CASE(PVFS_SERV_GETCONFIG, getconfig); CASE(PVFS_SERV_LOOKUP_PATH, lookup_path); CASE(PVFS_SERV_CREATE, create); + CASE(PVFS_SERV_MIRROR, mirror); + CASE(PVFS_SERV_UNSTUFF, unstuff); + CASE(PVFS_SERV_BATCH_CREATE, batch_create); CASE(PVFS_SERV_IO, io); CASE(PVFS_SERV_SMALL_IO, small_io); CASE(PVFS_SERV_GETATTR, getattr); @@ -456,7 +541,6 @@ static int lebf_encode_resp( CASE(PVFS_SERV_MKDIR, mkdir); CASE(PVFS_SERV_READDIR, readdir); CASE(PVFS_SERV_STATFS, statfs); - CASE(PVFS_SERV_MGMT_SETPARAM, mgmt_setparam); CASE(PVFS_SERV_MGMT_PERF_MON, mgmt_perf_mon); CASE(PVFS_SERV_MGMT_ITERATE_HANDLES, mgmt_iterate_handles); CASE(PVFS_SERV_MGMT_DSPACE_INFO_LIST, mgmt_dspace_info_list); @@ -466,10 +550,13 @@ static int lebf_encode_resp( CASE(PVFS_SERV_GETEATTR, geteattr); CASE(PVFS_SERV_LISTEATTR, listeattr); CASE(PVFS_SERV_LISTATTR, listattr); + CASE(PVFS_SERV_TREE_GET_FILE_SIZE, tree_get_file_size); + CASE(PVFS_SERV_MGMT_GET_UID, mgmt_get_uid); case PVFS_SERV_REMOVE: case PVFS_SERV_MGMT_REMOVE_OBJECT: case PVFS_SERV_MGMT_REMOVE_DIRENT: + case PVFS_SERV_TREE_REMOVE: case PVFS_SERV_SETATTR: case PVFS_SERV_SETEATTR: case PVFS_SERV_DELEATTR: @@ -477,14 +564,18 @@ static int lebf_encode_resp( case PVFS_SERV_TRUNCATE: case PVFS_SERV_FLUSH: case PVFS_SERV_MGMT_NOOP: + case PVFS_SERV_BATCH_REMOVE: case PVFS_SERV_PROTO_ERROR: - /* nothing else */ + case PVFS_SERV_IMM_COPIES: + case PVFS_SERV_MGMT_SETPARAM: + /** nothing else */ break; case PVFS_SERV_INVALID: case PVFS_SERV_PERF_UPDATE: + case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: - case PVFS_SERV_NUM_OPS: /* sentinel */ + case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_err("%s: invalid operation %d\n", __func__, resp->op); ret = -PVFS_ENOSYS; break; @@ -493,7 +584,7 @@ static int lebf_encode_resp( #undef CASE - /* although much more may have been allocated */ + /** although much more may have been allocated */ target_msg->total_size = target_msg->ptr_current - (char *) target_msg->buffer_list[0]; target_msg->size_list[0] = target_msg->total_size; @@ -509,7 +600,7 @@ static int lebf_encode_resp( return ret; } -/* lebf_decode_req() +/** lebf_decode_req() * * decodes a request message * @@ -531,7 +622,7 @@ static int lebf_decode_req( target_msg->buffer = req; - /* decode generic part of request (enough to get op number) */ + /** decode generic part of request (enough to get op number) */ decode_PVFS_server_req(p, req); gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_decode_req\n"); @@ -540,12 +631,18 @@ static int lebf_decode_req( switch (req->op) { - /* call standard function defined in headers */ + /** call standard function defined in headers */ CASE(PVFS_SERV_LOOKUP_PATH, lookup_path); CASE(PVFS_SERV_CREATE, create); + CASE(PVFS_SERV_MIRROR, mirror); + CASE(PVFS_SERV_UNSTUFF, unstuff); + CASE(PVFS_SERV_BATCH_CREATE, batch_create); + CASE(PVFS_SERV_BATCH_REMOVE, batch_remove); CASE(PVFS_SERV_REMOVE, remove); CASE(PVFS_SERV_MGMT_REMOVE_OBJECT, mgmt_remove_object); CASE(PVFS_SERV_MGMT_REMOVE_DIRENT, mgmt_remove_dirent); + CASE(PVFS_SERV_TREE_REMOVE, tree_remove); + CASE(PVFS_SERV_TREE_GET_FILE_SIZE, tree_get_file_size); CASE(PVFS_SERV_MGMT_GET_DIRDATA_HANDLE, mgmt_get_dirdata_handle); CASE(PVFS_SERV_IO, io); CASE(PVFS_SERV_SMALL_IO, small_io); @@ -569,18 +666,21 @@ static int lebf_decode_req( CASE(PVFS_SERV_DELEATTR, deleattr); CASE(PVFS_SERV_LISTEATTR, listeattr); CASE(PVFS_SERV_LISTATTR, listattr); + CASE(PVFS_SERV_MGMT_GET_UID, mgmt_get_uid); case PVFS_SERV_GETCONFIG: case PVFS_SERV_MGMT_NOOP: - /* nothing else */ + case PVFS_SERV_IMM_COPIES: + /** nothing else */ break; case PVFS_SERV_INVALID: case PVFS_SERV_WRITE_COMPLETION: case PVFS_SERV_PERF_UPDATE: + case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: case PVFS_SERV_PROTO_ERROR: - case PVFS_SERV_NUM_OPS: /* sentinel */ + case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid operation %d.\n", __func__, req->op); ret = -PVFS_EPROTO; goto out; @@ -599,7 +699,7 @@ static int lebf_decode_req( return(ret); } -/* lebf_decode_resp() +/** lebf_decode_resp() * * decodes a response structure * @@ -618,7 +718,7 @@ static int lebf_decode_resp( target_msg->buffer = resp; - /* decode generic part of response (including op number) */ + /** decode generic part of response (including op number) */ decode_PVFS_server_resp(p, resp); gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_decode_resp\n"); @@ -630,10 +730,13 @@ static int lebf_decode_resp( switch (resp->op) { - /* call standard function defined in headers */ + /** call standard function defined in headers */ CASE(PVFS_SERV_GETCONFIG, getconfig); CASE(PVFS_SERV_LOOKUP_PATH, lookup_path); CASE(PVFS_SERV_CREATE, create); + CASE(PVFS_SERV_MIRROR, mirror); + CASE(PVFS_SERV_UNSTUFF, unstuff); + CASE(PVFS_SERV_BATCH_CREATE, batch_create); CASE(PVFS_SERV_IO, io); CASE(PVFS_SERV_SMALL_IO, small_io); CASE(PVFS_SERV_GETATTR, getattr); @@ -642,7 +745,6 @@ static int lebf_decode_resp( CASE(PVFS_SERV_MKDIR, mkdir); CASE(PVFS_SERV_READDIR, readdir); CASE(PVFS_SERV_STATFS, statfs); - CASE(PVFS_SERV_MGMT_SETPARAM, mgmt_setparam); CASE(PVFS_SERV_MGMT_PERF_MON, mgmt_perf_mon); CASE(PVFS_SERV_MGMT_ITERATE_HANDLES, mgmt_iterate_handles); CASE(PVFS_SERV_MGMT_DSPACE_INFO_LIST, mgmt_dspace_info_list); @@ -652,10 +754,14 @@ static int lebf_decode_resp( CASE(PVFS_SERV_GETEATTR, geteattr); CASE(PVFS_SERV_LISTEATTR, listeattr); CASE(PVFS_SERV_LISTATTR, listattr); + CASE(PVFS_SERV_TREE_GET_FILE_SIZE, tree_get_file_size); + CASE(PVFS_SERV_MGMT_GET_UID, mgmt_get_uid); case PVFS_SERV_REMOVE: + case PVFS_SERV_BATCH_REMOVE: case PVFS_SERV_MGMT_REMOVE_OBJECT: case PVFS_SERV_MGMT_REMOVE_DIRENT: + case PVFS_SERV_TREE_REMOVE: case PVFS_SERV_SETATTR: case PVFS_SERV_SETEATTR: case PVFS_SERV_DELEATTR: @@ -664,13 +770,16 @@ static int lebf_decode_resp( case PVFS_SERV_FLUSH: case PVFS_SERV_MGMT_NOOP: case PVFS_SERV_PROTO_ERROR: - /* nothing else */ + case PVFS_SERV_IMM_COPIES: + case PVFS_SERV_MGMT_SETPARAM: + /** nothing else */ break; case PVFS_SERV_INVALID: case PVFS_SERV_PERF_UPDATE: + case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: - case PVFS_SERV_NUM_OPS: /* sentinel */ + case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid operation %d.\n", __func__, resp->op); ret = -PVFS_EPROTO; goto out; @@ -689,7 +798,7 @@ static int lebf_decode_resp( return(ret); } -/* lebf_encode_rel() +/** lebf_encode_rel() * * releases resources consumed while encoding * @@ -700,7 +809,7 @@ static void lebf_encode_rel( enum PINT_encode_msg_type input_type) { gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_encode_rel\n"); - /* just a single buffer to free */ + /** just a single buffer to free */ if (initializing_sizes) { free(msg->buffer_list[0]); @@ -712,7 +821,7 @@ static void lebf_encode_rel( } } -/* lebf_decode_rel() +/** lebf_decode_rel() * * releases resources consumed while decoding * @@ -725,9 +834,15 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, if (input_type == PINT_DECODE_REQ) { struct PVFS_server_req *req = &msg->stub_dec.req; switch (req->op) { - case PVFS_SERV_CREATE: - decode_free(req->u.create.handle_extent_array.extent_array); + if (req->u.create.attr.mask & PVFS_ATTR_META_DIST) + decode_free(req->u.create.attr.u.meta.dist); + if (req->u.create.layout.server_list.servers) + decode_free(req->u.create.layout.server_list.servers); + break; + case PVFS_SERV_BATCH_CREATE: + decode_free( + req->u.batch_create.handle_extent_array.extent_array); break; case PVFS_SERV_IO: @@ -740,6 +855,12 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, decode_free(req->u.small_io.file_req); break; + case PVFS_SERV_MIRROR: + decode_free(req->u.mirror.dist); + decode_free(req->u.mirror.dst_handle); + decode_free(req->u.mirror.wcIndex); + break; + case PVFS_SERV_MKDIR: decode_free(req->u.mkdir.handle_extent_array.extent_array); if (req->u.mkdir.attr.mask & PVFS_ATTR_META_DIST) @@ -752,15 +873,35 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, decode_free(req->u.mgmt_dspace_info_list.handle_array); break; - case PVFS_SERV_SETATTR: + case PVFS_SERV_SETATTR: if (req->u.setattr.attr.mask & PVFS_ATTR_META_DIST) decode_free(req->u.setattr.attr.u.meta.dist); if (req->u.setattr.attr.mask & PVFS_ATTR_META_DFILES) decode_free(req->u.setattr.attr.u.meta.dfile_array); break; + + case PVFS_SERV_TREE_REMOVE: + decode_free(req->u.tree_remove.handle_array); + break; + + case PVFS_SERV_TREE_GET_FILE_SIZE: + decode_free(req->u.tree_get_file_size.handle_array); + break; + case PVFS_SERV_LISTATTR: if (req->u.listattr.handles) decode_free(req->u.listattr.handles); + break; + + case PVFS_SERV_SETEATTR: + decode_free(req->u.seteattr.key); + decode_free(req->u.seteattr.val); + break; + + case PVFS_SERV_GETEATTR: + decode_free(req->u.geteattr.key); + decode_free(req->u.geteattr.valsz); + break; case PVFS_SERV_GETCONFIG: case PVFS_SERV_LOOKUP_PATH: @@ -781,18 +922,22 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, case PVFS_SERV_MGMT_ITERATE_HANDLES: case PVFS_SERV_MGMT_PERF_MON: case PVFS_SERV_MGMT_EVENT_MON: - case PVFS_SERV_GETEATTR: - case PVFS_SERV_SETEATTR: + case PVFS_SERV_MGMT_GET_UID: + case PVFS_SERV_DELEATTR: case PVFS_SERV_LISTEATTR: - /* nothing to free */ - break; + case PVFS_SERV_BATCH_REMOVE: + case PVFS_SERV_UNSTUFF: + case PVFS_SERV_IMM_COPIES: + /*nothing to free*/ + break; case PVFS_SERV_INVALID: case PVFS_SERV_WRITE_COMPLETION: case PVFS_SERV_PERF_UPDATE: + case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: case PVFS_SERV_PROTO_ERROR: - case PVFS_SERV_NUM_OPS: /* sentinel */ + case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid request operation %d.\n", __func__, req->op); break; @@ -824,6 +969,14 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, case PVFS_SERV_MGMT_ITERATE_HANDLES: decode_free(resp->u.mgmt_iterate_handles.handle_array); break; + + case PVFS_SERV_BATCH_CREATE: + decode_free(resp->u.batch_create.handle_array); + break; + + case PVFS_SERV_CREATE: + decode_free(resp->u.create.datafile_handles); + break; case PVFS_SERV_MGMT_DSPACE_INFO_LIST: decode_free(resp->u.mgmt_dspace_info_list.dspace_info_array); @@ -833,7 +986,24 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, if (resp->u.getattr.attr.mask & PVFS_ATTR_META_DIST) decode_free(resp->u.getattr.attr.u.meta.dist); if (resp->u.getattr.attr.mask & PVFS_ATTR_META_DFILES) - decode_free(resp->u.getattr.attr.u.meta.dfile_array); + decode_free(resp->u.getattr.attr.u.meta.dfile_array); + if ( resp->u.getattr.attr.mask + & PVFS_ATTR_META_MIRROR_DFILES ) + decode_free + (resp->u.getattr.attr.u.meta.mirror_dfile_array); + break; + + case PVFS_SERV_UNSTUFF: + if (resp->u.unstuff.attr.mask & PVFS_ATTR_META_DIST) + decode_free(resp->u.unstuff.attr.u.meta.dist); + if (resp->u.unstuff.attr.mask & PVFS_ATTR_META_DFILES) + { + decode_free(resp->u.unstuff.attr.u.meta.dfile_array); + } + if ( resp->u.unstuff.attr.mask + & PVFS_ATTR_META_MIRROR_DFILES ) + decode_free + (resp->u.unstuff.attr.u.meta.mirror_dfile_array); break; case PVFS_SERV_MGMT_EVENT_MON: @@ -841,7 +1011,7 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, break; case PVFS_SERV_GETEATTR: - /* need a loop here? WBL */ + /** need a loop here? WBL */ if (resp->u.geteattr.val) decode_free(resp->u.geteattr.val); break; @@ -851,22 +1021,53 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, break; case PVFS_SERV_LISTATTR: { - int i; - if (resp->u.listattr.error) - decode_free(resp->u.listattr.error); - if (resp->u.listattr.attr) { - for (i = 0; i < resp->u.listattr.nhandles; i++) { - if (resp->u.listattr.attr[i].mask & PVFS_ATTR_META_DIST) - decode_free(resp->u.listattr.attr[i].u.meta.dist); - if (resp->u.listattr.attr[i].mask & PVFS_ATTR_META_DFILES) - decode_free(resp->u.listattr.attr[i].u.meta.dfile_array); - } - decode_free(resp->u.listattr.attr); - } + int i; + if (resp->u.listattr.error) + decode_free(resp->u.listattr.error); + if (resp->u.listattr.attr) { + for (i = 0; i < resp->u.listattr.nhandles; i++) { + if (resp->u.listattr.attr[i].mask & + PVFS_ATTR_META_DIST) + decode_free(resp->u.listattr.attr[i].u.meta.dist); + if (resp->u.listattr.attr[i].mask & + PVFS_ATTR_META_DFILES) + { + decode_free( + resp->u.listattr.attr[i].u.meta.dfile_array); + } + if( + resp->u.listattr.attr[i].mask & + PVFS_ATTR_META_MIRROR_DFILES + ) + decode_free( + resp->u.listattr.attr[i].u.meta.mirror_dfile_array); + }/*end for*/ + decode_free(resp->u.listattr.attr); + }/*end if attr*/ break; - } + }/*end case*/ + + case PVFS_SERV_MIRROR: + { + decode_free(resp->u.mirror.bytes_written); + decode_free(resp->u.mirror.write_status_code); + break; + } + + case PVFS_SERV_TREE_GET_FILE_SIZE: + { + decode_free(resp->u.tree_get_file_size.size); + decode_free(resp->u.tree_get_file_size.error); + break; + } + + case PVFS_SERV_MGMT_GET_UID: + { + decode_free(resp->u.mgmt_get_uid.uid_info_array); + break; + } + case PVFS_SERV_GETCONFIG: - case PVFS_SERV_CREATE: case PVFS_SERV_REMOVE: case PVFS_SERV_MGMT_REMOVE_OBJECT: case PVFS_SERV_MGMT_REMOVE_DIRENT: @@ -887,13 +1088,16 @@ static void lebf_decode_rel(struct PINT_decoded_msg *msg, case PVFS_SERV_STATFS: case PVFS_SERV_WRITE_COMPLETION: case PVFS_SERV_PROTO_ERROR: - /* nothing to free */ - break; - + case PVFS_SERV_BATCH_REMOVE: + case PVFS_SERV_IMM_COPIES: + case PVFS_SERV_TREE_REMOVE: + /*nothing to free */ + break; case PVFS_SERV_INVALID: case PVFS_SERV_PERF_UPDATE: + case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: - case PVFS_SERV_NUM_OPS: /* sentinel */ + case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid response operation %d.\n", __func__, resp->op); break; diff --git a/src/proto/PINT-reqproto-encode.c b/src/proto/PINT-reqproto-encode.c index 132e553..3e9d259 100644 --- a/src/proto/PINT-reqproto-encode.c +++ b/src/proto/PINT-reqproto-encode.c @@ -10,6 +10,7 @@ #include "bmi.h" #include "gossip.h" +#include "pvfs2-debug.h" #include "pvfs2-req-proto.h" #include "PINT-reqproto-encode.h" #include "PINT-reqproto-module.h" @@ -50,6 +51,7 @@ static PINT_encoding_table_values *PINT_encoding_table[ int PINT_encode_initialize(void) { int ret = -PVFS_EINVAL; + void *header = NULL; gossip_debug(GOSSIP_ENDECODE_DEBUG,"PINT_encode_initialize\n"); if (ENCODING_IS_SUPPORTED(ENCODING_LE_BFIELD)) @@ -59,10 +61,16 @@ int PINT_encode_initialize(void) le_bytefield_table.init_fun(); /* header prepended to all messages of this type */ - *((int32_t*)&(le_bytefield_table.generic_header[0])) = - htobmi32(PVFS2_PROTO_VERSION); - *((int32_t*)&(le_bytefield_table.generic_header[4])) = - htobmi32(ENCODING_LE_BFIELD); + header = &(le_bytefield_table.generic_header[0]); + *((int32_t *)header) = htobmi32(PVFS2_PROTO_VERSION); + + header = &(le_bytefield_table.generic_header[4]); + *((int32_t *)header) = htobmi32(ENCODING_LE_BFIELD); + + //*((int32_t*)&(le_bytefield_table.generic_header[0])) = + // htobmi32(PVFS2_PROTO_VERSION); + //*((int32_t*)&(le_bytefield_table.generic_header[4])) = + // htobmi32(ENCODING_LE_BFIELD); le_bytefield_table.enc_type = ENCODING_LE_BFIELD; ret = 0; @@ -107,23 +115,13 @@ int PINT_encode(void* input_buffer, case ENCODING_LE_BFIELD: if (input_type == PINT_ENCODE_REQ) { - struct PVFS_server_req* tmp_req = input_buffer; - ENCODE_EVENT_START(PVFS_EVENT_API_ENCODE_REQ, - tmp_req->op, tmp_req); ret = PINT_encoding_table[enc_type]->op->encode_req( input_buffer, target_msg); - ENCODE_EVENT_STOP(PVFS_EVENT_API_ENCODE_REQ, - tmp_req->op, tmp_req, target_msg->total_size); } else if (input_type == PINT_ENCODE_RESP) { - struct PVFS_server_resp* tmp_resp = input_buffer; - ENCODE_EVENT_START(PVFS_EVENT_API_ENCODE_RESP, - tmp_resp->op, tmp_resp); ret = PINT_encoding_table[enc_type]->op->encode_resp( input_buffer, target_msg); - ENCODE_EVENT_STOP(PVFS_EVENT_API_ENCODE_RESP, - tmp_resp->op, tmp_resp, target_msg->total_size); } break; default: @@ -243,28 +241,20 @@ int PINT_decode(void* input_buffer, target_msg->enc_type = enc_type_recved; if(input_type == PINT_DECODE_REQ) { - ENCODE_EVENT_START(PVFS_EVENT_API_DECODE_REQ, - 0, input_buffer); ret = PINT_encoding_table[i]->op->decode_req(buffer_index, size_index, target_msg, target_addr); tmp_req = target_msg->buffer; - ENCODE_EVENT_STOP(PVFS_EVENT_API_DECODE_REQ, - tmp_req->op, input_buffer, size); return(ret); } else if(input_type == PINT_DECODE_RESP) { - ENCODE_EVENT_START(PVFS_EVENT_API_DECODE_RESP, - 0, input_buffer); ret = PINT_encoding_table[i]->op->decode_resp(buffer_index, size_index, target_msg, target_addr); tmp_resp = target_msg->buffer; - ENCODE_EVENT_STOP(PVFS_EVENT_API_DECODE_RESP, - tmp_resp->op, input_buffer, size); return(ret); } else diff --git a/src/proto/endecode-funcs.c b/src/proto/endecode-funcs.c new file mode 100644 index 0000000..be2aff8 --- /dev/null +++ b/src/proto/endecode-funcs.c @@ -0,0 +1,74 @@ +/* + * (C) 2007 The University of Chicago. + * + * See COPYING in top-level directory. + */ +#include +#include +#include +#include + +#define __PINT_REQPROTO_ENCODE_FUNCS_C /* trigger actual definitions */ +#include "endecode-funcs.h" +#include +#include "pvfs2-encode-stubs.h" + +void encode_func_uint64_t(char **pptr, void *x) +{ + encode_uint64_t(pptr, (uint64_t *)x); +} + +void decode_func_uint64_t(char **pptr, void *x) +{ + decode_uint64_t(pptr, (uint64_t *)x); +} + +void encode_func_int64_t(char **pptr, void *x) +{ + encode_int64_t(pptr, (int64_t *)x); +} + +void decode_func_int64_t(char **pptr, void *x) +{ + decode_int64_t(pptr, (int64_t *)x); +} + +void encode_func_uint32_t(char **pptr, void *x) +{ + encode_uint32_t(pptr, (uint32_t *)x); +} + +void decode_func_uint32_t(char **pptr, void *x) +{ + decode_uint32_t(pptr, (uint32_t *)x); +} + +void encode_func_int32_t(char **pptr, void *x) +{ + encode_int32_t(pptr, (int32_t *)x); +} + +void decode_func_int32_t(char **pptr, void *x) +{ + decode_int32_t(pptr, (int32_t *)x); +} + +void encode_func_string(char **pptr, void *x) +{ + encode_string(pptr, (char **)x); +} + +void decode_func_string(char **pptr, void *x) +{ + decode_string(pptr, (char **)x); +} + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/proto/endecode-funcs.h b/src/proto/endecode-funcs.h index 292b063..ca833db 100644 --- a/src/proto/endecode-funcs.h +++ b/src/proto/endecode-funcs.h @@ -13,6 +13,15 @@ #define __SRC_PROTO_ENDECODE_FUNCS_H #include "src/io/bmi/bmi-byteswap.h" +#include +#ifdef WIN32 +typedef uint32_t u_int32_t; +typedef uint64_t u_int64_t; + +/* typeof not available on Windows */ +#define typeof(t) t +#endif +#include /* * NOTE - Every macro defined here needs to have a stub defined in @@ -85,34 +94,37 @@ #define encode_string(pptr,pbuf) do { \ u_int32_t len = 0; \ if (*pbuf) \ - len = strlen(*pbuf); \ + len = strlen(*pbuf); \ *(u_int32_t *) *(pptr) = htobmi32(len); \ if (len) { \ - memcpy(*(pptr)+4, *pbuf, len+1); \ - int pad = roundup8(4 + len + 1) - (4 + len + 1); \ - *(pptr) += roundup8(4 + len + 1); \ - memset(*(pptr)-pad, 0, pad); \ + memcpy(*(pptr)+4, *pbuf, len+1); \ + int pad = roundup8(4 + len + 1) - (4 + len + 1); \ + *(pptr) += roundup8(4 + len + 1); \ + memset(*(pptr)-pad, 0, pad); \ } else { \ - *(u_int32_t *) *(pptr) = 0; \ - *(pptr) += 8; \ + *(u_int32_t *) (*(pptr)+4) = 0; \ + *(pptr) += 8; \ } \ } while (0) #else #define encode_string(pptr,pbuf) do { \ u_int32_t len = 0; \ if (*pbuf) \ - len = strlen(*pbuf); \ + len = strlen(*pbuf); \ *(u_int32_t *) *(pptr) = htobmi32(len); \ if (len) { \ - memcpy(*(pptr)+4, *pbuf, len+1); \ - *(pptr) += roundup8(4 + len + 1); \ + memcpy(*(pptr)+4, *pbuf, len+1); \ + *(pptr) += roundup8(4 + len + 1); \ } else { \ - *(u_int32_t *) *(pptr) = 0; \ - *(pptr) += 8; \ + *(u_int32_t *) *(pptr) = 0; \ + *(pptr) += 8; \ } \ } while (0) #endif +/* determines how much protocol space a string encoding will consume */ +#define encode_string_size_check(pbuf) (strlen(*pbuf) + 5) + #define decode_string(pptr,pbuf) do { \ u_int32_t len = bmitoh32(*(u_int32_t *) *(pptr)); \ *pbuf = *(pptr) + 4; \ @@ -170,14 +182,9 @@ static inline void decode_free (void *p) { free(p); } #else -#ifdef __KERNEL__ -#define decode_malloc(n) ((n) ? kmalloc(n, GFP_KERNEL) : 0) -#define decode_free(n) kfree(n) -#else #define decode_malloc(n) ((n) ? malloc(n) : 0) #define decode_free(n) free(n) #endif -#endif /* * These wrappers define functions to do the encoding of the types or @@ -466,6 +473,124 @@ static inline void decode_##name(char **pptr, name *x) { \ decode_##t12(pptr, &x->x12); \ } +#define endecode_fields_15_struct(name,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7, \ + t8,x8,t9,x9,t10,x10,t11,x11,t12,x12,t13,x13,t14,x14,t15,x15) \ +static inline void encode_##name(char **pptr, const struct name *x) { \ + encode_##t1(pptr, &x->x1); \ + encode_##t2(pptr, &x->x2); \ + encode_##t3(pptr, &x->x3); \ + encode_##t4(pptr, &x->x4); \ + encode_##t5(pptr, &x->x5); \ + encode_##t6(pptr, &x->x6); \ + encode_##t7(pptr, &x->x7); \ + encode_##t8(pptr, &x->x8); \ + encode_##t9(pptr, &x->x9); \ + encode_##t10(pptr, &x->x10); \ + encode_##t11(pptr, &x->x11); \ + encode_##t12(pptr, &x->x12); \ + encode_##t13(pptr, &x->x13); \ + encode_##t14(pptr, &x->x14); \ + encode_##t15(pptr, &x->x15); \ +} \ +static inline void decode_##name(char **pptr, struct name *x) { \ + decode_##t1(pptr, &x->x1); \ + decode_##t2(pptr, &x->x2); \ + decode_##t3(pptr, &x->x3); \ + decode_##t4(pptr, &x->x4); \ + decode_##t5(pptr, &x->x5); \ + decode_##t6(pptr, &x->x6); \ + decode_##t7(pptr, &x->x7); \ + decode_##t8(pptr, &x->x8); \ + decode_##t9(pptr, &x->x9); \ + decode_##t10(pptr, &x->x10); \ + decode_##t11(pptr, &x->x11); \ + decode_##t12(pptr, &x->x12); \ + decode_##t13(pptr, &x->x13); \ + decode_##t14(pptr, &x->x14); \ + decode_##t15(pptr, &x->x15); \ +} + +#define endecode_fields_16_struct(name,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7, \ + t8,x8,t9,x9,t10,x10,t11,x11,t12,x12,t13,x13,t14,x14,t15,x15,t16,x16) \ +static inline void encode_##name(char **pptr, const struct name *x) { \ + encode_##t1(pptr, &x->x1); \ + encode_##t2(pptr, &x->x2); \ + encode_##t3(pptr, &x->x3); \ + encode_##t4(pptr, &x->x4); \ + encode_##t5(pptr, &x->x5); \ + encode_##t6(pptr, &x->x6); \ + encode_##t7(pptr, &x->x7); \ + encode_##t8(pptr, &x->x8); \ + encode_##t9(pptr, &x->x9); \ + encode_##t10(pptr, &x->x10); \ + encode_##t11(pptr, &x->x11); \ + encode_##t12(pptr, &x->x12); \ + encode_##t13(pptr, &x->x13); \ + encode_##t14(pptr, &x->x14); \ + encode_##t15(pptr, &x->x15); \ + encode_##t16(pptr, &x->x16); \ +} \ +static inline void decode_##name(char **pptr, struct name *x) { \ + decode_##t1(pptr, &x->x1); \ + decode_##t2(pptr, &x->x2); \ + decode_##t3(pptr, &x->x3); \ + decode_##t4(pptr, &x->x4); \ + decode_##t5(pptr, &x->x5); \ + decode_##t6(pptr, &x->x6); \ + decode_##t7(pptr, &x->x7); \ + decode_##t8(pptr, &x->x8); \ + decode_##t9(pptr, &x->x9); \ + decode_##t10(pptr, &x->x10); \ + decode_##t11(pptr, &x->x11); \ + decode_##t12(pptr, &x->x12); \ + decode_##t13(pptr, &x->x13); \ + decode_##t14(pptr, &x->x14); \ + decode_##t15(pptr, &x->x15); \ + decode_##t16(pptr, &x->x16); \ +} + + +#define endecode_fields_17_struct(name,t1,x1,t2,x2,t3,x3,t4,x4,t5,x5,t6,x6,t7,x7, \ + t8,x8,t9,x9,t10,x10,t11,x11,t12,x12,t13,x13,t14,x14,t15,x15,t16,x16,t17,x17) \ +static inline void encode_##name(char **pptr, const struct name *x) { \ + encode_##t1(pptr, &x->x1); \ + encode_##t2(pptr, &x->x2); \ + encode_##t3(pptr, &x->x3); \ + encode_##t4(pptr, &x->x4); \ + encode_##t5(pptr, &x->x5); \ + encode_##t6(pptr, &x->x6); \ + encode_##t7(pptr, &x->x7); \ + encode_##t8(pptr, &x->x8); \ + encode_##t9(pptr, &x->x9); \ + encode_##t10(pptr, &x->x10); \ + encode_##t11(pptr, &x->x11); \ + encode_##t12(pptr, &x->x12); \ + encode_##t13(pptr, &x->x13); \ + encode_##t14(pptr, &x->x14); \ + encode_##t15(pptr, &x->x15); \ + encode_##t16(pptr, &x->x16); \ + encode_##t17(pptr, &x->x17); \ +} \ +static inline void decode_##name(char **pptr, struct name *x) { \ + decode_##t1(pptr, &x->x1); \ + decode_##t2(pptr, &x->x2); \ + decode_##t3(pptr, &x->x3); \ + decode_##t4(pptr, &x->x4); \ + decode_##t5(pptr, &x->x5); \ + decode_##t6(pptr, &x->x6); \ + decode_##t7(pptr, &x->x7); \ + decode_##t8(pptr, &x->x8); \ + decode_##t9(pptr, &x->x9); \ + decode_##t10(pptr, &x->x10); \ + decode_##t11(pptr, &x->x11); \ + decode_##t12(pptr, &x->x12); \ + decode_##t13(pptr, &x->x13); \ + decode_##t14(pptr, &x->x14); \ + decode_##t15(pptr, &x->x15); \ + decode_##t16(pptr, &x->x16); \ + decode_##t17(pptr, &x->x17); \ +} + /* ones with arrays that are allocated in the decode */ /* one field then one array */ @@ -681,5 +806,54 @@ static inline void decode_##name(char **pptr, struct name *x) { int i; \ decode_##ta1(pptr, &(x)->a1[i]); \ } +#ifdef WIN32 +#define DEFINE_STATIC_ENDECODE_FUNCS(__name__, __type__) \ +static void encode_func_##__name__(char **pptr, void *x) \ +{ \ + encode_##__name__(pptr, (__type__ *)x); \ +}; \ +static void decode_func_##__name__(char **pptr, void *x) \ +{ \ + decode_##__name__(pptr, (__type__ *)x); \ +} +#else +#define DEFINE_STATIC_ENDECODE_FUNCS(__name__, __type__) \ +__attribute__((unused)) \ +static void encode_func_##__name__(char **pptr, void *x) \ +{ \ + encode_##__name__(pptr, (__type__ *)x); \ +}; \ +__attribute__((unused)) \ +static void decode_func_##__name__(char **pptr, void *x) \ +{ \ + decode_##__name__(pptr, (__type__ *)x); \ +} +#endif + +#define encode_enum_union_2_struct(name, ename, uname, ut1, un1, en1, ut2, un2, en2) \ +static inline void encode_##name(char **pptr, const struct name *x) \ +{ \ + encode_enum(pptr, &x->ename); \ + switch(x->ename) \ + { \ + case en1: encode_##ut1(pptr, &x->uname.un1); break; \ + case en2: encode_##ut2(pptr, &x->uname.un2); break; \ + default: assert(0); \ + } \ +}; \ +static inline void decode_##name(char **pptr, struct name *x) \ +{ \ + decode_enum(pptr, &x->ename); \ + switch(x->ename) \ + { \ + case en1: decode_##ut1(pptr, &x->uname.un1); break; \ + case en2: decode_##ut2(pptr, &x->uname.un2); break; \ + default: assert(0); \ + } \ +}; + +#ifdef WIN32 +#endif + #endif /* __SRC_PROTO_ENDECODE_FUNCS_H */ diff --git a/src/proto/module.mk.in b/src/proto/module.mk.in index 0fc2d9a..803d105 100644 --- a/src/proto/module.mk.in +++ b/src/proto/module.mk.in @@ -1,7 +1,10 @@ DIR := src/proto LIBSRC += \ $(DIR)/PINT-reqproto-encode.c \ - $(DIR)/PINT-le-bytefield.c + $(DIR)/PINT-le-bytefield.c SERVERSRC += \ $(DIR)/PINT-reqproto-encode.c \ - $(DIR)/PINT-le-bytefield.c + $(DIR)/PINT-le-bytefield.c + +LIBBMISRC += $(DIR)/endecode-funcs.h \ + $(DIR)/endecode-funcs.c diff --git a/src/proto/pvfs2-attr.h b/src/proto/pvfs2-attr.h index c782d2c..9f249b3 100644 --- a/src/proto/pvfs2-attr.h +++ b/src/proto/pvfs2-attr.h @@ -14,6 +14,10 @@ #include "pvfs2-storage.h" #include "pint-distribution.h" +#ifndef max +#define max(a,b) ((a) < (b) ? (b) : (a)) +#endif + /* internal attribute masks, common to all obj types */ #define PVFS_ATTR_COMMON_UID (1 << 0) #define PVFS_ATTR_COMMON_GID (1 << 1) @@ -24,17 +28,22 @@ #define PVFS_ATTR_COMMON_TYPE (1 << 6) #define PVFS_ATTR_COMMON_ATIME_SET (1 << 7) #define PVFS_ATTR_COMMON_MTIME_SET (1 << 8) +#define PVFS_ATTR_COMMON_CID (1 << 9) #define PVFS_ATTR_COMMON_ALL \ (PVFS_ATTR_COMMON_UID | PVFS_ATTR_COMMON_GID | \ PVFS_ATTR_COMMON_PERM | PVFS_ATTR_COMMON_ATIME | \ PVFS_ATTR_COMMON_CTIME | PVFS_ATTR_COMMON_MTIME | \ - PVFS_ATTR_COMMON_TYPE) + PVFS_ATTR_COMMON_TYPE | PVFS_ATTR_COMMON_CID) /* internal attribute masks for metadata objects */ -#define PVFS_ATTR_META_DIST (1 << 10) -#define PVFS_ATTR_META_DFILES (1 << 11) +#define PVFS_ATTR_META_DIST (1 << 10) +#define PVFS_ATTR_META_DFILES (1 << 11) +#define PVFS_ATTR_META_MIRROR_DFILES (1 << 13) #define PVFS_ATTR_META_ALL \ -(PVFS_ATTR_META_DIST | PVFS_ATTR_META_DFILES) +(PVFS_ATTR_META_DIST | PVFS_ATTR_META_DFILES | PVFS_ATTR_META_MIRROR_DFILES) + +#define PVFS_ATTR_META_UNSTUFFED (1 << 12) + /* internal attribute masks for datafile objects */ #define PVFS_ATTR_DATA_SIZE (1 << 15) @@ -50,6 +59,10 @@ #define PVFS_ATTR_DIR_ALL \ (PVFS_ATTR_DIR_DIRENT_COUNT | PVFS_ATTR_DIR_HINT) +/* attributes that do not change once set */ +#define PVFS_STATIC_ATTR_MASK \ +(PVFS_ATTR_COMMON_TYPE|PVFS_ATTR_META_DIST|PVFS_ATTR_META_DFILES|PVFS_ATTR_META_MIRROR_DFILES|PVFS_ATTR_META_UNSTUFFED) + /* extended hint attributes for a metafile object */ struct PVFS_metafile_hint_s { @@ -58,7 +71,7 @@ struct PVFS_metafile_hint_s typedef struct PVFS_metafile_hint_s PVFS_metafile_hint; #ifdef __PINT_REQPROTO_ENCODE_FUNCS_C endecode_fields_1(PVFS_metafile_hint, - PVFS_flags, flags) + PVFS_flags, flags); #endif /* attributes specific to metadata objects */ @@ -71,6 +84,13 @@ struct PVFS_metafile_attr_s /* list of datafiles */ PVFS_handle *dfile_array; uint32_t dfile_count; + + /* list of mirrored datafiles */ + PVFS_handle *mirror_dfile_array; + uint32_t mirror_copies_count; + + int32_t stuffed_size; + PVFS_metafile_hint hint; }; typedef struct PVFS_metafile_attr_s PVFS_metafile_attr; @@ -82,21 +102,48 @@ typedef struct PVFS_metafile_attr_s PVFS_metafile_attr; decode_PINT_dist(pptr, &(x)->dist); \ (x)->dist_size = PINT_DIST_PACK_SIZE((x)->dist); \ } while (0) -#define encode_PVFS_metafile_attr_dfiles(pptr,x) do { int dfiles_i; \ - encode_uint32_t(pptr, &(x)->dfile_count); \ - encode_skip4(pptr,); \ - for (dfiles_i=0; dfiles_i<(x)->dfile_count; dfiles_i++) \ - encode_PVFS_handle(pptr, &(x)->dfile_array[dfiles_i]); \ - encode_PVFS_metafile_hint(pptr, &(x)->hint); \ +#define encode_PVFS_metafile_attr_mirror_dfiles(pptr,x) do { \ + int dfiles_i, copy_i, handle_i; \ + encode_uint32_t(pptr, &(x)->mirror_copies_count); \ + encode_skip4(pptr,); \ + for (copy_i=0; copy_i<(x)->mirror_copies_count; copy_i++) \ + for (dfiles_i=0; dfiles_i<(x)->dfile_count; dfiles_i++) \ + { \ + handle_i = (copy_i * (x)->dfile_count) + dfiles_i; \ + encode_PVFS_handle(pptr, &(x)->mirror_dfile_array[handle_i]); \ + } \ } while (0) -#define decode_PVFS_metafile_attr_dfiles(pptr,x) do { int dfiles_i; \ - decode_uint32_t(pptr, &(x)->dfile_count); \ - decode_skip4(pptr,); \ - (x)->dfile_array = decode_malloc((x)->dfile_count \ - * sizeof(*(x)->dfile_array)); \ - for (dfiles_i=0; dfiles_i<(x)->dfile_count; dfiles_i++) \ - decode_PVFS_handle(pptr, &(x)->dfile_array[dfiles_i]); \ - decode_PVFS_metafile_hint(pptr, &(x)->hint); \ +#define decode_PVFS_metafile_attr_mirror_dfiles(pptr,x) do { \ + int dfiles_i, copy_i, handle_i; \ + decode_uint32_t(pptr, &(x)->mirror_copies_count); \ + decode_skip4(pptr,); \ + (x)->mirror_dfile_array = decode_malloc((x)->dfile_count * \ + (x)->mirror_copies_count * \ + sizeof(PVFS_handle)); \ + for (copy_i=0; copy_i<(x)->mirror_copies_count; copy_i++) \ + for (dfiles_i=0; dfiles_i<(x)->dfile_count; dfiles_i++) \ + { \ + handle_i = (copy_i * (x)->dfile_count) + dfiles_i; \ + decode_PVFS_handle(pptr, &(x)->mirror_dfile_array[handle_i]); \ + } \ +} while (0) +#define encode_PVFS_metafile_attr_dfiles(pptr,x) do { \ + int dfiles_i; \ + encode_uint32_t(pptr, &(x)->dfile_count); \ + encode_skip4(pptr,); \ + for (dfiles_i=0; dfiles_i<(x)->dfile_count; dfiles_i++) \ + encode_PVFS_handle(pptr, &(x)->dfile_array[dfiles_i]); \ + encode_PVFS_metafile_hint(pptr, &(x)->hint); \ +} while (0) +#define decode_PVFS_metafile_attr_dfiles(pptr,x) do { \ + int dfiles_i; \ + decode_uint32_t(pptr, &(x)->dfile_count); \ + decode_skip4(pptr,); \ + (x)->dfile_array = decode_malloc((x)->dfile_count \ + * sizeof(*(x)->dfile_array)); \ + for (dfiles_i=0; dfiles_i<(x)->dfile_count; dfiles_i++) \ + decode_PVFS_handle(pptr, &(x)->dfile_array[dfiles_i]); \ + decode_PVFS_metafile_hint(pptr, &(x)->hint); \ } while (0) #endif @@ -106,7 +153,7 @@ struct PVFS_datafile_attr_s PVFS_size size; }; typedef struct PVFS_datafile_attr_s PVFS_datafile_attr; -endecode_fields_1(PVFS_datafile_attr, PVFS_size, size) +endecode_fields_1(PVFS_datafile_attr, PVFS_size, size); /* extended hint attributes for a directory object */ struct PVFS_directory_hint_s @@ -130,7 +177,7 @@ endecode_fields_7(PVFS_directory_hint, uint32_t, dist_params_len, skip4,, string, dist_params, - uint32_t, dfile_count) + uint32_t, dfile_count); #endif /* attributes specific to directory objects */ @@ -163,7 +210,7 @@ endecode_fields_3( PVFS_symlink_attr, uint32_t, target_path_len, skip4,, - string, target_path) + string, target_path); /* generic attributes; applies to all objects */ struct PVFS_object_attr @@ -174,6 +221,7 @@ struct PVFS_object_attr PVFS_time atime; PVFS_time mtime; PVFS_time ctime; + PVFS_handle cid; uint32_t mask; /* indicates which fields are currently valid */ PVFS_ds_type objtype; /* defined in pvfs2-types.h */ union @@ -190,6 +238,7 @@ typedef struct PVFS_object_attr PVFS_object_attr; #define encode_PVFS_object_attr(pptr,x) do { \ encode_PVFS_uid(pptr, &(x)->owner); \ encode_PVFS_gid(pptr, &(x)->group); \ + encode_PVFS_cid(pptr, &(x)->cid); \ encode_PVFS_permissions(pptr, &(x)->perms); \ encode_skip4(pptr,); \ encode_PVFS_time(pptr, &(x)->atime); \ @@ -197,20 +246,30 @@ typedef struct PVFS_object_attr PVFS_object_attr; encode_PVFS_time(pptr, &(x)->ctime); \ encode_uint32_t(pptr, &(x)->mask); \ encode_PVFS_ds_type(pptr, &(x)->objtype); \ + if ((x)->objtype == PVFS_TYPE_METAFILE && \ + (!((x)->mask & PVFS_ATTR_META_UNSTUFFED))) \ + { \ + encode_int32_t(pptr, &(x)->u.meta.stuffed_size); \ + encode_skip4(pptr,); \ + } \ if ((x)->mask & PVFS_ATTR_META_DIST) \ encode_PVFS_metafile_attr_dist(pptr, &(x)->u.meta); \ if ((x)->mask & PVFS_ATTR_META_DFILES) \ encode_PVFS_metafile_attr_dfiles(pptr, &(x)->u.meta); \ + if ((x)->mask & PVFS_ATTR_META_MIRROR_DFILES) \ + encode_PVFS_metafile_attr_mirror_dfiles(pptr, &(x)->u.meta); \ if ((x)->mask & PVFS_ATTR_DATA_SIZE) \ encode_PVFS_datafile_attr(pptr, &(x)->u.data); \ if ((x)->mask & PVFS_ATTR_SYMLNK_TARGET) \ encode_PVFS_symlink_attr(pptr, &(x)->u.sym); \ - if (((x)->mask & PVFS_ATTR_DIR_DIRENT_COUNT) || ((x)->mask & PVFS_ATTR_DIR_HINT)) \ + if (((x)->mask & PVFS_ATTR_DIR_DIRENT_COUNT) || \ + ((x)->mask & PVFS_ATTR_DIR_HINT)) \ encode_PVFS_directory_attr(pptr, &(x)->u.dir); \ } while (0) #define decode_PVFS_object_attr(pptr,x) do { \ decode_PVFS_uid(pptr, &(x)->owner); \ decode_PVFS_gid(pptr, &(x)->group); \ + decode_PVFS_cid(pptr, &(x)->cid); \ decode_PVFS_permissions(pptr, &(x)->perms); \ decode_skip4(pptr,); \ decode_PVFS_time(pptr, &(x)->atime); \ @@ -218,34 +277,44 @@ typedef struct PVFS_object_attr PVFS_object_attr; decode_PVFS_time(pptr, &(x)->ctime); \ decode_uint32_t(pptr, &(x)->mask); \ decode_PVFS_ds_type(pptr, &(x)->objtype); \ + if ((x)->objtype == PVFS_TYPE_METAFILE && \ + (!((x)->mask & PVFS_ATTR_META_UNSTUFFED))) \ + { \ + decode_int32_t(pptr, &(x)->u.meta.stuffed_size); \ + decode_skip4(pptr,); \ + } \ if ((x)->mask & PVFS_ATTR_META_DIST) \ decode_PVFS_metafile_attr_dist(pptr, &(x)->u.meta); \ if ((x)->mask & PVFS_ATTR_META_DFILES) \ decode_PVFS_metafile_attr_dfiles(pptr, &(x)->u.meta); \ + if ((x)->mask & PVFS_ATTR_META_MIRROR_DFILES) \ + decode_PVFS_metafile_attr_mirror_dfiles(pptr, &(x)->u.meta); \ if ((x)->mask & PVFS_ATTR_DATA_SIZE) \ decode_PVFS_datafile_attr(pptr, &(x)->u.data); \ if ((x)->mask & PVFS_ATTR_SYMLNK_TARGET) \ decode_PVFS_symlink_attr(pptr, &(x)->u.sym); \ - if (((x)->mask & PVFS_ATTR_DIR_DIRENT_COUNT) || ((x)->mask & PVFS_ATTR_DIR_HINT)) \ + if (((x)->mask & PVFS_ATTR_DIR_DIRENT_COUNT) || \ + ((x)->mask & PVFS_ATTR_DIR_HINT)) \ decode_PVFS_directory_attr(pptr, &(x)->u.dir); \ } while (0) #endif -/* attr buffer needs room for larger of symlink path, meta fields or dir hints: an attrib - * structure can never hold information for not more than a symlink or a metafile or a dir object */ +/* attr buffer needs room for larger of symlink path, meta fields or + * dir hints: an attrib structure can never hold information for not more + * than a symlink or a metafile or a dir object +*/ #define extra_size_PVFS_object_attr_dir (PVFS_REQ_LIMIT_DIST_BYTES + \ PVFS_REQ_LIMIT_DIST_NAME + roundup8(sizeof(PVFS_directory_attr))) +/* room for distribution, stuffed_size, dfile array, and mirror_dfile_array */ #define extra_size_PVFS_object_attr_meta (PVFS_REQ_LIMIT_DIST_BYTES + \ - PVFS_REQ_LIMIT_DFILE_COUNT * sizeof(PVFS_handle)) + sizeof(int32_t) + \ + (PVFS_REQ_LIMIT_DFILE_COUNT * sizeof(PVFS_handle)) + \ + (PVFS_REQ_LIMIT_MIRROR_DFILE_COUNT * sizeof(PVFS_handle))) #define extra_size_PVFS_object_attr_symlink (PVFS_REQ_LIMIT_PATH_NAME_BYTES) -#ifndef max3 -#define max3(a, b, c) (a) < (b) ? (b) < (c) ? (c) : (b) : (a) < (c) ? (c) : (a) -#endif - #define extra_size_PVFS_object_attr \ - max3(extra_size_PVFS_object_attr_meta, extra_size_PVFS_object_attr_symlink, extra_size_PVFS_object_attr_dir) + max(max(extra_size_PVFS_object_attr_meta, extra_size_PVFS_object_attr_symlink), extra_size_PVFS_object_attr_dir) #endif /* __PVFS2_ATTR_H */ diff --git a/src/proto/pvfs2-req-proto.h b/src/proto/pvfs2-req-proto.h index c141cda..8f25e43 100644 --- a/src/proto/pvfs2-req-proto.h +++ b/src/proto/pvfs2-req-proto.h @@ -17,16 +17,21 @@ #include "pvfs2-request.h" #include "pint-request.h" #include "pvfs2-mgmt.h" +#include "pint-hint.h" +#include "pint-uid-mgmt.h" /* update PVFS2_PROTO_MAJOR on wire protocol changes that break backwards * compatibility (such as changing the semantics or protocol fields for an * existing request type) */ -#define PVFS2_PROTO_MAJOR 5 +#define PVFS2_PROTO_MAJOR 6 /* update PVFS2_PROTO_MINOR on wire protocol changes that preserve backwards * compatibility (such as adding a new request type) + * NOTE: Incrementing this will make clients unable to talk to older servers. + * Do not change until we have a new version policy. */ #define PVFS2_PROTO_MINOR 0 + #define PVFS2_PROTO_VERSION ((PVFS2_PROTO_MAJOR*1000)+(PVFS2_PROTO_MINOR)) /* we set the maximum possible size of a small I/O packed message as 64K. This @@ -74,6 +79,15 @@ enum PVFS_server_op PVFS_SERV_LISTEATTR = 32, PVFS_SERV_SMALL_IO = 33, PVFS_SERV_LISTATTR = 34, + PVFS_SERV_BATCH_CREATE = 35, + PVFS_SERV_BATCH_REMOVE = 36, + PVFS_SERV_PRECREATE_POOL_REFILLER = 37, /* not a real protocol request */ + PVFS_SERV_UNSTUFF = 38, + PVFS_SERV_MIRROR = 39, + PVFS_SERV_IMM_COPIES = 40, + PVFS_SERV_TREE_REMOVE = 41, + PVFS_SERV_TREE_GET_FILE_SIZE = 42, + PVFS_SERV_MGMT_GET_UID = 43, /* leave this entry last */ PVFS_SERV_NUM_OPS }; @@ -82,7 +96,8 @@ enum PVFS_server_op * These ops must always work, even if the server is in admin mode. */ #define PVFS_SERV_IS_MGMT_OP(x) \ - ((x) == PVFS_SERV_MGMT_REMOVE_OBJECT \ + ((x) == PVFS_SERV_MGMT_SETPARAM \ + || (x) == PVFS_SERV_MGMT_REMOVE_OBJECT \ || (x) == PVFS_SERV_MGMT_REMOVE_DIRENT) /******************************************************************/ @@ -90,6 +105,8 @@ enum PVFS_server_op * parameters used within the request protocol */ +/* max size of layout information (may include explicit server list */ +#define PVFS_REQ_LIMIT_LAYOUT 4096 /* max size of opaque distribution parameters */ #define PVFS_REQ_LIMIT_DIST_BYTES 1024 /* max size of each configuration file transmitted to clients. @@ -112,6 +129,7 @@ enum PVFS_server_op #define PVFS_REQ_LIMIT_DFILE_COUNT 1024 #define PVFS_REQ_LIMIT_DFILE_COUNT_IS_VALID(dfile_count) \ ((dfile_count > 0) && (dfile_count < PVFS_REQ_LIMIT_DFILE_COUNT)) +#define PVFS_REQ_LIMIT_MIRROR_DFILE_COUNT 1024 /* max count of directory entries per readdir request */ #define PVFS_REQ_LIMIT_DIRENT_COUNT 512 /* max number of perf metrics returned by mgmt perf mon op */ @@ -120,6 +138,8 @@ enum PVFS_server_op #define PVFS_REQ_LIMIT_MGMT_EVENT_MON_COUNT 2048 /* max number of handles returned by any operation using an array of handles */ #define PVFS_REQ_LIMIT_HANDLES_COUNT 1024 +/* max number of handles that can be created at once using batch create */ +#define PVFS_REQ_LIMIT_BATCH_CREATE 8192 /* max number of handles returned by mgmt iterate handles op */ #define PVFS_REQ_LIMIT_MGMT_ITERATE_HANDLES_COUNT \ PVFS_REQ_LIMIT_HANDLES_COUNT @@ -137,15 +157,91 @@ enum PVFS_server_op /* max number of key/value pairs to set or get in a list operation */ #define PVFS_REQ_LIMIT_KEYVAL_LIST 32 /* max number of handles for which we return attributes */ -#define PVFS_REQ_LIMIT_LISTATTR 64 +#define PVFS_REQ_LIMIT_LISTATTR 60 +/* max number of bytes in an extended attribute key including null term */ +#define PVFS_REQ_LIMIT_EATTR_KEY_LEN PVFS_MAX_XATTR_NAMELEN +/* max number of bytes in an extended attribute value including null term */ +#define PVFS_REQ_LIMIT_EATTR_VAL_LEN PVFS_MAX_XATTR_VALUELEN +/* max number of keys or key/value pairs to set or get in an operation */ +#define PVFS_REQ_LIMIT_EATTR_LIST PVFS_MAX_XATTR_LISTLEN + /* create *********************************************************/ -/* - used to create new metafile and datafile objects */ +/* - used to create an object. This creates a metadata handle, + * a datafile handle, and links the datafile handle to the metadata handle. + * It also sets the attributes on the metadata. */ struct PVFS_servreq_create +{ + PVFS_fs_id fs_id; + PVFS_object_attr attr; + int32_t num_energy_nodes; + int32_t num_dfiles_req; + /* NOTE: leave layout as final field so that we can deal with encoding + * errors */ + PVFS_sys_layout layout; +}; +endecode_fields_5_struct( + PVFS_servreq_create, + PVFS_fs_id, fs_id, + int32_t, num_energy_nodes, + PVFS_object_attr, attr, + int32_t, num_dfiles_req, + PVFS_sys_layout, layout); + +#define extra_size_PVFS_servreq_create \ + (extra_size_PVFS_object_attr + extra_size_PVFS_sys_layout) + +#define PINT_SERVREQ_CREATE_FILL(__req, \ + __creds, \ + __fsid, \ + __attr, \ + __num_dfiles_req, \ + __num_energy_nodes, \ + __layout, \ + __hints) \ +do { \ + int mask; \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_CREATE; \ + (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ + (__req).u.create.fs_id = (__fsid); \ + (__req).u.create.num_dfiles_req = (__num_dfiles_req); \ + (__req).u.create.num_energy_nodes = (__num_energy_nodes); \ + (__attr).objtype = PVFS_TYPE_METAFILE; \ + mask = (__attr).mask; \ + (__attr).mask = PVFS_ATTR_COMMON_ALL; \ + (__attr).mask |= PVFS_ATTR_SYS_TYPE; \ + PINT_copy_object_attr(&(__req).u.create.attr, &(__attr)); \ + (__req).u.create.attr.mask |= mask; \ + (__req).u.create.layout = __layout; \ +} while (0) + +struct PVFS_servresp_create +{ + PVFS_handle metafile_handle; + int32_t stuffed; + int32_t datafile_count; + PVFS_handle *datafile_handles; +}; +endecode_fields_2a_struct( + PVFS_servresp_create, + PVFS_handle, metafile_handle, + int32_t, stuffed, + int32_t, datafile_count, + PVFS_handle, datafile_handles); +#define extra_size_PVFS_servresp_create \ + (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle)) + +/* batch_create *********************************************************/ +/* - used to create new multiple metafile and datafile objects */ + +struct PVFS_servreq_batch_create { PVFS_fs_id fs_id; PVFS_ds_type object_type; + uint32_t object_count; /* an array of handle extents that we use to suggest to @@ -155,38 +251,50 @@ struct PVFS_servreq_create */ PVFS_handle_extent_array handle_extent_array; }; -endecode_fields_3_struct( - PVFS_servreq_create, +endecode_fields_5_struct( + PVFS_servreq_batch_create, PVFS_fs_id, fs_id, PVFS_ds_type, object_type, - PVFS_handle_extent_array, handle_extent_array) -#define extra_size_PVFS_servreq_create \ + uint32_t, object_count, + skip4,, + PVFS_handle_extent_array, handle_extent_array); + +#define extra_size_PVFS_servreq_batch_create \ (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle_extent)) -#define PINT_SERVREQ_CREATE_FILL(__req, \ +#define PINT_SERVREQ_BATCH_CREATE_FILL(__req, \ __creds, \ __fsid, \ __objtype, \ - __ext_array) \ + __objcount, \ + __ext_array, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ - (__req).op = PVFS_SERV_CREATE; \ + (__req).op = PVFS_SERV_BATCH_CREATE; \ (__req).credentials = (__creds); \ - (__req).u.create.fs_id = (__fsid); \ - (__req).u.create.object_type = (__objtype); \ - (__req).u.create.handle_extent_array.extent_count =\ + (__req).hints = (__hints); \ + (__req).u.batch_create.fs_id = (__fsid); \ + (__req).u.batch_create.object_type = (__objtype); \ + (__req).u.batch_create.object_count = (__objcount); \ + (__req).u.batch_create.handle_extent_array.extent_count =\ (__ext_array).extent_count; \ - (__req).u.create.handle_extent_array.extent_array =\ + (__req).u.batch_create.handle_extent_array.extent_array =\ (__ext_array).extent_array; \ } while (0) -struct PVFS_servresp_create +struct PVFS_servresp_batch_create { - PVFS_handle handle; + PVFS_handle *handle_array; + uint32_t handle_count; }; -endecode_fields_1_struct( - PVFS_servresp_create, - PVFS_handle, handle) +endecode_fields_1a_struct( + PVFS_servresp_batch_create, + skip4,, + uint32_t, handle_count, + PVFS_handle, handle_array); +#define extra_size_PVFS_servresp_batch_create \ + (PVFS_REQ_LIMIT_BATCH_CREATE * sizeof(PVFS_handle)) /* remove *****************************************************/ /* - used to remove an existing metafile or datafile object */ @@ -199,20 +307,50 @@ struct PVFS_servreq_remove endecode_fields_2_struct( PVFS_servreq_remove, PVFS_handle, handle, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); #define PINT_SERVREQ_REMOVE_FILL(__req, \ __creds, \ __fsid, \ - __handle)\ + __handle,\ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_REMOVE; \ + (__req).hints = (__hints); \ (__req).credentials = (__creds); \ (__req).u.remove.fs_id = (__fsid); \ (__req).u.remove.handle = (__handle); \ } while (0) +struct PVFS_servreq_batch_remove +{ + PVFS_fs_id fs_id; + int32_t handle_count; + PVFS_handle *handles; +}; +endecode_fields_1a_struct( + PVFS_servreq_batch_remove, + PVFS_fs_id, fs_id, + int32_t, handle_count, + PVFS_handle, handles); +#define extra_size_PVFS_servreq_batch_remove \ + (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle)) + +#define PINT_SERVREQ_BATCH_REMOVE_FILL(__req, \ + __creds, \ + __fsid, \ + __count, \ + __handles) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_BATCH_REMOVE; \ + (__req).credentials = (__creds); \ + (__req).u.batch_remove.fs_id = (__fsid); \ + (__req).u.batch_remove.handle_count = (__count); \ + (__req).u.batch_remove.handles = (__handles); \ +} while (0) + /* mgmt_remove_object */ /* - used to remove an existing object reference */ @@ -224,15 +362,17 @@ struct PVFS_servreq_mgmt_remove_object endecode_fields_2_struct( PVFS_servreq_mgmt_remove_object, PVFS_handle, handle, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); #define PINT_SERVREQ_MGMT_REMOVE_OBJECT_FILL(__req, \ __creds, \ __fsid, \ - __handle)\ + __handle,\ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_REMOVE_OBJECT; \ + (__req).hints = (__hints); \ (__req).credentials = (__creds); \ (__req).u.mgmt_remove_object.fs_id = (__fsid); \ (__req).u.mgmt_remove_object.handle = (__handle); \ @@ -252,7 +392,7 @@ endecode_fields_4_struct( PVFS_handle, handle, PVFS_fs_id, fs_id, skip4,, - string, entry) + string, entry); #define extra_size_PVFS_servreq_mgmt_remove_dirent \ roundup8(PVFS_REQ_LIMIT_SEGMENT_BYTES+1) @@ -260,16 +400,105 @@ endecode_fields_4_struct( __creds, \ __fsid, \ __handle,\ - __entry) \ + __entry, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_REMOVE_DIRENT; \ + (__req).hints = (__hints); \ (__req).credentials = (__creds); \ (__req).u.mgmt_remove_dirent.fs_id = (__fsid); \ (__req).u.mgmt_remove_dirent.handle = (__handle); \ (__req).u.mgmt_remove_dirent.entry = (__entry); \ } while (0) +struct PVFS_servreq_tree_remove +{ + PVFS_fs_id fs_id; + uint32_t num_data_files; + PVFS_handle *handle_array; +}; +endecode_fields_1a_struct( + PVFS_servreq_tree_remove, + PVFS_fs_id, fs_id, + uint32_t, num_data_files, + PVFS_handle, handle_array); +#define extra_size_PVFS_servreq_tree_remove \ + (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle)) + +#define PINT_SERVREQ_TREE_REMOVE_FILL(__req, \ + __creds, \ + __fsid, \ + __num_data_files, \ + __handle_array, \ + __hints) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_TREE_REMOVE; \ + (__req).hints = (__hints); \ + (__req).credentials = (__creds); \ + (__req).u.tree_remove.fs_id = (__fsid); \ + (__req).u.tree_remove.num_data_files = (__num_data_files); \ + (__req).u.tree_remove.handle_array = (__handle_array); \ +} while (0) + +struct PVFS_servreq_tree_get_file_size +{ + PVFS_fs_id fs_id; + uint32_t caller_handle_index; + uint32_t retry_msgpair_at_leaf; + uint32_t num_data_files; + PVFS_handle *handle_array; +}; +endecode_fields_3a_struct( + PVFS_servreq_tree_get_file_size, + PVFS_fs_id, fs_id, + uint32_t, caller_handle_index, + uint32_t, retry_msgpair_at_leaf, + uint32_t, num_data_files, + PVFS_handle, handle_array); +#define extra_size_PVFS_servreq_tree_get_file_size \ + (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle)) + +#define PINT_SERVREQ_TREE_GET_FILE_SIZE_FILL(__req, \ + __creds, \ + __fsid, \ + __caller_handle_index, \ + __num_data_files, \ + __handle_array, \ + __retry_msgpair_at_leaf, \ + __hints) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_TREE_GET_FILE_SIZE; \ + (__req).hints = (__hints); \ + (__req).credentials = (__creds); \ + (__req).u.tree_get_file_size.fs_id = (__fsid); \ + (__req).u.tree_get_file_size.caller_handle_index = \ + (__caller_handle_index); \ + (__req).u.tree_get_file_size.num_data_files = (__num_data_files); \ + (__req).u.tree_get_file_size.handle_array = (__handle_array); \ + (__req).u.tree_get_file_size.retry_msgpair_at_leaf = \ + (__retry_msgpair_at_leaf);\ +} while (0) + +struct PVFS_servresp_tree_get_file_size +{ + uint32_t caller_handle_index; + uint32_t handle_count; + PVFS_size *size; + PVFS_error *error; +}; +endecode_fields_1aa_struct( + PVFS_servresp_tree_get_file_size, + uint32_t, caller_handle_index, + uint32_t, handle_count, + PVFS_size, size, + PVFS_error, error); +#define extra_size_PVFS_servresp_tree_get_file_size \ + ( (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_error)) + \ + (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_size)) ) + /* mgmt_get_dirdata_handle */ /* - used to retrieve the dirdata handle of the specified parent ref */ struct PVFS_servreq_mgmt_get_dirdata_handle @@ -280,16 +509,18 @@ struct PVFS_servreq_mgmt_get_dirdata_handle endecode_fields_2_struct( PVFS_servreq_mgmt_get_dirdata_handle, PVFS_handle, handle, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); #define PINT_SERVREQ_MGMT_GET_DIRDATA_HANDLE_FILL(__req, \ __creds, \ __fsid, \ - __handle)\ + __handle,\ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_GET_DIRDATA_HANDLE; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.mgmt_get_dirdata_handle.fs_id = (__fsid); \ (__req).u.mgmt_get_dirdata_handle.handle = (__handle); \ } while (0) @@ -300,7 +531,7 @@ struct PVFS_servresp_mgmt_get_dirdata_handle }; endecode_fields_1_struct( PVFS_servresp_mgmt_get_dirdata_handle, - PVFS_handle, handle) + PVFS_handle, handle); /* flush * - used to flush an object to disk */ @@ -314,16 +545,18 @@ endecode_fields_3_struct( PVFS_servreq_flush, PVFS_handle, handle, PVFS_fs_id, fs_id, - int32_t, flags) + int32_t, flags); #define PINT_SERVREQ_FLUSH_FILL(__req, \ __creds, \ __fsid, \ - __handle)\ + __handle,\ + __hints )\ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_FLUSH; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.flush.fs_id = (__fsid); \ (__req).u.flush.handle = (__handle); \ } while (0) @@ -341,17 +574,19 @@ endecode_fields_3_struct( PVFS_servreq_getattr, PVFS_handle, handle, PVFS_fs_id, fs_id, - uint32_t, attrmask) + uint32_t, attrmask); #define PINT_SERVREQ_GETATTR_FILL(__req, \ __creds, \ __fsid, \ __handle,\ - __amask) \ + __amask, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_GETATTR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.getattr.fs_id = (__fsid); \ (__req).u.getattr.handle = (__handle); \ (__req).u.getattr.attrmask = (__amask);\ @@ -363,10 +598,53 @@ struct PVFS_servresp_getattr }; endecode_fields_1_struct( PVFS_servresp_getattr, - PVFS_object_attr, attr) + PVFS_object_attr, attr); #define extra_size_PVFS_servresp_getattr \ extra_size_PVFS_object_attr +/* unstuff ****************************************************/ +/* - creates the datafile handles for the file. This allows a stuffed + * file to migrate to a large one. */ + +struct PVFS_servreq_unstuff +{ + PVFS_handle handle; /* handle of target object */ + PVFS_fs_id fs_id; /* file system */ + uint32_t attrmask; /* mask of desired attributes */ +}; +endecode_fields_3_struct( + PVFS_servreq_unstuff, + PVFS_handle, handle, + PVFS_fs_id, fs_id, + uint32_t, attrmask); + +#define PINT_SERVREQ_UNSTUFF_FILL(__req, \ + __creds, \ + __fsid, \ + __handle, \ + __amask) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_UNSTUFF; \ + (__req).credentials = (__creds); \ + (__req).u.unstuff.fs_id = (__fsid); \ + (__req).u.unstuff.handle = (__handle); \ + (__req).u.unstuff.attrmask = (__amask); \ +} while (0) + +struct PVFS_servresp_unstuff +{ + /* return the entire object's attributes, which includes the + * new datafile handles for the migrated file. + */ + PVFS_object_attr attr; +}; +endecode_fields_1_struct( + PVFS_servresp_unstuff, + PVFS_object_attr, attr); +#define extra_size_PVFS_servresp_unstuff \ + extra_size_PVFS_object_attr + /* setattr ****************************************************/ /* - sets attributes specified by mask of PVFS_ATTR_XXX values */ @@ -375,13 +653,14 @@ struct PVFS_servreq_setattr PVFS_handle handle; /* handle of target object */ PVFS_fs_id fs_id; /* file system */ PVFS_object_attr attr; /* new attributes */ + PVFS_handle cid; }; endecode_fields_4_struct( PVFS_servreq_setattr, PVFS_handle, handle, PVFS_fs_id, fs_id, - skip4,, - PVFS_object_attr, attr) + PVFS_handle, cid, + PVFS_object_attr, attr); #define extra_size_PVFS_servreq_setattr \ extra_size_PVFS_object_attr @@ -391,13 +670,16 @@ endecode_fields_4_struct( __handle, \ __objtype, \ __attr, \ - __extra_amask) \ + __extra_amask, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_SETATTR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.setattr.fs_id = (__fsid); \ (__req).u.setattr.handle = (__handle); \ + (__req).u.setattr.cid = (__attr).cid; \ (__attr).objtype = (__objtype); \ (__attr).mask |= PVFS_ATTR_SYS_TYPE; \ PINT_CONVERT_ATTR(&(__req).u.setattr.attr, &(__attr), __extra_amask);\ @@ -420,7 +702,7 @@ endecode_fields_5_struct( PVFS_fs_id, fs_id, skip4,, PVFS_handle, handle, - uint32_t, attrmask) + uint32_t, attrmask); #define extra_size_PVFS_servreq_lookup_path \ roundup8(PVFS_REQ_LIMIT_PATH_NAME_BYTES + 1) @@ -429,11 +711,13 @@ endecode_fields_5_struct( __path, \ __fsid, \ __handle, \ - __amask) \ + __amask, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_LOOKUP_PATH; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.lookup_path.path = (__path); \ (__req).u.lookup_path.fs_id = (__fsid); \ (__req).u.lookup_path.handle = (__handle);\ @@ -456,7 +740,7 @@ endecode_fields_1a_1a_struct( PVFS_handle, handle_array, skip4,, uint32_t, attr_count, - PVFS_object_attr, attr_array) + PVFS_object_attr, attr_array); /* this is a big thing that could be either a full path, * or lots of handles, just use the max io req limit */ #define extra_size_PVFS_servresp_lookup_path \ @@ -483,7 +767,7 @@ endecode_fields_4_struct( PVFS_fs_id, fs_id, skip4,, PVFS_object_attr, attr, - PVFS_handle_extent_array, handle_extent_array) + PVFS_handle_extent_array, handle_extent_array); #define extra_size_PVFS_servreq_mkdir \ (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle_extent)) @@ -491,11 +775,13 @@ endecode_fields_4_struct( __creds, \ __fs_id, \ __ext_array, \ - __attr) \ + __attr, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MKDIR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.mkdir.fs_id = __fs_id; \ (__req).u.mkdir.handle_extent_array.extent_count = \ (__ext_array).extent_count; \ @@ -509,10 +795,12 @@ do { \ struct PVFS_servresp_mkdir { PVFS_handle handle; /* handle of new directory */ + PVFS_handle cid; }; -endecode_fields_1_struct( +endecode_fields_2_struct( PVFS_servresp_mkdir, - PVFS_handle, handle) + PVFS_handle, handle, + PVFS_handle, cid); /* create dirent ***********************************************/ /* - creates a new entry within an existing directory */ @@ -529,7 +817,7 @@ endecode_fields_4_struct( string, name, PVFS_handle, new_handle, PVFS_handle, handle, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); #define extra_size_PVFS_servreq_crdirent \ roundup8(PVFS_REQ_LIMIT_SEGMENT_BYTES+1) @@ -538,15 +826,17 @@ endecode_fields_4_struct( __name, \ __new_handle, \ __handle, \ - __fs_id) \ + __fs_id, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_CRDIRENT; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.crdirent.name = (__name); \ (__req).u.crdirent.new_handle = (__new_handle); \ (__req).u.crdirent.handle = \ - (__handle); \ + (__handle); \ (__req).u.crdirent.fs_id = (__fs_id); \ } while (0) @@ -563,7 +853,7 @@ endecode_fields_3_struct( PVFS_servreq_rmdirent, string, entry, PVFS_handle, handle, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); #define extra_size_PVFS_servreq_rmdirent \ roundup8(PVFS_REQ_LIMIT_SEGMENT_BYTES+1) @@ -571,11 +861,13 @@ endecode_fields_3_struct( __creds, \ __fsid, \ __handle, \ - __entry) \ + __entry, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_RMDIRENT; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.rmdirent.fs_id = (__fsid); \ (__req).u.rmdirent.handle = (__handle); \ (__req).u.rmdirent.entry = (__entry); \ @@ -587,7 +879,7 @@ struct PVFS_servresp_rmdirent }; endecode_fields_1_struct( PVFS_servresp_rmdirent, - PVFS_handle, entry_handle) + PVFS_handle, entry_handle); /* chdirent ****************************************************/ /* - modifies an existing directory entry on a particular file system */ @@ -604,7 +896,7 @@ endecode_fields_4_struct( string, entry, PVFS_handle, new_dirent_handle, PVFS_handle, handle, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); #define extra_size_PVFS_servreq_chdirent \ roundup8(PVFS_REQ_LIMIT_SEGMENT_BYTES+1) @@ -613,11 +905,13 @@ endecode_fields_4_struct( __fsid, \ __handle, \ __new_dirent, \ - __entry) \ + __entry, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_CHDIRENT; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.chdirent.fs_id = (__fsid); \ (__req).u.chdirent.handle = \ (__handle); \ @@ -632,7 +926,7 @@ struct PVFS_servresp_chdirent }; endecode_fields_1_struct( PVFS_servresp_chdirent, - PVFS_handle, old_dirent_handle) + PVFS_handle, old_dirent_handle); /* readdir *****************************************************/ /* - reads entries from a directory */ @@ -649,18 +943,20 @@ endecode_fields_4_struct( PVFS_handle, handle, PVFS_fs_id, fs_id, uint32_t, dirent_count, - PVFS_ds_position, token) + PVFS_ds_position, token); #define PINT_SERVREQ_READDIR_FILL(__req, \ __creds, \ __fsid, \ __handle, \ __token, \ - __dirent_count) \ + __dirent_count, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_READDIR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.readdir.fs_id = (__fsid); \ (__req).u.readdir.handle = (__handle); \ (__req).u.readdir.token = (__token); \ @@ -681,17 +977,18 @@ endecode_fields_3a_struct( uint64_t, directory_version, skip4,, uint32_t, dirent_count, - PVFS_dirent, dirent_array) + PVFS_dirent, dirent_array); #define extra_size_PVFS_servresp_readdir \ roundup8(PVFS_REQ_LIMIT_DIRENT_COUNT * (PVFS_NAME_MAX + 1 + 8)) /* getconfig ***************************************************/ /* - retrieves initial configuration information from server */ -#define PINT_SERVREQ_GETCONFIG_FILL(__req, __creds)\ +#define PINT_SERVREQ_GETCONFIG_FILL(__req, __creds, __hints)\ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_GETCONFIG; \ + (__req).hints = (__hints); \ (__req).credentials = (__creds); \ } while (0); @@ -704,10 +1001,120 @@ endecode_fields_3_struct( PVFS_servresp_getconfig, uint32_t, fs_config_buf_size, skip4,, - string, fs_config_buf) + string, fs_config_buf); #define extra_size_PVFS_servresp_getconfig \ (PVFS_REQ_LIMIT_CONFIG_FILE_BYTES) +/* mirror ******************************************************/ +/* - copies a datahandle owned by the local server to a data- */ +/* handle on a remote server. There could be multiple desti- */ +/* nation data handles. dst_count tells us how many there */ +/* are. */ +struct PVFS_servreq_mirror +{ + PVFS_handle src_handle; + PVFS_handle *dst_handle; + PVFS_fs_id fs_id; + PINT_dist *dist; + uint32_t bsize; + uint32_t src_server_nr; + uint32_t *wcIndex; + uint32_t dst_count; + enum PVFS_flowproto_type flow_type; + enum PVFS_encoding_type encoding; +}; + +#ifdef __PINT_REQPROTO_ENCODE_FUNCS_C +#define encode_PVFS_servreq_mirror(pptr,x) do { \ + int i; \ + encode_PVFS_handle(pptr,&(x)->src_handle); \ + encode_PVFS_fs_id(pptr,&(x)->fs_id); \ + encode_PINT_dist(pptr,&(x)->dist); \ + encode_uint32_t(pptr,&(x)->bsize); \ + encode_uint32_t(pptr,&(x)->src_server_nr); \ + encode_uint32_t(pptr,&(x)->dst_count); \ + encode_enum(pptr,&(x)->flow_type); \ + encode_enum(pptr,&(x)->encoding); \ + for (i=0; i<(x)->dst_count; i++) \ + { \ + encode_PVFS_handle(pptr,&(x)->dst_handle[i]); \ + encode_uint32_t(pptr,&(x)->wcIndex[i]); \ + } \ +} while (0) + +#define decode_PVFS_servreq_mirror(pptr,x) do { \ + int i; \ + decode_PVFS_handle(pptr,&(x)->src_handle); \ + decode_PVFS_fs_id(pptr,&(x)->fs_id); \ + decode_PINT_dist(pptr,&(x)->dist); \ + decode_uint32_t(pptr,&(x)->bsize); \ + decode_uint32_t(pptr,&(x)->src_server_nr); \ + decode_uint32_t(pptr,&(x)->dst_count); \ + decode_enum(pptr,&(x)->flow_type); \ + decode_enum(pptr,&(x)->encoding); \ + (x)->dst_handle = decode_malloc((x)->dst_count * \ + sizeof(PVFS_handle)); \ + (x)->wcIndex = decode_malloc((x)->dst_count * \ + sizeof(uint32_t)); \ + for (i=0; i<(x)->dst_count; i++) \ + { \ + decode_PVFS_handle(pptr,&(x)->dst_handle[i]); \ + decode_uint32_t(pptr,&(x)->wcIndex[i]); \ + } \ +} while (0) +#endif + +#define extra_size_PVFS_servreq_mirror \ + ( (sizeof(PVFS_handle) * PVFS_REQ_LIMIT_HANDLES_COUNT) + \ + (sizeof(uint32_t) * PVFS_REQ_LIMIT_HANDLES_COUNT) ) + +/*Response to mirror request. Identifies the number of bytes written and the */ +/*status of that write for each source-destination handle pair. (Source is */ +/*always the same for each pair.) */ +struct PVFS_servresp_mirror +{ + PVFS_handle src_handle; + uint32_t src_server_nr; + uint32_t *bytes_written; + uint32_t *write_status_code; + uint32_t dst_count; +}; + +#ifdef __PINT_REQPROTO_ENCODE_FUNCS_C +#define encode_PVFS_servresp_mirror(pptr,x) do { \ + int i; \ + encode_PVFS_handle(pptr,&(x)->src_handle); \ + encode_uint32_t(pptr,&(x)->src_server_nr); \ + encode_uint32_t(pptr,&(x)->dst_count); \ + for (i=0; i<(x)->dst_count; i++) \ + { \ + encode_uint32_t(pptr,&(x)->bytes_written[i]); \ + encode_uint32_t(pptr,&(x)->write_status_code[i]); \ + } \ +} while (0) + +#define decode_PVFS_servresp_mirror(pptr,x) do { \ + int i; \ + decode_PVFS_handle(pptr,&(x)->src_handle); \ + decode_uint32_t(pptr,&(x)->src_server_nr); \ + decode_uint32_t(pptr,&(x)->dst_count); \ + (x)->bytes_written = decode_malloc((x)->dst_count * \ + sizeof(uint32_t)); \ + (x)->write_status_code = decode_malloc((x)->dst_count * \ + sizeof(uint32_t)); \ + for (i=0; i<(x)->dst_count; i++ ) \ + { \ + decode_uint32_t(pptr,&(x)->bytes_written[i]); \ + decode_uint32_t(pptr,&(x)->write_status_code[i]); \ + } \ +} while (0) +#endif + +#define extra_size_PVFS_servresp_mirror \ + ( (sizeof(uint32_t) * PVFS_REQ_LIMIT_HANDLES_COUNT) + \ + (sizeof(uint32_t) * PVFS_REQ_LIMIT_HANDLES_COUNT) ) + + /* truncate ****************************************************/ /* - resizes an existing datafile */ @@ -725,16 +1132,18 @@ endecode_fields_5_struct( PVFS_fs_id, fs_id, skip4,, PVFS_size, size, - int32_t, flags) + int32_t, flags); #define PINT_SERVREQ_TRUNCATE_FILL(__req, \ __creds, \ __fsid, \ __size, \ - __handle) \ + __handle, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_TRUNCATE; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.truncate.fs_id = (__fsid); \ (__req).u.truncate.size = (__size); \ (__req).u.truncate.handle = (__handle);\ @@ -749,13 +1158,14 @@ struct PVFS_servreq_statfs }; endecode_fields_1_struct( PVFS_servreq_statfs, - PVFS_fs_id, fs_id) + PVFS_fs_id, fs_id); -#define PINT_SERVREQ_STATFS_FILL(__req, __creds, __fsid)\ +#define PINT_SERVREQ_STATFS_FILL(__req, __creds, __fsid,__hints)\ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_STATFS; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.statfs.fs_id = (__fsid); \ } while (0) @@ -765,7 +1175,7 @@ struct PVFS_servresp_statfs }; endecode_fields_1_struct( PVFS_servresp_statfs, - PVFS_statfs, stat) + PVFS_statfs, stat); /* io **********************************************************/ /* - performs a read or write operation */ @@ -838,11 +1248,13 @@ struct PVFS_servreq_io __io_dist, \ __file_req, \ __file_req_off, \ - __aggregate_size) \ + __aggregate_size, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_IO; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.io.fs_id = (__fsid); \ (__req).u.io.handle = (__handle); \ (__req).u.io.io_type = (__io_type); \ @@ -853,7 +1265,7 @@ do { \ (__req).u.io.file_req = (__file_req); \ (__req).u.io.file_req_offset = (__file_req_off); \ (__req).u.io.aggregate_size = (__aggregate_size);\ -} while (0) +} while (0) struct PVFS_servresp_io { @@ -861,7 +1273,7 @@ struct PVFS_servresp_io }; endecode_fields_1_struct( PVFS_servresp_io, - PVFS_size, bstream_size) + PVFS_size, bstream_size); /* write operations require a second response to announce completion */ struct PVFS_servresp_write_completion @@ -870,7 +1282,7 @@ struct PVFS_servresp_write_completion }; endecode_fields_1_struct( PVFS_servresp_write_completion, - PVFS_size, total_completed) + PVFS_size, total_completed); #define SMALL_IO_MAX_SEGMENTS 64 @@ -964,11 +1376,13 @@ struct PVFS_servreq_small_io __filereq, \ __filereq_offset, \ __segments, \ - __memreq_size) \ + __memreq_size, \ + __hints ) \ do { \ int _sio_i; \ (__req).op = PVFS_SERV_SMALL_IO; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.small_io.fs_id = (__fsid); \ (__req).u.small_io.handle = (__handle); \ (__req).u.small_io.io_type = (__io_type); \ @@ -1048,7 +1462,7 @@ endecode_fields_3a_struct( uint32_t, attrmask, skip4,, uint32_t, nhandles, - PVFS_handle, handles) + PVFS_handle, handles); #define extra_size_PVFS_servreq_listattr \ (PVFS_REQ_LIMIT_LISTATTR * sizeof(PVFS_handle)) @@ -1057,11 +1471,13 @@ endecode_fields_3a_struct( __fsid, \ __amask, \ __nhandles, \ - __handle_array) \ + __handle_array, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_LISTATTR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.listattr.fs_id = (__fsid); \ (__req).u.listattr.attrmask = (__amask);\ (__req).u.listattr.nhandles = (__nhandles); \ @@ -1079,7 +1495,7 @@ endecode_fields_1aa_struct( skip4,, uint32_t, nhandles, PVFS_error, error, - PVFS_object_attr, attr) + PVFS_object_attr, attr); #define extra_size_PVFS_servresp_listattr \ ((PVFS_REQ_LIMIT_LISTATTR * sizeof(PVFS_error)) + (PVFS_REQ_LIMIT_LISTATTR * extra_size_PVFS_object_attr)) @@ -1091,46 +1507,44 @@ struct PVFS_servreq_mgmt_setparam { PVFS_fs_id fs_id; /* file system */ enum PVFS_server_param param; /* parameter to set */ - uint64_t value; /* parameter value */ + struct PVFS_mgmt_setparam_value value; }; endecode_fields_3_struct( PVFS_servreq_mgmt_setparam, PVFS_fs_id, fs_id, enum, param, - uint64_t, value) - -#define PINT_SERVREQ_MGMT_SETPARAM_FILL(__req, \ - __creds,\ - __fsid, \ - __param,\ - __value)\ -do { \ - memset(&(__req), 0, sizeof(__req)); \ - (__req).op = PVFS_SERV_MGMT_SETPARAM; \ - (__req).credentials = (__creds); \ - (__req).u.mgmt_setparam.fs_id = (__fsid); \ - (__req).u.mgmt_setparam.param = (__param); \ - (__req).u.mgmt_setparam.value = (__value); \ + PVFS_mgmt_setparam_value, value); + +#define PINT_SERVREQ_MGMT_SETPARAM_FILL(__req, \ + __creds, \ + __fsid, \ + __param, \ + __value, \ + __hints) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_MGMT_SETPARAM; \ + (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ + (__req).u.mgmt_setparam.fs_id = (__fsid); \ + (__req).u.mgmt_setparam.param = (__param); \ + if(__value){ \ + (__req).u.mgmt_setparam.value.type = (__value)->type; \ + (__req).u.mgmt_setparam.value.u.value = (__value)->u.value; \ + } \ } while (0) -struct PVFS_servresp_mgmt_setparam -{ - uint64_t old_value; -}; -endecode_fields_1_struct( - PVFS_servresp_mgmt_setparam, - uint64_t, old_value) - /* mgmt_noop ********************************************************/ /* - does nothing except contact a server to see if it is responding * to requests */ -#define PINT_SERVREQ_MGMT_NOOP_FILL(__req, __creds)\ +#define PINT_SERVREQ_MGMT_NOOP_FILL(__req, __creds, __hints)\ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_NOOP; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ } while (0) @@ -1139,44 +1553,50 @@ do { \ struct PVFS_servreq_mgmt_perf_mon { - uint32_t next_id; /* next time stamp id we want to retrieve */ - uint32_t count; /* how many measurements we want */ + uint32_t next_id; /* next time stamp id we want to retrieve */ + uint32_t key_count; /* how many counters per measurements we want */ + uint32_t count; /* how many measurements we want */ }; -endecode_fields_2_struct( +endecode_fields_3_struct( PVFS_servreq_mgmt_perf_mon, uint32_t, next_id, - uint32_t, count) - -#define PINT_SERVREQ_MGMT_PERF_MON_FILL(__req, \ - __creds, \ - __next_id,\ - __count) \ -do { \ - memset(&(__req), 0, sizeof(__req)); \ - (__req).op = PVFS_SERV_MGMT_PERF_MON; \ - (__req).credentials = (__creds); \ - (__req).u.mgmt_perf_mon.next_id = (__next_id);\ - (__req).u.mgmt_perf_mon.count = (__count); \ + uint32_t, key_count, + uint32_t, count); + +#define PINT_SERVREQ_MGMT_PERF_MON_FILL(__req, \ + __creds, \ + __next_id, \ + __key_count, \ + __count, \ + __hints) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_MGMT_PERF_MON; \ + (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ + (__req).u.mgmt_perf_mon.next_id = (__next_id); \ + (__req).u.mgmt_perf_mon.key_count = (__key_count);\ + (__req).u.mgmt_perf_mon.count = (__count); \ } while (0) struct PVFS_servresp_mgmt_perf_mon { - struct PVFS_mgmt_perf_stat* perf_array; /* array of statistics */ - uint32_t perf_array_count; /* size of above array */ - /* next id to pick up from this point */ - uint32_t suggested_next_id; - uint64_t end_time_ms; /* end time for final array entry */ - uint64_t cur_time_ms; /* current time according to svr */ + int64_t *perf_array; /* array of statistics */ + uint32_t perf_array_count; /* size of above array */ + uint32_t key_count; /* number of keys in each sample */ + uint32_t suggested_next_id; /* next id to pick up from this point */ + uint64_t end_time_ms; /* end time for final array entry */ + uint64_t cur_time_ms; /* current time according to svr */ }; endecode_fields_5a_struct( PVFS_servresp_mgmt_perf_mon, + uint32_t, key_count, uint32_t, suggested_next_id, - skip4,, uint64_t, end_time_ms, uint64_t, cur_time_ms, skip4,, uint32_t, perf_array_count, - PVFS_mgmt_perf_stat, perf_array) + int64_t, perf_array); #define extra_size_PVFS_servresp_mgmt_perf_mon \ (PVFS_REQ_LIMIT_IOREQ_BYTES) @@ -1187,26 +1607,32 @@ struct PVFS_servreq_mgmt_iterate_handles { PVFS_fs_id fs_id; int32_t handle_count; + int32_t flags; PVFS_ds_position position; }; -endecode_fields_3_struct( +endecode_fields_4_struct( PVFS_servreq_mgmt_iterate_handles, PVFS_fs_id, fs_id, int32_t, handle_count, - PVFS_ds_position, position) + int32_t, flags, + PVFS_ds_position, position); #define PINT_SERVREQ_MGMT_ITERATE_HANDLES_FILL(__req, \ __creds, \ __fs_id, \ __handle_count, \ - __position) \ + __position, \ + __flags, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_ITERATE_HANDLES; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.mgmt_iterate_handles.fs_id = (__fs_id); \ (__req).u.mgmt_iterate_handles.handle_count = (__handle_count);\ - (__req).u.mgmt_iterate_handles.position = (__position); \ + (__req).u.mgmt_iterate_handles.position = (__position), \ + (__req).u.mgmt_iterate_handles.flags = (__flags); \ } while (0) struct PVFS_servresp_mgmt_iterate_handles @@ -1220,7 +1646,7 @@ endecode_fields_2a_struct( PVFS_ds_position, position, skip4,, int32_t, handle_count, - PVFS_handle, handle_array) + PVFS_handle, handle_array); #define extra_size_PVFS_servresp_mgmt_iterate_handles \ (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle)) @@ -1237,7 +1663,7 @@ endecode_fields_1a_struct( PVFS_servreq_mgmt_dspace_info_list, PVFS_fs_id, fs_id, int32_t, handle_count, - PVFS_handle, handle_array) + PVFS_handle, handle_array); #define extra_size_PVFS_servreq_mgmt_dspace_info_list \ (PVFS_REQ_LIMIT_HANDLES_COUNT * sizeof(PVFS_handle)) @@ -1245,11 +1671,13 @@ endecode_fields_1a_struct( __creds, \ __fs_id, \ __handle_array, \ - __handle_count) \ + __handle_count, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_DSPACE_INFO_LIST; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.mgmt_dspace_info_list.fs_id = (__fs_id); \ (__req).u.mgmt_dspace_info_list.handle_array = (__handle_array);\ (__req).u.mgmt_dspace_info_list.handle_count = (__handle_count);\ @@ -1264,7 +1692,7 @@ endecode_fields_1a_struct( PVFS_servresp_mgmt_dspace_info_list, skip4,, int32_t, dspace_info_count, - PVFS_mgmt_dspace_info, dspace_info_array) + PVFS_mgmt_dspace_info, dspace_info_array); #define extra_size_PVFS_servresp_mgmt_dspace_info_list \ (PVFS_REQ_LIMIT_MGMT_DSPACE_INFO_LIST_COUNT * \ sizeof(struct PVFS_mgmt_dspace_info)) @@ -1278,13 +1706,14 @@ struct PVFS_servreq_mgmt_event_mon }; endecode_fields_1_struct( PVFS_servreq_mgmt_event_mon, - uint32_t, event_count) + uint32_t, event_count); -#define PINT_SERVREQ_MGMT_EVENT_MON_FILL(__req, __creds, __event_count)\ +#define PINT_SERVREQ_MGMT_EVENT_MON_FILL(__req, __creds, __event_count, __hints)\ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_MGMT_EVENT_MON; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.mgmt_event_mon.event_count = (__event_count); \ } while (0) @@ -1297,7 +1726,7 @@ endecode_fields_1a_struct( PVFS_servresp_mgmt_event_mon, skip4,, uint32_t, event_count, - PVFS_mgmt_event, event_array) + PVFS_mgmt_event, event_array); #define extra_size_PVFS_servresp_mgmt_event_mon \ (PVFS_REQ_LIMIT_MGMT_EVENT_MON_COUNT * \ roundup8(sizeof(struct PVFS_mgmt_event))) @@ -1321,7 +1750,8 @@ endecode_fields_2aa_struct( PVFS_ds_keyval, key, PVFS_size, valsz); #define extra_size_PVFS_servreq_geteattr \ - (PVFS_REQ_LIMIT_KEY_LEN * PVFS_REQ_LIMIT_KEYVAL_LIST) + ((PVFS_REQ_LIMIT_EATTR_KEY_LEN + sizeof(PVFS_size) \ + * PVFS_REQ_LIMIT_EATTR_LIST)) #define PINT_SERVREQ_GETEATTR_FILL(__req, \ __creds, \ @@ -1329,14 +1759,16 @@ endecode_fields_2aa_struct( __handle,\ __nkey,\ __key_array, \ - __size_array) \ + __size_array,\ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ - (__req).op = PVFS_SERV_GETEATTR; \ + (__req).op = PVFS_SERV_GETEATTR; \ (__req).credentials = (__creds); \ - (__req).u.geteattr.fs_id = (__fsid); \ - (__req).u.geteattr.handle = (__handle); \ - (__req).u.geteattr.nkey = (__nkey); \ + (__req).hints = (__hints); \ + (__req).u.geteattr.fs_id = (__fsid); \ + (__req).u.geteattr.handle = (__handle);\ + (__req).u.geteattr.nkey = (__nkey); \ (__req).u.geteattr.key = (__key_array);\ (__req).u.geteattr.valsz = (__size_array);\ } while (0) @@ -1354,8 +1786,8 @@ endecode_fields_1aa_struct( PVFS_ds_keyval, val, PVFS_error, err); #define extra_size_PVFS_servresp_geteattr \ - (PVFS_REQ_LIMIT_VAL_LEN * PVFS_REQ_LIMIT_KEYVAL_LIST + \ - PVFS_REQ_LIMIT_KEYVAL_LIST * sizeof(PVFS_error)) + ((PVFS_REQ_LIMIT_EATTR_VAL_LEN + sizeof(PVFS_error)) \ + * PVFS_REQ_LIMIT_EATTR_LIST) /* seteattr ****************************************************/ /* - sets list of extended attributes */ @@ -1379,8 +1811,8 @@ endecode_fields_4aa_struct( PVFS_ds_keyval, key, PVFS_ds_keyval, val); #define extra_size_PVFS_servreq_seteattr \ - ((PVFS_REQ_LIMIT_KEY_LEN + PVFS_REQ_LIMIT_VAL_LEN) \ - * PVFS_REQ_LIMIT_KEYVAL_LIST) + ((PVFS_REQ_LIMIT_EATTR_KEY_LEN + PVFS_REQ_LIMIT_EATTR_VAL_LEN) \ + * PVFS_REQ_LIMIT_EATTR_LIST) #define PINT_SERVREQ_SETEATTR_FILL(__req, \ __creds, \ @@ -1389,11 +1821,13 @@ endecode_fields_4aa_struct( __flags, \ __nkey, \ __key_array, \ - __val_array) \ + __val_array, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_SETEATTR; \ - (__req).credentials = (__creds); \ + (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.seteattr.fs_id = (__fsid); \ (__req).u.seteattr.handle = (__handle); \ (__req).u.seteattr.flags = (__flags); \ @@ -1417,17 +1851,19 @@ endecode_fields_3_struct( PVFS_fs_id, fs_id, PVFS_ds_keyval, key); #define extra_size_PVFS_servreq_deleattr \ - PVFS_REQ_LIMIT_KEY_LEN + PVFS_REQ_LIMIT_EATTR_KEY_LEN #define PINT_SERVREQ_DELEATTR_FILL(__req, \ __creds, \ __fsid, \ __handle,\ - __key) \ + __key, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_DELEATTR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.deleattr.fs_id = (__fsid); \ (__req).u.deleattr.handle = (__handle); \ (__req).u.deleattr.key.buffer_sz = (__key).buffer_sz;\ @@ -1454,7 +1890,7 @@ endecode_fields_4a_struct( uint32_t, nkey, PVFS_size, keysz); #define extra_size_PVFS_servreq_listeattr \ - (PVFS_REQ_LIMIT_KEYVAL_LIST * sizeof(PVFS_size)) + (PVFS_REQ_LIMIT_EATTR_LIST * sizeof(PVFS_size)) #define PINT_SERVREQ_LISTEATTR_FILL(__req, \ __creds, \ @@ -1462,11 +1898,13 @@ endecode_fields_4a_struct( __handle, \ __token, \ __nkey, \ - __size_array) \ + __size_array, \ + __hints) \ do { \ memset(&(__req), 0, sizeof(__req)); \ (__req).op = PVFS_SERV_LISTEATTR; \ (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ (__req).u.listeattr.fs_id = (__fsid); \ (__req).u.listeattr.handle = (__handle); \ (__req).u.listeattr.token = (__token); \ @@ -1485,10 +1923,47 @@ endecode_fields_2a_struct( PVFS_ds_position, token, skip4,, uint32_t, nkey, - PVFS_ds_keyval, key) + PVFS_ds_keyval, key); #define extra_size_PVFS_servresp_listeattr \ - (PVFS_REQ_LIMIT_KEY_LEN * PVFS_REQ_LIMIT_KEYVAL_LIST) + (PVFS_REQ_LIMIT_EATTR_KEY_LEN * PVFS_REQ_LIMIT_EATTR_LIST) + +/* mgmt_get_uid ****************************************************/ +/* retrieves uid managment history from server */ + +struct PVFS_servreq_mgmt_get_uid +{ + uint32_t history; /* number of seconds we want to go back + when retrieving the uid history */ +}; +endecode_fields_1_struct( + PVFS_servreq_mgmt_get_uid, + uint32_t, history); + +#define PINT_SERVREQ_MGMT_GET_UID_FILL(__req, \ + __creds, \ + __history, \ + __hints) \ +do { \ + memset(&(__req), 0, sizeof(__req)); \ + (__req).op = PVFS_SERV_MGMT_GET_UID; \ + (__req).credentials = (__creds); \ + (__req).hints = (__hints); \ + (__req).u.mgmt_get_uid.history = (__history); \ +} while (0) + +struct PVFS_servresp_mgmt_get_uid +{ + PVFS_uid_info_s *uid_info_array; /* array of uid info */ + uint32_t uid_info_array_count; /* size of above array */ +}; +endecode_fields_1a_struct( + PVFS_servresp_mgmt_get_uid, + skip4,, + uint32_t, uid_info_array_count, + PVFS_uid_info_s, uid_info_array); +#define extra_size_PVFS_servresp_mgmt_get_uid \ + UID_MGMT_MAX_HISTORY * sizeof(PVFS_uid_info_s) /* server request *********************************************/ /* - generic request with union of all op specific structs */ @@ -1497,10 +1972,16 @@ struct PVFS_server_req { enum PVFS_server_op op; PVFS_credentials credentials; + PVFS_hint hints; + union { + struct PVFS_servreq_mirror mirror; struct PVFS_servreq_create create; + struct PVFS_servreq_unstuff unstuff; + struct PVFS_servreq_batch_create batch_create; struct PVFS_servreq_remove remove; + struct PVFS_servreq_batch_remove batch_remove; struct PVFS_servreq_io io; struct PVFS_servreq_getattr getattr; struct PVFS_servreq_setattr setattr; @@ -1527,6 +2008,9 @@ struct PVFS_server_req struct PVFS_servreq_listeattr listeattr; struct PVFS_servreq_small_io small_io; struct PVFS_servreq_listattr listattr; + struct PVFS_servreq_tree_remove tree_remove; + struct PVFS_servreq_tree_get_file_size tree_get_file_size; + struct PVFS_servreq_mgmt_get_uid mgmt_get_uid; } u; }; #ifdef __PINT_REQPROTO_ENCODE_FUNCS_C @@ -1539,12 +2023,14 @@ encode_PVFS_server_req(char **pptr, const struct PVFS_server_req *x) { #endif *pptr += 4; encode_PVFS_credentials(pptr, &x->credentials); + encode_PINT_hint(pptr, x->hints); } static inline void decode_PVFS_server_req(char **pptr, struct PVFS_server_req *x) { decode_enum(pptr, &x->op); *pptr += 4; decode_PVFS_credentials(pptr, &x->credentials); + decode_PINT_hint(pptr, &x->hints); } #endif @@ -1556,7 +2042,10 @@ struct PVFS_server_resp PVFS_error status; union { + struct PVFS_servresp_mirror mirror; struct PVFS_servresp_create create; + struct PVFS_servresp_unstuff unstuff; + struct PVFS_servresp_batch_create batch_create; struct PVFS_servresp_getattr getattr; struct PVFS_servresp_mkdir mkdir; struct PVFS_servresp_readdir readdir; @@ -1567,7 +2056,6 @@ struct PVFS_server_resp struct PVFS_servresp_io io; struct PVFS_servresp_write_completion write_completion; struct PVFS_servresp_statfs statfs; - struct PVFS_servresp_mgmt_setparam mgmt_setparam; struct PVFS_servresp_mgmt_perf_mon mgmt_perf_mon; struct PVFS_servresp_mgmt_iterate_handles mgmt_iterate_handles; struct PVFS_servresp_mgmt_dspace_info_list mgmt_dspace_info_list; @@ -1577,12 +2065,14 @@ struct PVFS_server_resp struct PVFS_servresp_listeattr listeattr; struct PVFS_servresp_small_io small_io; struct PVFS_servresp_listattr listattr; + struct PVFS_servresp_tree_get_file_size tree_get_file_size; + struct PVFS_servresp_mgmt_get_uid mgmt_get_uid; } u; }; endecode_fields_2_struct( PVFS_server_resp, enum, op, - PVFS_error, status) + PVFS_error, status); #endif /* __PVFS2_REQ_PROTO_H */ diff --git a/src/server/batch-create.sm b/src/server/batch-create.sm new file mode 100644 index 0000000..bae4499 --- /dev/null +++ b/src/server/batch-create.sm @@ -0,0 +1,161 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" +#include "gossip.h" +#include "pvfs2-internal.h" + +%% + +machine pvfs2_batch_create_sm +{ + state prelude + { + jump pvfs2_prelude_sm; + success => create; + default => final_response; + } + + state create + { + run batch_create_create; + default => final_response; + } + + state final_response + { + jump pvfs2_final_response_sm; + default => cleanup; + } + + state cleanup + { + run batch_create_cleanup; + default => terminate; + } +} + +%% + + +/* + * Function: batch_create_create + * + * Params: server_op *s_op, + * job_status_s* js_p + * + * Pre: None + * + * Post: None + * + * Returns: int + * + * Synopsis: Create a dataspace. + */ +static int batch_create_create( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t i; + + if(s_op->req->u.batch_create.object_count < 1) + { + js_p->error_code = -PVFS_EINVAL; + return(SM_ACTION_COMPLETE); + } + + s_op->resp.u.batch_create.handle_count + = s_op->req->u.batch_create.object_count; + + /* allocate some space to hold the handles we create */ + s_op->resp.u.batch_create.handle_array = + malloc(s_op->req->u.batch_create.object_count * sizeof(PVFS_handle)); + if(!s_op->resp.u.batch_create.handle_array) + { + js_p->error_code = -PVFS_ENOMEM; + return(SM_ACTION_COMPLETE); + } + + ret = job_trove_dspace_create_list( + s_op->req->u.batch_create.fs_id, + &s_op->req->u.batch_create.handle_extent_array, + s_op->resp.u.batch_create.handle_array, + s_op->req->u.batch_create.object_count, + s_op->req->u.batch_create.object_type, + NULL, + TROVE_SYNC, + smcb, + 0, + js_p, + &i, + server_job_context, + s_op->req->hints); + + return(ret); +} + +/* + * Function: batch_create_cleanup + * + * Params: server_op *b, + * job_status_s* js_p + * + * Pre: None + * + * Post: None + * + * Returns: int + * + * Synopsis: free memory and return + * + */ +static int batch_create_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i; + + if(s_op->resp.status == 0) + { + for(i=0; iresp.u.batch_create.handle_count; i++) + { + gossip_debug( + GOSSIP_SERVER_DEBUG, "Batch created: %llu\n", + llu(s_op->resp.u.batch_create.handle_array[i])); + } + } + + if(s_op->resp.u.batch_create.handle_array) + { + free(s_op->resp.u.batch_create.handle_array); + } + + return(server_state_machine_complete(smcb)); +} + +struct PINT_server_req_params pvfs2_batch_create_params = +{ + .string_name = "batch_create", + .perm = PINT_SERVER_CHECK_NONE, + .access_type = PINT_server_req_modify, + .state_machine = &pvfs2_batch_create_sm +}; + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/server/batch-remove.sm b/src/server/batch-remove.sm new file mode 100644 index 0000000..d721e23 --- /dev/null +++ b/src/server/batch-remove.sm @@ -0,0 +1,226 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" +#include "gossip.h" +#include "pvfs2-internal.h" + +enum +{ + REMOVE_NEXT = 1 +}; + +%% + +machine pvfs2_batch_remove_sm +{ + state setup_prelude + { + run setup_prelude; + default => prelude; + } + + state prelude + { + jump pvfs2_prelude_work_sm; + success => setup_remove; + default => release; + } + + state setup_remove + { + run setup_remove; + success => remove; + default => remove_complete; + } + + state remove + { + jump pvfs2_remove_work_sm; + default => remove_complete; + } + + state remove_complete + { + run remove_complete; + default => release; + } + + state release + { + run release; + default => remove_next; + } + + state remove_next + { + run remove_next; + REMOVE_NEXT => setup_prelude; + default => response; + } + + state response + { + jump pvfs2_final_response_sm; + default => cleanup; + } + + state cleanup + { + run cleanup; + default => terminate; + } +} + +%% + +static PINT_sm_action setup_prelude( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + assert(s_op); + + /* get the object to remove, the access and scheduling policies */ + s_op->target_fs_id = s_op->req->u.batch_remove.fs_id; + s_op->target_handle = + s_op->req->u.batch_remove.handles[s_op->u.batch_remove.handle_index]; + + s_op->access_type = PINT_server_req_get_access_type(s_op->req); + s_op->sched_policy = PINT_server_req_get_sched_policy(s_op->req); + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action setup_remove( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_server_op *remove_op; + int ret; + + remove_op = malloc(sizeof(*remove_op)); + if(!remove_op) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(remove_op, 0, sizeof(*remove_op)); + + remove_op->u.remove.fs_id = s_op->target_fs_id; + remove_op->u.remove.handle = s_op->target_handle; + + ret = PINT_sm_push_frame(smcb, 0, remove_op); + if(ret < 0) + { + js_p->error_code = ret; + } + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action remove_complete( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *remove_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_server_op *s_op; + int error_code; + int task_id; + int remaining; + + s_op = PINT_sm_pop_frame(smcb, &task_id, &error_code, &remaining); + + free(remove_op); + + if(error_code != 0) + { + s_op->u.batch_remove.error_code = error_code; + return SM_ACTION_COMPLETE; + } + + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action release( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t tmp_id; + int ret; + + /* we need to release the scheduled remove request on the target + * handle. The schedule call occurred in the prelude_work sm */ + + if(!s_op->scheduled_id) + { + return SM_ACTION_COMPLETE; + } + + if(js_p->error_code) + { + s_op->u.batch_remove.error_code = js_p->error_code; + } + + ret = job_req_sched_release(s_op->scheduled_id, smcb, 0, js_p, &tmp_id, + server_job_context); + s_op->scheduled_id = 0; + return ret; +} + +static PINT_sm_action remove_next( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + if(s_op->u.batch_remove.error_code != 0) + { + js_p->error_code = s_op->u.batch_remove.error_code; + return SM_ACTION_COMPLETE; + } + + if(js_p->error_code != 0) + { + return SM_ACTION_COMPLETE; + } + + s_op->u.batch_remove.handle_index++; + if(s_op->u.batch_remove.handle_index < s_op->req->u.batch_remove.handle_count) + { + js_p->error_code = REMOVE_NEXT; + } + + return SM_ACTION_COMPLETE; +} + + +static int cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + return(server_state_machine_complete(smcb)); +} + +struct PINT_server_req_params pvfs2_batch_remove_params = +{ + .string_name = "batch_remove", + .perm = PINT_SERVER_CHECK_NONE, + .access_type = PINT_server_req_modify, + .state_machine = &pvfs2_batch_remove_sm +}; + + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/server/chdirent.sm b/src/server/chdirent.sm index a6e1c8c..44ec713 100644 --- a/src/server/chdirent.sm +++ b/src/server/chdirent.sm @@ -13,6 +13,7 @@ #include "gossip.h" #include "pvfs2-util.h" #include "pvfs2-internal.h" +#include "pint-util.h" enum { @@ -132,7 +133,7 @@ static PINT_sm_action chdirent_verify_parent_metadata_and_read_directory_entry_h 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -165,7 +166,7 @@ static PINT_sm_action chdirent_read_directory_entry( 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -222,7 +223,7 @@ static PINT_sm_action chdirent_change_directory_entry( &s_op->key, &s_op->val, TROVE_SYNC | 0, - NULL, smcb, 0, js_p, &j_id, server_job_context); + NULL, smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); s_op->u.chdirent.dir_attr_update_required = 1; return ret; @@ -270,7 +271,7 @@ static PINT_sm_action chdirent_update_directory_attr( ds_attr, TROVE_SYNC | 0, - smcb, 0, js_p, &j_id, server_job_context); + smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/check.c b/src/server/check.c index 3b37aa7..dc47351 100644 --- a/src/server/check.c +++ b/src/server/check.c @@ -24,10 +24,10 @@ #include "check.h" static gen_mutex_t check_group_mutex = GEN_MUTEX_INITIALIZER; +static int pw_buf_size = 1024; // 1 KB +static int gr_buf_size = 1024*1024; // 1 MB static char* check_group_pw_buffer = NULL; -static long check_group_pw_buffer_size = 0; static char* check_group_gr_buffer = NULL; -static long check_group_gr_buffer_size = 0; static int PINT_check_group(uid_t uid, gid_t gid); /* PINT_check_mode() @@ -221,18 +221,8 @@ static int PINT_check_group(uid_t uid, gid_t gid) if(!check_group_pw_buffer) { - /* need to create a buffer for pw and grp entries */ -#if defined(_SC_GETGR_R_SIZE_MAX) && defined(_SC_GETPW_R_SIZE_MAX) - /* newish posix systems can tell us what the max buffer size is */ - check_group_gr_buffer_size = sysconf(_SC_GETGR_R_SIZE_MAX); - check_group_pw_buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); -#else - /* fall back for older systems */ - check_group_pw_buffer_size = 1024; - check_group_gr_buffer_size = 1024; -#endif - check_group_pw_buffer = (char*)malloc(check_group_pw_buffer_size); - check_group_gr_buffer = (char*)malloc(check_group_gr_buffer_size); + check_group_pw_buffer = (char*)malloc(pw_buf_size); + check_group_gr_buffer = (char*)malloc(gr_buf_size); if(!check_group_pw_buffer || !check_group_gr_buffer) { if(check_group_pw_buffer) @@ -251,12 +241,13 @@ static int PINT_check_group(uid_t uid, gid_t gid) } /* get user information */ - ret = getpwuid_r(uid, &pwd, check_group_pw_buffer, - check_group_pw_buffer_size, - &pwd_p); + ret = getpwuid_r(uid, &pwd, check_group_pw_buffer, pw_buf_size, &pwd_p); if(ret != 0 || pwd_p == NULL) { gen_mutex_unlock(&check_group_mutex); + gossip_err("Get user info for (uid=%d) failed." + "errno [%d] error_msg [%s]\n", + uid, ret, strerror(ret)); return(-PVFS_EINVAL); } @@ -267,27 +258,20 @@ static int PINT_check_group(uid_t uid, gid_t gid) return 0; } - /* get other group information */ - ret = getgrgid_r(gid, &grp, check_group_gr_buffer, - check_group_gr_buffer_size, - &grp_p); - if(ret != 0) + /* get the members of the group */ + ret = getgrgid_r(gid, &grp, check_group_gr_buffer, gr_buf_size, &grp_p); + if(ret != 0 || grp_p == NULL) { - gen_mutex_unlock(&check_group_mutex); - return(-PVFS_EINVAL); - } - - if(grp_p == NULL) - { - gen_mutex_unlock(&check_group_mutex); - gossip_err("User (uid=%d) isn't in group %d on storage node.\n", - uid, gid); - return(-PVFS_EACCES); + gen_mutex_unlock(&check_group_mutex); + gossip_err("Get members for group (gid=%d) failed." + "errno [%d] error_msg [%s]\n", + gid, ret, strerror(ret)); + return(-PVFS_EINVAL); } for(i = 0; grp.gr_mem[i] != NULL; i++) { - if(0 == strcmp(pwd.pw_name, grp.gr_mem[i]) ) + if(0 == strcmp(pwd.pw_name, grp.gr_mem[i])) { gen_mutex_unlock(&check_group_mutex); return 0; diff --git a/src/server/crdirent.sm b/src/server/crdirent.sm index ba8b813..2214101 100644 --- a/src/server/crdirent.sm +++ b/src/server/crdirent.sm @@ -13,6 +13,7 @@ #include "pvfs2-attr.h" #include "pvfs2-util.h" #include "pvfs2-internal.h" +#include "pint-util.h" enum { @@ -202,7 +203,7 @@ static PINT_sm_action crdirent_read_directory_entry_handle( 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -279,7 +280,7 @@ static PINT_sm_action crdirent_write_directory_entry( s_op->u.crdirent.fs_id, s_op->u.crdirent.dirent_handle, &s_op->key, &s_op->val, keyval_flags, - NULL, smcb, 0, js_p, &i, server_job_context); + NULL, smcb, 0, js_p, &i, server_job_context, s_op->req->hints); /* * creating an entry will cause directory times to be updated. @@ -329,7 +330,7 @@ static PINT_sm_action crdirent_update_directory_attr( s_op->req->u.crdirent.fs_id, s_op->req->u.crdirent.handle, ds_attr, TROVE_SYNC, - smcb, 0, js_p, &j_id, server_job_context); + smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/create-immutable-copies.sm b/src/server/create-immutable-copies.sm new file mode 100644 index 0000000..1248aff --- /dev/null +++ b/src/server/create-immutable-copies.sm @@ -0,0 +1,2120 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* adding a comment */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" +#include "pvfs2-internal.h" +#include "pvfs2-util.h" +#include "pint-util.h" +#include "pint-eattr.h" +#include "pint-cached-config.h" +#include "pvfs2-dist-basic.h" +#include "pvfs2-mirror.h" + +/*Global Variables*/ + +/*all bits turned on*/ +static uint64_t UINT64_HIGH = 0xffffffffffffffffULL; + + +/*attribute keys used for the mirroring process*/ +static char handle_key[] = USER_PVFS2_MIRROR_HANDLES; +static char copy_count_key[] = USER_PVFS2_MIRROR_COPIES; +static char status_key[] = USER_PVFS2_MIRROR_STATUS; +static char mode_key[] = USER_PVFS2_MIRROR_MODE; + +enum { + LOCAL_HANDLES = 100, + REMOTE_HANDLES, + REPLACE_DONE, + LOCAL_SRC, + REMOTE_SRC, + RETRY, + NOTHING_TO_DO +}; + + +#define SERVER_NAME_MAX 1024 +#define WRITE_RETRY_LIMIT 2 +#define DEFAULT_COPIES 1 + + +/*helper macros*/ + +/*Sets up a two dimensional array from a one dimensional array*/ +#define ONE_DIM_TO_TWO_DIMS(in,out,rows,cols,type) \ + do { \ + int i; \ + type *p; \ + for (i=0,p=in; i obtain_source_info; + default => cleanup; + } + + state obtain_source_info + { + run obtain_source_info; + success => inspect_source_info; + default => cleanup; + } + + state inspect_source_info + { + run inspect_source_info; + success => create_local_datahandles; + default => cleanup; + } + + state create_local_datahandles + { + run create_local_datahandles; + success => obtain_local_handle_sizes; + default => cleanup; + } + + state obtain_local_handle_sizes + { + run obtain_local_handle_sizes; + success => inspect_local_handle_sizes; + default => cleanup; + } + + state inspect_local_handle_sizes + { + run inspect_local_handle_sizes; + success => create_remote_datahandles; + default => cleanup; + } + + state create_remote_datahandles + { + run create_remote_datahandles; + success => setup_datahandle_copies; + default => remove_local_datahandle_objects; + } + + state setup_datahandle_copies + { + run setup_datahandle_copies; + success => copy_data; + default => remove_local_datahandle_objects; + } + + state copy_data + { + pjmp copy_data + { + LOCAL_SRC => pvfs2_pjmp_mirror_work_sm; + REMOTE_SRC => pvfs2_pjmp_call_msgpairarray_sm; + } + success => check_copy_results; + default => cleanup; + } + + state check_copy_results + { + run check_copy_results; + success => store_mirror_info; + default => check_for_retries; + } + + state check_for_retries + { + run check_for_retries; + RETRY => copy_data; + default => store_mirror_info; + } + + state store_mirror_info + { + run store_mirror_info; + /* default => replace_remote_datahandle_objects; + * If the write of the datahandle information fails, even though the + * the copies actually exist, the metadata for the logical file will + * NOT have knowledge of it. */ + default => check_store_job; + } + + state check_store_job + { + run check_store_job; + default => cleanup; + } + + state replace_remote_datahandle_objects + { + run replace_remote_datahandle_objects; + REPLACE_DONE => remove_local_datahandle_objects; + default => replace_remote_datahandle_objects; + } + + state remove_local_datahandle_objects + { + run remove_local_datahandle_objects; + default => cleanup; + } + + state cleanup + { + run cleanup; + default => return; + } + +} /*end nested state machine pvfs2_create_immutable_copies_sm*/ +%% + + +/* Actions for pvfs2_create_immutable_copies_sm */ +static PINT_sm_action initialize_structures ( struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing initialize_structures....\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tframe count is %d.\n", + smcb->frame_count); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t base frame is %d.\n", + smcb->base_frame); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(s_op->u.create_copies); + + int ret; + + js_p->error_code = 0; + + + + /* These values are generated by default when the prelude executes. + * When seteattr executes, the prelude retrieves the common metadata info. */ + imm_p->dfile_count = s_op->target_object_attr->u.meta.dfile_count; + imm_p->metadata_handle = s_op->target_handle; + imm_p->fs_id = s_op->target_fs_id; + + /* Get the number of IO servers currently running for the given filesystem */ + ret = PINT_cached_config_get_num_io(imm_p->fs_id, + &(imm_p->io_servers_count)); + if (ret) + { + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tdfile_count: %d\tmetadata_handle: %llu" + "\tfs_id: %u" + "\tio_servers_count: %d\n", + imm_p->dfile_count, + llu(imm_p->metadata_handle), + imm_p->fs_id, + imm_p->io_servers_count ); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tds_attr.b_size:%d\n", + (int)s_op->ds_attr.u.datafile.b_size); + + + return SM_ACTION_COMPLETE; +} /* end action initialize_structures */ + + + +static PINT_sm_action obtain_source_info (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + /* In this state, we are retrieving the data handles, the number of + * desired copies, and the mirroring mode for the given meta data handle. + * If the mirroring mode == NO_MIRRORING, then we will not perform the + * mirror operation. */ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing obtain_source_info....\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + int keyval_count = 3; + job_id_t job_id; + int ret = 0,i; + + js_p->error_code = 0; + + /* allocate space to retrieve attributes from trove. */ + sm_p->keyval_count = keyval_count; + + sm_p->key_a = malloc(sizeof(*sm_p->key_a) * sm_p->keyval_count); + sm_p->val_a = malloc(sizeof(*sm_p->val_a) * sm_p->keyval_count); + sm_p->error_a = malloc(sizeof(*sm_p->error_a) * sm_p->keyval_count); + if (!sm_p->key_a || !sm_p->val_a || !sm_p->error_a) + goto error_exit; + + memset(sm_p->key_a, 0, sizeof(*sm_p->key_a) * sm_p->keyval_count); + memset(sm_p->val_a, 0, sizeof(*sm_p->val_a) * sm_p->keyval_count); + memset(sm_p->error_a, 0, sizeof(*sm_p->error_a) * sm_p->keyval_count); + + /* setup key/val to retreive the mirroring mode */ + i=0; + assert(ikey_a[i].buffer = mode_key; + sm_p->key_a[i].buffer_sz = sizeof(mode_key); + + sm_p->val_a[i].buffer = &(imm_p->mirror_mode); + sm_p->val_a[i].buffer_sz = sizeof(imm_p->mirror_mode); + + /* setup key/val to retreive the datahandles */ + i++; assert(ikey_a[i].buffer = Trove_Common_Keys[METAFILE_HANDLES_KEY].key; + sm_p->key_a[i].buffer_sz = Trove_Common_Keys[METAFILE_HANDLES_KEY].size; + + imm_p->handle_array_base = malloc(imm_p->dfile_count * sizeof(PVFS_handle)); + if (!imm_p->handle_array_base) + goto error_exit; + sm_p->val_a[i].buffer = imm_p->handle_array_base; + sm_p->val_a[i].buffer_sz = (imm_p->dfile_count * sizeof(PVFS_handle)); + + /* setup key/val to retreive the number of copies */ + i++; assert(ikey_a[i].buffer = copy_count_key; + sm_p->key_a[i].buffer_sz = sizeof(copy_count_key); + + sm_p->val_a[i].buffer = &(imm_p->copies); + sm_p->val_a[i].buffer_sz = sizeof(imm_p->copies); + + + /* don't need to get file's distriubtion information, because we are + * copying each datahandle, as is, directly into a new datahandle. + * Distribution is only needed when you are modifying the logical + * file, not the individual data handles. However, we need to provide + * a distribution value for the IO request, even though it won't + * be used. So, instead of issuing a trove call to get the file's + * distribution information, we will be using just the "basic" + * distribution. */ + + imm_p->dist = malloc(sizeof(PINT_dist)); + if (!imm_p->dist) + goto error_exit; + memset(imm_p->dist,0,sizeof(PINT_dist)); + + imm_p->dist->dist_name = malloc(PVFS_DIST_BASIC_NAME_SIZE); + if (!imm_p->dist->dist_name) + goto error_exit; + strcpy(imm_p->dist->dist_name,PVFS_DIST_BASIC_NAME); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tdistribution name:%s\n", + imm_p->dist->dist_name); + + ret = PINT_dist_lookup(imm_p->dist); + if (ret) + { + gossip_lerr("Error looking up basic distribution:%d", ret); + js_p->error_code = ret; + goto error_exit; + } + + /* retrieve key/val pairs */ + ret = job_trove_keyval_read_list( imm_p->fs_id, + imm_p->metadata_handle, + sm_p->key_a, + sm_p->val_a, + sm_p->error_a, + sm_p->keyval_count, + 0, + NULL, + smcb, + 0, + js_p, + &job_id, + server_job_context, + NULL ); + return (ret); + +error_exit: + if (sm_p->key_a) + free (sm_p->key_a); + if (sm_p->val_a) + free (sm_p->val_a); + if (sm_p->error_a) + free(sm_p->error_a); + sm_p->key_a = sm_p->val_a = NULL; + sm_p->error_a = NULL; + + if (imm_p->dist && imm_p->dist->dist_name) + free(imm_p->dist->dist_name); + if (imm_p->dist) + free(imm_p->dist); + imm_p->dist = NULL; + + if (js_p->error_code == 0) + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; +} /* end action obtain_source_info */ + + +static PINT_sm_action inspect_source_info (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing inspect_source_info....\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + char server_name[SERVER_NAME_MAX] = {0}; + server_configuration_s *config = get_server_config_struct(); + int ret = 0; + int i; + + /* check error codes from previous trove read-list call. */ + for (i=0; ikeyval_count; i++) + { + /* if the mirroring mode has no entry, the mode=NO_MIRRORING, or the + * mode is not the expected mode, then there is nothing to do. */ + if (sm_p->key_a[i].buffer == mode_key) + { + if (PVFS_get_errno_mapping(sm_p->error_a[i]) == ENOENT) + { + js_p->error_code = NOTHING_TO_DO; + goto error_exit; + } + imm_p->mirror_mode = *(MIRROR_MODE *)sm_p->val_a[i].buffer; + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tRetrieved mirroring mode is %d.\n", + imm_p->mirror_mode); + if (imm_p->mirror_mode == NO_MIRRORING || + imm_p->mirror_mode != imm_p->expected_mirror_mode) + { + js_p->error_code = NOTHING_TO_DO; + goto error_exit; + } + } + + /* if the user hasn't set the number of copies, this code will + * set a default (currently = 1). */ + if (sm_p->key_a[i].buffer == copy_count_key) + { + if (PVFS_get_errno_mapping(sm_p->error_a[i]) == ENOENT) + { + gossip_lerr("User-defined number of copies not found. " + "Defaulting number of copies to %d.\n", + DEFAULT_COPIES); + imm_p->copies = DEFAULT_COPIES; + continue; + } + } + + /* check for other types of errors. */ + if (sm_p->error_a[i]) + { + gossip_lerr("Error retrieving value for '%s' : %s\n", + (char *)sm_p->key_a[i].buffer, + strerror(PVFS_get_errno_mapping(-sm_p->error_a[i]))); + js_p->error_code = sm_p->error_a[i]; + goto error_exit; + } + } /* end for */ + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tRetrieved # of copies:%d\n", + imm_p->copies); + + /* If there is only one server running, then it makes no sense to create + * copies on the same server. */ + if (imm_p->io_servers_count == 1) + { + gossip_lerr("Mirroring operation is not permitted when only one " + "I/O server is running.\n"); + js_p->error_code = -PVFS_EPERM; + return SM_ACTION_COMPLETE; + } + + /* We need at least (# of copies) + 1 I/O servers running in the system to + * prevent duplicate data on any one server, while not exceeding the number + * of I/O servers in the system. If the number of copies requested by the + * user is >= the number of I/O servers in the system, then we lower the + * number of requested copies. We then set the number of I/O servers + * required to meet this request with the (new value of copies) + 1. + * At this point, if the number of I/O servers required is less than the + * number of servers in this file's distribution, then set the number of + * required I/O servers to the same number of servers in this file's + * distribution. */ + if (imm_p->copies >= imm_p->io_servers_count) + imm_p->copies = imm_p->io_servers_count - 1; + + imm_p->io_servers_required = imm_p->copies + 1; + + if (imm_p->io_servers_required < imm_p->dfile_count) + imm_p->io_servers_required = imm_p->dfile_count; + + /* allocate space for io_servers array. this array will contain the + * server names which will be used as the valid destination remotes for the + * copies. */ + imm_p->io_servers = malloc( imm_p->io_servers_required * sizeof(char *) ); + if ( !imm_p->io_servers ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(imm_p->io_servers,0,imm_p->io_servers_required * sizeof(char *)); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tAllocated char *:\n"); + for (i=0; iio_servers_required; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t io_servers[%d] : %p " + "\t &io_servers[%d] : %p\n", + i, imm_p->io_servers[i], + i, &(imm_p->io_servers[i])); + } + imm_p->num_io_servers = imm_p->io_servers_required; + + for (i=0;iio_servers_required;i++) + { + imm_p->io_servers[i] = malloc( sizeof(server_name) ); + if ( !imm_p->io_servers[i] ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(imm_p->io_servers[i],0,sizeof(server_name)); + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tAllocated server_name..\n"); + for (i=0; iio_servers_required; i++) + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t io_servers[%d] : %p " + "\t *io_servers[%d] : %s " + "\t &io_servers[%d] : %p\n", + i, imm_p->io_servers[i], + i, imm_p->io_servers[i], + i, &(imm_p->io_servers[i])); + + /* allocate space for the local source handles */ + imm_p->handle_array_base_local = + malloc(imm_p->dfile_count * sizeof(PVFS_handle)); + if (!imm_p->handle_array_base_local) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(imm_p->handle_array_base_local, 0 , + imm_p->dfile_count * sizeof(PVFS_handle)); + imm_p->handle_array_base_local_count = 0; + + /* allocate space for the data handle copies array, which will hold a + * combination of local and remote data handles used as destination handles + * for the copies. The order of this array will mimmick the order of the + * original data file array. Thus, handle_array_base[i] + * and handle_array_copies[i] will hold handles for the same server + * number. */ + imm_p->handle_array_copies = malloc(imm_p->io_servers_required * + imm_p->copies * + sizeof(PVFS_handle)); + if ( !imm_p->handle_array_copies ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(imm_p->handle_array_copies,0,imm_p->io_servers_required * + imm_p->copies * sizeof(PVFS_handle)); + + /* allocate space for the local_io_servers array. this array contains the + * server names for local data handles. */ + imm_p->local_io_servers = malloc( imm_p->io_servers_required * + sizeof(char *)); + if ( !imm_p->local_io_servers ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + + memset(imm_p->local_io_servers, 0, + imm_p->io_servers_required * sizeof(char *)); + imm_p->local_io_servers_count = 0; + + /* allocate space for the remote_io_servers array. this array contains the + * server names for remote data handles. */ + imm_p->remote_io_servers = malloc( imm_p->io_servers_required * + sizeof(char*)); + if ( !imm_p->remote_io_servers ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(imm_p->remote_io_servers, 0, + imm_p->io_servers_required * sizeof(char*)); + imm_p->remote_io_servers_count = 0; + + /* populate the io_servers array with server_names: + * Step 1: Always start by using the server names associated with the + * original datahandles, keeping the order in tact. + * Step 2: If additional servers are needed, then tap into the list of + * servers running in the file system and grab those that are not + * currently in the io_servers list. */ + + /* Step 1 */ + for (i=0; idfile_count; i++) + { + ret = PINT_cached_config_get_server_name( imm_p->io_servers[i], + sizeof(server_name)-1, + imm_p->handle_array_base[i], + imm_p->fs_id ); + if (ret) + { + js_p->error_code = ret; + goto error_exit; + } + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tValue of server_name is %s " + "for handle %llu\n", + imm_p->io_servers[i], + llu(imm_p->handle_array_base[i])); + } /* end for */ + + /* Step 2 */ + if (imm_p->io_servers_required > imm_p->dfile_count) + { + ret = get_server_names(imm_p); + if (ret) + { + gossip_lerr("Unable to populate io_servers list.\n"); + js_p->error_code = ret; + goto error_exit; + } + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tconfig->host_id is %s.\n", + config->host_id); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\timm_p->io_servers_required : %d\n", + imm_p->io_servers_required); + for (i=0; iio_servers_required; i++) + { + char *server_name = imm_p->io_servers[i]; + if (strncmp(server_name, config->host_id, SERVER_NAME_MAX-1) == 0) + { /* local */ + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tprocessing local....\n"); + imm_p->local_io_servers[imm_p->handle_array_copies_local_count] = + malloc( SERVER_NAME_MAX ); + if ( !imm_p->local_io_servers[ + imm_p->handle_array_copies_local_count] ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset( imm_p->local_io_servers[ + imm_p->handle_array_copies_local_count], + 0, + SERVER_NAME_MAX); + memcpy(imm_p->local_io_servers[ + imm_p->handle_array_copies_local_count], + server_name, SERVER_NAME_MAX-1); + if ( i < imm_p->dfile_count ) + { + imm_p->handle_array_base_local[ + imm_p->handle_array_base_local_count] + = imm_p->handle_array_base[i]; + imm_p->handle_array_base_local_count++; + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tlocal source handle(%d):%llu\n", + imm_p->handle_array_base_local_count, + llu(imm_p->handle_array_base_local[ + imm_p->handle_array_base_local_count])); + } + + imm_p->handle_array_copies_local_count++; + } + else + { /*remote*/ + imm_p->remote_io_servers[imm_p->handle_array_copies_remote_count] = + malloc( SERVER_NAME_MAX ); + if ( ! imm_p->remote_io_servers[ + imm_p->handle_array_copies_remote_count]) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(imm_p->remote_io_servers[ + imm_p->handle_array_copies_remote_count], 0, SERVER_NAME_MAX); + memcpy(imm_p->remote_io_servers[ + imm_p->handle_array_copies_remote_count], + server_name, SERVER_NAME_MAX-1); + imm_p->handle_array_copies_remote_count++; + } /* end if */ + } /* end for */ + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tLocal: %d\tRemote: %d\n", + imm_p->handle_array_copies_local_count, + imm_p->handle_array_copies_remote_count); + + imm_p->local_io_servers_count = imm_p->handle_array_copies_local_count; + imm_p->remote_io_servers_count = imm_p->handle_array_copies_remote_count; + + for (i=0; ilocal_io_servers_count; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tlocal_io_servers[%d]: %s\n", i, + imm_p->local_io_servers[i]); + } + + for (i=0; iremote_io_servers_count; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tremote_io_servers[%d]: %s\n", i, + imm_p->remote_io_servers[i]); + } + + /* allocate and initialize space for local and remote handle arrays */ + if (imm_p->handle_array_copies_local_count) + { + imm_p->handle_array_copies_local = + malloc( imm_p->handle_array_copies_local_count * + imm_p->copies * sizeof(PVFS_handle)); + if ( !imm_p->handle_array_copies_local ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + } + if (imm_p->handle_array_copies_remote_count) + { + imm_p->handle_array_copies_remote = + malloc( imm_p->handle_array_copies_remote_count * + imm_p->copies * sizeof(PVFS_handle)); + if ( !imm_p->handle_array_copies_remote ) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + } /* end if */ + + memset(imm_p->handle_array_copies_local, 0, + imm_p->handle_array_copies_local_count * + imm_p->copies * sizeof(PVFS_handle)); + memset(imm_p->handle_array_copies_remote, 0, + imm_p->handle_array_copies_remote_count * + imm_p->copies * sizeof(PVFS_handle)); + +error_exit: + /* all other memory will be freed in the "cleanup" action. */ + free(sm_p->key_a); + free(sm_p->val_a); + free(sm_p->error_a); + sm_p->key_a = sm_p->val_a = NULL; + sm_p->error_a = NULL; + + return SM_ACTION_COMPLETE; +} /* end action inspect_source_info */ + + + +/* We must get the bstream size for any datahandles that reside on this server. + * This scenario occurs when a metadata and i/o server are one of the same or + * a metadata server and i/o server are running on the same machine. I think + * this is outdated now, but I check for it anyway. */ +static PINT_sm_action obtain_local_handle_sizes(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, + "Executing obtain_local_handle_sizes....\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + job_id_t job_id; + int ret = 0; + + js_p->error_code = 0; + + /* Do we have any local handles? */ + if (imm_p->handle_array_copies_local_count == 0) + return SM_ACTION_COMPLETE; + + sm_p->error_a = malloc(imm_p->handle_array_base_local_count * + sizeof(PVFS_error) ); + if (!sm_p->error_a) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + + imm_p->ds_attr_a = malloc(imm_p->handle_array_base_local_count * + sizeof(PVFS_ds_attributes)); + if (!imm_p->ds_attr_a) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + + ret = job_trove_dspace_getattr_list(imm_p->fs_id, + imm_p->handle_array_base_local_count, + imm_p->handle_array_base_local, + smcb, + sm_p->error_a, + imm_p->ds_attr_a, + 0, + js_p, + &job_id, + server_job_context, + NULL); + return ret; + + +error_exit: + if (sm_p->error_a) + free(sm_p->error_a); + sm_p->error_a = NULL; + + return SM_ACTION_COMPLETE; +}/* end action obtain_local_handle_sizes */ + + + +static PINT_sm_action inspect_local_handle_sizes(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, + "Executing inspect_local_handle_sizes..\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + int i,j; + + /* Do we have any local handles? */ + if (imm_p->handle_array_copies_local_count == 0) + return SM_ACTION_COMPLETE; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tchecking for errors....\n"); + + /* check for errors */ + for (i=0; ihandle_array_base_local_count; i++) + { + if (sm_p->error_a[i]) + { + js_p->error_code = sm_p->error_a[i]; + free(sm_p->error_a); + sm_p->error_a = NULL; + return SM_ACTION_COMPLETE; + } + } /* end for */ + js_p->error_code = 0; + + imm_p->bstream_array_base_local = malloc(imm_p->dfile_count * + sizeof(PVFS_size)); + if (!imm_p->bstream_array_base_local) + { + js_p->error_code = -PVFS_ENOMEM; + free(sm_p->error_a); + sm_p->error_a = NULL; + return SM_ACTION_COMPLETE; + } + memset(imm_p->bstream_array_base_local, 0, + imm_p->dfile_count * sizeof(PVFS_size)); + + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tpopulating bstream_array_base_local...\n"); + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\thandle_array_base_local_count:%d\n", + imm_p->handle_array_base_local_count); + /* populate bstream_array_base_local */ + for (i=0; ihandle_array_base_local_count; i++) + { + for (j=0; jdfile_count; j++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tlocal handle(%d):%llu" + "\tbase handle(%d):%llu\n", + i, llu(imm_p->handle_array_base_local[i]), + j, llu(imm_p->handle_array_base[j]) ); + if (imm_p->handle_array_base_local[i] == + imm_p->handle_array_base[j]) + { + imm_p->bstream_array_base_local[j] = + imm_p->ds_attr_a[i].u.datafile.b_size; + gossip_debug(GOSSIP_MIRROR_DEBUG, "\thandle:%llu\tsize:%d\n", + llu(imm_p->handle_array_base_local[i]), + (int)imm_p->bstream_array_base_local[j] ); + } + } /* end for */ + } /* end for */ + + free(sm_p->error_a); + sm_p->error_a = NULL; + + return SM_ACTION_COMPLETE; +} /* end action inspect_local_handle_sizes */ + + + +static PINT_sm_action create_local_datahandles (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, + "Executing create_local_datahandles....\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + job_id_t job_id; + int ret = 0; + int i; + + PVFS_handle_extent_array data_handle_ext_array; + server_configuration_s *config = get_server_config_struct(); + + js_p->error_code = 0; + + /* Do we have any local handles? */ + if (imm_p->handle_array_copies_local_count == 0) + return SM_ACTION_COMPLETE; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "Target handle: %llu\tTarget FS ID: %d\n", + llu(imm_p->metadata_handle),imm_p->fs_id); + gossip_debug(GOSSIP_MIRROR_DEBUG, "dfile count: %d\n",imm_p->dfile_count); + gossip_debug(GOSSIP_MIRROR_DEBUG, "stuffed size: %d\n", + sm_p->target_object_attr->u.meta.stuffed_size); + gossip_debug(GOSSIP_MIRROR_DEBUG,"hint.flags: %llu\n", + llu(sm_p->target_object_attr->u.meta.hint.flags)); + gossip_debug(GOSSIP_MIRROR_DEBUG,"dfile array P: %p\n", + sm_p->target_object_attr->u.meta.dfile_array); + + /* find local IO extent array for this file system for metadata host */ + ret = PINT_cached_config_get_server( imm_p->fs_id, + config->host_id, + PINT_SERVER_TYPE_IO, + &data_handle_ext_array ); + if (ret) + { + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + for (i=0;icopies,&imm_p->copies); + + + /*i create local datahandles - will be used as destination handles for + * copies*/ + ret = job_trove_dspace_create_list( imm_p->fs_id, + &data_handle_ext_array, + imm_p->handle_array_copies_local, + imm_p->handle_array_copies_local_count * + imm_p->copies, + PVFS_TYPE_DATAFILE, + NULL, + TROVE_SYNC, + smcb, + 0, + js_p, + &job_id, + server_job_context, + NULL ); + return ret; +} /* end action create_local_datahandles */ + + +static PINT_sm_action create_remote_datahandles (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, + "Executing create_remote_datahandles...\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + int ret = 0,i; + job_id_t job_id; + + js_p->error_code = 0; + + if (imm_p->handle_array_copies_remote_count == 0) + return SM_ACTION_COMPLETE; + + int rows = imm_p->copies; + int cols = imm_p->handle_array_copies_remote_count; + + imm_p->my_remote_servers = malloc(sizeof(char *) * rows * cols); + if (!imm_p->my_remote_servers) + { + gossip_lerr("Error allocating imm_p->my_remote_servers.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(imm_p->my_remote_servers,0,sizeof(char *) * rows * cols); + + /* setup my_remote_servers[copy,remote#] = remote server name. This will + * allow job_precreate_pool to return handle_array_copies_remote where + * [copy,remote#] = remote handle. We end up with a list of remotes for + * each copy in original distribution order. */ + for (i=0; i<(rows*cols); i++) + { + imm_p->my_remote_servers[i] = imm_p->remote_io_servers[i%cols]; + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\tremote_io_servers[%d]:%s " + "my_remote_servers[%d]:%s\n", + (i%cols), + imm_p->remote_io_servers[i%cols], + i, + imm_p->my_remote_servers[i]); + } + + ret = job_precreate_pool_get_handles(imm_p->fs_id, + rows*cols, + PVFS_TYPE_DATAFILE, + (const char **) + imm_p->my_remote_servers, + imm_p->handle_array_copies_remote, + 0, + smcb, + 0, + js_p, + &job_id, + server_job_context, + NULL); + return ret; +} /* end action create_remote_datahandles */ + + +static PINT_sm_action setup_datahandle_copies (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing setup_datahandle_copies...\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + server_configuration_s *config = get_server_config_struct(); + int i, j, k; + + js_p->error_code = 0; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tRemote destination handles:\n"); + int rows = imm_p->copies; + int cols = imm_p->handle_array_copies_remote_count; + for (i=0; i<(rows*cols); i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\tRemote handle(%d):%llu\n", + i, + llu(imm_p-> + handle_array_copies_remote[i])); + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tLocal destination handles:\n"); + cols = imm_p->handle_array_copies_local_count; + for (i=0; i<(rows*cols); i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\tLocal handle(%d):%llu\n", + i, + llu(imm_p-> + handle_array_copies_local[i])); + } + + for (i=0, j=0, k=0; i < (imm_p->io_servers_required * imm_p->copies); i++) + { + if ( strncmp(imm_p->io_servers[i%imm_p->io_servers_required], + config->host_id,SERVER_NAME_MAX-1) == 0 ) + { /* local */ + memcpy(&imm_p->handle_array_copies[i], + &imm_p->handle_array_copies_local[j],sizeof(PVFS_handle)); + j++; + } + else + { /* remote */ + memcpy(&imm_p->handle_array_copies[i], + &imm_p->handle_array_copies_remote[k],sizeof(PVFS_handle)); + k++; + } + } /* end for */ + + for (i=0; i<(imm_p->io_servers_required * imm_p->copies); i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\thandle_array_copies[%d]: %llu.\n", + i, + llu(imm_p->handle_array_copies[i])); + } + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tnumber of io servers required: %d\n", + imm_p->io_servers_required); + + /* create and initialize the writes_completed array */ + imm_p->writes_completed = malloc(sizeof(PVFS_handle) * imm_p->dfile_count + * imm_p->copies); + if (!imm_p->writes_completed) + { + gossip_lerr("Unable to allocate imm_p->writes_completed.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(imm_p->writes_completed, UINT64_HIGH, sizeof(PVFS_handle) * + imm_p->dfile_count * imm_p->copies); + + /* the retry count is used to monitor how many times we retry a write. + * this value is incremented in the check_for_retries state. */ + imm_p->retry_count = 0; + + return SM_ACTION_COMPLETE; +} /* end action setup_datahandle_copies */ + + +static PINT_sm_action copy_data (struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing copy_data....\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + server_configuration_s *config = get_server_config_struct(); + filesystem_configuration_s *fs = PINT_config_find_fs_id( config, + imm_p->fs_id); + int ret = 0; + int src, row, cols, i, index, wc; + + /* this variable helps to understand the logic better. it is a + * redeclaration of the one dimensional imm_p->handle_array_copies and can + * be accessed as handle_array_copies[copies,dfile_count]. */ + PVFS_handle *handle_array_copies[imm_p->copies]; + memset(handle_array_copies,0,sizeof(PVFS_handle) * imm_p->copies); + ONE_DIM_TO_TWO_DIMS(imm_p->handle_array_copies, + handle_array_copies, + imm_p->copies,imm_p->io_servers_required, + PVFS_handle); + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tone dim to two dims:\n"); + for (row=0; rowcopies; row++) + { + for (cols=0; colsio_servers_required; cols++) + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\thandle_array_copies[%d][%d] : " + "%llu\n", + row,cols, + llu(handle_array_copies[row][cols])); + } + + js_p->error_code = 0; + + /* for each source handle[src], create a MIRROR request containing a set + * of destination handles. */ + for (src=0; srcdfile_count; src++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tWorking on src #%d\n",src); + + /* writes_completed indicates the status of each copy for each source: + * 0 ==> completed, + ==> incomplete, UINT64_HIGH ==> initial state. + * If incomplete, the value stored in the array is the destination + * handle. */ + for (row=src,cols=0; colscopies; cols++) + { + index = (imm_p->copies * row) + cols; + /* this will capture UINT64_HIGH or handle */ + if ( imm_p->writes_completed[index] > 0 + || imm_p->writes_completed[index] == UINT64_HIGH) + break; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tValue of cols is %d\n",cols); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tValue of imm_p->copies is %d.\n", + imm_p->copies); + + /* if all copies for this source are zero ==> process next source. */ + if (cols==imm_p->copies) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tThis source[%d] has all " + "complete writes.\n", src); + continue; + } + + struct PVFS_server_req *req = malloc(sizeof(struct PVFS_server_req)); + if (!req) + { + gossip_lerr("Unable to allocate PVFS_server_req.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(req,0,sizeof(struct PVFS_server_req)); + + req->u.mirror.dst_handle = malloc(sizeof(PVFS_handle) * imm_p->copies); + if ( !req->u.mirror.dst_handle) + { + gossip_lerr("Unable to allocate mirror.dst_handle.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(req->u.mirror.dst_handle, 0, + sizeof(PVFS_handle) * imm_p->copies); + + /* index into the writes_completed array for each destination handle */ + req->u.mirror.wcIndex = malloc(sizeof(uint32_t) * imm_p->copies); + if ( !req->u.mirror.wcIndex ) + { + gossip_lerr("Unable to allocate mirror.wcIndex.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(req->u.mirror.wcIndex,0,sizeof(uint32_t) * imm_p->copies); + + req->op = PVFS_SERV_MIRROR; + req->credentials = sm_p->req->credentials; + + req->u.mirror.src_handle = imm_p->handle_array_base[src]; + + + /* In the initial state or when all writes have failed, get destination + * handles from handle_array_copies array. Otherwise, use the handles + * stored in the writes_completed array. */ + index = imm_p->copies*src; /* first copy for this source */ + if (imm_p->writes_completed[index] == UINT64_HIGH) + { + /* handle_array_copies[copy,server#] is accessed as a + * two-dimensional array where a row represents a copy and columns + * represent the destination handles,in order of the original file + * distribution. We map the source handle[i], which is also in + * distribution order, to + * handle_arrray_copies[0,i+1],[1,i+2],..,[n-1,(i+y)-1], where n + * is the number of copies and y is the number of handles in one + * copy */ + for (wc=0,row=0,cols=(src+1)%imm_p->io_servers_required; + row < imm_p->copies; + wc++,row++,cols=(cols+1)%imm_p->io_servers_required) + { + req->u.mirror.dst_handle[row] = handle_array_copies[row][cols]; + req->u.mirror.wcIndex[row] = index + wc; + req->u.mirror.dst_count++; + } + } + else + { + for (row=src,cols=0,i=0; colscopies; cols++,i++) + { + index = (imm_p->copies*row) + cols; + if (imm_p->writes_completed[index] > 0) + { + req->u.mirror.dst_handle[i] = + imm_p->writes_completed[index]; + req->u.mirror.wcIndex[i] = index; + req->u.mirror.dst_count++; + } + } + } + req->u.mirror.fs_id = imm_p->fs_id; + req->u.mirror.dist = imm_p->dist; + req->u.mirror.src_server_nr = src; + req->u.mirror.flow_type = fs->flowproto; + req->u.mirror.encoding = fs->encoding; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\treq->: src:%llu\tfs_id:%d" + "\tdist name:%s\tsrc server_nr:%d\n", + llu(req->u.mirror.src_handle), + req->u.mirror.fs_id, + req->u.mirror.dist->dist_name, + req->u.mirror.src_server_nr ); + for (i=0; iu.mirror.dst_count; i++) + gossip_debug(GOSSIP_MIRROR_DEBUG, "\treq->dst_handle[%d] : %llu\n", + i, + llu(req->u.mirror.dst_handle[i])); + + struct PINT_server_op *mirror_op = + malloc(sizeof(struct PINT_server_op)); + if (!mirror_op) + { + gossip_lerr("Error allocating mirror_op"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(mirror_op, 0, sizeof(struct PINT_server_op)); + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tabout to allocate mirror_op...\n"); + + if (imm_p->bstream_array_base_local) + { + req->u.mirror.bsize = imm_p->bstream_array_base_local[src]; + } + + mirror_op->req = req; + mirror_op->op = req->op; + mirror_op->addr = sm_p->addr; /* get addr for this server */ + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmirror_op->req(%p)\n", + mirror_op->req); + + if ( strncmp(imm_p->io_servers[src], config->host_id, + SERVER_NAME_MAX-1) == 0 ) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"Above SRC is local.\n"); + PINT_sm_push_frame(smcb, LOCAL_SRC, mirror_op); + } + else + { + /* setup msgpairarray call. This msgpair represents a connection + * between the meta server and a remote IO server. The request + * for the remote IO server is PVFS_SERV_MIRROR, which will read + * data residing on that server and write it to a destination + * handle specified in the request. The response returned from + * this msgpair will indicate if the copy was successful. */ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Above SRC is remote.\n"); + + PINT_sm_msgarray_op *msgarray_op = &(mirror_op->msgarray_op); + + memset(msgarray_op,0,sizeof(PINT_sm_msgarray_op)); + msgarray_op->msgarray = &msgarray_op->msgpair; + msgarray_op->count = 1; + PINT_sm_msgpair_state *msg_p = &msgarray_op->msgpair; + + msg_p->req = *req; + msg_p->fs_id = req->u.mirror.fs_id; + msg_p->handle = req->u.mirror.src_handle; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = mirror_comp_fn; + + /* setup msgarray parameters */ + PINT_serv_init_msgarray_params(mirror_op,req->u.mirror.fs_id); + + /* determine the BMI svr address for the source handle */ + ret = PINT_cached_config_map_to_server(&msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id ); + if (ret) + { + gossip_err("Failed to map address\n"); + js_p->error_code = -1; + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmsg_p->req.op:%d" + "\tmsg_p->fs_id:%d" + "\tmsg_p->handle:%llu\n", + msg_p->req.op, + msg_p->fs_id, + llu(msg_p->handle) ); + + PINT_sm_push_frame(smcb, REMOTE_SRC, mirror_op); + } + } /* end for (src) */ + + + return SM_ACTION_COMPLETE; +} /* end action copy_data */ + + +static PINT_sm_action check_copy_results (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing check_copy_results....\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + int task_id, error_code, remaining, i, j, index; + struct PINT_server_op *mirror_op = NULL; + struct PVFS_servresp_mirror *respmir = NULL; + struct PVFS_servreq_mirror *reqmir = NULL; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tjs_p->error_code:%d\n", + js_p->error_code); + js_p->error_code = 0; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tsm_p->op:%d\n",sm_p->op); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tsmcb->base_frame:%d" + "\tsmcb->frame_count:%d\n", + smcb->base_frame,smcb->frame_count); + + /* the pjmp should have pushed at least one frame */ + assert(smcb->frame_count > (smcb->base_frame+1)); + + do + { + mirror_op = PINT_sm_pop_frame(smcb, &task_id, &error_code, &remaining); + respmir = &(mirror_op->resp.u.mirror); + reqmir = &(mirror_op->req->u.mirror); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmirror_op->op:%d" + "\ttask_id:%d" + "\terror_code:%d(%0x)" + "\tremaining:%d\n", + mirror_op->op, + task_id, + error_code, error_code, + remaining); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tresp.src_handle:%llu " + "\tresp.src_server_nr:%d\n", + llu(respmir->src_handle), + respmir->src_server_nr ); + for (i=0; idst_count; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\tbytes_written[%d]:%d\n" + "\t\twrite_status_codde[%d]:%d\n", + i, + respmir->bytes_written[i], + i, + respmir->write_status_code[i]); + } + + /* if error_code != 0, then NONE of the writes requested in this + * msgpair + * array executed, so we do not need to check the individual status + * codes. */ + if (error_code) + { + js_p->error_code = error_code; /* respmir has no valid data in it.*/ + } + else + { + /* check the write status for each destination associated with + * this particular source handle. */ + for (i=0; idst_count; i++) + { + if (js_p->error_code) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tStatus already " + "established:%d(0x%0x)\n", + js_p->error_code, js_p->error_code); + } + else if (respmir->write_status_code[i]) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tStatus came from " + "error_code %d(0x%0x)\n", + error_code, + error_code); + js_p->error_code = respmir->write_status_code[i]; + } + else + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tStatus is still " + "zero(%d)\n", js_p->error_code); + } + } /* end for */ + } /* end if */ + + + for (i=0; idst_count; i++) + { + index = reqmir->wcIndex[i]; + if ( error_code == 0 /* this will short circuit if false */ + && respmir->write_status_code[i] == 0 ) + { + imm_p->writes_completed[index] = 0; + } + else if (error_code == 0) + { + imm_p->writes_completed[index] = reqmir->dst_handle[i]; + } + else + { + imm_p->writes_completed[index] = UINT64_HIGH; + } + } + + switch(task_id) + { + case LOCAL_SRC: + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tReturning from LOCAL " + "call...\n"); + break; + } + case REMOTE_SRC: + { + /* the destory can be moved into cleanup_msgpairarray, if + * none of its values are needed in this function. */ + PINT_msgpairarray_destroy(&(mirror_op->msgarray_op)); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tReturning from REMOTE " + "call .\n"); + break; + } + } /* end switch */ + + /* cleanup request/response allocations for mirror request */ + if (reqmir->dst_handle) + free(reqmir->dst_handle); + if (reqmir->wcIndex) + free(reqmir->wcIndex); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmirror_op->req(%p)\n", + mirror_op->req); + free(mirror_op->req); + + if (respmir->bytes_written) + free(respmir->bytes_written); + if (respmir->write_status_code) + free(respmir->write_status_code); + + free(mirror_op); + } while (remaining > (smcb->base_frame+1)); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tfinal value of js_p->error_code: " + "%d(%0x)\n", js_p->error_code, + js_p->error_code); + + /* if one of the writes failed, js_p->error_code will contain an error. */ + gossip_debug(GOSSIP_MIRROR_DEBUG, "\twrites_completed array[src,server#]" + ":\n"); + for (i=0; idfile_count; i++) + { + for (j=0; jcopies; j++) + { + index = (imm_p->dfile_count * i) + j; + gossip_debug(GOSSIP_MIRROR_DEBUG,"\t\t[%d][%d]:%llu\n", i, j, + llu(imm_p->writes_completed[index])); + } + } + + return SM_ACTION_COMPLETE; +} /* end action check_copy_results */ + + +static PINT_sm_action check_for_retries (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing check_for_retries....\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + int i; + + imm_p->retry_count++; + + /* Have we hit the retry limit? */ + if (imm_p->retry_count >= WRITE_RETRY_LIMIT) + { + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + + /* Are there any writes to retry? */ + for (i=0; i<(imm_p->dfile_count * imm_p->copies); i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\twrites_complete[%d]:%llu\n", + i,llu(imm_p->writes_completed[i])); + if (imm_p->writes_completed[i] != 0) + { + js_p->error_code = RETRY; + return SM_ACTION_COMPLETE; + } + } + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} /* end state check_for_retries */ + + +static PINT_sm_action store_mirror_info (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing store_mirror_info....\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + PVFS_handle *reorg_handles = NULL; + int key_count = 3; + int ret = 0,i,j; + job_id_t job_id; + + js_p->error_code = 0; + + /* put copy handles in proper distribution order */ + reorg_handles = reorganize_copy_handles(imm_p); + if (!reorg_handles) + { + gossip_lerr("Unable to create reorg_handles array.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + /* setup key/val pairs */ + sm_p->keyval_count = key_count; + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tvalue of copies:%d \tlocation:%p\n", + imm_p->copies,&imm_p->copies); + + sm_p->key_a = malloc(sizeof(PVFS_ds_keyval) * sm_p->keyval_count); + sm_p->val_a = malloc(sizeof(PVFS_ds_keyval) * sm_p->keyval_count); + + if (!sm_p->key_a || !sm_p->val_a) + goto error_exit; + + memset(sm_p->key_a, 0, sizeof(PVFS_ds_keyval) * sm_p->keyval_count); + memset(sm_p->val_a, 0, sizeof(PVFS_ds_keyval) * sm_p->keyval_count); + + /* setup user.pvfs2.mirror.handles */ + i=0; + assert(ikey_a[i].buffer = malloc(sizeof(handle_key)); + if (!sm_p->key_a[i].buffer) + goto error_exit; + strcpy(sm_p->key_a[i].buffer, handle_key); + sm_p->key_a[i].buffer_sz = sizeof(handle_key); + + sm_p->val_a[i].buffer = reorg_handles; + sm_p->val_a[i].buffer_sz = + sizeof(PVFS_handle) * imm_p->dfile_count * imm_p->copies; + + /* setup user.pvfs2.mirror.copies */ + i++; + assert(ikey_a[i].buffer = malloc(sizeof(copy_count_key)); + if (!sm_p->key_a[i].buffer) + goto error_exit; + + strcpy(sm_p->key_a[i].buffer,copy_count_key); + sm_p->key_a[i].buffer_sz = sizeof(copy_count_key); + + sm_p->val_a[i].buffer = malloc(sizeof(imm_p->copies)); + if (!sm_p->val_a[i].buffer) + goto error_exit; + + sm_p->val_a[i].buffer_sz = sizeof(imm_p->copies); + memcpy(sm_p->val_a[i].buffer ,&(imm_p->copies), sm_p->val_a[i].buffer_sz); + + /* setup user.pvfs2.mirror.status */ + i++; + assert(ikey_a[i].buffer = malloc(sizeof(status_key)); + if (!sm_p->key_a[i].buffer) + goto error_exit; + + strcpy(sm_p->key_a[i].buffer,status_key); + sm_p->key_a[i].buffer_sz = sizeof(status_key); + + sm_p->val_a[i].buffer = + malloc(sizeof(PVFS_handle) * imm_p->dfile_count * imm_p->copies); + if (!sm_p->val_a[i].buffer) + goto error_exit; + sm_p->val_a[i].buffer_sz = + sizeof(PVFS_handle) * imm_p->dfile_count * imm_p->copies; + memcpy(sm_p->val_a[i].buffer,imm_p->writes_completed, + sm_p->val_a[i].buffer_sz); + + /* verify inputs */ + i=0; + assert(ikey_a[i].buffer); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tsize of buffer : %d\n", + sm_p->key_a[i].buffer_sz); + PVFS_handle *myHandle = (PVFS_handle *)sm_p->val_a[i].buffer; + for (j=0; j<(imm_p->dfile_count*imm_p->copies); j++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\t\thandle(%d):%llu\n", j, + llu(myHandle[j])); + } + + i++; + assert(ival_a[i].buffer; + sm_p->val_a[i].buffer_sz = sizeof(int); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t%s:%d \tpointer:%p \tbuffer size:%d\n", + (char *)sm_p->key_a[i].buffer, + *myCount,myCount, + sm_p->val_a[i].buffer_sz); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tkey count:%d\n", sm_p->keyval_count); + + i++; + assert(ival_a[i].buffer; + for (j=0; j<(imm_p->dfile_count * imm_p->copies); j++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\thandle(%d):status(%llu)\n", + j, llu(myStatus[j])); + } + + /* store keys */ + ret = job_trove_keyval_write_list( imm_p->fs_id, + imm_p->metadata_handle, + sm_p->key_a, + sm_p->val_a, + sm_p->keyval_count, + TROVE_SYNC, /*trove flags*/ + NULL, + smcb, + 0, + js_p, + &job_id, + server_job_context, + NULL); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tvalue of ret from call to trove : " + "%d\n", ret); + + i=0; + assert(ikey_a[i].buffer); + for (j=0; j<(imm_p->dfile_count * imm_p->copies); j++) + { + PVFS_handle *myHandle = (PVFS_handle *)sm_p->val_a[i].buffer; + gossip_debug(GOSSIP_MIRROR_DEBUG,"\t\thandle(%d):%llu\n", j, + llu(myHandle[j])); + } + + i++; + assert(ival_a[i].buffer; + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t%s:%d \tpointer:%p\n", + (char *)sm_p->key_a[i].buffer, + *myCount, + sm_p->val_a[i].buffer); + + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tvalue of ret from trove call : %d\n", + ret); + return (ret); + +error_exit: + for (i=0; ikeyval_count; i++) + { + if (sm_p->key_a && sm_p->key_a[i].buffer) + free(sm_p->key_a[i].buffer); + if (sm_p->val_a && sm_p->val_a[i].buffer) + free(sm_p->val_a[i].buffer); + } + + if (sm_p->key_a) + free(sm_p->key_a); + if (sm_p->val_a) + free(sm_p->val_a); + + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; +} /* end action store_mirror_info */ + + +static PINT_sm_action check_store_job (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing check_store_job....\n"); + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + int i; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tjs_p->error_code:%d\n", + js_p->error_code); + + if (js_p->error_code) + { + gossip_err("Unable to store datahandles and number of copies for this " + "mirror operation.\n"); + gossip_err("\tMeta data handle is %llu\n",llu(imm_p->metadata_handle)); + return SM_ACTION_COMPLETE; + } + + /* release memory used in previous job call */ + for (i=0; ikeyval_count; i++) + { + free(sm_p->key_a[i].buffer); + free(sm_p->val_a[i].buffer); + } + free(sm_p->key_a); + free(sm_p->val_a); + sm_p->key_a = sm_p->val_a = NULL; + sm_p->keyval_count = 0; + + js_p->error_code = 0; + + return SM_ACTION_COMPLETE; +} /* end state check_store_job */ + + +static PINT_sm_action replace_remote_datahandle_objects(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing replace_remote_datahandle_" + "objects....\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + job_id_t job_id; + int ret; + int tmpindex; + PVFS_handle pool_handle; + + js_p->error_code = 0; + + if (!imm_p->handle_array_copies_remote) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "handle_array_copies_remote is " + "null: %p\n", + imm_p->handle_array_copies_remote); + js_p->error_code = REPLACE_DONE; + return SM_ACTION_COMPLETE; + } + + imm_p->handle_array_copies_remote_count--; + if (imm_p->handle_array_copies_remote_count < 0) + { + js_p->error_code = REPLACE_DONE; + return SM_ACTION_COMPLETE; + } + + tmpindex = imm_p->handle_array_copies_remote_count; + + /* find the pool that this handle belongs to */ + ret = job_precreate_pool_lookup_server( imm_p->remote_io_servers[tmpindex], + PVFS_TYPE_DATAFILE, + imm_p->fs_id, + &pool_handle ); + if (ret < 0) + { + imm_p->handle_array_copies_remote_count++; + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + ret = job_precreate_pool_fill( pool_handle, + imm_p->fs_id, + &(imm_p->handle_array_copies_remote + [tmpindex]), + 1, + smcb, + 0, + js_p, + &job_id, + server_job_context, + NULL ); + + return ret; +} /* end action replace_remote_datahandle_objects */ + + +static PINT_sm_action remove_local_datahandle_objects(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing remove_local_datahandle_" + "objects....\n"); + + struct PINT_server_op *sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_server_create_copies_op *imm_p = &(sm_p->u.create_copies); + job_id_t job_id; + int ret; + + if (js_p->error_code) + imm_p->saved_error_code = js_p->error_code; + + js_p->error_code = 0; + + if (!imm_p->handle_array_copies_local) + return SM_ACTION_COMPLETE; + + ret = job_trove_dspace_remove_list( imm_p->fs_id, + imm_p->handle_array_copies_local, + NULL, + imm_p->handle_array_copies_local_count, + TROVE_SYNC, + smcb, + 0, + js_p, + &job_id, + server_job_context, + NULL ); + + return ret; +} /* end action remove_local_datahandle_objects */ + + +static PINT_sm_action cleanup (struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing cleanup....\n"); + + struct PINT_server_op *sm_p = NULL; + PINT_server_create_copies_op *imm_p = NULL; + int i; + + sm_p = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + imm_p = &(sm_p->u.create_copies); + + if (js_p->error_code == NOTHING_TO_DO) + js_p->error_code = 0; + + if (imm_p->my_remote_servers) + free(imm_p->my_remote_servers); + + if (imm_p->writes_completed) + free(imm_p->writes_completed); + + if (imm_p->handle_array_copies_local) + free(imm_p->handle_array_copies_local); + + if (imm_p->handle_array_copies_remote) + free(imm_p->handle_array_copies_remote); + + if (imm_p->remote_io_servers) + { + for (i=0;iremote_io_servers_count;i++) + free(imm_p->remote_io_servers[i]); + free(imm_p->remote_io_servers); + } + + if (imm_p->local_io_servers) + { + for (i=0; ilocal_io_servers_count; i++) + free(imm_p->local_io_servers[i]); + free(imm_p->local_io_servers); + } + + if (imm_p->handle_array_base) + free(imm_p->handle_array_base); + + if (imm_p->handle_array_base_local) + free(imm_p->handle_array_base_local); + + if (imm_p->handle_array_copies) + free(imm_p->handle_array_copies); + + if (imm_p->io_servers) + { + for (i=0;iio_servers_required;i++) + { + free(imm_p->io_servers[i]); + } + free(imm_p->io_servers); + } + + if (imm_p->ds_attr_a) + free(imm_p->ds_attr_a); + + if (imm_p->bstream_array_base_local) + free(imm_p->bstream_array_base_local); + + if (!js_p->error_code && imm_p->saved_error_code) + js_p->error_code = imm_p->saved_error_code; + + if (imm_p->dist && imm_p->dist->dist_name) + free(imm_p->dist->dist_name); + if (imm_p->dist) + free(imm_p->dist); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "Leaving cleanup: error_code:%d.....\n", + js_p->error_code); + + return SM_ACTION_COMPLETE; +} /* end action cleanup */ + + +int mirror_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int i) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing mirror_comp_fn.....\n"); + + PINT_smcb *smcb = v_p; + struct PINT_server_op *mirror_op = PINT_sm_frame(smcb, + PINT_MSGPAIR_PARENT_SM); + struct PVFS_servresp_mirror *respmir = &(mirror_op->resp.u.mirror); + int k; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tmirror_op:%p\n",mirror_op); + + /* only posted one msgpair per source handle*/ + assert(i==0); + + /* If the response status is non-zero, then the rest of the response is + * NOT encoded in final-response.sm. So, there are no values to access. + * NOTE: An error code will be returned in the status field IFF NONE of + * the writes were successful. Otherwise, the status of each write will + * be contained in the write_status_code field. */ + if (resp_p->status != 0) + return(resp_p->status); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tresp->src_handle:%llu " + "\tresp->src_server_nr:%d " + "\tresp->status:%d\n", + llu(resp_p->u.mirror.src_handle), + resp_p->u.mirror.src_server_nr, + resp_p->status ); + for (k=0; ku.mirror.dst_count; k++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tresp->bytes_written[%d]:%d" + "\tresp->write_status_code[%d]:%d\n", + k, + resp_p->u.mirror.bytes_written[k], + k, + resp_p->u.mirror. + write_status_code[k]); + } + + assert(mirror_op->op == PVFS_SERV_MIRROR); + + memset(&(mirror_op->resp),0,sizeof(mirror_op->resp)); + + /* capture information from the mirror operation. */ + respmir->src_handle = resp_p->u.mirror.src_handle; + respmir->src_server_nr = resp_p->u.mirror.src_server_nr; + respmir->dst_count = resp_p->u.mirror.dst_count; + + respmir->bytes_written = malloc(sizeof(uint32_t) * respmir->dst_count); + if (!respmir->bytes_written) + { + gossip_lerr("Unable to allocate respmir->bytes_written\n"); + return (-PVFS_ENOMEM); + } + memset(respmir->bytes_written,0,sizeof(uint32_t) * respmir->dst_count); + + respmir->write_status_code = malloc(sizeof(uint32_t) * respmir->dst_count); + if (!respmir->write_status_code) + { + gossip_lerr("Unable to allocate respmir->write_status_code.\n"); + return (-PVFS_ENOMEM); + } + memset(respmir->write_status_code,0,sizeof(uint32_t) * respmir->dst_count); + + memcpy(respmir->bytes_written,resp_p->u.mirror.bytes_written, + sizeof(uint32_t) * respmir->dst_count); + memcpy(respmir->write_status_code,resp_p->u.mirror.write_status_code, + sizeof(uint32_t) * respmir->dst_count); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tsmcb->base_frame:%d\tframe_count:%d\n", + smcb->base_frame,smcb->frame_count); + + return(0); +} /* end msgpair completion function mirror_comp_fn */ + + +static PVFS_handle *reorganize_copy_handles( + struct PINT_server_create_copies_op *imm_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing reorganize_copy_handles..\n"); + + uint64_t i, j, k, rows, in_cols, out_cols; + PVFS_handle *copies_out = NULL; + PVFS_handle *copies_in = imm_p->handle_array_copies; + + rows = imm_p->copies; + in_cols = imm_p->io_servers_required; + out_cols = imm_p->dfile_count; + + /* allocate copies_out array */ + copies_out = malloc(sizeof(PVFS_handle) * rows * out_cols); + if (!copies_out) + { + gossip_lerr("Unable to allocate memeory.\n"); + return (NULL); + } + memset(copies_out, 0, sizeof(PVFS_handle) * rows * out_cols); + + for (i=0; i<(in_cols*rows); i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\thandle_array_copies(%d):%llu\n", + (int)i, + llu(copies_in[i])); + } + + /* this code copies copies_in[n+1] to copies_out[n] within the same row + * each row represents one copy of the logical file, i.e., each of its + * datahandles. */ + for (i=0,k=1; iio_servers_required; i++) + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t [%d]:%s" + "\tlength:%d\n", + i, + imm_p->io_servers[i], + (int)strlen(imm_p->io_servers[i])); + + /* Get access to the io server names residing in the cache */ + ret = PINT_cached_config_io_server_names(&list, &size, imm_p->fs_id); + if (ret) + { + if (list) + free(list); + gossip_lerr("Unable to retrieve IO server names from the cache.\n"); + return(ret); + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tReturned from PINT_cached_config...\n"); + for (i=0; idfile_count; j++) + { + if (strncmp(list[i],imm_p->io_servers[j],strlen(list[i])) == 0) + { + list[i] = NULL; + break; + } + } /* end for */ + } /* end for */ + + /* Add server names to io_servers list */ + for (i=0,j=imm_p->dfile_count; iio_servers_required; i++) + { + if (list[i]) + { + strncpy(imm_p->io_servers[j],list[i],SERVER_NAME_MAX-1); + j++; + } + } /*end for*/ + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tio_servers(after):\n"); + for (i=0; iio_servers_required; i++) + gossip_debug(GOSSIP_MIRROR_DEBUG,"\t [%d]:%s\n", + i, imm_p->io_servers[i]); + + /* deallocate memory used for "list" */ + free(list); + + return (0); +} /* end function get_server_names */ + +/* Right now, this state machine is not called as a standalone request. It is + * only called as a nested machine from seteattr; however, when time comes to + * create a standalone server request, the values used for the request + * parameters are listed below. */ +static inline int PINT_get_object_ref_copies( struct PVFS_server_req *req, + PVFS_fs_id *fs_id, + PVFS_handle *handle ) +{ + *fs_id = req->u.seteattr.fs_id; + *handle = req->u.seteattr.handle; + + return 0; +}; + +/* request parameters */ +struct PINT_server_req_params pvfs2_create_immutable_copies_params = +{ + .string_name = "create_immutable_copies", + .perm = PINT_SERVER_CHECK_NONE, + .access_type = PINT_server_req_modify, + .sched_policy = PINT_SERVER_REQ_SCHEDULE, + .get_object_ref = PINT_get_object_ref_copies, + .state_machine = &pvfs2_create_immutable_copies_sm +}; diff --git a/src/server/create.sm b/src/server/create.sm index 37fd338..3689867 100644 --- a/src/server/create.sm +++ b/src/server/create.sm @@ -1,5 +1,5 @@ /* - * (C) 2001 Clemson University and The University of Chicago + * (C) 2001 The University of Chicago * * See COPYING in top-level directory. */ @@ -12,6 +12,16 @@ #include "pvfs2-attr.h" #include "gossip.h" #include "pvfs2-internal.h" +#include "pint-util.h" +#include "pint-cached-config.h" +#include "trove-handle-mgmt.h" + +#define REPLACE_DONE 100 + +enum { + OSD_MSGPAIR = 2001, + OSD_MDFILE_MSGPAIR = 2002 +}; %% @@ -20,19 +30,95 @@ machine pvfs2_create_sm state prelude { jump pvfs2_prelude_sm; - success => create; - default => final_response; + success => create_metafile; + default => setup_final_response; + } + + state create_metafile + { + run create_metafile; + success => check_stuffed; + OSD_MDFILE_MSGPAIR => setup_local_datafile_handles; + default => setup_final_response; + } + + state check_stuffed + { + run check_stuffed; + success => create_local_datafiles; + default => setup_final_response; } - state create + state create_local_datafiles { - run create_create; - default => setup_resp; + run create_local_datafiles; + success => setup_local_datafile_handles; + default => remove_metafile_object; + } + + state setup_local_datafile_handles + { + run setup_local_datafile_handles; + success => request_datafiles; + default => remove_local_datafile_handles; + } + + state request_datafiles + { + run request_datafiles; + success => write_keyvals; + default => remove_local_datafile_handles; + } + + state write_keyvals + { + run write_keyvals; + success => setobj_attribs; + default => replace_remote_datafile_handles; + } + + state setobj_attribs + { + run setattr_setobj_attribs; + success => setup_resp; + default => remove_keyvals; } state setup_resp { - run create_setup_resp; + run setup_resp; + default => setup_final_response; + } + + state remove_local_datafile_handles + { + run remove_local_datafile_handles; + default => remove_metafile_object; + } + + state replace_remote_datafile_handles + { + run replace_remote_datafile_handles; + REPLACE_DONE => remove_local_datafile_handles; + default => replace_remote_datafile_handles; + } + + state remove_metafile_object + { + run remove_metafile_object; + default => setup_final_response; + } + + state remove_keyvals + { + run remove_keyvals; + success => replace_remote_datafile_handles; + default => setup_final_response; + } + + state setup_final_response + { + run setup_final_response; default => final_response; } @@ -44,95 +130,721 @@ machine pvfs2_create_sm state cleanup { - run create_cleanup; + run cleanup; default => terminate; } } %% -/* - * Function: create_create - * - * Params: server_op *s_op, - * job_status_s* js_p - * - * Pre: None - * - * Post: None - * - * Returns: int - * - * Synopsis: Create the new dataspace with the values provided in the response. - * - */ -static int create_create( +static int setup_final_response( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + /* retrieve original error code if present */ + if(s_op->u.create.saved_error_code) + { + js_p->error_code = s_op->u.create.saved_error_code; + } + + /* otherwise propigate the js_p->error code */ + return(SM_ACTION_COMPLETE); +} + +static int create_metafile( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -1; job_id_t i; + PVFS_handle_extent_array meta_handle_ext_array; + server_configuration_s *config = get_server_config_struct(); + + if (config->osd_type == OSD_MDFILE) + { + /* Nothing to do here, go to the setup_local_datafile_handles to get a data handle_array_local */ + js_p->error_code = OSD_MDFILE_MSGPAIR; + return SM_ACTION_COMPLETE; + } + + /* first state to check in, make sure the attr mask contains the dist bit. + * it's required later (not sure if we have to require it) but if we don't + * have it here, return an EINVAL */ + if( ! (s_op->req->u.create.attr.mask & PVFS_ATTR_META_DIST) ) + { + gossip_debug(GOSSIP_SERVER_DEBUG, "%s: invalid create request, " + "attribute mask did not include the distribution\n", + __func__); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + + ret = PINT_cached_config_get_server( + s_op->req->u.create.fs_id, + config->host_id, + PINT_SERVER_TYPE_META, + &meta_handle_ext_array); ret = job_trove_dspace_create( s_op->req->u.create.fs_id, - &s_op->req->u.create.handle_extent_array, - s_op->req->u.create.object_type, + &meta_handle_ext_array, + PVFS_TYPE_METAFILE, NULL, - TROVE_SYNC , + 0, smcb, 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return(ret); } -/* create_setup_resp() - * - * fills in the response structure based on results of previous operation - */ -static int create_setup_resp( +static int check_stuffed( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int server_type; + server_configuration_s *config = get_server_config_struct(); + struct filesystem_configuration_s *fs_conf; + PVFS_BMI_addr_t myaddr; + PVFS_sys_layout *layout; + int ret; + const char* svr_name; + int i; + + s_op->resp.u.create.metafile_handle = js_p->handle; + gossip_debug( + GOSSIP_SERVER_DEBUG, "Metafile handle created: %llu\n", + llu(js_p->handle)); + + assert(config); + + if(config->osd_type == OSD_DATAFILE) + { + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + + layout = &s_op->req->u.create.layout; + + if(layout->algorithm == PVFS_SYS_LAYOUT_LIST) + { + for(i=0; iserver_list.count; i++) + { + gossip_debug(GOSSIP_SERVER_DEBUG, "layout list server %d: %lld\n", + i, lld(layout->server_list.servers[i])); + } + } + + fs_conf = PINT_config_find_fs_id(config, + s_op->req->u.create.fs_id); + if(!fs_conf) + { + js_p->error_code = -PVFS_EINVAL; + return(SM_ACTION_COMPLETE); + } + + ret = BMI_addr_lookup(&myaddr, config->host_id); + if(ret != 0) + { + /* we can't get our own address? */ + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* is this metadata server also IO? */ + svr_name = PINT_cached_config_map_addr(s_op->req->u.create.fs_id, + myaddr, &server_type); + if(!svr_name) + { + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* For now only support stuffing of ROUND_ROBIN layouts. + * As a performance enhancement, don't create a stuffed + * file when the current environment only has one server. + * This prevents unstuffing from being called by the client sys-io machine. + */ + if((server_type & PINT_SERVER_TYPE_IO) && + fs_conf->file_stuffing && + layout->algorithm == PVFS_SYS_LAYOUT_ROUND_ROBIN && + s_op->req->u.create.num_dfiles_req > 1 ) + { + /* we can do a stuffed create here, only one datafile */ + s_op->req->u.create.attr.u.meta.dfile_count = 1; + s_op->resp.u.create.datafile_count = 1; + s_op->resp.u.create.datafile_handles = malloc(sizeof(PVFS_handle)); + s_op->u.create.handle_array_local = malloc(sizeof(PVFS_handle)); + if(!s_op->resp.u.create.datafile_handles || !s_op->u.create.handle_array_local) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + s_op->resp.u.create.stuffed = 1; + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + + /* file will not be stuffed; need to allocate all datafiles */ + s_op->u.create.num_io_servers = s_op->req->u.create.num_dfiles_req; + s_op->resp.u.create.datafile_handles = malloc( + sizeof(*s_op->resp.u.create.datafile_handles) * + s_op->u.create.num_io_servers); + s_op->u.create.handle_array_local = malloc( + sizeof(*s_op->u.create.handle_array_local) * + s_op->u.create.num_io_servers); + s_op->u.create.handle_array_remote = malloc( + sizeof(*s_op->u.create.handle_array_remote) * + s_op->u.create.num_io_servers); + s_op->u.create.remote_io_servers = malloc( + sizeof(char*) * + s_op->u.create.num_io_servers); + if(!s_op->resp.u.create.datafile_handles || + !s_op->u.create.handle_array_local || + !s_op->u.create.handle_array_remote || + !s_op->u.create.remote_io_servers) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + /* gather list of servers to use, may include local server */ + ret = PINT_cached_config_get_server_list( + s_op->req->u.create.fs_id, + s_op->req->u.create.attr.u.meta.dist, + s_op->req->u.create.num_dfiles_req, + s_op->req->credentials.uid, + s_op->req->u.create.num_energy_nodes, + &s_op->req->u.create.layout, + &s_op->u.create.io_servers, + &s_op->u.create.num_io_servers); + if(ret < 0) + { + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* layout may have adjusted number of datafiles */ + s_op->req->u.create.attr.u.meta.dfile_count + = s_op->u.create.num_io_servers; + s_op->resp.u.create.datafile_count + = s_op->u.create.num_io_servers; + for(i=0; iu.create.num_io_servers; i++) + { + gossip_debug(GOSSIP_SERVER_DEBUG, "io_server %d: %s\n", + i, s_op->u.create.io_servers[i]); + } + + s_op->resp.u.create.stuffed = 0; + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + +static int create_local_datafiles( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - if (js_p->error_code == 0) + int ret = -1; + job_id_t tmp_id; + PVFS_handle_extent_array data_handle_ext_array; + server_configuration_s *config = get_server_config_struct(); + int i; + int tmp_index = 0; + + if(s_op->resp.u.create.stuffed) + { + /* only one datafile, and it is local */ + s_op->u.create.handle_array_local_count = 1; + s_op->u.create.handle_array_remote_count = 0; + } + else { - gossip_debug(GOSSIP_SERVER_DEBUG, "Handle created: %llu\n", - llu(js_p->handle)); - s_op->resp.u.create.handle = js_p->handle; - switch(s_op->req->u.create.object_type) + /* figure out how many datafiles need to be local vs. remote */ + s_op->u.create.handle_array_local_count = 0; + s_op->u.create.handle_array_remote_count = 0; + for(i=0; iu.create.num_io_servers; i++) { - case PVFS_TYPE_NONE: - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, - "new handle: %llu, type unknown.\n", llu(js_p->handle)); - break; - case PVFS_TYPE_METAFILE: - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, - "new handle: %llu, type metafile.\n", llu(js_p->handle)); - break; - case PVFS_TYPE_DATAFILE: - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, - "new handle: %llu, type datafile.\n", llu(js_p->handle)); - break; - case PVFS_TYPE_DIRECTORY: - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, - "new handle: %llu, type directory.\n", llu(js_p->handle)); - break; - case PVFS_TYPE_SYMLINK: - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, - "new handle: %llu, type symlink.\n", llu(js_p->handle)); - break; - case PVFS_TYPE_DIRDATA: - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, - "new handle: %llu, type dirdata.\n", llu(js_p->handle)); - break; + if(!strcmp(s_op->u.create.io_servers[i], config->host_id)) + { + s_op->u.create.handle_array_local_count++; + } + else + { + s_op->u.create.handle_array_remote_count++; + s_op->u.create.remote_io_servers[tmp_index] = + s_op->u.create.io_servers[i]; + tmp_index++; + } } } - /* NOTE: we _deliberately_ leave the error_code unchanged so that it - * can be used by the next state. + gossip_debug(GOSSIP_SERVER_DEBUG, "creating %d local data files\n", + s_op->u.create.handle_array_local_count); + gossip_debug(GOSSIP_SERVER_DEBUG, "creating %d remote data files\n", + s_op->u.create.handle_array_remote_count); + + if(s_op->u.create.handle_array_local_count == 0) + { + /* no local work to do */ + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); + } + + /* find local extent array */ + ret = PINT_cached_config_get_server( + s_op->req->u.create.fs_id, + config->host_id, + PINT_SERVER_TYPE_IO, + &data_handle_ext_array); + if(ret < 0) + { + js_p->error_code = ret; + return(SM_ACTION_COMPLETE); + } + + /* deliberately not setting SYNC flag, because both the attrs and + * keyvals will be synced in later states */ + ret = job_trove_dspace_create_list( + s_op->req->u.create.fs_id, + &data_handle_ext_array, + s_op->u.create.handle_array_local, + s_op->u.create.handle_array_local_count, + PVFS_TYPE_DATAFILE, + NULL, + 0, + smcb, + 0, + js_p, + &tmp_id, + server_job_context, + s_op->req->hints); + + return(ret); +} + +static PINT_sm_action request_datafiles( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + + if(s_op->u.create.handle_array_remote_count == 0) + { + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); + } + + ret = job_precreate_pool_get_handles( + s_op->req->u.create.fs_id, + s_op->u.create.handle_array_remote_count, + PVFS_TYPE_DATAFILE, + s_op->u.create.remote_io_servers, + s_op->u.create.handle_array_remote, + 0, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + return ret; +} + +static PINT_sm_action remove_metafile_object( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + + /* save the error code before we begin cleanup */ + if(!s_op->u.create.saved_error_code) + { + s_op->u.create.saved_error_code = js_p->error_code; + } + + ret = job_trove_dspace_remove( + s_op->req->u.create.fs_id, + s_op->resp.u.create.metafile_handle, + 0, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + return ret; +} + +static PINT_sm_action remove_local_datafile_handles( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + + /* save the error code before we begin cleanup */ + if(!s_op->u.create.saved_error_code) + { + s_op->u.create.saved_error_code = js_p->error_code; + } + + if(s_op->u.create.handle_array_local_count == 0) + { + /* nothing to do */ + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); + } + + ret = job_trove_dspace_remove_list(s_op->req->u.create.fs_id, + s_op->u.create.handle_array_local, + NULL, + s_op->u.create.handle_array_local_count, + 0, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + + return ret; +} + +static PINT_sm_action replace_remote_datafile_handles( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + PVFS_handle pool_handle; + + /* save the error code before we begin cleanup */ + if(!s_op->u.create.saved_error_code) + { + s_op->u.create.saved_error_code = js_p->error_code; + } + + if(s_op->u.create.handle_index < s_op->u.create.handle_array_remote_count) + { + /* find pool that this handle belongs to */ + ret = job_precreate_pool_lookup_server( + s_op->u.create.remote_io_servers[s_op->u.create.handle_index], + PVFS_TYPE_DATAFILE, + s_op->req->u.create.fs_id, + &pool_handle); + if(ret < 0) + { + s_op->u.create.handle_index++; + js_p->error_code = ret; + return(SM_ACTION_COMPLETE); + } + + /* return handle to pool */ + ret = job_precreate_pool_fill( + pool_handle, + s_op->req->u.create.fs_id, + &s_op->u.create.handle_array_remote[s_op->u.create.handle_index], + 1, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + + s_op->u.create.handle_index++; + return(ret); + } + else + { + /* all handles have been replaced */ + js_p->error_code = REPLACE_DONE; + return(SM_ACTION_COMPLETE); + } +} + +static PINT_sm_action remove_keyvals( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + + /* save the error code before we begin cleanup */ + if(!s_op->u.create.saved_error_code) + { + s_op->u.create.saved_error_code = js_p->error_code; + } + + /* the keyval keys and vals should still be valid here */ + ret = job_trove_keyval_remove_list( + s_op->req->u.create.fs_id, + s_op->resp.u.create.metafile_handle, + s_op->key_a, s_op->val_a, s_op->error_a, + 2, TROVE_SYNC, NULL, smcb, 0, js_p, &j_id, server_job_context, + s_op->req->hints); + + return ret; +} + +static PINT_sm_action setup_local_datafile_handles( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_err("setup_local_datafile_handles\n"); + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i; + int tmp_index = 0; + PVFS_handle_extent_array data_handle_ext_array; + PINT_llist *cur = NULL; + struct host_alias_s *cur_alias; + server_configuration_s *config = get_server_config_struct(); + + if(config->osd_type == OSD_DATAFILE || config->osd_type == OSD_MDFILE) + { + cur = config->host_aliases; + while(cur) + { + cur_alias = PINT_llist_head(cur); + if (!cur_alias) + { + break; + } + if(!strncmp(cur_alias->bmi_address, "osd", 3)) { + PINT_cached_config_get_server( + s_op->req->u.create.fs_id, + cur_alias->bmi_address, + PINT_SERVER_TYPE_IO, + &data_handle_ext_array); + } + cur = PINT_llist_next(cur); + } + + s_op->u.create.num_io_servers = s_op->req->u.create.num_dfiles_req; + s_op->req->u.create.attr.u.meta.dfile_count = s_op->u.create.num_io_servers; + s_op->resp.u.create.datafile_count = s_op->u.create.num_io_servers; + + s_op->resp.u.create.datafile_handles = malloc(sizeof(PVFS_handle)); + s_op->resp.u.create.datafile_handles[0] = trove_handle_alloc_from_range(s_op->req->u.create.fs_id, &data_handle_ext_array); + + if (config->osd_type == OSD_MDFILE) + js_p->error_code = OSD_MDFILE_MSGPAIR; + else + js_p->error_code = 0; + } else { + if(s_op->resp.u.create.stuffed) + { + s_op->resp.u.create.datafile_handles[0] = + s_op->u.create.handle_array_local[0]; + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); + } + else + { + for(i=0; iu.create.num_io_servers; i++) + { + /* find local server positions and set handles */ + if(!strcmp(s_op->u.create.io_servers[i], config->host_id)) + { + s_op->resp.u.create.datafile_handles[i] = + s_op->u.create.handle_array_local[tmp_index]; + tmp_index++; + } + } + } + } + + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action write_keyvals( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + int i; + int tmp_index = 0; + server_configuration_s *config = get_server_config_struct(); + char* tmpbuf; + + if(s_op->u.create.handle_array_remote_count) + { + for(i=0; iu.create.num_io_servers; i++) + { + /* find remote server positions and set handles */ + if(strcmp(s_op->u.create.io_servers[i], config->host_id)) + { + s_op->resp.u.create.datafile_handles[i] = + s_op->u.create.handle_array_remote[tmp_index]; + tmp_index++; + } + } + } + + /* start with 2 keyvals: the distribution and the datafile handles */ + int keyval_count = 2; + + if(s_op->resp.u.create.stuffed) + { + /* also need to set the layout as a keyval */ + keyval_count+= 2; + } + + s_op->key_a = malloc(sizeof(PVFS_ds_keyval) * keyval_count); + if(!s_op->key_a) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + s_op->val_a = malloc(sizeof(PVFS_ds_keyval) * keyval_count); + if(!s_op->val_a) + { + free(s_op->key_a); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(s_op->val_a, 0, sizeof(PVFS_ds_keyval) * keyval_count); + + s_op->key_a[0].buffer = Trove_Common_Keys[METAFILE_HANDLES_KEY].key; + s_op->key_a[0].buffer_sz = Trove_Common_Keys[METAFILE_HANDLES_KEY].size; + + s_op->val_a[0].buffer = s_op->resp.u.create.datafile_handles; + s_op->val_a[0].buffer_sz = + s_op->resp.u.create.datafile_count * sizeof(PVFS_handle); + + s_op->key_a[1].buffer = Trove_Common_Keys[METAFILE_DIST_KEY].key; + s_op->key_a[1].buffer_sz = Trove_Common_Keys[METAFILE_DIST_KEY].size; + + s_op->val_a[1].buffer_sz = + s_op->req->u.create.attr.u.meta.dist_size; + s_op->val_a[1].buffer = malloc(s_op->val_a[1].buffer_sz); + if(!s_op->val_a[1].buffer) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + PINT_dist_encode(s_op->val_a[1].buffer, + s_op->req->u.create.attr.u.meta.dist); + + if(s_op->resp.u.create.stuffed) + { + s_op->key_a[2].buffer = Trove_Common_Keys[METAFILE_LAYOUT_KEY].key; + s_op->key_a[2].buffer_sz = Trove_Common_Keys[METAFILE_LAYOUT_KEY].size; + + s_op->val_a[2].buffer = malloc(PVFS_REQ_LIMIT_LAYOUT); + if(!s_op->val_a[2].buffer) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + tmpbuf = s_op->val_a[2].buffer; + encode_PVFS_sys_layout(&tmpbuf, &s_op->req->u.create.layout); + + s_op->val_a[2].buffer_sz = (tmpbuf - (char*)s_op->val_a[2].buffer); + + gossip_debug(GOSSIP_SERVER_DEBUG, + "create storing layout of size: %d\n", + s_op->val_a[2].buffer_sz); + + s_op->key_a[3].buffer = Trove_Common_Keys[NUM_DFILES_REQ_KEY].key; + s_op->key_a[3].buffer_sz = Trove_Common_Keys[NUM_DFILES_REQ_KEY].size; + + gossip_debug( + GOSSIP_SERVER_DEBUG, "create storing NUM_DFILES_REQ_KEY value of %d\n", + s_op->req->u.create.num_dfiles_req); + s_op->val_a[3].buffer = &s_op->req->u.create.num_dfiles_req; + s_op->val_a[3].buffer_sz = sizeof(s_op->req->u.create.num_dfiles_req); + } + + ret = job_trove_keyval_write_list( + s_op->req->u.create.fs_id, + s_op->resp.u.create.metafile_handle, + s_op->key_a, s_op->val_a, + keyval_count, TROVE_SYNC, NULL, smcb, + 0, js_p, &j_id, server_job_context, + s_op->req->hints); + return ret; +} + +static PINT_sm_action setattr_setobj_attribs( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int ret = -1; + job_id_t j_id; + PVFS_object_attr *a_p = NULL; + PVFS_object_attr *dspace_a_p = NULL; + PVFS_ds_attributes *ds_attr = NULL; + + dspace_a_p = &s_op->attr; + a_p = &s_op->req->u.create.attr; + + /* + * Remember that mtime is versioned on disk! so convert it here.. + * It is better to do it here than change the PVFS_object_attr_overwrite_setable + * macro, since there are many more users of it, I think. + */ + if (a_p->mask & PVFS_ATTR_COMMON_MTIME_SET) + { + PVFS_time orig_mtime = a_p->mtime; + a_p->mtime = PINT_util_mktime_version(orig_mtime); + gossip_debug(GOSSIP_SETATTR_DEBUG, "setting version " + "to %llu\n\tmtime is %llu\n", + llu(a_p->mtime), llu(orig_mtime)); + } + + /* + we have the attribs stored in the dspace, as well as the + requested attribs to store. overwrite the ones that are setable + and specified by the mask value in the request; macro defined in + pvfs2-storage.h + */ + PVFS_object_attr_overwrite_setable(dspace_a_p, a_p); + + gossip_debug( + GOSSIP_SERVER_DEBUG, + "[STUFFED CREATE]: WRITING attrs: [owner = %d, group = %d\n\t" + "perms = %o, type = %d, atime = %llu, mtime = %llu\n\t" + "ctime = %llu | dfile_count = %d | dist_size = %d]\n", + dspace_a_p->owner, dspace_a_p->group, dspace_a_p->perms, + dspace_a_p->objtype, llu(dspace_a_p->atime), + llu(PINT_util_mkversion_time(dspace_a_p->mtime)), llu(dspace_a_p->ctime), + (int)dspace_a_p->u.meta.dfile_count, + (int)dspace_a_p->u.meta.dist_size); + + /* translate attrs to storage attr format */ + ds_attr = &(s_op->ds_attr); + PVFS_object_attr_to_ds_attr(dspace_a_p, ds_attr); + + ret = job_trove_dspace_setattr( + s_op->req->u.create.fs_id, s_op->resp.u.create.metafile_handle, + ds_attr, + TROVE_SYNC, + smcb, 0, js_p, &j_id, server_job_context, + s_op->req->hints); + + return ret; +} + +static int setup_resp( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + if (js_p->error_code == 0) + { + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, + "create: new metadata handle: %llu.\n", + llu(s_op->resp.u.create.metafile_handle)); + } + return SM_ACTION_COMPLETE; } @@ -152,9 +864,54 @@ static int create_setup_resp( * Synopsis: free memory and return * */ -static int create_cleanup( +static int cleanup( struct PINT_smcb *smcb, job_status_s *js_p) { + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + if(s_op->key_a) + { + free(s_op->key_a); + } + + if(s_op->val_a) + { + if(s_op->val_a[1].buffer) + { + free(s_op->val_a[1].buffer); + } + if(s_op->resp.u.create.stuffed && s_op->val_a[2].buffer) + { + free(s_op->val_a[2].buffer); + } + free(s_op->val_a); + } + + if(s_op->resp.u.create.datafile_handles) + { + free(s_op->resp.u.create.datafile_handles); + } + + if(s_op->u.create.handle_array_remote) + { + free(s_op->u.create.handle_array_remote); + } + + if(s_op->u.create.handle_array_local) + { + free(s_op->u.create.handle_array_local); + } + + if(s_op->u.create.io_servers) + { + free(s_op->u.create.io_servers); + } + + if(s_op->u.create.remote_io_servers) + { + free(s_op->u.create.remote_io_servers); + } + return(server_state_machine_complete(smcb)); } diff --git a/src/server/del-eattr.sm b/src/server/del-eattr.sm index 4ca65b3..3a1783e 100644 --- a/src/server/del-eattr.sm +++ b/src/server/del-eattr.sm @@ -77,6 +77,13 @@ static PINT_sm_action deleattr_verify_eattribs( PINT_util_get_object_type(a_p->objtype), a_p->owner, a_p->group, a_p->perms, a_p->objtype); + + if( s_op->req->u.deleattr.key.buffer_sz > PVFS_MAX_XATTR_NAMELEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + switch (a_p->objtype) { case PVFS_TYPE_METAFILE : @@ -166,7 +173,7 @@ static PINT_sm_action deleattr_delobj_eattribs( 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/event-mon.sm b/src/server/event-mon.sm index d8deee0..f950625 100644 --- a/src/server/event-mon.sm +++ b/src/server/event-mon.sm @@ -81,10 +81,6 @@ static PINT_sm_action event_mon_do_work( s_op->resp.u.mgmt_event_mon.event_count = s_op->req->u.mgmt_event_mon.event_count; - /* get events */ - PINT_event_retrieve(s_op->resp.u.mgmt_event_mon.event_array, - s_op->req->u.mgmt_event_mon.event_count); - js_p->error_code = 0; return SM_ACTION_COMPLETE; } diff --git a/src/server/final-response.sm b/src/server/final-response.sm index 7f7a1af..ea48bf2 100644 --- a/src/server/final-response.sm +++ b/src/server/final-response.sm @@ -128,7 +128,7 @@ static PINT_sm_action final_response_send_resp( PINT_gossip_err_server_resp(&s_op->resp); js_p->error_code = ret; - return SM_ACTION_COMPLETE; + return 1; } /* send the response */ @@ -136,7 +136,8 @@ static PINT_sm_action final_response_send_resp( s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list, s_op->encoded.list_count, s_op->encoded.total_size, s_op->tag, s_op->encoded.buffer_type, 0, smcb, 0, js_p, &tmp_id, - server_job_context, user_opts->server_job_bmi_timeout); + server_job_context, user_opts->server_job_bmi_timeout, + s_op->req->hints); return ret; } @@ -225,6 +226,9 @@ static void PINT_gossip_err_server_resp( case PVFS_TYPE_DIRDATA: gossip_err("DIRDATA [ n/a ]\n"); break; + case PVFS_TYPE_INTERNAL: + gossip_err("INTERNAL [ n/a ]\n"); + break; case PVFS_TYPE_NONE: gossip_err("NONE [ n/a ]\n"); break; diff --git a/src/server/flush.sm b/src/server/flush.sm index ceb4ab1..f7163a1 100644 --- a/src/server/flush.sm +++ b/src/server/flush.sm @@ -133,7 +133,7 @@ static PINT_sm_action flush_keyval_flush( 0, js_p, &tmp_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -184,7 +184,7 @@ static PINT_sm_action flush_bstream_flush( 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/get-attr.sm b/src/server/get-attr.sm index 7cd7f93..df150e0 100644 --- a/src/server/get-attr.sm +++ b/src/server/get-attr.sm @@ -28,24 +28,33 @@ #include "pvfs2-util.h" #include "pint-util.h" #include "pvfs2-internal.h" +#include "pint-cached-config.h" + +static uint64_t UINT64_HIGH = 0xffffffffffffffffLL; PINT_server_trove_keys_s Trove_Special_Keys[] = { - {"user.pvfs2.dist_name" , SPECIAL_DIST_NAME_KEYLEN}, - {"user.pvfs2.dist_params", SPECIAL_DIST_PARAMS_KEYLEN}, - {"user.pvfs2.num_dfiles" , SPECIAL_NUM_DFILES_KEYLEN}, - {"user.pvfs2.meta_hint" , SPECIAL_METAFILE_HINT_KEYLEN}, + {"user.pvfs2.dist_name" , SPECIAL_DIST_NAME_KEYLEN}, + {"user.pvfs2.dist_params" , SPECIAL_DIST_PARAMS_KEYLEN}, + {"user.pvfs2.num_dfiles" , SPECIAL_NUM_DFILES_KEYLEN}, + {"user.pvfs2.meta_hint" , SPECIAL_METAFILE_HINT_KEYLEN}, + {"user.pvfs2.mirror.copies", SPECIAL_MIRROR_COPIES_KEYLEN}, + {"user.pvfs2.mirror.handles", SPECIAL_MIRROR_HANDLES_KEYLEN}, + {"user.pvfs2.mirror.status" , SPECIAL_MIRROR_STATUS_KEYLEN}, }; enum { - STATE_METAFILE = 7, - STATE_SYMLINK = 9, - STATE_DIR = 10, - STATE_DIR_HINT = 11, - STATE_DONE = 12 + STATE_METAFILE = 7, + STATE_SYMLINK = 9, + STATE_DIR = 10, + STATE_DIR_HINT = 11, + STATE_DONE = 12, + SKIP_NEXT_STATE = 13, }; +static void free_nested_getattr_data(struct PINT_server_op *s_op); + %% nested machine pvfs2_get_attr_work_sm @@ -81,6 +90,34 @@ nested machine pvfs2_get_attr_work_sm state read_metafile_datafile_handles_if_required { run getattr_read_metafile_datafile_handles_if_required; + success => datafile_handles_safety_check; + default => setup_resp; + } + + state datafile_handles_safety_check + { + run getattr_datafile_handles_safety_check; + success => read_mirrored_copies_count_if_required; + default => setup_resp; + } + + state read_mirrored_copies_count_if_required + { + run getattr_read_mirrored_copies_count_if_required; + SKIP_NEXT_STATE => read_metafile_distribution_if_required; + default => read_mirrored_handles_if_required; + } + + state read_mirrored_handles_if_required + { + run getattr_read_mirrored_handles_if_required; + SKIP_NEXT_STATE => read_metafile_distribution_if_required; + default => mirrored_handles_safety_check; + } + + state mirrored_handles_safety_check + { + run getattr_mirrored_handles_safety_check; success => read_metafile_distribution_if_required; default => setup_resp; } @@ -88,6 +125,32 @@ nested machine pvfs2_get_attr_work_sm state read_metafile_distribution_if_required { run getattr_read_metafile_distribution_if_required; + default => interpret_metafile_distribution; + } + + state interpret_metafile_distribution + { + run interpret_metafile_distribution; + success => detect_stuffed; + default => setup_resp; + } + + state detect_stuffed + { + run getattr_detect_stuffed; + default => read_stuffed_size; + } + + state read_stuffed_size + { + run getattr_read_stuffed_size; + success => interpret_stuffed_size; + default => setup_resp; + } + + state interpret_stuffed_size + { + run getattr_interpret_stuffed_size; default => setup_resp; } @@ -131,13 +194,19 @@ nested machine pvfs2_get_attr_work_sm } } -machine pvfs2_get_attr_sm +nested machine pvfs2_get_attr_with_prelude_sm { + state init + { + run getattr_with_prelude_init; + default => prelude; + } + state prelude { jump pvfs2_prelude_sm; success => setup_op; - default => final_response; + default => return; } state setup_op @@ -149,6 +218,15 @@ machine pvfs2_get_attr_sm state do_work { jump pvfs2_get_attr_work_sm; + default => return; + } +} + +machine pvfs2_get_attr_sm +{ + state work + { + jump pvfs2_get_attr_with_prelude_sm; default => final_response; } @@ -194,6 +272,7 @@ static PINT_sm_action getattr_verify_attribs( resp_attr->group = s_op->attr.group; resp_attr->perms = s_op->attr.perms; resp_attr->atime = s_op->attr.atime; + resp_attr->cid = s_op->attr.cid; resp_attr->mtime = PINT_util_mkversion_time(s_op->attr.mtime); if (resp_attr->mtime == 0) @@ -246,128 +325,146 @@ static PINT_sm_action getattr_verify_attribs( have the original client request attr mask (s_op->u.getattr.attrmask). */ - if (resp_attr->objtype == PVFS_TYPE_METAFILE) + switch(resp_attr->objtype) { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: metafile\n"); - gossip_debug(GOSSIP_GETATTR_DEBUG, - " Req handle %llu refers to a metafile\n", - llu(s_op->u.getattr.handle)); - - if (s_op->u.getattr.attrmask & PVFS_ATTR_META_DFILES) - { + case PVFS_TYPE_METAFILE: + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: metafile\n"); gossip_debug(GOSSIP_GETATTR_DEBUG, - " dspace has dfile_count of %d\n", - resp_attr->u.meta.dfile_count); - resp_attr->mask |= PVFS_ATTR_META_DFILES; - } - else - { - gossip_debug(GOSSIP_GETATTR_DEBUG, " client doesn't want " - "dfile info, clearing response attr mask\n"); - resp_attr->mask &= ~PVFS_ATTR_META_DFILES; - } + " Req handle %llu refers to a metafile\n", + llu(s_op->u.getattr.handle)); - if (s_op->u.getattr.attrmask & PVFS_ATTR_META_DIST) - { - gossip_debug(GOSSIP_GETATTR_DEBUG, - " dspace has dist size of %d\n", - resp_attr->u.meta.dist_size); + if (s_op->u.getattr.attrmask & PVFS_ATTR_META_DFILES) + { + gossip_debug(GOSSIP_GETATTR_DEBUG, + " dspace has dfile_count of %d\n", + resp_attr->u.meta.dfile_count); + resp_attr->mask |= PVFS_ATTR_META_DFILES; + } + else + { + gossip_debug(GOSSIP_GETATTR_DEBUG, " client doesn't want " + "dfile info, clearing response attr mask\n"); + resp_attr->mask &= ~PVFS_ATTR_META_DFILES; + } - resp_attr->mask |= PVFS_ATTR_META_DIST; - } - else - { - gossip_debug(GOSSIP_GETATTR_DEBUG, " client doesn't want " - "dist info, clearing response attr mask\n"); + if (s_op->u.getattr.attrmask & PVFS_ATTR_META_DIST) + { + gossip_debug(GOSSIP_GETATTR_DEBUG, + " dspace has dist size of %d\n", + resp_attr->u.meta.dist_size); - resp_attr->mask &= ~PVFS_ATTR_META_DIST; - } - js_p->error_code = STATE_METAFILE; - } - else if (resp_attr->objtype == PVFS_TYPE_DATAFILE) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: datafile\n"); - /* - note: the prelude already retrieved the size for us, so - there's no special action that needs to be taken if we have - a datafile here (other than adjusting our mask to include - the data information and copying the retrieved size from the - ds_attribute the prelude used) - */ - resp_attr->u.data.size = s_op->ds_attr.b_size; - resp_attr->mask |= PVFS_ATTR_DATA_ALL; + resp_attr->mask |= PVFS_ATTR_META_DIST; + } + else + { + gossip_debug(GOSSIP_GETATTR_DEBUG, " client doesn't want " + "dist info, clearing response attr mask\n"); - gossip_debug(GOSSIP_GETATTR_DEBUG, " handle %llu refers to " - "a datafile (size = %lld).\n", - llu(s_op->u.getattr.handle), - lld(resp_attr->u.data.size)); - } - else if (resp_attr->objtype == PVFS_TYPE_DIRECTORY) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: directory\n"); - if (s_op->u.getattr.attrmask & PVFS_ATTR_DIR_DIRENT_COUNT) - { - gossip_debug(GOSSIP_GETATTR_DEBUG, - " getattr: dirent_count needed.\n"); - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - resp_attr->mask |= PVFS_ATTR_DIR_DIRENT_COUNT; - js_p->error_code = STATE_DIR; - } - else - { - gossip_debug(GOSSIP_GETATTR_DEBUG, - " getattr: dirent_count not needed.\n"); - js_p->error_code = 0; - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - } - if (s_op->u.getattr.attrmask & PVFS_ATTR_DIR_HINT) - { - gossip_debug(GOSSIP_GETATTR_DEBUG, - " getattr: dfile_count needed.\n"); - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - resp_attr->mask |= PVFS_ATTR_DIR_HINT; - js_p->error_code = STATE_DIR; - } - else - { - gossip_debug(GOSSIP_GETATTR_DEBUG, - " getattr: dfile_count not needed\n"); + resp_attr->mask &= ~PVFS_ATTR_META_DIST; + } + + if (s_op->u.getattr.attrmask & PVFS_ATTR_META_MIRROR_DFILES) + { + gossip_debug(GOSSIP_GETATTR_DEBUG,"client wants mirrored " + "handles.\n"); + resp_attr->mask |= PVFS_ATTR_META_MIRROR_DFILES; + resp_attr->u.meta.mirror_copies_count = 0; + resp_attr->u.meta.mirror_dfile_array = NULL; + } + else + { + gossip_debug(GOSSIP_GETATTR_DEBUG,"client doesn't want " + "mirrored handles.\n"); + resp_attr->mask &= ~(PVFS_ATTR_META_MIRROR_DFILES); + } + js_p->error_code = STATE_METAFILE; + break; + case PVFS_TYPE_DATAFILE: + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: datafile\n"); + /* + note: the prelude already retrieved the size for us, so + there's no special action that needs to be taken if we have + a datafile here (other than adjusting our mask to include + the data information and copying the retrieved size from the + ds_attribute the prelude used) + */ + resp_attr->u.data.size = s_op->ds_attr.u.datafile.b_size; + resp_attr->mask |= PVFS_ATTR_DATA_ALL; + + gossip_debug(GOSSIP_GETATTR_DEBUG, " handle %llu refers to " + "a datafile (size = %lld).\n", + llu(s_op->u.getattr.handle), + lld(resp_attr->u.data.size)); + break; + case PVFS_TYPE_DIRECTORY: + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: directory\n"); + if (s_op->u.getattr.attrmask & PVFS_ATTR_DIR_DIRENT_COUNT) + { + gossip_debug(GOSSIP_GETATTR_DEBUG, + " getattr: dirent_count needed.\n"); + assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); + resp_attr->mask |= PVFS_ATTR_DIR_DIRENT_COUNT; + js_p->error_code = STATE_DIR; + } + else + { + gossip_debug(GOSSIP_GETATTR_DEBUG, + " getattr: dirent_count not needed.\n"); + js_p->error_code = 0; + assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); + } + if (s_op->u.getattr.attrmask & PVFS_ATTR_DIR_HINT) + { + gossip_debug(GOSSIP_GETATTR_DEBUG, + " getattr: dfile_count needed.\n"); + assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); + resp_attr->mask |= PVFS_ATTR_DIR_HINT; + js_p->error_code = STATE_DIR; + } + else + { + gossip_debug(GOSSIP_GETATTR_DEBUG, + " getattr: dfile_count not needed\n"); + assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); + } + break; + case PVFS_TYPE_DIRDATA: + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: dirdata\n"); + gossip_debug( + GOSSIP_GETATTR_DEBUG, " handle %llu refers to " + "a dirdata object. doing nothing special\n", + llu(s_op->u.getattr.handle)); assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - } - } - else if (resp_attr->objtype == PVFS_TYPE_DIRDATA) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: dirdata\n"); - gossip_debug( - GOSSIP_GETATTR_DEBUG, " handle %llu refers to " - "a dirdata object. doing nothing special\n", - llu(s_op->u.getattr.handle)); - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - } - else if (resp_attr->objtype == PVFS_TYPE_SYMLINK) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: symlink\n"); - gossip_debug( - GOSSIP_GETATTR_DEBUG, " handle %llu refers to a symlink.\n", - llu(s_op->u.getattr.handle)); + break; + case PVFS_TYPE_SYMLINK: + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: symlink\n"); + gossip_debug( + GOSSIP_GETATTR_DEBUG, " handle %llu refers to a symlink.\n", + llu(s_op->u.getattr.handle)); - /* - we'll definitely have to fetch the symlink target in this - case, as the prelude will never retrieve it for us - */ - js_p->error_code = STATE_SYMLINK; - } - else - { - /* if we don't understand the object type, then it probably indicates - * a bug or some data corruption. All trove objects should have a - * type set. - */ - gossip_err( - "Error: got unknown type when verifying attributes for handle %llu.\n", - llu(s_op->u.getattr.handle)); - js_p->error_code = -PVFS_ENXIO; + /* + we'll definitely have to fetch the symlink target in this + case, as the prelude will never retrieve it for us + */ + js_p->error_code = STATE_SYMLINK; + break; + case PVFS_TYPE_INTERNAL: + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: symlink\n"); + /* nothing interesting to add; this is meaningless to a client */ + break; + default: + /* if we don't understand the object type, then it probably indicates + * a bug or some data corruption. All trove objects should have a + * type set. + */ + gossip_err( + "Error: got unknown type when verifying attributes for " + "handle %llu.\n", + llu(s_op->u.getattr.handle)); + js_p->error_code = -PVFS_ENXIO; + break; } + return SM_ACTION_COMPLETE; } @@ -404,15 +501,22 @@ static PINT_sm_action getattr_read_symlink_target( return SM_ACTION_COMPLETE; } + if(s_op->free_val) + { + free(s_op->val.buffer); + } s_op->val.buffer = s_op->resp.u.getattr.attr.u.sym.target_path; s_op->val.buffer_sz = s_op->resp.u.getattr.attr.u.sym.target_path_len; + /* this will get cleaned up with attr structure */ + s_op->free_val = 0; ret = job_trove_keyval_read( s_op->u.getattr.fs_id, s_op->u.getattr.handle, - &(s_op->key), &(s_op->val), - 0, + &s_op->key, &s_op->val, + 0, NULL, smcb, 0, js_p, - &i, server_job_context); + &i, server_job_context, s_op->req->hints); + return ret; } @@ -422,22 +526,27 @@ static PINT_sm_action getattr_interpret_metafile_hint( { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); PVFS_object_attr *resp_attr = NULL; - + PVFS_metafile_attr *meta = &(s_op->resp.u.getattr.attr.u.meta); resp_attr = &s_op->resp.u.getattr.attr; + assert(resp_attr->objtype == PVFS_TYPE_METAFILE); + if (js_p->error_code == 0 || js_p->error_code == -TROVE_ENOENT) { if (js_p->error_code == 0) { - memcpy(&s_op->resp.u.getattr.attr.u.meta.hint, s_op->val.buffer, - sizeof(s_op->resp.u.getattr.attr.u.meta.hint)); + memcpy(&(meta->hint), s_op->val.buffer, sizeof(meta->hint)); } if ((resp_attr->mask & PVFS_ATTR_META_DFILES) || - (resp_attr->mask & PVFS_ATTR_META_DIST)) + (resp_attr->mask & PVFS_ATTR_META_DIST) || + (resp_attr->mask & PVFS_ATTR_META_MIRROR_DFILES)) { gossip_debug(GOSSIP_GETATTR_DEBUG, " * client wants extra " "meta info, about to retrieve it now\n"); js_p->error_code = STATE_METAFILE; + if ( (resp_attr->mask & PVFS_ATTR_META_MIRROR_DFILES) && + !(meta->hint.flags & PVFS_MIRROR_FL) ) + resp_attr->mask &= ~(PVFS_ATTR_META_MIRROR_DFILES); } else { @@ -445,18 +554,13 @@ static PINT_sm_action getattr_interpret_metafile_hint( "extra meta info, preparing response now\n"); js_p->error_code = 0; } - } - else { + } else { /*If we hit an error the DIST & DFILES are no longer valid*/ - s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DIST; - s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DFILES; - } - if (s_op->val.buffer) - { - free(s_op->val.buffer); - s_op->val.buffer = NULL; + resp_attr->mask &= ~PVFS_ATTR_META_DIST; + resp_attr->mask &= ~PVFS_ATTR_META_DFILES; + resp_attr->mask &= ~PVFS_ATTR_META_MIRROR_DFILES; } - return 1; + return SM_ACTION_COMPLETE; } static PINT_sm_action getattr_read_metafile_hint( @@ -475,6 +579,7 @@ static PINT_sm_action getattr_read_metafile_hint( /*If we hit an error the DIST & DFILES are no longer valid*/ s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DIST; s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DFILES; + s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; return 1; } @@ -483,8 +588,13 @@ static PINT_sm_action getattr_read_metafile_hint( s_op->key.buffer = Trove_Special_Keys[METAFILE_HINT_KEY].key; s_op->key.buffer_sz = Trove_Special_Keys[METAFILE_HINT_KEY].size; + if(s_op->free_val) + { + free(s_op->val.buffer); + } s_op->val.buffer = buf; s_op->val.buffer_sz = sizeof(s_op->resp.u.getattr.attr.u.meta.hint) + 1; + s_op->free_val = 1; gossip_debug(GOSSIP_GETATTR_DEBUG, " reading metafile hint (coll_id = %d, " @@ -499,7 +609,7 @@ static PINT_sm_action getattr_read_metafile_hint( &s_op->key, &s_op->val, 0, NULL, smcb, 0, js_p, - &i, server_job_context); + &i, server_job_context, s_op->req->hints); return ret; } @@ -556,6 +666,7 @@ static PINT_sm_action getattr_read_metafile_datafile_handles_if_required( /*If we hit an error the DIST & DFILES are no longer valid*/ s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DIST; s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DFILES; + s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; js_p->error_code = -PVFS_EOVERFLOW; return SM_ACTION_COMPLETE; @@ -577,8 +688,14 @@ static PINT_sm_action getattr_read_metafile_datafile_handles_if_required( return SM_ACTION_COMPLETE; } + if(s_op->free_val) + { + free(s_op->val.buffer); + } s_op->val.buffer = s_op->resp.u.getattr.attr.u.meta.dfile_array; s_op->val.buffer_sz = (dfile_count * sizeof(PVFS_handle)); + /* this will get cleaned up with attr structure */ + s_op->free_val = 0; gossip_debug(GOSSIP_GETATTR_DEBUG, " reading %d datafile handles (coll_id = %d, " @@ -589,15 +706,405 @@ static PINT_sm_action getattr_read_metafile_datafile_handles_if_required( s_op->val.buffer_sz); ret = job_trove_keyval_read( - s_op->u.getattr.fs_id, s_op->u.getattr.handle, - &s_op->key, &s_op->val, - 0, - NULL, smcb, 0, js_p, - &i, server_job_context); + s_op->u.getattr.fs_id + ,s_op->u.getattr.handle + ,&s_op->key + ,&s_op->val + ,0 + ,NULL + ,smcb + ,0 + ,js_p + ,&i + ,server_job_context + ,s_op->req->hints); return ret; } + +static PINT_sm_action getattr_read_mirrored_copies_count_if_required( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s...\n",__func__); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_server_resp *resp = &(s_op->resp); + PVFS_metafile_attr *meta = &(resp->u.getattr.attr.u.meta); + int ret = -PVFS_EINVAL; + job_id_t job_id; + + /* Are we mirroring? */ + if (!(resp->u.getattr.attr.mask & PVFS_ATTR_META_MIRROR_DFILES)) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tMirroring is NOT turned on " + "for this handle(%llu)..\n" + ,llu(s_op->u.getattr.handle)); + js_p->error_code = SKIP_NEXT_STATE; + + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tMirroring IS turned on for this " + "handle(%llu)...\n" + ,llu(s_op->u.getattr.handle)); + + js_p->error_code = 0; + + /* setup job to read user.pvfs2.mirror.copies */ + + /* initialize */ + if (s_op->free_val) + free(s_op->val.buffer); + memset(&(s_op->val),0,sizeof(s_op->val)); + memset(&(s_op->key),0,sizeof(s_op->key)); + + /* set key = user.pvfs2.mirror.copies */ + s_op->key.buffer = Trove_Special_Keys[MIRROR_COPIES_KEY].key; + s_op->key.buffer_sz = Trove_Special_Keys[MIRROR_COPIES_KEY].size; + + /* setup space for retrieved value */ + meta->mirror_copies_count = 0; + meta->mirror_dfile_array = NULL; + s_op->val.buffer = &(meta->mirror_copies_count); + s_op->val.buffer_sz = sizeof(meta->mirror_copies_count); + s_op->free_val = 0; + + /* submit job to read the value */ + ret = job_trove_keyval_read( + s_op->u.getattr.fs_id + ,s_op->u.getattr.handle + ,&s_op->key + ,&s_op->val + ,0 + ,NULL + ,smcb + ,0 + ,js_p + ,&job_id + ,server_job_context + ,s_op->req->hints); + + return ret; +}/*end getattr_read_mirrored_copies_if_required*/ + + + +static PINT_sm_action getattr_read_mirrored_handles_if_required( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s ...\n",__func__); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_server_resp *resp = &(s_op->resp); + PVFS_metafile_attr *meta = &(resp->u.getattr.attr.u.meta); + int ret = -PVFS_EINVAL; + job_id_t job_id; + int i; + + /* Did we find mirror.copies? */ + if (js_p->error_code < 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tNumber of mirrored copies cannot " + "be retrieved.\n"); + if (resp->u.getattr.attr.mask & (PVFS_ATTR_META_DFILES | + PVFS_ATTR_META_DIST) ) + { + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + js_p->error_code = SKIP_NEXT_STATE; + } else { + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_DFILES; + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_DIST; + } + memset(&(s_op->key),0,sizeof(s_op->key)); + memset(&(s_op->val),0,sizeof(s_op->val)); + s_op->free_val = 0; + + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tNumber of mirrored copies count " + "successfully retrieved.\n"); + + /* check number of mirrored copies */ + if (meta->mirror_copies_count == 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tNumber of mirrored copies " + "is ZERO.\n"); + gossip_lerr("Mirror handles requested, but number of mirrored copies " + "is zero.\n"); + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + js_p->error_code = SKIP_NEXT_STATE; + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tNumber of mirrored copies " + "retrieved : %d\n" + ,meta->mirror_copies_count); + + + /* check to see if total number of mirrored handles is sane */ + if ( (meta->dfile_count * meta->mirror_copies_count) > + PVFS_REQ_LIMIT_MIRROR_DFILE_COUNT ) + { + gossip_lerr("Number of mirrored handles(%d) exceeds the system " + "limit(%d)\n" + ,meta->dfile_count * meta->mirror_copies_count + ,PVFS_REQ_LIMIT_MIRROR_DFILE_COUNT); + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + js_p->error_code = SKIP_NEXT_STATE; + return SM_ACTION_COMPLETE; + } + + js_p->error_code = 0; + /* get mirrored handles and status of each handle */ + + /* initialize */ + if (s_op->free_val) + free(s_op->val.buffer); + memset(&(s_op->key),0,sizeof(s_op->key)); + memset(&(s_op->val),0,sizeof(s_op->val)); + + for (i=0; ikeyval_count; i++) + if (s_op->val_a && s_op->val_a[i].buffer && s_op->free_val) + free(s_op->val_a[i].buffer); + if (s_op->val_a) + free(s_op->val_a); + if (s_op->key_a) + free(s_op->key_a); + if (s_op->error_a) + free(s_op->error_a); + s_op->free_val = 0; + + /* allocate space for keys and values */ + s_op->keyval_count = 2; + s_op->free_val = 1; + s_op->key_a = s_op->val_a = NULL; + s_op->error_a = NULL; + + s_op->key_a = malloc(sizeof(*s_op->key_a) * s_op->keyval_count); + s_op->val_a = malloc(sizeof(*s_op->val_a) * s_op->keyval_count); + s_op->error_a = malloc(sizeof(*s_op->error_a) * s_op->keyval_count); + if (!s_op->key_a || !s_op->val_a || !s_op->error_a) + { + gossip_lerr("Cannot allocate memory for key/val/error.\n"); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(s_op->key_a,0,sizeof(*s_op->key_a)); + memset(s_op->val_a,0,sizeof(*s_op->val_a)); + + /* set key = user.pvfs2.mirror.handles */ + s_op->key_a[0].buffer = Trove_Special_Keys[MIRROR_HANDLES_KEY].key; + s_op->key_a[0].buffer_sz = Trove_Special_Keys[MIRROR_HANDLES_KEY].size; + + /* setup buffer space for handles */ + s_op->val_a[0].buffer = malloc(sizeof(PVFS_handle) * + meta->dfile_count * + meta->mirror_copies_count); + if (!s_op->val_a[0].buffer) + { + gossip_lerr("Cannot allocate memory for mirrored handles.\n"); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(s_op->val_a[0].buffer,0,sizeof(PVFS_handle) * + meta->dfile_count * + meta->mirror_copies_count); + s_op->val_a[0].buffer_sz = sizeof(PVFS_handle) * + meta->dfile_count * + meta->mirror_copies_count; + + /* set key = user.pvfs2.mirror.status */ + s_op->key_a[1].buffer = Trove_Special_Keys[MIRROR_STATUS_KEY].key; + s_op->key_a[1].buffer_sz = Trove_Special_Keys[MIRROR_STATUS_KEY].size; + + /* setup buffer space for handle statuses */ + s_op->val_a[1].buffer = malloc(sizeof(PVFS_handle) * + meta->dfile_count * + meta->mirror_copies_count); + if (!s_op->val_a[1].buffer) + { + gossip_lerr("Cannot allocate memory for mirrored handle statuses.\n"); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(s_op->val_a[1].buffer,0,sizeof(PVFS_handle) * + meta->dfile_count * + meta->mirror_copies_count); + s_op->val_a[1].buffer_sz = sizeof(PVFS_handle) * + meta->dfile_count * + meta->mirror_copies_count; + + + /* call job to retrieve the key/val pairs */ + ret = job_trove_keyval_read_list( + s_op->u.getattr.fs_id + ,s_op->u.getattr.handle + ,s_op->key_a + ,s_op->val_a + ,s_op->error_a + ,s_op->keyval_count + ,0 + ,NULL + ,smcb + ,0 + ,js_p + ,&job_id + ,server_job_context + ,s_op->req->hints ); + + return ret; + +error_exit: + for (i=0; ikeyval_count; i++) + { + if (s_op->val_a && s_op->val_a[i].buffer) + free(s_op->val_a[i].buffer); + } + if (s_op->val_a) + free(s_op->val_a); + if (s_op->key_a) + free(s_op->key_a); + if (s_op->error_a) + free(s_op->error_a); + s_op->val_a = s_op->key_a = NULL; + s_op->error_a = NULL; + s_op->keyval_count = 0; + s_op->free_val = 0; + + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + + return SM_ACTION_COMPLETE; +}/*end getattr_read_mirrored_handles_if_required*/ + + + + +static PINT_sm_action getattr_mirrored_handles_safety_check( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s ...\n",__func__); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_server_resp *resp = &(s_op->resp); + PVFS_metafile_attr *meta = &(resp->u.getattr.attr.u.meta); + int row,col,index; + int i; + + + js_p->error_code = 0; + + /* Check the error code for each key/val pair. */ + for (i=0; ikeyval_count; i++) + { + if (s_op->error_a[i] != 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tRetrieval of key(%s) failed.\n" + ,(char *)s_op->key_a[i].buffer); + js_p->error_code = s_op->error_a[i]; + } + } + + if (js_p->error_code) + { + goto error_exit; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tWe successfully retrieved handles and " + "statuses.\n"); + + for (i=0; ikeyval_count; i++) + { /* Did we get the data that we were expecting from + * user.pvfs2.mirror.handles(i=0) or + * user.pvfs2.mirror.statuses(i=1)? + */ + + if (s_op->val_a[i].read_sz != s_op->val_a[i].buffer_sz) + { + gossip_lerr("Error: %s key found val size: %d when " + "expecting val size: %d\n" + ,(char *)s_op->key_a[i].buffer + ,s_op->val_a[i].read_sz + ,s_op->val_a[i].buffer_sz); + js_p->error_code = s_op->val_a[i].buffer_sz; + } + }/*end for*/ + + if (js_p->error_code) + { + goto error_exit; + } + + /*initialize permanent data structures*/ + meta->mirror_dfile_array = s_op->val_a[0].buffer; + s_op->u.getattr.mirror_dfile_status_array = s_op->val_a[1].buffer; + s_op->val_a[0].buffer = s_op->val_a[1].buffer = NULL; + + /* Check the mirroring status for each handle. If the status is non-zero, + * the handle is not valid, so put a null in the mirrory array for that + * handle. Otherwise, do nothing. + */ + for (row=0; rowmirror_copies_count; row++) + { + for (col=0; coldfile_count; col++) + { + index = (row*meta->dfile_count) + col; + if ( s_op->u.getattr.mirror_dfile_status_array[index] == UINT64_HIGH ) + meta->mirror_dfile_array[index] = 0; + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tmirror handle[%d]:%llu \t" + "status:%llu\n" + ,index + ,llu(meta->mirror_dfile_array[index]) + ,llu(s_op->u.getattr.mirror_dfile_status_array[index])); + } + } + + /*Cleanup*/ + free(s_op->key_a); + free(s_op->val_a); + free(s_op->error_a); + s_op->key_a = s_op->val_a = NULL; + s_op->error_a = NULL; + s_op->keyval_count = 0; + s_op->free_val = 0; + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + +error_exit: + /* if we have an error, cleanup, and pretend that we never attempted + * mirrors in the first place. + */ + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tCleaning up mirror operation...\n"); + js_p->error_code = 0; + for (i=0; ikeyval_count; i++) + { + if (s_op->val_a[i].buffer) + free(s_op->val_a[i].buffer); + } + free(s_op->key_a); + free(s_op->val_a); + free(s_op->error_a); + s_op->key_a = s_op->val_a = NULL; + s_op->error_a = NULL; + s_op->keyval_count = 0; + + /*We MUST set the number of copies to zero to prevent encoding errors + *later. + */ + meta->mirror_copies_count = 0; + meta->mirror_dfile_array = NULL; + s_op->u.getattr.mirror_dfile_status_array = NULL; + resp->u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + return SM_ACTION_COMPLETE; +}/*end getattr_mirrored_handles_safety_check*/ + + + + static PINT_sm_action getattr_read_metafile_distribution_if_required( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -638,6 +1145,10 @@ static PINT_sm_action getattr_read_metafile_distribution_if_required( /* add mask value to indicate the distribution is filled */ s_op->resp.u.getattr.attr.mask |= PVFS_ATTR_META_DIST; + if(s_op->free_val) + { + free(s_op->val.buffer); + } s_op->val.buffer_sz = s_op->resp.u.getattr.attr.u.meta.dist_size; s_op->val.buffer = malloc(s_op->val.buffer_sz); if (!s_op->val.buffer) @@ -651,14 +1162,127 @@ static PINT_sm_action getattr_read_metafile_distribution_if_required( ret = job_trove_keyval_read( s_op->u.getattr.fs_id, s_op->u.getattr.handle, - &(s_op->key), &(s_op->val), - 0, + &(s_op->key), &(s_op->val), + 0, NULL, - smcb, 0, js_p, &i, server_job_context); + smcb, 0, js_p, &i, server_job_context, s_op->req->hints); return ret; } +static PINT_sm_action getattr_read_stuffed_size( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t job_id; + + if(js_p->error_code == -TROVE_ENOENT) + { + gossip_debug( + GOSSIP_GETATTR_DEBUG, "Getattr detected non-stuffed file.\n"); + /* this means that the keyval fields used to indicate a file is + * stuffed are not present. Set mask accordingly and continue. + */ + s_op->resp.u.getattr.attr.mask |= PVFS_ATTR_META_UNSTUFFED; + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + if(js_p->error_code) + { + /* any other error code here is just a normal error case */ + /* preserve error code and catch next error transition */ + return SM_ACTION_COMPLETE; + } + + gossip_debug( + GOSSIP_GETATTR_DEBUG, "Getattr detected stuffed file.\n"); + /* otherwise, we found keyval fields indicating that the file is + * stuffed. It does not matter if the client asked for the size or not; + * we must retrieve a valid stuffed_size value for the attrs. + */ + s_op->resp.u.getattr.attr.mask &= (~(PVFS_ATTR_META_UNSTUFFED)); + + return(job_trove_dspace_getattr( + s_op->u.getattr.fs_id, + s_op->resp.u.getattr.attr.u.meta.dfile_array[0], + smcb, + &s_op->ds_attr, + 0, + js_p, + &job_id, + server_job_context, + s_op->req->hints)); +} + +static PINT_sm_action getattr_interpret_stuffed_size( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PVFS_metafile_attr *meta = &(s_op->resp.u.getattr.attr.u.meta); + + if(js_p->error_code == 0) + { + meta->stuffed_size = s_op->ds_attr.u.datafile.b_size; + } + + /* deliberately leave error_code unchanged so that any errors get + * handled in the next state + */ + return SM_ACTION_COMPLETE; +} + + +/* interpret_metafile_distribution() + * + * capture and encode results of reading distribution + */ +static PINT_sm_action interpret_metafile_distribution( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PVFS_object_attr *resp_attr = &s_op->resp.u.getattr.attr; + + if(js_p->error_code < 0) + { + return SM_ACTION_COMPLETE; + } + + if(s_op->u.getattr.attrmask & PVFS_ATTR_META_DIST) + { + /* successfully read dist key; make sure we got something valid */ + if(s_op->val.read_sz != s_op->val.buffer_sz) + { + gossip_err("Error: %s key found val size: %d when " + "expecting val size: %d\n", + Trove_Common_Keys[METAFILE_DIST_KEY].key, + s_op->val.read_sz, + s_op->val.buffer_sz); + + /* clear bitmask to prevent double free between setup_resp and + * PINT_free_object_attr() + */ + s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DIST; + + js_p->error_code = -PVFS_EIO; + return SM_ACTION_COMPLETE; + } + + assert(s_op->val.buffer); + PINT_dist_decode(&resp_attr->u.meta.dist, s_op->val.buffer); + + if(resp_attr->u.meta.dist == 0) { + gossip_err("Found dist of 0 for handle %llu,%d\n", + llu(s_op->u.getattr.handle), s_op->u.getattr.fs_id); + PVFS_perror("Metafile getattr_setup_resp",js_p->error_code); + js_p->error_code = -PVFS_EIO; + return SM_ACTION_COMPLETE; + } + } + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + static PINT_sm_action getattr_setup_resp( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -675,16 +1299,13 @@ static PINT_sm_action getattr_setup_resp( } if(js_p->error_code < 0) { - if(s_op->val.buffer) - { - free(s_op->val.buffer); - } + free_nested_getattr_data(s_op); return SM_ACTION_COMPLETE; } gossip_debug( GOSSIP_GETATTR_DEBUG, - "- RETURNING retrieved attrs: [owner = %d, group = %d\n\t" + "- retrieved attrs: [owner = %d, group = %d\n\t" "perms = %o, type = %d, atime = %llu, mtime = %llu\n\t" "ctime = %llu, dist_size = %d]\n", resp_attr->owner, resp_attr->group, resp_attr->perms, @@ -704,21 +1325,17 @@ static PINT_sm_action getattr_setup_resp( " also returning %d datafile handles\n", resp_attr->u.meta.dfile_count); } - + if (resp_attr->mask & PVFS_ATTR_META_MIRROR_DFILES) + { + if (resp_attr->u.meta.mirror_copies_count) + assert(resp_attr->u.meta.mirror_dfile_array); + gossip_debug(GOSSIP_GETATTR_DEBUG, + " also returning %d mirrored copies\n" + ,resp_attr->u.meta.mirror_copies_count); + } if (resp_attr->mask & PVFS_ATTR_META_DIST) { - assert(s_op->val.buffer); - PINT_dist_decode(&resp_attr->u.meta.dist, s_op->val.buffer); - free(s_op->val.buffer); - - if(resp_attr->u.meta.dist == 0) { - gossip_err("Found dist of 0 for handle %llu,%d\n", - llu(s_op->u.getattr.handle), s_op->u.getattr.fs_id); - PVFS_perror("Metafile getattr_setup_resp",js_p->error_code); - js_p->error_code = -PVFS_EIO; - return SM_ACTION_COMPLETE; - } - + /* we have already gathered the dist field in an earlier state */ gossip_debug(GOSSIP_GETATTR_DEBUG, " also returning dist size of %d\n", resp_attr->u.meta.dist_size); @@ -757,6 +1374,7 @@ static PINT_sm_action getattr_setup_resp( s_op->u.getattr.fs_id); PVFS_perror("Symlink retrieval failure",js_p->error_code); + free_nested_getattr_data(s_op); js_p->error_code = -PVFS_EINVAL; return SM_ACTION_COMPLETE; } @@ -786,19 +1404,24 @@ static PINT_sm_action getattr_setup_resp( s_op->resp.u.getattr.attr.mask); #endif + free_nested_getattr_data(s_op); return SM_ACTION_COMPLETE; } -static PINT_sm_action getattr_cleanup( - struct PINT_smcb *smcb, job_status_s *js_p) +static void free_nested_getattr_data(struct PINT_server_op *s_op) { - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i; + /* free up anything that was set up specifically by this nested machine */ + if (s_op->free_val) + { + for (i=0; ikeyval_count; i++) + { + if (s_op->val_a[i].buffer) + free(s_op->val_a[i].buffer); + } + } if(s_op->val_a) { - if(s_op->val_a[NUM_DFILES_KEY].buffer) - { - free(s_op->val_a[NUM_DFILES_KEY].buffer); - } free(s_op->val_a); s_op->val_a = NULL; } @@ -812,11 +1435,41 @@ static PINT_sm_action getattr_cleanup( free(s_op->u.getattr.err_array); s_op->u.getattr.err_array = NULL; } + if (s_op->u.getattr.mirror_dfile_status_array) + { + free(s_op->u.getattr.mirror_dfile_status_array); + s_op->u.getattr.mirror_dfile_status_array = NULL; + } + if(s_op->free_val) + { + free(s_op->val.buffer); + s_op->val.buffer = NULL; + } + if(s_op->error_a) + { + free(s_op->error_a); + s_op->error_a = NULL; + } + + return; +} + +static PINT_sm_action getattr_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); PINT_free_object_attr(&s_op->resp.u.getattr.attr); return(server_state_machine_complete(smcb)); } +static PINT_sm_action getattr_with_prelude_init( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + static PINT_sm_action getattr_setup_op( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -824,11 +1477,46 @@ static PINT_sm_action getattr_setup_op( s_op->u.getattr.handle = s_op->req->u.getattr.handle; s_op->u.getattr.fs_id = s_op->req->u.getattr.fs_id; s_op->u.getattr.attrmask = s_op->req->u.getattr.attrmask; + s_op->u.getattr.err_array = NULL; + s_op->u.getattr.mirror_dfile_status_array = NULL; js_p->error_code = 0; return SM_ACTION_COMPLETE; } +static PINT_sm_action getattr_datafile_handles_safety_check( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + if((js_p->error_code == 0) && + (s_op->u.getattr.attrmask & PVFS_ATTR_META_DFILES)) + { + /* successfully read datafile key; make sure we got something valid */ + if(s_op->val.read_sz != s_op->val.buffer_sz) + { + gossip_err("Error: %s key found val size: %d when " + "expecting val size: %d\n", + Trove_Common_Keys[METAFILE_HANDLES_KEY].key, + s_op->val.read_sz, + s_op->val.buffer_sz); + + /* clear bitmask to prevent double free between setup_resp and + * PINT_free_object_attr() + */ + s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DFILES; + s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_DIST; + s_op->resp.u.getattr.attr.mask &= ~PVFS_ATTR_META_MIRROR_DFILES; + + js_p->error_code = -PVFS_EIO; + return SM_ACTION_COMPLETE; + } + } + + /* otherwise deliberately preserve existing error code */ + return SM_ACTION_COMPLETE; +} + static PINT_sm_action getattr_get_dirdata_handle( struct PINT_smcb *smcb, job_status_s *js_p) { @@ -838,8 +1526,13 @@ static PINT_sm_action getattr_get_dirdata_handle( s_op->key.buffer = Trove_Common_Keys[DIR_ENT_KEY].key; s_op->key.buffer_sz = Trove_Common_Keys[DIR_ENT_KEY].size; + if(s_op->free_val) + { + free(s_op->val.buffer); + } s_op->val.buffer = &s_op->u.getattr.dirent_handle; s_op->val.buffer_sz = sizeof(PVFS_handle); + s_op->free_val = 0; ret = job_trove_keyval_read( s_op->u.getattr.fs_id, s_op->u.getattr.handle, @@ -850,7 +1543,7 @@ static PINT_sm_action getattr_get_dirdata_handle( 0, js_p, &tmp_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -880,7 +1573,7 @@ static PINT_sm_action getattr_get_dirent_count( 0, js_p, &tmp_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -963,7 +1656,8 @@ static PINT_sm_action getattr_get_dir_hint( js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } - s_op->val_a = (PVFS_ds_keyval *) calloc(NUM_SPECIAL_KEYS, sizeof(PVFS_ds_keyval)); + s_op->val_a = (PVFS_ds_keyval *) calloc(NUM_SPECIAL_KEYS + ,sizeof(PVFS_ds_keyval)); if (s_op->val_a == NULL) { js_p->error_code = -PVFS_ENOMEM; @@ -977,6 +1671,9 @@ static PINT_sm_action getattr_get_dir_hint( return SM_ACTION_COMPLETE; } + + s_op->free_val = 0; + s_op->keyval_count = NUM_SPECIAL_KEYS; for (i = 0; i < NUM_SPECIAL_KEYS; i++) { s_op->key_a[i].buffer = Trove_Special_Keys[i].key; @@ -992,22 +1689,26 @@ static PINT_sm_action getattr_get_dir_hint( s_op->val_a[i].buffer_sz = 16; } else if (i == DIST_PARAMS_KEY) { - s_op->val_a[i].buffer = s_op->resp.u.getattr.attr.u.dir.hint.dist_params; - s_op->val_a[i].buffer_sz = s_op->resp.u.getattr.attr.u.dir.hint.dist_params_len; + s_op->val_a[i].buffer + = s_op->resp.u.getattr.attr.u.dir.hint.dist_params; + s_op->val_a[i].buffer_sz + = s_op->resp.u.getattr.attr.u.dir.hint.dist_params_len; } else if (i == DIST_NAME_KEY) { - s_op->val_a[i].buffer = s_op->resp.u.getattr.attr.u.dir.hint.dist_name; - s_op->val_a[i].buffer_sz = s_op->resp.u.getattr.attr.u.dir.hint.dist_name_len; + s_op->val_a[i].buffer + = s_op->resp.u.getattr.attr.u.dir.hint.dist_name; + s_op->val_a[i].buffer_sz + = s_op->resp.u.getattr.attr.u.dir.hint.dist_name_len; } } js_p->error_code = 0; ret = job_trove_keyval_read_list( - s_op->req->u.getattr.fs_id, - s_op->req->u.getattr.handle, + s_op->u.getattr.fs_id, + s_op->u.getattr.handle, s_op->key_a, s_op->val_a, s_op->u.getattr.err_array, NUM_SPECIAL_KEYS, 0, NULL, smcb, 0, js_p, &tmp_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -1044,7 +1745,8 @@ static PINT_sm_action getattr_interpret_dir_hint( else { s_op->resp.u.getattr.attr.u.dir.hint.dist_name_len = 0; - } + } + s_op->val_a[DIST_NAME_KEY].buffer = NULL; if (s_op->u.getattr.err_array[DIST_PARAMS_KEY] == 0) { @@ -1060,22 +1762,36 @@ static PINT_sm_action getattr_interpret_dir_hint( { s_op->resp.u.getattr.attr.u.dir.hint.dist_params_len = 0; } + if (s_op->val_a[DIST_PARAMS_KEY].buffer) + { + s_op->val_a[DIST_PARAMS_KEY].buffer = NULL; + } + + if (s_op->u.getattr.err_array[NUM_DFILES_KEY] == 0) { char *endptr = NULL; - gossip_debug(GOSSIP_SERVER_DEBUG, "val_a[NUM_DFILES_KEY] %p read_sz = %d\n", + gossip_debug(GOSSIP_SERVER_DEBUG, "val_a[NUM_DFILES_KEY] %p " + "read_sz = %d\n", s_op->val_a[NUM_DFILES_KEY].buffer, s_op->val_a[NUM_DFILES_KEY].read_sz); - dfile_count = strtol(s_op->val_a[NUM_DFILES_KEY].buffer, &endptr, 10); + dfile_count = strtol(s_op->val_a[NUM_DFILES_KEY].buffer + , &endptr, 10); if (*endptr != '\0' || dfile_count < 0) { dfile_count = 0; } + } + if(s_op->val_a[NUM_DFILES_KEY].buffer) + { free(s_op->val_a[NUM_DFILES_KEY].buffer); s_op->val_a[NUM_DFILES_KEY].buffer = NULL; s_op->val_a[NUM_DFILES_KEY].buffer_sz = 0; } + s_op->keyval_count = 0; + s_op->free_val = 0; + s_op->resp.u.getattr.attr.u.dir.hint.dfile_count = dfile_count; @@ -1083,10 +1799,43 @@ static PINT_sm_action getattr_interpret_dir_hint( s_op->resp.u.getattr.attr.u.dir.hint.dfile_count); js_p->error_code = 0; - } + }/* end if val_a and key_a */ return SM_ACTION_COMPLETE; } +/* getattr_detect_stuffed() + * + * determine if a file is stuffed or not + */ +static PINT_sm_action getattr_detect_stuffed( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t tmp_id; + + /* we can determine stuffedness by the presence of the dfiles req key */ + + s_op->key.buffer = Trove_Common_Keys[NUM_DFILES_REQ_KEY].key; + s_op->key.buffer_sz = Trove_Common_Keys[NUM_DFILES_REQ_KEY].size; + if(s_op->free_val) + { + free(s_op->val.buffer); + } + s_op->val.buffer = &s_op->u.getattr.num_dfiles_req; + s_op->val.buffer_sz = sizeof(s_op->u.getattr.num_dfiles_req); + s_op->free_val = 0; + + return(job_trove_keyval_read( + s_op->u.getattr.fs_id, + s_op->u.getattr.handle, + &(s_op->key), + &(s_op->val), + 0, + NULL, smcb, 0, js_p, + &tmp_id, server_job_context, + s_op->req->hints)); +} + PINT_GET_OBJECT_REF_DEFINE(getattr); struct PINT_server_req_params pvfs2_get_attr_params = diff --git a/src/server/get-eattr.sm b/src/server/get-eattr.sm index f263346..8d5ee22 100644 --- a/src/server/get-eattr.sm +++ b/src/server/get-eattr.sm @@ -78,19 +78,33 @@ static PINT_sm_action geteattr_setup_resp( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int i, tsz; + int i; gossip_debug(GOSSIP_GETEATTR_DEBUG,"geteattr requesting %d keys\n", s_op->req->u.geteattr.nkey); js_p->error_code = 0; + /* ensure not too many keys were requested */ + if( s_op->req->u.geteattr.nkey > PVFS_MAX_XATTR_LISTLEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + /* iterate through the keys and see if they fall into valid name spaces */ for(i=0; ireq->u.geteattr.nkey; i++) { gossip_debug(GOSSIP_GETEATTR_DEBUG, "geteattr key %d : %s\n", i, (char *) s_op->req->u.geteattr.key[i].buffer); + /* ensure no buffer_sz is too larger */ + if( s_op->req->u.geteattr.key[i].buffer_sz > PVFS_MAX_XATTR_NAMELEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + js_p->error_code = PINT_eattr_check_access( &s_op->req->u.geteattr.key[i], NULL); @@ -118,7 +132,7 @@ static PINT_sm_action geteattr_setup_resp( return SM_ACTION_COMPLETE; } s_op->resp.u.geteattr.nkey = s_op->req->u.geteattr.nkey; - for (i = 0, tsz = 0; i < s_op->req->u.geteattr.nkey; i++) + for (i = 0; i < s_op->req->u.geteattr.nkey; i++) { s_op->resp.u.geteattr.val[i].buffer = malloc( s_op->req->u.geteattr.valsz[i]); @@ -166,7 +180,7 @@ static PINT_sm_action geteattr_read_eattrib( 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/io.sm b/src/server/io.sm index c82d19f..0457a0e 100644 --- a/src/server/io.sm +++ b/src/server/io.sm @@ -92,6 +92,7 @@ machine pvfs2_io_sm static int io_send_ack( struct PINT_smcb *smcb, job_status_s *js_p) { + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing io_send_ack (io.sm)....\n"); struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int err = -PVFS_EIO; job_id_t tmp_id; @@ -102,7 +103,10 @@ static int io_send_ack( * failed to get the size, or failed for permission reasons */ s_op->resp.status = js_p->error_code; - s_op->resp.u.io.bstream_size = s_op->ds_attr.b_size; + s_op->resp.u.io.bstream_size = s_op->ds_attr.u.datafile.b_size; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tbstream_size:%d\n" + ,(int)s_op->resp.u.io.bstream_size); err = PINT_encode(&s_op->resp, PINT_ENCODE_RESP, &(s_op->encoded), s_op->addr, s_op->decoded.enc_type); @@ -117,7 +121,8 @@ static int io_send_ack( s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list, s_op->encoded.list_count, s_op->encoded.total_size, s_op->tag, s_op->encoded.buffer_type, 0, smcb, 0, js_p, - &tmp_id, server_job_context, user_opts->server_job_bmi_timeout); + &tmp_id, server_job_context, user_opts->server_job_bmi_timeout, + s_op->req->hints); return err; } @@ -156,6 +161,8 @@ static PINT_sm_action io_start_flow( return SM_ACTION_COMPLETE; } + s_op->u.io.flow_d->hints = s_op->req->hints; + /* we still have the file size stored in the response structure * that we sent in the previous state, other details come from * request @@ -232,8 +239,14 @@ static PINT_sm_action io_start_flow( return SM_ACTION_COMPLETE; } + gossip_debug(GOSSIP_IO_DEBUG,"\tabout to issue job_flow...\n"); err = job_flow(s_op->u.io.flow_d, smcb, 0, js_p, &tmp_id, - server_job_context, user_opts->server_job_flow_timeout); + server_job_context, user_opts->server_job_flow_timeout + , s_op->req->hints); + + gossip_debug(GOSSIP_IO_DEBUG,"\treturn code from job_flow " + "submission:%d\n" + ,err); return err; } @@ -334,7 +347,9 @@ static PINT_sm_action io_send_completion_ack( int err = -PVFS_EIO; job_id_t tmp_id; struct server_configuration_s *user_opts = get_server_config_struct(); - + + gossip_debug(GOSSIP_IO_DEBUG,"Executing io_send_completion_ack.\n"); + /* we only send this trailing ack if we are working on a write * operation; otherwise just cut out early */ @@ -370,23 +385,29 @@ static PINT_sm_action io_send_completion_ack( return SM_ACTION_COMPLETE; } + gossip_debug(GOSSIP_IO_DEBUG,"\ts_op->tag:%d\n",s_op->tag); + err = job_bmi_send_list( s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list, s_op->encoded.list_count, s_op->encoded.total_size, s_op->tag, s_op->encoded.buffer_type, 0, smcb, 0, js_p, &tmp_id, - server_job_context, user_opts->server_job_bmi_timeout); + server_job_context, user_opts->server_job_bmi_timeout, + s_op->req->hints); + + gossip_debug(GOSSIP_IO_DEBUG,"return code from sending ack:%d\n" + ,err); return err; } -static inline int PINT_server_req_access_io( +static enum PINT_server_req_access_type PINT_server_req_access_io( struct PVFS_server_req *req) { if(req->u.io.io_type == PVFS_IO_READ) { - return 1; + return PINT_SERVER_REQ_READONLY; } - return 0; + return PINT_SERVER_REQ_MODIFY; } PINT_GET_OBJECT_REF_DEFINE(io); diff --git a/src/server/iterate-handles.sm b/src/server/iterate-handles.sm index 257abcb..de51f62 100644 --- a/src/server/iterate-handles.sm +++ b/src/server/iterate-handles.sm @@ -90,18 +90,48 @@ static PINT_sm_action iterate_handles_do_work( s_op->resp.u.mgmt_iterate_handles.position = s_op->req->u.mgmt_iterate_handles.position; - ret = job_trove_dspace_iterate_handles( - s_op->req->u.mgmt_iterate_handles.fs_id, - s_op->resp.u.mgmt_iterate_handles.position, - s_op->resp.u.mgmt_iterate_handles.handle_array, - s_op->req->u.mgmt_iterate_handles.handle_count, - 0, - NULL, - smcb, - 0, - js_p, - &tmp_id, - server_job_context); + if(s_op->req->u.mgmt_iterate_handles.flags == PVFS_MGMT_RESERVED) + { + /* for now the only special case reserved handles are those that are + * allocated by precreate + */ + ret = job_precreate_pool_iterate_handles( + s_op->req->u.mgmt_iterate_handles.fs_id, + s_op->resp.u.mgmt_iterate_handles.position, + s_op->resp.u.mgmt_iterate_handles.handle_array, + s_op->req->u.mgmt_iterate_handles.handle_count, + 0, + NULL, + smcb, + 0, + js_p, + &tmp_id, + server_job_context, + s_op->req->hints); + } + else if(s_op->req->u.mgmt_iterate_handles.flags == 0) + { + ret = job_trove_dspace_iterate_handles( + s_op->req->u.mgmt_iterate_handles.fs_id, + s_op->resp.u.mgmt_iterate_handles.position, + s_op->resp.u.mgmt_iterate_handles.handle_array, + s_op->req->u.mgmt_iterate_handles.handle_count, + 0, + NULL, + smcb, + 0, + js_p, + &tmp_id, + server_job_context); + } + else + { + gossip_err("Error: unsupported mgmt_iterate_handles flags: %d\n", + s_op->req->u.mgmt_iterate_handles.flags); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + if (ret < 0) return ret; /* error */ if (ret == 1) diff --git a/src/server/list-attr.sm b/src/server/list-attr.sm index d7c56a5..684f895 100644 --- a/src/server/list-attr.sm +++ b/src/server/list-attr.sm @@ -24,90 +24,41 @@ #include "pint-util.h" #include "pvfs2-internal.h" -enum -{ - STATE_METAFILE = 7, - STATE_SYMLINK = 9, - STATE_DIR = 10 -}; +#define GETATTR 100 -enum -{ - MORE_WORK = 1, -}; %% -nested machine pvfs2_list_attr_work_sm -{ - state verify_attribs - { - run listattr_verify_attribs; - STATE_SYMLINK => read_symlink_target; - STATE_METAFILE => read_metafile_datafile_handles_if_required; - STATE_DIR => get_dirent_count; - default => setup_resp; - } - - state read_symlink_target - { - run listattr_read_symlink_target; - default => setup_resp; - } - - state read_metafile_datafile_handles_if_required - { - run listattr_read_metafile_datafile_handles_if_required; - success => read_metafile_distribution_if_required; - default => setup_resp; - } - - state read_metafile_distribution_if_required - { - run listattr_read_metafile_distribution_if_required; - default => setup_resp; - } - - state get_dirent_count - { - run listattr_get_dirent_count; - success => interpret_dirent_count; - default => setup_resp; - } - - state interpret_dirent_count - { - run listattr_interpret_dirent_count; - default => setup_resp; - } - - state setup_resp - { - run listattr_setup_resp; - default => return; - } -} machine pvfs2_list_attr_sm { state prelude { jump pvfs2_prelude_sm; - success => setup_op; + success => read_basic_attrs; default => final_response; } - state setup_op + state read_basic_attrs { - run listattr_setup_op; - success => do_work; + run listattr_read_basic_attrs; + success => setup_getattr; + default => final_response; + } + + state setup_getattr + { + pjmp listattr_setup_getattr + { + GETATTR => pvfs2_pjmp_get_attr_work_sm; + } + success => interpret_getattrs; default => final_response; } - state do_work + state interpret_getattrs { - jump pvfs2_list_attr_work_sm; - MORE_WORK => do_work; + run listattr_interpret_getattrs; default => final_response; } @@ -126,556 +77,184 @@ machine pvfs2_list_attr_sm %% -/* listattr_verify_attribs() - * - * We initialize the attribute mask that will be returned in this - * function. This mask can be augmented in some of the other states. - */ -static PINT_sm_action listattr_verify_attribs( +static PINT_sm_action listattr_read_basic_attrs( struct PINT_smcb *smcb, job_status_s *js_p) -{ +{ + int ret; struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PVFS_object_attr *resp_attr = NULL, *obj_attr = NULL; - PVFS_ds_attributes *ds_attr = NULL; - int *resp_error = NULL, *ds_error = NULL; - PVFS_handle *handle = NULL; - - js_p->error_code = 0; - - /* - explicitly copy basic attributes structure (read in from the - trove_getattr_list for the matching dspace) into response to be sent - back to the client. - */ - resp_attr = &s_op->resp.u.listattr.attr[s_op->u.listattr.handle_index]; - resp_error = &s_op->resp.u.listattr.error[s_op->u.listattr.handle_index]; - obj_attr = &s_op->u.listattr.attr_a[s_op->u.listattr.handle_index]; - ds_attr = &s_op->u.listattr.ds_attr_a[s_op->u.listattr.handle_index]; - handle = &s_op->u.listattr.handles[s_op->u.listattr.handle_index]; - ds_error = &s_op->u.listattr.errors[s_op->u.listattr.handle_index]; - PVFS_ds_attr_to_object_attr(ds_attr, obj_attr); - obj_attr->mask = PVFS_ATTR_COMMON_ALL; - - *resp_error = *ds_error; - resp_attr->owner = obj_attr->owner; - resp_attr->group = obj_attr->group; - resp_attr->perms = obj_attr->perms; - resp_attr->atime = obj_attr->atime; - - resp_attr->mtime = PINT_util_mkversion_time(obj_attr->mtime); - if (resp_attr->mtime == 0) - { - /* - this is a compatibility hack to allow existing storage - spaces to be automagically converted to this versioned time - on-disk format slowly over time and doing the right thing in - the meantime - */ - resp_attr->mtime = obj_attr->mtime; - - gossip_debug(GOSSIP_LISTATTR_DEBUG, " No version found! Using " - "mtime %llu\n", llu(resp_attr->mtime)); - } - else - { - gossip_debug( - GOSSIP_LISTATTR_DEBUG, " VERSION is %llu, mtime is %llu\n", - llu(obj_attr->mtime), llu(resp_attr->mtime)); - } - - resp_attr->ctime = obj_attr->ctime; - resp_attr->mask = obj_attr->mask; - resp_attr->objtype = obj_attr->objtype; - resp_attr->u.meta.dfile_count = obj_attr->u.meta.dfile_count; - resp_attr->u.meta.dist_size = obj_attr->u.meta.dist_size; - -#if 0 - gossip_debug( - GOSSIP_LISTATTR_DEBUG, - "+ _DSPACE_ retrieved attrs: [owner = %d, group = %d\n\t" - "perms = %o, type = %d, atime = %llu, mtime = %llu\n\t" - "ctime = %llu, dfile_count = %d, dist_size = %d]\n", - resp_attr->owner, resp_attr->group, resp_attr->perms, - resp_attr->objtype, llu(resp_attr->atime), - llu(resp_attr->mtime), llu(resp_attr->ctime), - (int)resp_attr->u.meta.dfile_count, - (int)resp_attr->u.meta.dist_size); -#endif - - /* - weed out the attr mask of the response based on what the client - request asked for. also, check if we need to retrieve more - information before returning the response to the client (by - guiding the state machine to get it). - - we can safely do this now that we have the type of the object - (read in from the dspace, not stored in the resp_attr), and we - have the original client request attr mask - (s_op->u.listattr.attrmask). - */ - if (resp_attr->objtype == PVFS_TYPE_METAFILE) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: metafile\n"); - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " Req handle %llu refers to a metafile\n", - llu(*handle)); - - if (s_op->u.listattr.attrmask & PVFS_ATTR_META_DFILES) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " dspace has dfile_count of %d\n", - resp_attr->u.meta.dfile_count); - resp_attr->mask |= PVFS_ATTR_META_DFILES; - } - else - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, " client doesn't want " - "dfile info, clearing response attr mask\n"); - resp_attr->mask &= ~PVFS_ATTR_META_DFILES; - } - - if (s_op->u.listattr.attrmask & PVFS_ATTR_META_DIST) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " dspace has dist size of %d\n", - resp_attr->u.meta.dist_size); - - resp_attr->mask |= PVFS_ATTR_META_DIST; - } - else - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, " client doesn't want " - "dist info, clearing response attr mask\n"); - - resp_attr->mask &= ~PVFS_ATTR_META_DIST; - } + job_id_t tmp_id; - if ((resp_attr->mask & PVFS_ATTR_META_DFILES) || - (resp_attr->mask & PVFS_ATTR_META_DIST)) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, " * client wants extra " - "meta info, about to retrieve it now\n"); - js_p->error_code = STATE_METAFILE; - } - else - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, " * client doesn't want " - "extra meta info, preparing response now\n"); - } - } - else if (resp_attr->objtype == PVFS_TYPE_DATAFILE) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: datafile\n"); - /* - note: the prelude already retrieved the size for us, so - there's no special action that needs to be taken if we have - a datafile here (other than adjusting our mask to include - the data information and copying the retrieved size from the - ds_attribute the prelude used) - */ - resp_attr->u.data.size = ds_attr->b_size; - resp_attr->mask |= PVFS_ATTR_DATA_ALL; - - gossip_debug(GOSSIP_LISTATTR_DEBUG, " handle %llu refers to " - "a datafile (size = %lld).\n", - llu(*handle), - lld(resp_attr->u.data.size)); - } - else if (resp_attr->objtype == PVFS_TYPE_DIRECTORY) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: directory\n"); - if (s_op->u.listattr.attrmask & PVFS_ATTR_DIR_DIRENT_COUNT) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " listattr: dirent_count needed.\n"); - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - resp_attr->mask |= PVFS_ATTR_DIR_DIRENT_COUNT; - js_p->error_code = STATE_DIR; - } - else - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " listattr: dirent_count not needed.\n"); - js_p->error_code = 0; - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - } - } - else if (resp_attr->objtype == PVFS_TYPE_DIRDATA) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: dirdata\n"); - gossip_debug( - GOSSIP_LISTATTR_DEBUG, " handle %llu refers to " - "a dirdata object. doing nothing special\n", - llu(*handle)); - assert(resp_attr->mask & PVFS_ATTR_COMMON_ALL); - } - else if (resp_attr->objtype == PVFS_TYPE_SYMLINK) - { - PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DEBUG, "type: symlink\n"); - gossip_debug( - GOSSIP_LISTATTR_DEBUG, " handle %llu refers to a symlink.\n", - llu(*handle)); - - /* - we'll definitely have to fetch the symlink target in this - case, as the prelude will never retrieve it for us - */ - js_p->error_code = STATE_SYMLINK; - } - else - { - gossip_debug( - GOSSIP_LISTATTR_DEBUG, " handle %llu refers to something " - "unknown -- assuming datafile!\n", - llu(*handle)); - - resp_attr->objtype = PVFS_TYPE_DATAFILE; - resp_attr->u.data.size = ds_attr->b_size; - resp_attr->mask |= (PVFS_ATTR_COMMON_TYPE | PVFS_ATTR_DATA_ALL); - - gossip_debug(GOSSIP_LISTATTR_DEBUG, " handle %llu now refers to " - "a datafile (size = %lld).\n", - llu(*handle), - lld(resp_attr->u.data.size)); + s_op->u.listattr.ds_attr_a = (PVFS_ds_attributes *) + calloc(s_op->req->u.listattr.nhandles * sizeof(PVFS_ds_attributes), 1); + if (s_op->u.listattr.ds_attr_a == NULL) { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; } - return SM_ACTION_COMPLETE; -} - -static PINT_sm_action listattr_read_symlink_target( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret; - job_id_t i; - PVFS_object_attr *resp_attr = NULL; - PVFS_handle *handle = NULL; - - resp_attr = &s_op->resp.u.listattr.attr[s_op->u.listattr.handle_index]; - handle = &s_op->u.listattr.handles[s_op->u.listattr.handle_index]; - - js_p->error_code = 0; - /* if we don't need to fill in the symlink target, skip it */ - if (!(s_op->u.listattr.attrmask & PVFS_ATTR_SYMLNK_TARGET)) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, "skipping symlink target read\n"); + s_op->u.listattr.errors = (PVFS_error *) + calloc(s_op->req->u.listattr.nhandles * sizeof(PVFS_error), 1); + if (s_op->u.listattr.errors == NULL) { + js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } - - s_op->key.buffer = Trove_Common_Keys[SYMLINK_TARGET_KEY].key; - s_op->key.buffer_sz = Trove_Common_Keys[SYMLINK_TARGET_KEY].size; - - /* - optimistically add mask value to indicate the symlink target is - filled (error_code is checked in next state) - */ - resp_attr->mask |= PVFS_ATTR_SYMLNK_TARGET; - - resp_attr->u.sym.target_path_len = PVFS_NAME_MAX; - resp_attr->u.sym.target_path = - malloc(resp_attr->u.sym.target_path_len); - if (!resp_attr->u.sym.target_path) - { + s_op->u.listattr.attr_a = (PVFS_object_attr *) + calloc(s_op->req->u.listattr.nhandles * sizeof(PVFS_object_attr), 1); + if (s_op->u.listattr.attr_a == NULL) { js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; + return SM_ACTION_COMPLETE; } - s_op->val.buffer = resp_attr->u.sym.target_path; - s_op->val.buffer_sz = resp_attr->u.sym.target_path_len; - - ret = job_trove_keyval_read( - s_op->u.listattr.fs_id, *handle, - &(s_op->key), &(s_op->val), - 0, NULL, smcb, 0, js_p, - &i, server_job_context); + js_p->error_code = 0; + /* initiate retrieval of the attributes from the dspace */ + ret = job_trove_dspace_getattr_list( + s_op->req->u.listattr.fs_id, + s_op->req->u.listattr.nhandles, + s_op->req->u.listattr.handles, + smcb, + s_op->u.listattr.errors, + s_op->u.listattr.ds_attr_a, + 0, + js_p, + &tmp_id, + server_job_context, + s_op->req->hints); return ret; } -static PINT_sm_action listattr_read_metafile_datafile_handles_if_required( +static PINT_sm_action listattr_setup_getattr( struct PINT_smcb *smcb, job_status_s *js_p) -{ +{ struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret = -PVFS_EINVAL; - int dfile_count = 0; - job_id_t i; - PVFS_object_attr *resp_attr = NULL; - PVFS_handle *handle = NULL; - - - resp_attr = &s_op->resp.u.listattr.attr[s_op->u.listattr.handle_index]; - handle = &s_op->u.listattr.handles[s_op->u.listattr.handle_index]; - assert(resp_attr->objtype == PVFS_TYPE_METAFILE); + struct PINT_server_op *getattr_op; + int ret; + int i; + s_op->u.listattr.parallel_sms = 0; js_p->error_code = 0; - /* if we don't need to fill in the dfiles, skip them */ - if (!(s_op->u.listattr.attrmask & PVFS_ATTR_META_DFILES)) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, "skipping data handle read\n"); - return SM_ACTION_COMPLETE; - } - - dfile_count = resp_attr->u.meta.dfile_count; - - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " request has dfile_count of %d | dspace has %d\n", - resp_attr->u.meta.dfile_count, - resp_attr->u.meta.dfile_count); - - /* verify that the retrieved dfile count is sane */ - if (!PVFS_REQ_LIMIT_DFILE_COUNT_IS_VALID(dfile_count)) + for(i=0; ireq->u.listattr.nhandles; i++) { - gossip_err("The requested dfile count of %d is invalid; " - "aborting operation.\n", dfile_count); - gossip_err( - "+ attrs read from dspace: (owner = %d, group = %d, " - "perms = %o, type = %d\n atime = %lld, mtime = %lld, " - "ctime = %lld |\n dfile_count = %d | dist_size = %d)\n", - resp_attr->owner, - resp_attr->group, - resp_attr->perms, - resp_attr->objtype, - lld(resp_attr->atime), - lld(resp_attr->mtime), - lld(resp_attr->ctime), - (int)resp_attr->u.meta.dfile_count, - (int)resp_attr->u.meta.dist_size); - - gossip_err("handle: %llu (%llx), fsid: %d.\n", - llu(*handle), llu(*handle), - (int)s_op->u.listattr.fs_id); - - js_p->error_code = -PVFS_EOVERFLOW; - return SM_ACTION_COMPLETE; - } - - s_op->key.buffer = Trove_Common_Keys[METAFILE_HANDLES_KEY].key; - s_op->key.buffer_sz = Trove_Common_Keys[METAFILE_HANDLES_KEY].size; + if(s_op->u.listattr.errors[i]) + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "listattr: dspace_getattr_list failed to get attrs for handle: %llu\n", + llu(s_op->req->u.listattr.handles[i])); + /* skip nested machine for this handle */ + continue; + } - /* add mask value to indicate the data file array is filled */ - resp_attr->mask |= PVFS_ATTR_META_DFILES; + getattr_op = malloc(sizeof(*getattr_op)); + if(!getattr_op) + { + s_op->u.listattr.errors[i] = -PVFS_ENOMEM; + gossip_debug(GOSSIP_SERVER_DEBUG, + "listattr: failed to setup nested sm for handle: %llu\n", + llu(s_op->req->u.listattr.handles[i])); + continue; + } + memset(getattr_op, 0, sizeof(*getattr_op)); + + /* TODO: need a way to explicitly set the right inputs to the + * getattr nested sm. This code block is very fragile. + */ + + /* need attrs that the prelude would have read normally */ + PVFS_ds_attr_to_object_attr(&s_op->u.listattr.ds_attr_a[i], + &getattr_op->attr); + getattr_op->attr.mask = PVFS_ATTR_COMMON_ALL; + getattr_op->ds_attr = s_op->u.listattr.ds_attr_a[i]; + /* need a valid request structure for some generic features like access + * logging + */ + getattr_op->req = s_op->req; + /* need to fill in the input parameters to the getattr nested machine */ + getattr_op->u.getattr.fs_id = s_op->req->u.listattr.fs_id; + getattr_op->u.getattr.handle = s_op->req->u.listattr.handles[i]; + getattr_op->u.getattr.attrmask = s_op->req->u.listattr.attrmask; + + ret = PINT_sm_push_frame(smcb, GETATTR, getattr_op); + if(ret < 0) + { + s_op->u.listattr.errors[i] = -PVFS_ENOMEM; + gossip_debug(GOSSIP_SERVER_DEBUG, + "listattr: failed to setup nested sm for handle: %llu\n", + llu(s_op->req->u.listattr.handles[i])); + continue; + } - resp_attr->u.meta.dfile_array = - malloc(dfile_count * sizeof(PVFS_handle)); - if (!resp_attr->u.meta.dfile_array) - { - gossip_err("Cannot allocate dfile array of count %d\n", - dfile_count); - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; + s_op->u.listattr.parallel_sms++; } - s_op->val.buffer = resp_attr->u.meta.dfile_array; - s_op->val.buffer_sz = (dfile_count * sizeof(PVFS_handle)); - - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " reading %d datafile handles (coll_id = %d, " - "handle = %llu, key = %s (%d), val_buf = %p (%d))\n", - dfile_count, s_op->u.listattr.fs_id, - llu(*handle), (char *)s_op->key.buffer, - s_op->key.buffer_sz, s_op->val.buffer, - s_op->val.buffer_sz); - - ret = job_trove_keyval_read( - s_op->u.listattr.fs_id, *handle, - &s_op->key, &s_op->val, - 0, NULL, smcb, 0, js_p, - &i, server_job_context); + gossip_debug(GOSSIP_SERVER_DEBUG, + "listattr: set up %d parallel nested getattr machines.\n", + s_op->u.listattr.parallel_sms); - return ret; -} - -static PINT_sm_action listattr_read_metafile_distribution_if_required( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret = -PVFS_EINVAL; - job_id_t i; - PVFS_object_attr *resp_attr = NULL; - PVFS_handle *handle = NULL; - - resp_attr = &s_op->resp.u.listattr.attr[s_op->u.listattr.handle_index]; - handle = &s_op->u.listattr.handles[s_op->u.listattr.handle_index]; - - assert(resp_attr->objtype == PVFS_TYPE_METAFILE); - - js_p->error_code = 0; - - /* if we don't need to fill in the distribution, skip it */ - if (!(s_op->u.listattr.attrmask & PVFS_ATTR_META_DIST)) + if(s_op->u.listattr.parallel_sms > 0) { - gossip_debug(GOSSIP_LISTATTR_DEBUG, "skipping data handle " - "distribution read\n"); + js_p->error_code = 0; return SM_ACTION_COMPLETE; } - - s_op->key.buffer = Trove_Common_Keys[METAFILE_DIST_KEY].key; - s_op->key.buffer_sz = Trove_Common_Keys[METAFILE_DIST_KEY].size; - - /* - there *should* be some distribution information. if not, dump - which handle is busted and assertion die for now while we're not - handling this kind of error - */ - if (resp_attr->u.meta.dist_size < 1) + else { - gossip_err("Cannot Read Dist! Got an invalid dist size for " - "handle %llu,%d\n",llu(*handle), - s_op->u.listattr.fs_id); - js_p->error_code = -PVFS_EINVAL; + /* we didn't kick off any parallel machines. Pick an error code and + * move along... + */ + js_p->error_code = s_op->u.listattr.errors[0]; return SM_ACTION_COMPLETE; } - assert(resp_attr->u.meta.dist_size > 0); - - /* add mask value to indicate the distribution is filled */ - resp_attr->mask |= PVFS_ATTR_META_DIST; - - s_op->val.buffer_sz = resp_attr->u.meta.dist_size; - s_op->val.buffer = malloc(s_op->val.buffer_sz); - if (!s_op->val.buffer) - { - gossip_err("Cannot allocate dist of size %d\n", - s_op->val.buffer_sz); - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - - ret = job_trove_keyval_read( - s_op->u.listattr.fs_id, *handle, - &(s_op->key), &(s_op->val), - 0, NULL, - smcb, 0, js_p, &i, server_job_context); - - return ret; } -static PINT_sm_action listattr_setup_resp(struct PINT_smcb *smcb, job_status_s *js_p) -{ +static PINT_sm_action listattr_interpret_getattrs(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + struct PINT_server_op *getattr_op = NULL; + /* note: this gives us a pointer to the base frame (list_attr), + * _not_ the getattr frames that were previously pushed. + */ struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PVFS_object_attr *resp_attr; - PVFS_handle *handle; - int *resp_error; - - handle = &s_op->u.listattr.handles[s_op->u.listattr.handle_index]; - resp_error = &s_op->resp.u.listattr.error[s_op->u.listattr.handle_index]; - resp_attr = &s_op->resp.u.listattr.attr[s_op->u.listattr.handle_index++]; - s_op->u.listattr.nhandles--; - - - gossip_debug( - GOSSIP_LISTATTR_DEBUG, - "- RETURNING retrieved attrs: [owner = %d, group = %d\n\t" - "perms = %o, type = %d, atime = %llu, mtime = %llu\n\t" - "ctime = %llu, dfile_count = %d, dist_size = %d]\n", - resp_attr->owner, resp_attr->group, resp_attr->perms, - resp_attr->objtype, llu(resp_attr->atime), - llu(resp_attr->mtime), llu(resp_attr->ctime), - (int)resp_attr->u.meta.dfile_count, - (int)resp_attr->u.meta.dist_size); - - *resp_error = 0; - if (resp_attr->objtype == PVFS_TYPE_METAFILE) - { - if (js_p->error_code == 0) + int task_id; + int remaining; + PVFS_error tmp_err; + int i, j; + + assert(s_op); + assert(s_op->op == PVFS_SERV_LISTATTR); + + gossip_debug(GOSSIP_SERVER_DEBUG, + "listattr: trying to interpret results from %d nested parallel getattr machines.\n", + s_op->u.listattr.parallel_sms); + + /* gather results */ + for(i=0; iu.listattr.parallel_sms; i++) + { + getattr_op = PINT_sm_pop_frame(smcb, &task_id, &tmp_err, + &remaining); + gossip_debug(GOSSIP_SERVER_DEBUG, "getattr_op is %s.\n",(getattr_op ? "okay" : "NULL")); + gossip_debug(GOSSIP_SERVER_DEBUG, "listattr: nested sm returned error code: %d\n", tmp_err); + gossip_debug(GOSSIP_SERVER_DEBUG, "s_op is %p \t getattr_op is %p",s_op,getattr_op ); + /* match it up with the correct array entry */ + for(j=0; jreq->u.listattr.nhandles; j++) { - if (resp_attr->mask & PVFS_ATTR_META_DFILES) + if(s_op->req->u.listattr.handles[j] == getattr_op->u.getattr.handle) { - if (resp_attr->u.meta.dfile_count) - { - assert(resp_attr->u.meta.dfile_array); - } - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " also returning %d datafile handles\n", - resp_attr->u.meta.dfile_count); + s_op->u.listattr.attr_a[j] = getattr_op->resp.u.getattr.attr; + s_op->u.listattr.errors[j] = tmp_err; + free(getattr_op); + break; } - - if (resp_attr->mask & PVFS_ATTR_META_DIST) - { - PINT_dist_decode(&resp_attr->u.meta.dist, s_op->val.buffer); - free(s_op->val.buffer); - - if(resp_attr->u.meta.dist == 0) { - gossip_err("Found dist of 0 for handle %llu,%d\n", - llu(*handle), s_op->u.listattr.fs_id); - PVFS_perror("Metafile listattr_setup_resp",js_p->error_code); - *resp_error = -PVFS_EIO; - js_p->error_code = s_op->u.listattr.nhandles == 0 ? 0 : MORE_WORK; - return SM_ACTION_COMPLETE; - } - - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " also returning dist size of %d\n", - resp_attr->u.meta.dist_size); - } - } - else { - PVFS_perror("Metafile listattr_setup_resp", js_p->error_code); - *resp_error = js_p->error_code; - js_p->error_code = s_op->u.listattr.nhandles == 0 ? 0 : MORE_WORK; - return SM_ACTION_COMPLETE; - } - } - else if ((resp_attr->objtype == PVFS_TYPE_DATAFILE) && - (resp_attr->mask & PVFS_ATTR_DATA_SIZE)) - { - if (js_p->error_code == 0) - { - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " also returning data size of %lld\n", - lld(resp_attr->u.data.size)); - } - else { - PVFS_perror("Datafile listattr_setup_resp", js_p->error_code); - *resp_error = js_p->error_code; - js_p->error_code = s_op->u.listattr.nhandles == 0 ? 0 : MORE_WORK; - return SM_ACTION_COMPLETE; - } - } - else if ((resp_attr->objtype == PVFS_TYPE_SYMLINK) && - (resp_attr->mask & PVFS_ATTR_SYMLNK_TARGET)) - { - if (js_p->error_code == 0) - { - assert(resp_attr->u.sym.target_path); - assert(resp_attr->u.sym.target_path_len); - /* - adjust target path len down to actual size ; always - include the null termination char in the target_path_len - */ - resp_attr->u.sym.target_path_len = - (strlen(resp_attr->u.sym.target_path) + 1); - - gossip_debug(GOSSIP_LISTATTR_DEBUG, - " also returning link target of %s (len %d)\n", - resp_attr->u.sym.target_path, - resp_attr->u.sym.target_path_len); - } - else - { - gossip_err("Failed to retrieve symlink target path for " - "handle %llu,%d\n",llu(*handle), - s_op->u.listattr.fs_id); - PVFS_perror("Symlink retrieval failure",js_p->error_code); - - *resp_error = js_p->error_code; - js_p->error_code = s_op->u.listattr.nhandles == 0 ? 0 : MORE_WORK; - return SM_ACTION_COMPLETE; } } - gossip_debug(GOSSIP_LISTATTR_DEBUG,"@ End %s attributes: sending " - "status %d (error = %d)\n", - PINT_util_get_object_type(resp_attr->objtype), - s_op->resp.status, js_p->error_code); - -#if 0 - gossip_debug(GOSSIP_LISTATTR_DEBUG, "returning attrmask "); - PINT_attrmask_print(GOSSIP_LISTATTR_DEBUG, - resp_attr->mask); -#endif - js_p->error_code = s_op->u.listattr.nhandles == 0 ? 0 : MORE_WORK; + /* if we reached this point, then we have a successful ack to send back; + * set remaining response fields + */ + s_op->resp.u.listattr.attr = s_op->u.listattr.attr_a; + s_op->resp.u.listattr.error = s_op->u.listattr.errors; + s_op->resp.u.listattr.nhandles = s_op->req->u.listattr.nhandles; + + js_p->error_code = 0; return SM_ACTION_COMPLETE; } @@ -686,120 +265,18 @@ static PINT_sm_action listattr_cleanup(struct PINT_smcb *smcb, job_status_s *js_ if (s_op->u.listattr.attr_a) { - for (i = 0; i < s_op->req->u.listattr.nhandles; i++) { + for (i = 0; i < s_op->req->u.listattr.nhandles; i++) + { PINT_free_object_attr(&s_op->u.listattr.attr_a[i]); } free(s_op->u.listattr.attr_a); } if (s_op->u.listattr.ds_attr_a) free(s_op->u.listattr.ds_attr_a); - if (s_op->u.listattr.errors) + if (s_op->u.listattr.errors) free(s_op->u.listattr.errors); - return(server_state_machine_complete(smcb)); -} - -static PINT_sm_action listattr_setup_op( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - int ret; - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - job_id_t tmp_id; - s_op->u.listattr.handles = s_op->req->u.listattr.handles; - s_op->u.listattr.fs_id = s_op->req->u.listattr.fs_id; - s_op->u.listattr.attrmask = s_op->req->u.listattr.attrmask; - s_op->resp.u.listattr.nhandles = s_op->u.listattr.nhandles = s_op->req->u.listattr.nhandles; - s_op->u.listattr.handle_index = 0; - s_op->u.listattr.attr_a = (PVFS_object_attr *) - calloc(s_op->u.listattr.nhandles * sizeof(PVFS_object_attr), 1); - if (s_op->u.listattr.attr_a == NULL) { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - s_op->u.listattr.ds_attr_a = (PVFS_ds_attributes *) - calloc(s_op->u.listattr.nhandles * sizeof(PVFS_ds_attributes), 1); - if (s_op->u.listattr.ds_attr_a == NULL) { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - s_op->u.listattr.errors = (PVFS_error *) - calloc(s_op->u.listattr.nhandles * sizeof(PVFS_error), 1); - if (s_op->u.listattr.errors == NULL) { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - /* Listattr response has to be allocated extra memory */ - s_op->resp.u.listattr.error = (PVFS_error *) - calloc(s_op->u.listattr.nhandles * sizeof(PVFS_error), 1); - if (s_op->resp.u.listattr.error == NULL) { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - s_op->resp.u.listattr.attr = (PVFS_object_attr *) - calloc(s_op->u.listattr.nhandles * sizeof(PVFS_object_attr), 1); - if (s_op->resp.u.listattr.attr == NULL) { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; - } - js_p->error_code = 0; - /* initiate retrieval of the attributes from the dspace */ - ret = job_trove_dspace_getattr_list( - s_op->u.listattr.fs_id, - s_op->u.listattr.nhandles, - s_op->u.listattr.handles, - smcb, - s_op->u.listattr.errors, - s_op->u.listattr.ds_attr_a, - 0, - js_p, - &tmp_id, - server_job_context); - - return ret; -} - -static PINT_sm_action listattr_get_dirent_count( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret; - job_id_t tmp_id; - PVFS_handle *handle = NULL; - - handle = &s_op->u.listattr.handles[s_op->u.listattr.handle_index]; - - js_p->error_code = 0; - - ret = job_trove_keyval_get_handle_info( - s_op->u.listattr.fs_id, - *handle, - TROVE_KEYVAL_HANDLE_COUNT, - &s_op->u.listattr.keyval_handle_info, - smcb, - 0, - js_p, - &tmp_id, - server_job_context); - - return ret; -} - -static PINT_sm_action listattr_interpret_dirent_count( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - PVFS_object_attr *resp_attr = NULL; - - /* from the keyval_handle_info, retrieve the dirent_count */ - s_op->u.listattr.dirent_count = s_op->u.listattr.keyval_handle_info.count; - resp_attr = &s_op->resp.u.listattr.attr[s_op->u.listattr.handle_index]; - resp_attr->u.dir.dirent_count = s_op->u.listattr.dirent_count; - - gossip_debug(GOSSIP_LISTATTR_DEBUG, "listattr: dirent_count: %lld\n", - lld(resp_attr->u.dir.dirent_count)); - - js_p->error_code = 0; - return SM_ACTION_COMPLETE; + return(server_state_machine_complete(smcb)); } static inline int PINT_get_object_ref_listattr( diff --git a/src/server/list-eattr.sm b/src/server/list-eattr.sm index 0e58031..ec6a559 100644 --- a/src/server/list-eattr.sm +++ b/src/server/list-eattr.sm @@ -84,6 +84,31 @@ static PINT_sm_action listeattr_setup_resp( js_p->error_code = 0; + /* ensure not too many keys were requested */ + if( s_op->req->u.listeattr.nkey > PVFS_MAX_XATTR_LISTLEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + + /* enforce that no key size be larger than PVFS_MAX_XATTR_NAMELEN. + * Otherwise, when a blind memcpy happens inside dbpf based on the key + * size we won't over run our fixed length buffer. fixed buffer size is: + * PVFS_NAME_MAX == DBPF_MAX_KEY_LENGTH == PVFS_MAX_XATTR_NAMELEN */ + for( i = 0; i < s_op->req->u.listeattr.nkey; i++) + { + if( s_op->req->u.listeattr.keysz[i] > PVFS_MAX_XATTR_NAMELEN ) + { + gossip_debug(GOSSIP_LISTEATTR_DEBUG, "%s: requested key %d " + "size of %ld is greater than maximum of %d\n", + __func__, i, + ((long int)s_op->req->u.listeattr.keysz[i]), + PVFS_MAX_XATTR_NAMELEN ); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + } + s_op->resp.u.listeattr.key = malloc(s_op->req->u.listeattr.nkey * sizeof(PVFS_ds_keyval)); if (!s_op->resp.u.listeattr.key) @@ -93,8 +118,11 @@ static PINT_sm_action listeattr_setup_resp( } s_op->resp.u.listeattr.nkey = s_op->req->u.listeattr.nkey; + for (i = 0, tsz = 0; i < s_op->req->u.listeattr.nkey; i++) + { tsz += s_op->req->u.listeattr.keysz[i]; + } s_op->u.eattr.buffer = malloc(tsz); if (!s_op->u.eattr.buffer) { @@ -141,7 +169,7 @@ static PINT_sm_action listeattr_list_eattrib( 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/lookup.sm b/src/server/lookup.sm index 68a24e6..d67fa11 100644 --- a/src/server/lookup.sm +++ b/src/server/lookup.sm @@ -19,7 +19,6 @@ #include "str-utils.h" #include "pint-util.h" #include "pvfs2-internal.h" -#include "check.h" enum { @@ -238,7 +237,7 @@ static PINT_sm_action lookup_read_object_metadata( /* get the dspace attributes/metadata */ ret = job_trove_dspace_getattr( s_op->req->u.lookup_path.fs_id, handle, smcb, ds_attr, - 0, js_p, &j_id, server_job_context); + 0, js_p, &j_id, server_job_context, s_op->req->hints ); return ret; } @@ -411,8 +410,8 @@ static PINT_sm_action lookup_check_acls_if_needed( 0, js_p, &i, - server_job_context); - return SM_ACTION_COMPLETE; + server_job_context, s_op->req->hints); + return ret; } /* @@ -520,7 +519,7 @@ static PINT_sm_action lookup_read_directory_entry_handle( ret = job_trove_keyval_read( s_op->req->u.lookup_path.fs_id, handle, &s_op->key, &s_op->val, 0, - NULL, smcb, 0, js_p, &j_id, server_job_context); + NULL, smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } @@ -560,7 +559,7 @@ static PINT_sm_action lookup_read_directory_entry( &s_op->key, &s_op->val, 0, NULL, smcb, 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/mgmt-get-dirdata-handle.sm b/src/server/mgmt-get-dirdata-handle.sm index 37bea8d..793674a 100644 --- a/src/server/mgmt-get-dirdata-handle.sm +++ b/src/server/mgmt-get-dirdata-handle.sm @@ -81,7 +81,7 @@ static int mgmt_get_dirdata_handle_from_parent( &s_op->key, &s_op->val, 0, NULL, smcb, 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/mgmt-remove-dirent.sm b/src/server/mgmt-remove-dirent.sm index 227e1e1..7860d01 100644 --- a/src/server/mgmt-remove-dirent.sm +++ b/src/server/mgmt-remove-dirent.sm @@ -82,7 +82,7 @@ static int mgmt_remove_dirent_get_dirdata_handle_from_parent( &s_op->key, &s_op->val, 0, NULL, smcb, 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -114,7 +114,7 @@ static PINT_sm_action mgmt_remove_dirent( 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/mgmt-remove-object.sm b/src/server/mgmt-remove-object.sm index 319cfb6..803a5fa 100644 --- a/src/server/mgmt-remove-object.sm +++ b/src/server/mgmt-remove-object.sm @@ -66,7 +66,7 @@ static PINT_sm_action mgmt_remove_dspace( 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/mirror.sm b/src/server/mirror.sm new file mode 100644 index 0000000..13e325d --- /dev/null +++ b/src/server/mirror.sm @@ -0,0 +1,1036 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" +#include "pvfs2-internal.h" +#include "pvfs2-util.h" +#include "pint-util.h" +#include "pint-eattr.h" +#include "pint-cached-config.h" +#include "client-state-machine.h" + +extern job_context_id pint_client_sm_context; + +#define WRITE_ACK_RCV 0 +#define SRC_FLOW_POST 1 +#define NUM_OF_PHASES 2 + +#define PHASE(__tag) (__tag%NUM_OF_PHASES) +#define DST(__tag) (__tag/NUM_OF_PHASES) +#define TAG(__phase,__dst) ( (NUM_OF_PHASES * __dst) + __phase ) + + + +enum +{ + NO_DATA_TO_COPY = 100, + COMM_DONE = 400, +}; + +int write_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int i); + + +%% +machine pvfs2_mirror_sm +{ + state prelude + { + jump pvfs2_prelude_sm; + success => inspect_inputs; + default => final_response; + } + + state inspect_inputs + { + run inspect_inputs; + success => mirror_do_work; + default => final_response; + } + + state mirror_do_work + { + jump pvfs2_mirror_work_sm; + default => final_response; + } + + state final_response + { + jump pvfs2_final_response_sm; + default => mirror_cleanup; + } + + state mirror_cleanup + { + run mirror_cleanup; + default => terminate; + } + +} /*end machine pvfs2_mirror_sm*/ + +%% + +%% +nested machine pvfs2_mirror_work_sm +{ + state initialize_structures + { + run initialize_structures; + NO_DATA_TO_COPY => cleanup_mirror_work; + success => setup_write_request; + default => cleanup_mirror_work; + } + + state setup_write_request + { + run setup_write_request; + success => call_msgpairarray; + default => cleanup_mirror_work; + } + + state call_msgpairarray + { + jump pvfs2_msgpairarray_sm; + + /*success => cleanup_msgpairarray;*/ + + /*default case implies that at least one of the io requests failed*/ + /*default => cleanup_mirror_work;*/ + default => cleanup_msgpairarray; + } + + state cleanup_msgpairarray + { + run cleanup_msgpairarray; + success => post_ack_and_flow; + default => cleanup_mirror_work; + } + + state post_ack_and_flow + { + run post_ack_and_flow; + success => check_comm; + default => cleanup_mirror_work; + } + + state check_comm + { + run check_comm; + COMM_DONE => check_results; + default => check_comm; + } + + state check_results + { + run check_results; + default => cleanup_mirror_work; + } + + state cleanup_mirror_work + { + run cleanup_mirror_work; + default => return; + } + +} /*end nested machine pvfs2_mirror_work_sm*/ +%% + +/*START OF pvfs2_mirror_sm*/ +static PINT_sm_action inspect_inputs(struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, + "Executing mirror:inspect_inputs....\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tframe count is %d.\n",smcb->frame_count); + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\t base frame is %d.\n",smcb->base_frame); + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + int i; + + js_p->error_code = 0; + + reqmir_p->bsize = s_op->ds_attr.u.datafile.b_size; + + if (s_op->req) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tREQUEST STRUCTURE EXISTS.\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\trequest->op:%d" + "\tmirror.src_handle:%llu" + "\tmirror.dst_count: %d" + "\tmirror.fs_id:%d" + "\tmirror.dist.name:%s" + "\tmirror.bsize:%d" + "\tmirror.src_server_nr:%d" + "\n", + s_op->req->op, + llu(reqmir_p->src_handle) , + reqmir_p->dst_count, + reqmir_p->fs_id, + reqmir_p->dist->dist_name, + reqmir_p->bsize, + reqmir_p->src_server_nr); + + for (i=0; idst_count; i++) + gossip_debug(GOSSIP_MIRROR_DEBUG, "\treqmir->dst_handle[%d] : %llu\n", + i, llu(reqmir_p->dst_handle[i])); + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tds_attr.b_size:%d\n", + (int)s_op->ds_attr.u.datafile.b_size); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tobject type:%0x\n", + (int)s_op->attr.objtype); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tdatafile size:%d\n", + (int)s_op->attr.u.data.size); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmask:%0x\n", s_op->attr.mask); + return SM_ACTION_COMPLETE; +}/*end action inspect_inputs*/ + + +static PINT_sm_action mirror_cleanup( struct PINT_smcb *smcb, + job_status_s *js_p) +{ + + return(server_state_machine_complete(smcb)); + +}/*end action mirror_cleanup*/ + + + +/******************************************************************************/ +/* START OF pvfs2_mirror_work_sm */ +/******************************************************************************/ +static PINT_sm_action initialize_structures(struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing mirror:" + "initialize_structures...\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tframe count is %d.\n", smcb->frame_count); + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\t base frame is %d.\n", smcb->base_frame); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + struct PVFS_servresp_mirror *respmir_p = &(s_op->resp.u.mirror); + struct PINT_server_mirror_op *mir_p = &(s_op->u.mirror); + int i,ret; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\ts_op->op:%d\n",s_op->op); + + js_p->error_code = 0; + + memset(respmir_p,0,sizeof(*respmir_p)); + + respmir_p->src_handle = reqmir_p->src_handle; + respmir_p->src_server_nr = reqmir_p->src_server_nr; + respmir_p->dst_count = reqmir_p->dst_count; + + respmir_p->bytes_written = malloc(sizeof(uint32_t) * respmir_p->dst_count); + if (!respmir_p->bytes_written) + { + gossip_lerr("Unable to allocate respmir_p->bytes_written\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(respmir_p->bytes_written, 0, + sizeof(uint32_t) * respmir_p->dst_count); + + respmir_p->write_status_code = malloc(sizeof(uint32_t) * + respmir_p->dst_count); + if (!respmir_p->write_status_code) + { + gossip_lerr("Unable to allocate respmir->write_status_code.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(respmir_p->write_status_code, 0, sizeof(uint32_t) * + respmir_p->dst_count); + + if (reqmir_p->bsize == 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tNo data to copy...\n"); + js_p->error_code = NO_DATA_TO_COPY; + return SM_ACTION_COMPLETE; + } + + if (s_op->req) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\trequest->op:%d" + "\tmirror.src_handle:%llu" + "\tmirror.fs_id:%d" + "\tmirror.dist.name:%s" + "\tmirror.bsize:%d" + "\tmirror.src_server_nr:%d" + "\tmirror.dst_count:%d" + "\n", + s_op->req->op, + llu(reqmir_p->src_handle), + reqmir_p->fs_id, + reqmir_p->dist->dist_name, + reqmir_p->bsize, + reqmir_p->src_server_nr, + reqmir_p->dst_count); + for (i=0; idst_count; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmirror.dst_handle[%d] : %llu\n", + i, + llu(reqmir_p->dst_handle[i])); + } + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tCreating jobs array....\n"); + + mir_p->jobs = malloc(sizeof(write_job_t) * reqmir_p->dst_count); + if (!mir_p->jobs) + { + gossip_lerr("Unable to allocate jobs array.\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(mir_p->jobs,0,sizeof(write_job_t) * reqmir_p->dst_count); + + /*Scheduling occurred in pvfs2_mirror_sm/prelude if the schedule_id is */ + /*already provided. Otherwise, this mirror request was called as a nested*/ + /*function, and therefore needs to be scheduled. */ + if (s_op->scheduled_id != 0) + return SM_ACTION_COMPLETE; + + ret = job_req_sched_post(s_op->op, + reqmir_p->fs_id, + reqmir_p->src_handle, + PINT_server_req_get_access_type(s_op->req), + PINT_server_req_get_sched_policy(s_op->req), + smcb, + 0, + js_p, + &(s_op->scheduled_id), + server_job_context); + return (ret); +}/*end action initialize_structures*/ + + + +static PINT_sm_action setup_write_request(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, "Executing mirror:" + "setup_write_request.....\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + struct PINT_server_mirror_op *mir_p = &(s_op->u.mirror); + write_job_t *jobs = mir_p->jobs; + int ret,i; + PVFS_Request myFileReq = PVFS_BYTE; + PVFS_offset myFileReqOffset = 0; + + js_p->error_code = 0; + + /*initialize msgarray_op structure*/ + PINT_sm_msgarray_op *msgarray_op = &(s_op->msgarray_op); + memset(msgarray_op, 0, sizeof(PINT_sm_msgarray_op)); + + /*parameters are setup like a client except for job_context*/ + PINT_serv_init_msgarray_params(s_op,reqmir_p->fs_id); + + /*allocate a mspair_state structure for each destination handle.*/ + ret=PINT_msgpairarray_init(msgarray_op,reqmir_p->dst_count); + if (ret) + { + gossip_lerr("Failed to allocate msgarray.\n"); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /*setup msgpairarray to initiate PVFS_SERV_IO write request for the */ + /*destination handles. */ + for (i=0; i < reqmir_p->dst_count; i++) + { + PINT_sm_msgpair_state *msg_p = &(msgarray_op->msgarray[i]); + + msg_p->fs_id = reqmir_p->fs_id; + msg_p->handle = reqmir_p->dst_handle[i]; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = write_comp_fn; + + /*determine the BMI svr address for the destination handle*/ + ret = PINT_cached_config_map_to_server(&msg_p->svr_addr, + msg_p->handle, + msg_p->fs_id ); + if (ret) + { + gossip_lerr("Failed to map address\n"); + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /*save the svr_addr for later use*/ + jobs[i].svr_addr = msg_p->svr_addr; + + + /*setup the server PVFS_SERV_IO write request itself*/ + gossip_debug(GOSSIP_MIRROR_DEBUG, "\treqmir_p->bsize:%d.\n", + reqmir_p->bsize); + PINT_SERVREQ_IO_FILL( msg_p->req, + s_op->req->credentials, + reqmir_p->fs_id, + reqmir_p->dst_handle[i], + PVFS_IO_WRITE, + reqmir_p->flow_type, + 0, + 1, + reqmir_p->dist, + myFileReq, + myFileReqOffset, + reqmir_p->bsize, + NULL ); + }/*end for*/ + + PINT_sm_push_frame(smcb,0,msgarray_op); + + return SM_ACTION_COMPLETE; + +}/*end action setup_write_request*/ + + +static PINT_sm_action cleanup_msgpairarray( struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG, + "Executing MIRROR:cleanup_msgpairarray\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + + PINT_sm_msgarray_op *msgarray_op = &(s_op->msgarray_op); + PINT_server_mirror_op *mir_op = &(s_op->u.mirror); + write_job_t *jobs = mir_op->jobs; + int i; + + js_p->error_code = 0; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tCURRENT:\tsmcb->base_frame:%d" + "\tframe_count:%d\n", + smcb->base_frame,smcb->frame_count); + + /*if ALL msgpairs have errors, then set an error code and skip the rest */ + /*of this request. */ + for (i=0; idst_count; i++) + { + if (jobs[i].io_status == 0) + break; + } + if (i==reqmir_p->dst_count) + { + js_p->error_code = -PVFS_EIO; + return SM_ACTION_COMPLETE; + } + + /*retain the session/flow identifier created in the PVFS_SERV_IO request */ + /*for each destination handle. */ + for (i=0; i < reqmir_p->dst_count; i++) + { + if (jobs[i].io_status == 0) + { + jobs[i].session_tag = msgarray_op->msgarray[i].session_tag; + } + else + { + jobs[i].session_tag = 0; /*session tags are NEVER zero*/ + /*PINT_util_get_next_tag() */ + } + } + + /*will free msgarray if necessary*/ + PINT_msgpairarray_destroy(msgarray_op); + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tIO_STATUS & SESSION TAG\n"); + for (i=0; i < reqmir_p->dst_count; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\t\tio_status[%d]:%d " + "\tsession_tag[%d]:%d\n", + i, jobs[i].io_status, + i, jobs[i].session_tag); + } + + + gossip_debug(GOSSIP_MIRROR_DEBUG,"Leaving MIRROR:cleanup msgpairarray.\n"); + + return SM_ACTION_COMPLETE; +}/*end action cleanup_msgpairarray*/ + +static PINT_sm_action post_ack_and_flow (struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing mirror:post_ack_and_flow..\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + PINT_server_mirror_op *mir_op = &(s_op->u.mirror); + write_job_t *jobs = mir_op->jobs; + + int ret,i; + unsigned long status_user_tag = 0; + + js_p->error_code = 0; + + mir_op->job_count = 0; + mir_op->max_resp_sz = PINT_encode_calc_max_size( PINT_ENCODE_RESP, + PVFS_SERV_WRITE_COMPLETION, + reqmir_p->encoding ); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tmax_resp_sz:%d\n", + mir_op->max_resp_sz); + + /*get flow info from the server configuration file.*/ + struct filesystem_configuration_s *cur_fs = NULL; + struct server_configuration_s *server_config = NULL; + + server_config = get_server_config_struct(); + cur_fs = PINT_config_find_fs_id(server_config,reqmir_p->fs_id); + + /*post write-ack and flow for each destination handle*/ + for (i=0; i < reqmir_p->dst_count; i++) + { + /* if the initial IO request for this destination handle failed, then + * skip it. */ + if (jobs[i].io_status != 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"io_status[%d] : %d ...skipping\n", + i, jobs[i].io_status); + continue; + } + + jobs[i].encoded_resp_p = BMI_memalloc( jobs[i].svr_addr, + mir_op->max_resp_sz, + BMI_RECV ); + if (!jobs[i].encoded_resp_p) + { + gossip_lerr("mirror:BMI_memalloc (for write ack) failed.\n"); + js_p->error_code = -PVFS_ENOMEM; + continue; + } + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tencoded response successfully " + "allocated.\n"); + + /* pre-post this recv with an infinite timeout and adjust it after the + * flow completes, since we don't know how long a flow can take at this + * point. */ + gossip_debug(GOSSIP_MIRROR_DEBUG, "\trecv_id:%d\n", + (int)jobs[i].recv_id); + status_user_tag = TAG(WRITE_ACK_RCV,i); + ret = job_bmi_recv( jobs[i].svr_addr, + jobs[i].encoded_resp_p, + mir_op->max_resp_sz, + jobs[i].session_tag, + BMI_PRE_ALLOC, + smcb, + status_user_tag, + &(jobs[i].recv_status), + &(jobs[i].recv_id), + server_job_context, + JOB_TIMEOUT_INF, + NULL ); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tWRITE_ACK_RCV:return code:%d\n", + ret); + /* we expect this job to __not__ complete immediately, since we have not + * posted the flow. */ + if (ret == 1 && jobs[i].recv_status.error_code) + { + /*Error posting the job*/ + js_p->error_code = jobs[i].recv_status.error_code; + continue; + } + else if (ret == 1) + { + /* Job completed immediately with no errors. In this context, + * immediate completion is an error. */ + js_p->error_code = -EPERM; // operation not permitted. + continue; + } + else if (ret != 0) + { + /* Error adding job to the job_time_mgr */ + js_p->error_code = ret; + continue; + } + + /* increment once for successful post of write-ack */ + mir_op->job_count++; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tWrite ACK recv successfully posted." + "\tjob_id:%d \tjob_count:%d\n", + (int)jobs[i].recv_id, + mir_op->job_count); + + /* issue flow request for the src datahandle using the session tag + * obtained from the PVFS_SERV_IO request. + * + * setup the flow descriptor. Read from the src datahandle and send to + * the destination BMI endpoint. The PVFS_SERV_IO has already setup the + * other end of the flow (BMI src/TROVE dest). */ + jobs[i].flow_desc = PINT_flow_alloc(); + if (!jobs[i].flow_desc) + { + js_p->error_code = -PVFS_ENOMEM; + gossip_lerr("unable to allocate memory for flow descriptor"); + job_bmi_cancel(jobs[i].recv_id,server_job_context); + mir_op->job_count--; + continue; + } + + PINT_flow_reset(jobs[i].flow_desc); + + jobs[i].flow_desc->src.endpoint_id = TROVE_ENDPOINT; + jobs[i].flow_desc->src.u.trove.handle = reqmir_p->src_handle; + jobs[i].flow_desc->src.u.trove.coll_id = reqmir_p->fs_id; + + jobs[i].flow_desc->dest.endpoint_id = BMI_ENDPOINT; + jobs[i].flow_desc->dest.u.bmi.address = jobs[i].svr_addr; + + jobs[i].flow_desc->buffer_size = cur_fs->fp_buffer_size; + jobs[i].flow_desc->buffers_per_flow = cur_fs->fp_buffers_per_flow; + + jobs[i].flow_desc->file_data.extend_flag = 1; + jobs[i].flow_desc->file_data.fsize = reqmir_p->bsize; + jobs[i].flow_desc->file_data.dist = reqmir_p->dist; + jobs[i].flow_desc->file_data.server_nr = 0; + jobs[i].flow_desc->file_data.server_ct = 1; + + jobs[i].flow_desc->file_req = PVFS_BYTE; + jobs[i].flow_desc->file_req_offset = 0; + jobs[i].flow_desc->mem_req = NULL; + + jobs[i].flow_desc->tag = jobs[i].session_tag; + jobs[i].flow_desc->type = reqmir_p->flow_type; + jobs[i].flow_desc->user_ptr = NULL; + jobs[i].flow_desc->aggregate_size = reqmir_p->bsize; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tbsize:%lld \tdatafile:nr:%d\tct:%d" + "\toffset:%lld \ttag:%d\n", + lld(jobs[i].flow_desc->file_data.fsize), + jobs[i].flow_desc->file_data.server_nr, + jobs[i].flow_desc->file_data.server_ct, + lld(jobs[i].flow_desc->file_req_offset), + jobs[i].flow_desc->tag ); + + /* post the flow */ + status_user_tag = TAG(SRC_FLOW_POST,i); + ret = job_flow( jobs[i].flow_desc, + smcb, + status_user_tag, + &(jobs[i].flow_status), + &(jobs[i].flow_job_id), + server_job_context, + server_config->server_job_flow_timeout, + NULL ); + + /* if the flow fails immediately, then we have to do some special + * handling. This function is not equipped to handle the failure + * directly, so instead we post a null job that will propagate the error + * to the normal state where we interpret flow errors. */ + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tSRC_FLOW_POST:return code:%d\n", + ret); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tflow job id:%d\n", + (int)jobs[i].flow_job_id); + if (ret<0) + { + /* a failure occured while adding this job to the job_time_mgr */ + js_p->error_code = ret; + job_bmi_cancel(jobs[i].recv_id,server_job_context); + mir_op->job_count--; + continue; + } + else if (ret == 1 && jobs[i].flow_status.error_code == 0) + { + /* job completed immediately AND was successful */ + js_p->error_code = 0; + /* increment job_count again for successful post of flow */ + mir_op->job_count++; + continue; + } + else if (jobs[i].flow_status.error_code) + { + /* job completed immediately AND was NOT successful */ + js_p->error_code = jobs[i].flow_status.error_code; + job_bmi_cancel(jobs[i].recv_id,server_job_context); + mir_op->job_count--; + continue; + } + + /* increment job_count again for successful post of flow */ + mir_op->job_count++; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tsuccessfully posted flow " + "\tjob_count : %d...\n", + mir_op->job_count); + } /* end for each destination handle */ + + /* if the job_count > 0, then at least one of the IO requests was + * successful + * and the submission of the write-ack and flow were also successful. */ + if (mir_op->job_count > 0) + return SM_ACTION_DEFERRED; + else + { + /* if job_count is zero, then nothing worked. */ + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tNo jobs successfully posted in " + "post_ack_and_flow : %d\n", + js_p->error_code); + js_p->error_code = -PVFS_EIO; + return SM_ACTION_COMPLETE; + } +} /* end action post_ack_and_flow */ + + +static PINT_sm_action check_comm ( struct PINT_smcb *smcb, job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing check_comm...\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + + PINT_server_mirror_op *mir_op = &(s_op->u.mirror); + write_job_t *jobs = mir_op->jobs; + + job_aint status_user_tag = js_p->status_user_tag; + int ret,i; + + struct filesystem_configuration_s *cur_fs = NULL; + struct server_configuration_s *server_config = NULL; + + server_config = get_server_config_struct(); + cur_fs = PINT_config_find_fs_id(server_config,reqmir_p->fs_id); + + if( cur_fs == NULL ) + { + return -PVFS_EINVAL; + } + + /* status_user_tag's will only exist for those destination handles that had + * successful posts of a write-ack and flow. */ + switch(PHASE(status_user_tag)) + { + case SRC_FLOW_POST: + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tReceived SRC_FLOW_POST for " + "dst(%d)...\n", + (int)DST(status_user_tag)); + i = (int)DST(status_user_tag); + jobs[i].flow_status = *js_p; + mir_op->job_count--; + if (mir_op->job_count > 0) + { + ret = job_reset_timeout(jobs[i].recv_id, + server_config->server_job_bmi_timeout); + if (ret == 0 || ret == -PVFS_EINVAL) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\ttimeout reset:%d(%0x)\n", ret,ret); + /* ack was reset or it has already completed. */ + } + else + { + gossip_lerr("Unable to reset timeout"); + return(ret); + } + } + break; + } + case WRITE_ACK_RCV: + { + i = (int)DST(status_user_tag); + jobs[i].recv_status = *js_p; + mir_op->job_count--; + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tReceived WRITE_ACK_RCV for " + "dst(%d)...\n", + (int)DST(status_user_tag)); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tjob_count:%d\n", + mir_op->job_count); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\trecv_status.error_code:%d\n", + jobs[i].recv_status.error_code); + break; + } + default: + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tReceived unknown:%d\n", + (int)status_user_tag); + mir_op->job_count = 0; + break; + } + } /* end switch */ + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tjs_p->error_code:%d" + "\tjs_p->actual_size:%d\n", + js_p->error_code, + (int)js_p->actual_size); + + js_p->error_code = 0; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tjob_count:%d\n", mir_op->job_count); + if (mir_op->job_count) /* more jobs to process */ + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tjs_p->error_code:%d " + "\tjob_count:%d " + "\tleaving deferred\n", + js_p->error_code, + mir_op->job_count); + return SM_ACTION_DEFERRED; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tjs_p->error_code:%d" + "\tjob_count:%d" + "\tleaving complete\n", + js_p->error_code, + mir_op->job_count); + + if (mir_op->job_count == 0) /*no more jobs to process*/ + js_p->error_code = COMM_DONE; + + return SM_ACTION_COMPLETE; +} /* end action check_comm */ + + +static PINT_sm_action check_results( struct PINT_smcb *smcb, + job_status_s *js_p ) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing check_results...\n"); + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + struct PVFS_servresp_mirror *respmir_p = &(s_op->resp.u.mirror); + PINT_server_mirror_op *mir_p = &(s_op->u.mirror); + write_job_t *jobs = mir_p->jobs; + + struct PINT_decoded_msg decoded_resp = {0}; + struct PVFS_server_resp *resp = NULL; + + int ret,i; + + js_p->error_code = 0; + + /*check result statuses for each destination handle*/ + for (i=0; idst_count; i++) + { + if (jobs[i].io_status != 0) + { + respmir_p->write_status_code[i] = jobs[i].io_status; + } + else if (jobs[i].recv_status.error_code) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tACK rcv failed:%d\n", + jobs[i].recv_status.error_code ); + respmir_p->write_status_code[i] = jobs[i].recv_status.error_code; + } + else if (jobs[i].flow_status.error_code) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tFLOW failed:%d\n", + jobs[i].flow_status.error_code); + respmir_p->write_status_code[i] = jobs[i].flow_status.error_code; + } + else + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tACK & FLOW succeeded\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\trecv_status.actual_size:%d\n", + (int)jobs[i].recv_status.actual_size); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tparameters sent into " + "PINT_serv_decode_resp:\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\tfs_id : %d\n" + "\t\t\tencoded_resp_p : %p\n" + "\t\t\tdecoded_resp : %p\n" + "\t\t\tsvr_addr : %ld\n" + "\t\t\tactual_size : %d\n" + "\t\t\tresp : %p\n", + reqmir_p->fs_id, + (void *)jobs[i].encoded_resp_p, + &decoded_resp, + (long int)jobs[i].svr_addr, + (int)jobs[i].recv_status.actual_size, + resp); + ret = PINT_serv_decode_resp( reqmir_p->fs_id, + jobs[i].encoded_resp_p, + &decoded_resp, + &(jobs[i].svr_addr), + jobs[i].recv_status.actual_size, + &resp ); + gossip_debug(GOSSIP_MIRROR_DEBUG, + "\tafter PINT_serv_decode_resp..\n"); + if (ret == 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tsession_tag : %d\n", + jobs[i].session_tag); + respmir_p->bytes_written[i] = + resp->u.write_completion.total_completed; + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tafter bytes_written..\n"); + respmir_p->write_status_code[i] = jobs[i].recv_status.error_code; + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tafter write_status_code..\n"); + PINT_decode_release(&decoded_resp, PINT_DECODE_RESP); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tbytes written:%d " + "\tresp status:%d " + "\tresp op:%d\n", + respmir_p->bytes_written[i], + respmir_p->write_status_code[i], + resp->op ); + } + else + { + gossip_lerr("PINT_serv_decode_resp failed(%d)",ret); + respmir_p->write_status_code[i] = ret; + } + }/*end if*/ + + if (jobs[i].flow_desc) + PINT_flow_free(jobs[i].flow_desc); + if (jobs[i].encoded_resp_p) + BMI_memfree( jobs[i].svr_addr, + jobs[i].encoded_resp_p, + mir_p->max_resp_sz, + BMI_RECV); + } /* end for each destination handle */ + + /* if at least ONE of the writes was successful, then return a zero to + * indicate that ALL status codes must be checked. If NONE of the writes + * were successful, then return a non-zero to indicate that individual + * statuses do not need to be checked. */ + for (i=0; idst_count; i++) + { + if (respmir_p->write_status_code[i] == 0) + break; + } + if (i==reqmir_p->dst_count) + js_p->error_code = -PVFS_EIO; + else + js_p->error_code = 0; + + return SM_ACTION_COMPLETE; +} /* end action check_results */ + +static PINT_sm_action cleanup_mirror_work(struct PINT_smcb *smcb, + job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing cleanup_mirror_work.....\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + struct PVFS_servresp_mirror *respmir_p = &(s_op->resp.u.mirror); + struct PVFS_servreq_mirror *reqmir_p = &(s_op->req->u.mirror); + struct PINT_server_mirror_op *mir_p = &(s_op->u.mirror); + write_job_t *jobs = mir_p->jobs; + int i,ret; + job_id_t job_id; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tIN:js_p->error_code:%d\n", + js_p->error_code); + if (js_p->error_code == NO_DATA_TO_COPY) + { + js_p->error_code = 0; + } + + if (mir_p->jobs) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tJOBS IO STATUS...\n"); + for (i=0; idst_count; i++) + gossip_debug(GOSSIP_MIRROR_DEBUG,"\t\tio_status(%d):%d\n", i, + (int)jobs[i].io_status); + } + + if (mir_p->jobs) + free(mir_p->jobs); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tOUT:js_p->error_code:%d\n", + js_p->error_code); + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\trespmir_p->src_handle:%llu\n", + llu(respmir_p->src_handle)); + + for (i=0; idst_count; i++) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "\t\tbytes_written[%d]:%d\n" + "\t\t\twrite_status_code[%d]:%d\n", + i, respmir_p->bytes_written[i], + i, respmir_p->write_status_code[i]); + } + gossip_debug(GOSSIP_MIRROR_DEBUG,"\ts_op->resp.status:%d\n", + s_op->resp.status); + + /*If s_op->schedule_id is non-zero, then we must release the object */ + /*from the scheduler. */ + if (s_op->scheduled_id) + { + ret = job_req_sched_release(s_op->scheduled_id, + smcb, + 0, + js_p, + &job_id, + server_job_context); + s_op->scheduled_id = 0; + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tLeaving cleanup_mirror_work.....\n"); + return (ret); + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"Leaving cleanup_mirror_work.....\n"); + return SM_ACTION_COMPLETE; +} /* end action cleanup_mirror_work */ + + + +int write_comp_fn(void *v_p, struct PVFS_server_resp *resp_p, int i) +{ + /* This function executes AFTER each msgpair has completed and is under the + * control of msgpairarray.sm. Here, we will capture the response from the + * PVFS_SERV_IO request; however, the response pertains only to the initial + * ACK and start of the flow on the remote server. We will retain the status + * from each response and then check it when we return from the jump to + * msgpairarray. We will always return a zero from this function, even if + * the request failed, so we can check it later. */ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing write_comp_fn.....\n"); + + PINT_smcb *smcb = v_p; + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + struct PINT_server_mirror_op *mir_p = &(s_op->u.mirror); + write_job_t *jobs = mir_p->jobs; + + /* resp_p contains the response from the PVFS_SERV_IO request after it + * has posted its initial write ack and flow. */ + jobs[i].io_status = resp_p->status; + + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tInitial ACK from IO write request(%d). " + "bstream_size is %d.\n", + i, + (int)resp_p->u.io.bstream_size); + gossip_debug(GOSSIP_MIRROR_DEBUG, "\tstatus:%d\n", (int)jobs[i].io_status); + + if (resp_p->status != 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tNegative response from " + "PVFS_SERV_IO:%d\n", resp_p->status); + } + + return(0); +} /* end msgpair completion function mirror_comp_fn */ + +/* set handle and fs-id ... required by the state machine processor */ +static inline int PINT_get_object_ref_mirror( struct PVFS_server_req *req, + PVFS_fs_id *fs_id, + PVFS_handle *handle ) +{ + *fs_id = req->u.mirror.fs_id; + *handle = req->u.mirror.src_handle; + return 0; +}; + +/* request parameters */ +struct PINT_server_req_params pvfs2_mirror_params = +{ + .string_name = "mirror", + .perm = PINT_SERVER_CHECK_NONE, + .access_type = PINT_server_req_modify, + .sched_policy = PINT_SERVER_REQ_SCHEDULE, + .get_object_ref = PINT_get_object_ref_mirror, + .state_machine = &pvfs2_mirror_sm +}; diff --git a/src/server/mkdir.sm b/src/server/mkdir.sm index bc88758..451dce5 100644 --- a/src/server/mkdir.sm +++ b/src/server/mkdir.sm @@ -14,6 +14,8 @@ #include "pvfs2-util.h" #include "pvfs2-internal.h" #include "pint-util.h" +#include "pint-cached-config.h" +#include "trove-handle-mgmt.h" %% @@ -124,7 +126,7 @@ static PINT_sm_action mkdir_create( PVFS_TYPE_DIRECTORY, NULL, TROVE_SYNC, smcb, 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -165,13 +167,13 @@ static PINT_sm_action mkdir_setattrib( s_op->u.mkdir.fs_id, s_op->resp.u.mkdir.handle, ds_attr, TROVE_SYNC, - smcb, 0, js_p, &j_id, server_job_context); + smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } -static PINT_sm_action mkdir_create_dirdata_dspace( - struct PINT_smcb *smcb, job_status_s *js_p) +static int mkdir_create_dirdata_dspace( + struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_ENOMEM; @@ -197,7 +199,7 @@ static PINT_sm_action mkdir_create_dirdata_dspace( s_op->u.mkdir.fs_id, &extent_array, PVFS_TYPE_DIRDATA, NULL, TROVE_SYNC, smcb, 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); free(extent_array.extent_array); extent_array.extent_array = NULL; @@ -242,13 +244,12 @@ static PINT_sm_action mkdir_write_dirdata_handle( s_op->u.mkdir.fs_id, s_op->resp.u.mkdir.handle, &s_op->key, &s_op->val, 0, - NULL, smcb, 0, js_p, &i, server_job_context); + NULL, smcb, 0, js_p, &i, server_job_context, s_op->req->hints); return ret; } -static PINT_sm_action mkdir_error( - struct PINT_smcb *smcb, job_status_s *js_p) +static int mkdir_error(struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); s_op->resp.u.mkdir.handle = 0; @@ -257,7 +258,7 @@ static PINT_sm_action mkdir_error( { js_p->error_code = -PVFS_EINVAL; } - return SM_ACTION_COMPLETE; + return 1; } /* @@ -284,10 +285,40 @@ static PINT_sm_action mkdir_prep_sm( PVFS_object_attr *a_p = NULL; PVFS_ds_attributes *ds_attr = NULL; + PVFS_handle_extent_array data_handle_ext_array; + PINT_llist *cur = NULL; + struct host_alias_s *cur_alias; + server_configuration_s *config = get_server_config_struct(); + s_op->u.mkdir.fs_id = s_op->req->u.mkdir.fs_id; s_op->u.mkdir.handle_extent_array = s_op->req->u.mkdir.handle_extent_array; + if(config->osd_type == OSD_DATAFILE) + { + cur = config->host_aliases; + while(cur) + { + cur_alias = PINT_llist_head(cur); + if (!cur_alias) + { + break; + } + if(!strncmp(cur_alias->bmi_address, "osd", 3)) { + PINT_cached_config_get_server( + s_op->req->u.create.fs_id, + cur_alias->bmi_address, + PINT_SERVER_TYPE_IO, + &data_handle_ext_array); + } + cur = PINT_llist_next(cur); + } + + s_op->resp.u.mkdir.cid = malloc(sizeof(PVFS_handle)); + s_op->resp.u.mkdir.cid = trove_handle_alloc_from_range(s_op->req->u.create.fs_id, &data_handle_ext_array); + s_op->req->u.mkdir.attr.cid = s_op->resp.u.mkdir.cid; + } + a_p = &(s_op->req->u.mkdir.attr); if (a_p->objtype != PVFS_TYPE_DIRECTORY) @@ -319,6 +350,7 @@ static inline int PINT_get_object_ref_mkdir( struct PINT_server_req_params pvfs2_mkdir_params = { .string_name = "mkdir", + .get_object_ref = PINT_get_object_ref_mkdir, .perm = PINT_SERVER_CHECK_NONE, .access_type = PINT_server_req_modify, .state_machine = &pvfs2_mkdir_sm diff --git a/src/server/module.mk.in b/src/server/module.mk.in index 6998c75..175156d 100644 --- a/src/server/module.mk.in +++ b/src/server/module.mk.in @@ -6,9 +6,14 @@ ifdef BUILD_SERVER # automatically generated c files SERVER_SMCGEN := \ + $(DIR)/pjmp-machines.c \ $(DIR)/setparam.c \ $(DIR)/lookup.c \ $(DIR)/create.c \ + $(DIR)/mirror.c \ + $(DIR)/create-immutable-copies.c \ + $(DIR)/batch-create.c \ + $(DIR)/batch-remove.c \ $(DIR)/crdirent.c \ $(DIR)/set-attr.c \ $(DIR)/mkdir.c \ @@ -29,7 +34,6 @@ ifdef BUILD_SERVER $(DIR)/final-response.c \ $(DIR)/perf-update.c \ $(DIR)/perf-mon.c \ - $(DIR)/event-mon.c \ $(DIR)/iterate-handles.c \ $(DIR)/job-timer.c \ $(DIR)/proto-error.c \ @@ -40,15 +44,16 @@ ifdef BUILD_SERVER $(DIR)/set-eattr.c \ $(DIR)/del-eattr.c \ $(DIR)/list-eattr.c \ - $(DIR)/unexpected.c + $(DIR)/unexpected.c \ + $(DIR)/precreate-pool-refiller.c \ + $(DIR)/unstuff.c \ + $(DIR)/tree-communicate.c \ + $(DIR)/mgmt-get-uid.c # c files that should be added to server library SERVERSRC += \ $(SERVER_SMCGEN) - # server only file - SERVERSRC += $(DIR)/check.c - # track generate .c files to remove during dist clean, etc. SMCGEN += $(SERVER_SMCGEN) @@ -60,6 +65,11 @@ ifdef BUILD_SERVER MODCFLAGS_$(DIR)/statfs.c = \ -I$(srcdir)/src/io/trove/trove-handle-mgmt + MODCFLAGS_$(DIR)/create.c = \ + -I$(srcdir)/src/io/trove/trove-handle-mgmt + MODCFLAGS_$(DIR)/mkdir.c = \ + -I$(srcdir)/src/io/trove/trove-handle-mgmt + ifdef PVFS2_SEGV_BACKTRACE MODCFLAGS_$(DIR)/pvfs2-server.c := -D__PVFS2_SEGV_BACKTRACE__ endif diff --git a/src/server/perf-mon.sm b/src/server/perf-mon.sm index 1c8c0a8..4500e0f 100644 --- a/src/server/perf-mon.sm +++ b/src/server/perf-mon.sm @@ -13,11 +13,11 @@ #include #include "pvfs2-server.h" +#include "pint-util.h" #include "pint-perf-counter.h" -static uint64_t* static_start_time_array_ms = NULL; -static uint64_t* static_interval_array_ms = NULL; -static int64_t** static_value_matrix = NULL; +/* there had better not be but one of these requests at a time */ +static int64_t *static_value_array = NULL; static int static_history_size = 0; static int static_key_count = 0; @@ -25,38 +25,51 @@ static int reallocate_static_arrays_if_needed(void); #define MAX_NEXT_ID 1000000000 +#define STATIC_TIME(i) (static_value_array[((i) * (static_key_count + 2)) \ + + static_key_count]) +#define STATIC_INTV(i) (static_value_array[((i) * (static_key_count + 2)) \ + + static_key_count + 1]) +#define STATIC_SAMP(i) (static_value_array[((i) * (static_key_count + 2))]) + +#define SOP_PERF_SAMP(i) (s_op->resp.u.mgmt_perf_mon.perf_array[((i) \ + * (key_count + 2))]) +#define SOP_PERF_TIME(i) (s_op->resp.u.mgmt_perf_mon.perf_array[((i) \ + * (key_count + 2)) + key_count]) +#define SOP_PERF_INTV(i) (s_op->resp.u.mgmt_perf_mon.perf_array[((i) \ + * (key_count + 2)) + key_count + 1]) + %% machine pvfs2_perf_mon_sm { - state prelude - { - jump pvfs2_prelude_sm; - default => do_work; - } - - state do_work - { - run perf_mon_do_work; - default => final_response; - } - - state final_response - { - jump pvfs2_final_response_sm; - default => cleanup; - } - - state cleanup - { - run perf_mon_cleanup; - default => terminate; - } + state prelude + { + jump pvfs2_prelude_sm; + default => do_work; + } + + state do_work + { + run perf_mon_do_work; + default => final_response; + } + + state final_response + { + jump pvfs2_final_response_sm; + default => cleanup; + } + + state cleanup + { + run perf_mon_cleanup; + default => terminate; + } } %% -/* perf_mon_cleanup() +/** perf_mon_cleanup() * * cleans up any resources consumed by this state machine and ends * execution of the machine @@ -66,12 +79,12 @@ static PINT_sm_action perf_mon_cleanup( { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); if(s_op->resp.u.mgmt_perf_mon.perf_array) - free(s_op->resp.u.mgmt_perf_mon.perf_array); + free(s_op->resp.u.mgmt_perf_mon.perf_array); return(server_state_machine_complete(smcb)); } -/* perf_mon_do_work() +/** perf_mon_do_work() * * gathers statistics and builds response */ @@ -79,12 +92,12 @@ static PINT_sm_action perf_mon_do_work( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - struct timeval tv; int i; int valid_count = 0; uint32_t tmp_next_id; int ret = -1; - int idx; + uint32_t key_count = 0; + uint32_t sample_count = 0; #ifdef __PVFS2_DISABLE_PERF_COUNTERS__ gossip_err("Error: perf_mon request received, but perf counters are disabled.\n"); @@ -92,27 +105,65 @@ static PINT_sm_action perf_mon_do_work( return SM_ACTION_COMPLETE; #endif + /* how many keys and history intervals do we have in the perf counter? */ + ret = PINT_perf_get_info(PINT_server_pc, PINT_PERF_KEY_COUNT, + &key_count); + if(ret < 0) + { + return SM_ACTION_COMPLETE; + return(ret); + } + + /* get no more counters than exist, and no more than requested */ + if (key_count > s_op->req->u.mgmt_perf_mon.key_count) + { + key_count = s_op->req->u.mgmt_perf_mon.key_count; + } + + /* how many keys and history intervals do we have in the perf counter? */ + ret = PINT_perf_get_info(PINT_server_pc, PINT_PERF_HISTORY_SIZE, + &sample_count); + if(ret < 0) + { + js_p->error_code = ret; + return SM_ACTION_COMPLETE; + } + + /* get no more counters than exist, and no more than requested */ + if (sample_count > s_op->req->u.mgmt_perf_mon.count) + { + sample_count = s_op->req->u.mgmt_perf_mon.count; + } + + if (key_count < 1 || sample_count < 1) + { + gossip_err("Error: perf mon request with key_count or sample_count less than 1.\n"); + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + /* allocate memory to hold statistics */ s_op->resp.u.mgmt_perf_mon.perf_array - = (struct PVFS_mgmt_perf_stat*)malloc(s_op->req->u.mgmt_perf_mon.count * - sizeof(struct PVFS_mgmt_perf_stat)); + = (int64_t *)malloc(sample_count * (key_count + 2) + * sizeof(int64_t)); if(!s_op->resp.u.mgmt_perf_mon.perf_array) { - js_p->error_code = -PVFS_ENOMEM; - return SM_ACTION_COMPLETE; + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; } + memset(s_op->resp.u.mgmt_perf_mon.perf_array, 0, + sample_count * (key_count + 2) * sizeof(int64_t)); /* fill in some of the response */ - gettimeofday(&tv, NULL); - s_op->resp.u.mgmt_perf_mon.cur_time_ms = tv.tv_sec*1000 + - tv.tv_usec/1000; + s_op->resp.u.mgmt_perf_mon.cur_time_ms = PINT_util_get_time_ms(); s_op->resp.u.mgmt_perf_mon.suggested_next_id - = s_op->req->u.mgmt_perf_mon.next_id; - s_op->resp.u.mgmt_perf_mon.perf_array_count - = s_op->req->u.mgmt_perf_mon.count; + = s_op->req->u.mgmt_perf_mon.next_id; + s_op->resp.u.mgmt_perf_mon.key_count = key_count; + s_op->resp.u.mgmt_perf_mon.perf_array_count = + (key_count + 2) * sample_count; - /* make sure we have scratch memory to use as an intermediate buffer for - * performance counters + /* make sure we have scratch memory to use as an intermediate + * buffer for performance counters */ ret = reallocate_static_arrays_if_needed(); if(ret < 0) @@ -124,22 +175,22 @@ static PINT_sm_action perf_mon_do_work( } PINT_perf_retrieve(PINT_server_pc, - static_value_matrix, - static_start_time_array_ms, - static_interval_array_ms, - static_key_count, - static_history_size); - - /* work through start times, and find the oldest one that is new enough - * to satisfy next_id - * NOTE: we encode hi order bits of timestamp as id values. That - * should be sufficient to maintain compatibility. + static_value_array, + static_key_count, + static_history_size); + + /* we are filling per_array in backwards, but this is the order + * karma and other programs that collect data over multiple + * intervals expect it */ + /* Work through start times, and find the oldest one that + * is new enough to satisfy next_id + * NOTE: we encode hi order bits of timestamp as id values. + * That * should be sufficient to maintain compatibility. */ - for(i=static_history_size-1; i>=0; i--) + valid_count = 0; + for(i = 1; i < static_history_size; i++) { - tmp_next_id = 0; - tmp_next_id += (uint32_t)(static_start_time_array_ms[i] % MAX_NEXT_ID); - + tmp_next_id = STATIC_TIME(i) % MAX_NEXT_ID; /* check three conditions: * 1) that this interval from the perf counter is valid (start time * not zero) @@ -148,74 +199,66 @@ static PINT_sm_action perf_mon_do_work( * 3) if the start time has rolled over within MAX_NEXT_ID */ if(tmp_next_id != 0 && - ((tmp_next_id >= s_op->req->u.mgmt_perf_mon.next_id) || - ((s_op->req->u.mgmt_perf_mon.next_id-tmp_next_id)>(MAX_NEXT_ID/2)))) + ((tmp_next_id >= s_op->req->u.mgmt_perf_mon.next_id) || + ((s_op->req->u.mgmt_perf_mon.next_id - tmp_next_id) > + (MAX_NEXT_ID / 2)))) { - /* found the first valid timestamp */ - valid_count = i+1; - /* compute a next id to suggest that the client use next time - * (newest time plus 1) - */ - tmp_next_id = 0; - tmp_next_id += (uint32_t)(static_start_time_array_ms[0] % - MAX_NEXT_ID); - tmp_next_id += 1; - s_op->resp.u.mgmt_perf_mon.suggested_next_id = tmp_next_id; + /* found oldest valid sample */ break; } - } - if(valid_count > s_op->req->u.mgmt_perf_mon.count) - { - valid_count = s_op->req->u.mgmt_perf_mon.count; - } - - /* fill in all of the valid values */ - for(i=0; ireq->u.mgmt_perf_mon.count; i++) + } + /* now copy newer, valid samples */ + for(; i < static_history_size && valid_count < sample_count; i++) { - if(iresp.u.mgmt_perf_mon.perf_array[idx].valid_flag = 1; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].id = 0; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].id += - (uint32_t)(static_start_time_array_ms[i] % 1000000000); - s_op->resp.u.mgmt_perf_mon.perf_array[idx].start_time_ms = - static_start_time_array_ms[i]; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].read = - static_value_matrix[PINT_PERF_READ][i]; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].write = - static_value_matrix[PINT_PERF_WRITE][i]; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].metadata_read = - static_value_matrix[PINT_PERF_METADATA_READ][i]; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].metadata_write = - static_value_matrix[PINT_PERF_METADATA_WRITE][i]; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].dspace_queue = - static_value_matrix[PINT_PERF_METADATA_DSPACE_OPS][i]; - s_op->resp.u.mgmt_perf_mon.perf_array[idx].keyval_queue = - static_value_matrix[PINT_PERF_METADATA_KEYVAL_OPS][i]; + /* valid sample */ + memcpy(&SOP_PERF_SAMP(valid_count), + &STATIC_SAMP(i), + key_count * sizeof(int64_t)); + memcpy(&SOP_PERF_TIME(valid_count), + &STATIC_TIME(i), + 2 * sizeof(int64_t)); + + valid_count++; } - else + } + if(valid_count < sample_count) + { + /* copy sample zero - the newest sample */ + if(STATIC_TIME(0) != 0) { - s_op->resp.u.mgmt_perf_mon.perf_array[i].valid_flag = 0; + /* valid sample */ + memcpy(&SOP_PERF_SAMP(valid_count), + &STATIC_SAMP(0), + key_count * sizeof(int64_t)); + memcpy(&SOP_PERF_TIME(valid_count), + &STATIC_TIME(0), + 2 * sizeof(int64_t)); + + valid_count++; } } - + /* nextid is based on time stamp of last valid sample */ /* set final end time */ if(valid_count > 0) { s_op->resp.u.mgmt_perf_mon.end_time_ms = - static_start_time_array_ms[0] + - static_interval_array_ms[0]; + SOP_PERF_TIME(valid_count-1) + SOP_PERF_INTV(valid_count-1); + s_op->resp.u.mgmt_perf_mon.suggested_next_id = + (SOP_PERF_TIME(valid_count-1) + 1) % MAX_NEXT_ID; + } + else + { + s_op->resp.u.mgmt_perf_mon.end_time_ms = 0; } js_p->error_code = 0; + return SM_ACTION_COMPLETE; } -/* reallocate_static_arrays() +/** reallocate_static_arrays() * * allocates new arrays for temporary storage of performance counter data, * freeing old memory if needed @@ -227,87 +270,46 @@ static int reallocate_static_arrays_if_needed(void) unsigned int history_size; unsigned int key_count; int ret = -1; - int i; /* how many keys and history intervals do we have in the perf counter? */ ret = PINT_perf_get_info(PINT_server_pc, PINT_PERF_KEY_COUNT, - &key_count); + &key_count); if(ret < 0) { return(ret); } + + /* the key count shouldn't change once acquired */ + if (static_key_count == 0) + { + static_key_count = key_count; + } + assert(key_count == static_key_count); + + /* get server history size - this can change */ ret = PINT_perf_get_info(PINT_server_pc, PINT_PERF_HISTORY_SIZE, - &history_size); + &history_size); if(ret < 0) { return(ret); } + /* allocate array if needed */ if(history_size > static_history_size) { - if(static_start_time_array_ms) - { - free(static_start_time_array_ms); - } - if(static_interval_array_ms) + if(static_value_array) { - free(static_interval_array_ms); - } - - /* reallocate time arrays */ - static_start_time_array_ms = - (uint64_t*)malloc(history_size*sizeof(uint64_t)); - if(!static_start_time_array_ms) - { - return(-PVFS_ENOMEM); + free(static_value_array); } - static_interval_array_ms = - (uint64_t*)malloc(history_size*sizeof(uint64_t)); - if(!static_interval_array_ms) + static_value_array = (int64_t *) + malloc(history_size * (key_count + 2) * sizeof(int64_t)); + if(!static_value_array) { - free(static_start_time_array_ms); return(-PVFS_ENOMEM); } - - /* the key count shouldn't change once acquired */ - assert((static_key_count == 0)||(key_count == static_key_count)); - - /* allocate value matrix */ - if(!static_value_matrix) - { - static_value_matrix = - (int64_t**)malloc(key_count*sizeof(int64_t*)); - if(!static_value_matrix) - { - free(static_start_time_array_ms); - free(static_interval_array_ms); - return(-PVFS_ENOMEM); - } - memset(static_value_matrix, 0, key_count*sizeof(int64_t*)); - } - - for(i=0; i=0; i--) - { - free(static_value_matrix[i]); - } - free(static_start_time_array_ms); - free(static_interval_array_ms); - return(-PVFS_ENOMEM); - } - } + static_history_size = history_size; } - /* NOTE: we change the static counts, even if they are decreasing. It is - * ok if the arrays are bigger than needed. - */ - static_history_size = history_size; - static_key_count = key_count; return(0); } diff --git a/src/server/perf-update.sm b/src/server/perf-update.sm index 3aa015c..3230698 100644 --- a/src/server/perf-update.sm +++ b/src/server/perf-update.sm @@ -35,7 +35,7 @@ machine pvfs2_perf_update_sm %% -/* perf_update_error() +/** perf_update_error() * * cleans up any resources consumed by this state machine and ends * execution of the machine @@ -50,7 +50,7 @@ static PINT_sm_action perf_update_error( return(server_state_machine_complete(smcb)); } -/* perf_update_do_work() +/** perf_update_do_work() * * resets counters, updates metrices, etc- this is intended to be called * repeatedly on a regular interval @@ -65,7 +65,6 @@ static PINT_sm_action perf_update_do_work( char* ptr; char* token; char delim[] = "\n"; - struct server_configuration_s *user_opts = get_server_config_struct(); #if 0 PINT_STATE_DEBUG("do_work"); @@ -92,12 +91,12 @@ static PINT_sm_action perf_update_do_work( PINT_perf_rollover(PINT_server_pc); /* post another timer */ - return(job_req_sched_post_timer(user_opts->perf_update_interval, - smcb, - 0, - js_p, - &tmp_id, - server_job_context)); + return(job_req_sched_post_timer(PINT_server_pc->interval, + smcb, + 0, + js_p, + &tmp_id, + server_job_context)); } struct PINT_server_req_params pvfs2_perf_update_params = diff --git a/src/server/pjmp-machines.sm b/src/server/pjmp-machines.sm new file mode 100644 index 0000000..1ab252a --- /dev/null +++ b/src/server/pjmp-machines.sm @@ -0,0 +1,301 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + * + * Changes by Acxiom Corporation to add dirent_count field to attributes + * Copyright © Acxiom Corporation, 2005. + */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" +#include "pvfs2-types.h" +#include "pvfs2-types-debug.h" +#include "pvfs2-util.h" +#include "pint-util.h" +#include "pvfs2-internal.h" +#include "pint-cached-config.h" + +%% +machine pvfs2_pjmp_get_attr_work_sm +{ + state pjmp_get_attr_work_initialize + { + run pjmp_initialize; + default => pjmp_call_get_attr_work_sm; + } + + state pjmp_call_get_attr_work_sm + { + jump pvfs2_get_attr_work_sm; + default => pjmp_get_attr_work_execute_terminate; + } + + state pjmp_get_attr_work_execute_terminate + { + run pjmp_execute_terminate; + default => terminate; + } +} + + +machine pvfs2_pjmp_create_immutable_copies_sm +{ + state pjmp_create_immutable_copies_initialize + { + run pjmp_initialize; + default => pjmp_call_create_immutable_copies_sm; + } + + state pjmp_call_create_immutable_copies_sm + { + jump pvfs2_create_immutable_copies_sm; + default => pjmp_create_immutable_copies_execute_terminate; + } + + state pjmp_create_immutable_copies_execute_terminate + { + run pjmp_execute_terminate; + default => terminate; + } +} + + +machine pvfs2_pjmp_mirror_work_sm +{ + state pjmp_mirror_work_initialize + { + run pjmp_initialize; + default => pjmp_call_mirror_work_sm; + } + + state pjmp_call_mirror_work_sm + { + jump pvfs2_mirror_work_sm; + default => pjmp_mirror_work_execute_terminate; + } + + state pjmp_mirror_work_execute_terminate + { + run pjmp_execute_terminate; + default => terminate; + } +} + + +machine pvfs2_pjmp_remove_work_sm +{ + state pjmp_remove_work_initialize + { + run pjmp_remove_work_schedule_job; + success => pjmp_call_remove_work_sm; + default => pjmp_remove_work_execute_terminate; + } + + state pjmp_call_remove_work_sm + { + jump pvfs2_remove_work_sm; + default => pjmp_remove_work_release_job; + } + + state pjmp_remove_work_release_job + { + run pjmp_remove_work_release_job; + default => pjmp_remove_work_execute_terminate; + } + state pjmp_remove_work_execute_terminate + { + run pjmp_remove_work_execute_terminate; + default => terminate; + } +} + + +machine pvfs2_pjmp_get_attr_with_prelude_sm +{ + state pjmp_initialize + { + run pjmp_initialize; + default => pjmp_call_get_attr_with_prelude; + } + + state pjmp_call_get_attr_with_prelude + { + jump pvfs2_get_attr_with_prelude_sm; + default => pjmp_execute_terminate; + } + + state pjmp_execute_terminate + { + run pjmp_execute_terminate; + default => terminate; + } +} + + +machine pvfs2_pjmp_call_msgpairarray_sm +{ + state call_msgpairarray + { + run call_msgpairarray; + success => transfer_msgpair; + default => cleanup_msgpairarray; + } + + state transfer_msgpair + { + jump pvfs2_msgpairarray_sm; + default => cleanup_msgpairarray; + } + + state cleanup_msgpairarray + { + run cleanup_msgpairarray; + default => terminate; + } +} /*end state machine pvfs2_pjmp_call_msgpairarray_sm*/ +%% + +static PINT_sm_action pjmp_initialize( struct PINT_smcb *smcb + , job_status_s *js_p) +{ + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action pjmp_execute_terminate( struct PINT_smcb *smcb + , job_status_s *js_p) +{ + return SM_ACTION_TERMINATE; +} + +/****************************************************************************/ +/* Actions for pvfs2_pjmp_call_msgpairarray_sm */ +/****************************************************************************/ + +/*We use a separate nested state machine to call msgpairarray because */ +/*msgpairarray.sm is setup to work only with a "jump". When "pjmp" */ +/*is used, the frame stack gets corrupted. */ +static PINT_sm_action call_msgpairarray (struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_SERVER_DEBUG,"Executing pvfs2_pjmp_call_msgpairarray_sm:" + "call_msgpairarray for operation (%s)\n" + ,PINT_map_server_op_to_string(smcb->op)); + gossip_debug(GOSSIP_SERVER_DEBUG,"\tframe count is %d.\n",smcb->frame_count); + gossip_debug(GOSSIP_SERVER_DEBUG,"\t base frame is %d.\n",smcb->base_frame); + gossip_debug(GOSSIP_SERVER_DEBUG,"\t op nbr is #%d.\n",smcb->op); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *msgarray = &(s_op->msgarray_op); + + gossip_debug(GOSSIP_SERVER_DEBUG,"\ts_op:%p\n",s_op); + + js_p->error_code = 0; + + gossip_debug(GOSSIP_SERVER_DEBUG,"\tmsgarray->msgpair.req.op:%d\n" + ,msgarray->msgpair.req.op ); + + PINT_sm_push_frame(smcb,0,msgarray); + + gossip_debug(GOSSIP_SERVER_DEBUG,"\tAFTER PUSH:smcb->base_frame:%d" + "\tframe_count:%d\n" + ,smcb->base_frame,smcb->frame_count); + + return SM_ACTION_COMPLETE; +}/*end action call_msgpairarray*/ + +static PINT_sm_action cleanup_msgpairarray (struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_SERVER_DEBUG,"Executing pvfs2_pjmp_call_msgpairarray_sm:cleanup_msgpairarray" + "....\n"); + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + PINT_sm_msgarray_op *msgarray = &(s_op->msgarray_op); + + gossip_debug(GOSSIP_SERVER_DEBUG,"\ts_op:%p\n",s_op); + + gossip_debug(GOSSIP_SERVER_DEBUG,"\tjs_p->error_code:%d\n" + ,js_p->error_code); + gossip_debug(GOSSIP_SERVER_DEBUG,"\tBEFORE POP:smcb->frame_base:%d" + "\tframe_count:%d\n" + ,smcb->base_frame,smcb->frame_count); + + + gossip_debug(GOSSIP_SERVER_DEBUG,"\tmsgarray->msgpair.req.op:%d\n" + ,msgarray->msgpair.req.op ); + + gossip_debug(GOSSIP_SERVER_DEBUG,"\ts_op->resp.status:%d\n" + ,s_op->resp.status); + + return SM_ACTION_TERMINATE; +}/*end action cleanup_msgpairarray*/ + +static PINT_sm_action pjmp_remove_work_schedule_job(struct PINT_smcb *smcb, job_status_s *js_p) +{ + int ret; + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + ret = PINT_server_req_get_object_ref(s_op->req + ,&s_op->target_fs_id + ,&s_op->target_handle); + s_op->access_type = PINT_server_req_get_access_type(s_op->req); + s_op->sched_policy = PINT_server_req_get_sched_policy(s_op->req); + + js_p->error_code = 0; + ret = job_req_sched_post( s_op->op + ,s_op->target_fs_id + ,s_op->target_handle + ,s_op->access_type + ,s_op->sched_policy + ,smcb + ,0 + ,js_p + ,&(s_op->scheduled_id) + ,server_job_context); + return ret; +}/*end pjmp_remove_work_schedule_job*/ + + +static PINT_sm_action pjmp_remove_work_release_job(struct PINT_smcb *smcb, job_status_s *js_p) +{ + int ret = -1; + job_id_t tmp_id; + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + /* save the error-code returned from the previous step */ + s_op->u.remove.saved_error_code = js_p->error_code; + + ret = job_req_sched_release( s_op->scheduled_id + ,smcb + ,0 + ,js_p + ,&tmp_id + ,server_job_context); + + return ret; +}/*end pjmp_remove_work_release_job*/ + +static PINT_sm_action pjmp_remove_work_execute_terminate(struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + js_p->error_code = s_op->u.remove.saved_error_code; + + return SM_ACTION_TERMINATE; +}/*end pjmp_remove_execute_terminate */ + + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/server/precreate-pool-refiller.sm b/src/server/precreate-pool-refiller.sm new file mode 100644 index 0000000..3706f64 --- /dev/null +++ b/src/server/precreate-pool-refiller.sm @@ -0,0 +1,317 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pvfs2-server.h" +#include "pint-perf-counter.h" +#include "server-config.h" +#include "pint-util.h" + +static int batch_create_comp_fn( + void *v_p, struct PVFS_server_resp *resp_p, int index); + +%% + +machine pvfs2_precreate_pool_refiller_sm +{ + state setup + { + run setup_fn; + success => wait_for_threshold; + default => error_retry; + } + + state wait_for_threshold + { + run wait_for_threshold_fn; + success => setup_batch_create; + default => error_retry; + } + + state setup_batch_create + { + run setup_batch_create_fn; + success => msgpair_xfer_batch_create; + default => error_retry; + } + + state msgpair_xfer_batch_create + { + jump pvfs2_msgpairarray_sm; + success => store_handles; + default => msgpair_retry; + } + + state msgpair_retry + { + run msgpair_retry_fn; + default => setup_batch_create; + } + + state store_handles + { + run store_handles_fn; + success => wait_for_threshold; + default => error_retry; + } + + state error_retry + { + run error_fn; + success => setup; + default => terminate; + } +} + +%% + +/* msgpair_retry_fn() + * + * handles anything that needs to happen between sets of msgpair retries + */ +static PINT_sm_action msgpair_retry_fn( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t tmp_id; + + /* signal anyone waiting on get_handles() that we are having trouble */ + return(job_precreate_pool_fill_signal_error( + s_op->u.precreate_pool_refiller.pool_handle, + s_op->u.precreate_pool_refiller.fsid, + js_p->error_code, + smcb, + 0, + js_p, + &tmp_id, + server_job_context)); +} + +/* wait_for_threshold_fn() + * + * waits until the pool count has dropped below a low threshold before + * proceeding + */ +static PINT_sm_action wait_for_threshold_fn( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t tmp_id; + struct server_configuration_s *user_opts = get_server_config_struct(); + int index = 0; + + PVFS_ds_type_to_int(s_op->u.precreate_pool_refiller.type, &index); + + return(job_precreate_pool_check_level( + s_op->u.precreate_pool_refiller.pool_handle, + s_op->u.precreate_pool_refiller.fsid, + user_opts->precreate_low_threshold[index], + smcb, + 0, + js_p, + &tmp_id, + server_job_context)); +} + + /* store_handles_fn() + * + * stores a set of precreated handles persistently within precreate pools + */ +static PINT_sm_action store_handles_fn( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t tmp_id; + struct server_configuration_s *user_opts = get_server_config_struct(); + int index = 0; + PVFS_ds_type_to_int( s_op->u.precreate_pool_refiller.type, &index ); + + return(job_precreate_pool_fill( + s_op->u.precreate_pool_refiller.pool_handle, + s_op->u.precreate_pool_refiller.fsid, + s_op->u.precreate_pool_refiller.precreate_handle_array, + user_opts->precreate_batch_size[index], + smcb, + 0, + js_p, + &tmp_id, + server_job_context, + NULL)); +} + + +/* setup_batch_create_fn() + * + * prepares a req/resp pair to another server to precreate a batch of + * handles + */ +static PINT_sm_action setup_batch_create_fn( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + PINT_sm_msgpair_state *msg_p = NULL; + PVFS_credentials creds; + struct server_configuration_s *user_opts = get_server_config_struct(); + int index = 0; + + gossip_debug(GOSSIP_SERVER_DEBUG, "setting up msgpair to get precreated " + "handles from %s, of type %u, and put store them in %llu.\n", + s_op->u.precreate_pool_refiller.host, + s_op->u.precreate_pool_refiller.type, + llu(s_op->u.precreate_pool_refiller.pool_handle)); + + PINT_msgpair_init(&s_op->msgarray_op); + msg_p = &s_op->msgarray_op.msgpair; + + /* note: we are acting like a client in this case, so use client timeout + * and delay values + */ + s_op->msgarray_op.params.job_timeout = user_opts->client_job_bmi_timeout; + s_op->msgarray_op.params.retry_delay = user_opts->client_retry_delay_ms; + s_op->msgarray_op.params.retry_limit = user_opts->client_retry_limit; + s_op->msgarray_op.params.quiet_flag = 1; + + msg_p->svr_addr = s_op->u.precreate_pool_refiller.host_addr; + PINT_util_gen_credentials(&creds); + + PVFS_ds_type_to_int( s_op->u.precreate_pool_refiller.type, &index ); + + PINT_SERVREQ_BATCH_CREATE_FILL( + msg_p->req, + creds, + s_op->u.precreate_pool_refiller.fsid, + s_op->u.precreate_pool_refiller.type, + user_opts->precreate_batch_size[index], + s_op->u.precreate_pool_refiller.handle_extent_array, + NULL); + + msg_p->fs_id = s_op->u.precreate_pool_refiller.fsid; + msg_p->handle = s_op->u.precreate_pool_refiller.handle_extent_array.extent_array[0].first; + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + msg_p->comp_fn = batch_create_comp_fn; + + PINT_sm_push_frame(smcb, 0, &s_op->msgarray_op); + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); +} + + +/* setup_fn() + * + * initial state to allocate memory for use through the remainder of the + * state machine's life + */ +static PINT_sm_action setup_fn( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct server_configuration_s *user_opts = get_server_config_struct(); + int index = 0; + PVFS_ds_type_to_int( s_op->u.precreate_pool_refiller.type, &index); + + s_op->u.precreate_pool_refiller.precreate_handle_array = + malloc(user_opts->precreate_batch_size[index] * sizeof(PVFS_handle)); + if(!s_op->u.precreate_pool_refiller.precreate_handle_array) + { + js_p->error_code = -PVFS_ENOMEM; + return(SM_ACTION_COMPLETE); + } + + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); +} + + +/* error_fn() + * + * handles error transitions + */ +static PINT_sm_action error_fn( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + job_id_t tmp_id; + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + gossip_err("Error: precreate_pool_refiller for %s encountered error.\n", + s_op->u.precreate_pool_refiller.host); + gossip_err("Error: sleeping for 30 seconds before retrying.\n"); + + if(s_op->u.precreate_pool_refiller.precreate_handle_array) + { + free(s_op->u.precreate_pool_refiller.precreate_handle_array); + } + + return(job_req_sched_post_timer( + (30*1000), + smcb, + 0, + js_p, + &tmp_id, + server_job_context)); +} + + +/* batch_create_comp_fn() + * + * msgpair completion function to handle processing batch create response i + * from another server + */ +static int batch_create_comp_fn(void *v_p, + struct PVFS_server_resp *resp_p, + int index) +{ + PINT_smcb *smcb = v_p; + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM); + int i; + + gossip_debug(GOSSIP_SERVER_DEBUG, "batch_create_comp_fn\n"); + + assert(resp_p->op == PVFS_SERV_BATCH_CREATE); + + if (resp_p->status != 0) + { + PVFS_perror_gossip("batch_create request got", resp_p->status); + return resp_p->status; + } + + for(i = 0; iu.batch_create.handle_count; i++) + { + s_op->u.precreate_pool_refiller.precreate_handle_array[i] = + resp_p->u.batch_create.handle_array[i]; + + gossip_debug(GOSSIP_SERVER_DEBUG, + "Got batch created handle: %llu from: %s\n", + llu(resp_p->u.batch_create.handle_array[i]), + s_op->u.precreate_pool_refiller.host); + } + + return 0; +} + +struct PINT_server_req_params pvfs2_precreate_pool_refiller_params = +{ + .string_name = "precreate_pool_refiller", + .perm = PINT_SERVER_CHECK_INVALID, + .state_machine = &pvfs2_precreate_pool_refiller_sm +}; + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/server/prelude.sm b/src/server/prelude.sm index b6662b6..ac7bc16 100644 --- a/src/server/prelude.sm +++ b/src/server/prelude.sm @@ -15,7 +15,6 @@ #include "pint-util.h" #include "pvfs2-internal.h" #include "pint-perf-counter.h" -#include "check.h" /* prelude state machine: * This is a nested state machine that performs initial setup @@ -84,14 +83,29 @@ nested machine pvfs2_prelude_sm static PINT_sm_action prelude_setup( struct PINT_smcb *smcb, job_status_s *js_p) { + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing pvfs2_prelude_sm:prelude_setup...\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tbase frame:%d\tframe count:%d\n" + ,smcb->base_frame,smcb->frame_count); int ret; struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); ret = PINT_server_req_get_object_ref( s_op->req, &s_op->target_fs_id, &s_op->target_handle); + if( ret != 0 ) + { + js_p->error_code = -PVFS_EINVAL; + } s_op->access_type = PINT_server_req_get_access_type(s_op->req); s_op->sched_policy = PINT_server_req_get_sched_policy(s_op->req); + /* add the user to the uid mgmt system */ + ret = PINT_add_user_to_uid_mgmt(s_op->req->credentials.uid); + if (ret != 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG, "Unable to add user id to uid" + "management interface\n"); + } + return SM_ACTION_COMPLETE; } @@ -105,6 +119,17 @@ static PINT_sm_action prelude_req_sched( struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL; + if (s_op->prelude_mask & PRELUDE_SCHEDULER_DONE) { + gossip_debug(GOSSIP_SERVER_DEBUG, + "(%p) %s (prelude sm) state: req_sched already done... skipping.\n", s_op, + PINT_map_server_op_to_string(s_op->req->op)); + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing pvfs2_prelude_work_sm:prelude_req_sched\n"); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tbase_frame:%d\tframe_count:%d\n" + ,smcb->base_frame,smcb->frame_count); gossip_debug(GOSSIP_SERVER_DEBUG, "(%p) %s (prelude sm) state: req_sched\n", s_op, PINT_map_server_op_to_string(s_op->req->op)); @@ -116,7 +141,9 @@ static PINT_sm_action prelude_req_sched( smcb, 0, js_p, &(s_op->scheduled_id), server_job_context); + /* these are two different counters - one instantaneous, one cumulative */ PINT_perf_count(PINT_server_pc, PINT_PERF_REQSCHED, 1, PINT_PERF_ADD); + PINT_perf_count(PINT_server_pc, PINT_PERF_REQUESTS, 1, PINT_PERF_ADD); return ret; } @@ -132,6 +159,14 @@ static PINT_sm_action prelude_getattr_if_needed( int ret = -PVFS_EINVAL; job_id_t tmp_id; + if (s_op->prelude_mask & PRELUDE_GETATTR_DONE) { + gossip_debug(GOSSIP_SERVER_DEBUG, + "(%p) %s (prelude sm) state: getattr already done... skipping.\n", s_op, + PINT_map_server_op_to_string(s_op->req->op)); + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + PINT_ACCESS_DEBUG(s_op, GOSSIP_ACCESS_DETAIL_DEBUG, "start\n"); gossip_debug(GOSSIP_SERVER_DEBUG, @@ -144,6 +179,7 @@ static PINT_sm_action prelude_getattr_if_needed( */ if (s_op->target_handle == PVFS_HANDLE_NULL) { + js_p->error_code = 0; return SM_ACTION_COMPLETE; } @@ -158,7 +194,7 @@ static PINT_sm_action prelude_getattr_if_needed( ret = job_trove_dspace_getattr( s_op->target_fs_id, s_op->target_handle, smcb, &(s_op->ds_attr), - 0, js_p, &tmp_id, server_job_context); + 0, js_p, &tmp_id, server_job_context, s_op->req->hints); return ret; } @@ -279,7 +315,8 @@ static int iterate_ro_wildcards(struct filesystem_configuration_s *fsconfig, PVF { gossip_debug(GOSSIP_SERVER_DEBUG, "BMI_query_addr_range %lld, %s\n", lld(client_addr), fsconfig->ro_hosts[i]); - /* Does the client address match the wildcard specification and/or the netmask specification? */ + /* Does the client address match the wildcard specification and/or */ + /* the netmask specification? */ if (BMI_query_addr_range(client_addr, fsconfig->ro_hosts[i], fsconfig->ro_netmasks[i]) == 1) { @@ -289,15 +326,20 @@ static int iterate_ro_wildcards(struct filesystem_configuration_s *fsconfig, PVF return 0; } -static int permit_operation(PVFS_fs_id fsid, int read_only, PVFS_BMI_addr_t client_addr) +/* + * Return zero if this operation should be allowed. + */ +static int permit_operation(PVFS_fs_id fsid, + enum PINT_server_req_access_type access_type, + PVFS_BMI_addr_t client_addr) { int exp_flags = 0; struct server_configuration_s *serv_config = NULL; struct filesystem_configuration_s * fsconfig = NULL; - if (read_only == 1) + if (access_type == PINT_SERVER_REQ_READONLY) { - return 0; + return 0; /* anything that doesn't modify state is okay */ } serv_config = PINT_get_server_config(); fsconfig = PINT_config_find_fs_id(serv_config, fsid); @@ -339,7 +381,6 @@ static PINT_sm_action prelude_perm_check( PVFS_ds_attributes *ds_attr = NULL; PVFS_uid translated_uid = s_op->req->credentials.uid; PVFS_gid translated_gid = s_op->req->credentials.gid; - PVFS_fs_id fsid = PVFS_FS_ID_NULL; int squashed_flag = 0; int skip_acl_flag = 0; @@ -358,6 +399,13 @@ static PINT_sm_action prelude_perm_check( /* Set the target object attribute pointer.. used later by the acl check */ s_op->target_object_attr = obj_attr; + if (s_op->prelude_mask & PRELUDE_PERM_CHECK_DONE) { + gossip_debug(GOSSIP_SERVER_DEBUG, + "(%p) %s (prelude sm) state: perm check already done... skipping.\n", s_op, + PINT_map_server_op_to_string(s_op->req->op)); + return SM_ACTION_COMPLETE; + } + if (s_op->target_fs_id != PVFS_FS_ID_NULL) { /* @@ -373,7 +421,7 @@ static PINT_sm_action prelude_perm_check( else { /* Translate the uid and gid's in case we need to do some squashing based on the export and the client address */ - if (translate_ids(fsid, s_op->req->credentials.uid, s_op->req->credentials.gid, + if (translate_ids(s_op->target_fs_id, s_op->req->credentials.uid, s_op->req->credentials.gid, &translated_uid, &translated_gid, s_op->addr) == 1) { squashed_flag = 1; @@ -390,6 +438,11 @@ static PINT_sm_action prelude_perm_check( s_op->req->u.mkdir.attr.owner = translated_uid; s_op->req->u.mkdir.attr.group = translated_gid; } + else if (s_op->req->op == PVFS_SERV_CREATE) + { + s_op->req->u.create.attr.owner = translated_uid; + s_op->req->u.create.attr.group = translated_gid; + } } } } @@ -456,31 +509,39 @@ static PINT_sm_action prelude_perm_check( } else /* setattr, seteattr, seteattr_list */ { - /* - NOTE: on other file systems, setattr doesn't - seem to require read permissions by the user, group - OR other, so long as the user or group matches (or - is root) - */ - if (((s_op->attr.mask & PVFS_ATTR_COMMON_UID) && - ((s_op->attr.owner == 0) || - (s_op->attr.owner == translated_uid))) || - (((s_op->attr.mask & PVFS_ATTR_COMMON_GID) && - ((s_op->attr.group == 0) || - (s_op->attr.group == translated_gid)))) || - (translated_uid == 0)) + if(s_op->attr.perms == 0 && s_op->attr.objtype == + PVFS_TYPE_SYMLINK) + { + /* if the object is of type symlink but has empty perms, + * then it must be a newly created symlink object that + * does not have its true attributes set yet. Let this + * operation through. + */ + js_p->error_code = 0; + } + else if(s_op->attr.owner == translated_uid || translated_uid + == 0) { + /* owner of file and root can always set attributes (see + * iozone, which does a setattr as part of truncating a + * file with permission mask set to 0 + */ js_p->error_code = 0; } else { - js_p->error_code = -PVFS_EACCES; + /* normal setattr requires write permissions on existing + * objects + */ + js_p->error_code = PINT_check_mode( + &(s_op->attr), translated_uid, + translated_gid, PINT_ACCESS_WRITABLE); } } break; case PINT_SERVER_CHECK_NONE: if(squashed_flag && - !PINT_server_req_get_access_type(s_op->req) && + PINT_server_req_get_access_type(s_op->req) == PINT_SERVER_REQ_MODIFY && ((s_op->req->op == PVFS_SERV_IO) || (s_op->req->op == PVFS_SERV_SMALL_IO) || (s_op->req->op == PVFS_SERV_TRUNCATE))) @@ -572,7 +633,8 @@ static PINT_sm_action prelude_check_acls_if_needed( 0, js_p, &i, - server_job_context); + server_job_context, + s_op->req->hints); return ret; } @@ -587,7 +649,16 @@ static PINT_sm_action prelude_check_acls( obj_attr = s_op->target_object_attr; assert(obj_attr); - /* anything non-zero we treat as a real error */ + /* if we didn't find the acl attributes, then just treat it as a + * permission denied case. + */ + if (js_p->error_code == -TROVE_ENOENT) + { + js_p->error_code = -PVFS_EACCES; + goto cleanup; + } + + /* anything else non-zero we treat as a real error */ if (js_p->error_code) { goto cleanup; diff --git a/src/server/proto-error.sm b/src/server/proto-error.sm index 390f50f..5ec0575 100644 --- a/src/server/proto-error.sm +++ b/src/server/proto-error.sm @@ -44,8 +44,6 @@ static PINT_sm_action proto_error_init( job_id_t tmp_id; struct server_configuration_s *user_opts = get_server_config_struct(); - BMI_set_info(s_op->addr, BMI_INC_ADDR_REF, NULL); - s_op->resp.op = PVFS_SERV_PROTO_ERROR; s_op->resp.status = -PVFS_EPROTO; @@ -54,7 +52,7 @@ static PINT_sm_action proto_error_init( PINT_ENCODE_RESP, &(s_op->encoded), s_op->addr, - ENCODING_DEFAULT); + PVFS2_ENCODING_DEFAULT); if (ret < 0) { gossip_lerr("Error: PINT_encode() failure.\n"); @@ -79,7 +77,7 @@ static PINT_sm_action proto_error_init( js_p, &tmp_id, server_job_context, - user_opts->server_job_bmi_timeout); + user_opts->server_job_bmi_timeout, NULL); return ret; } diff --git a/src/server/pvfs2-server-req.c b/src/server/pvfs2-server-req.c index f5051fa..95ec6e1 100644 --- a/src/server/pvfs2-server-req.c +++ b/src/server/pvfs2-server-req.c @@ -5,7 +5,7 @@ */ #include "pvfs2-server.h" -#include +#include "assert.h" /* server operation state machines */ extern struct PINT_server_req_params pvfs2_get_config_params; @@ -35,7 +35,6 @@ extern struct PINT_server_req_params pvfs2_perf_update_params; extern struct PINT_server_req_params pvfs2_job_timer_params; extern struct PINT_server_req_params pvfs2_proto_error_params; extern struct PINT_server_req_params pvfs2_perf_mon_params; -extern struct PINT_server_req_params pvfs2_event_mon_params; extern struct PINT_server_req_params pvfs2_iterate_handles_params; extern struct PINT_server_req_params pvfs2_get_eattr_params; extern struct PINT_server_req_params pvfs2_get_eattr_list_params; @@ -43,6 +42,16 @@ extern struct PINT_server_req_params pvfs2_set_eattr_params; extern struct PINT_server_req_params pvfs2_set_eattr_list_params; extern struct PINT_server_req_params pvfs2_del_eattr_params; extern struct PINT_server_req_params pvfs2_list_eattr_params; +extern struct PINT_server_req_params pvfs2_batch_create_params; +extern struct PINT_server_req_params pvfs2_batch_remove_params; +extern struct PINT_server_req_params pvfs2_unstuff_params; +extern struct PINT_server_req_params pvfs2_stuffed_create_params; +extern struct PINT_server_req_params pvfs2_precreate_pool_refiller_params; +extern struct PINT_server_req_params pvfs2_mirror_params; +extern struct PINT_server_req_params pvfs2_create_immutable_copies_params; +extern struct PINT_server_req_params pvfs2_tree_remove_params; +extern struct PINT_server_req_params pvfs2_tree_get_file_size_params; +extern struct PINT_server_req_params pvfs2_uid_mgmt_params; /* table of incoming request types and associated parameters */ struct PINT_server_req_entry PINT_server_req_table[] = @@ -70,7 +79,7 @@ struct PINT_server_req_entry PINT_server_req_table[] = /* 20 */ {PVFS_SERV_MGMT_PERF_MON, &pvfs2_perf_mon_params}, /* 21 */ {PVFS_SERV_MGMT_ITERATE_HANDLES, &pvfs2_iterate_handles_params}, /* 22 */ {PVFS_SERV_MGMT_DSPACE_INFO_LIST, NULL}, - /* 23 */ {PVFS_SERV_MGMT_EVENT_MON, &pvfs2_event_mon_params}, + /* 23 */ {PVFS_SERV_MGMT_EVENT_MON, NULL}, /* 24 */ {PVFS_SERV_MGMT_REMOVE_OBJECT, &pvfs2_mgmt_remove_object_params}, /* 25 */ {PVFS_SERV_MGMT_REMOVE_DIRENT, &pvfs2_mgmt_remove_dirent_params}, /* 26 */ {PVFS_SERV_MGMT_GET_DIRDATA_HANDLE, &pvfs2_mgmt_get_dirdata_handle_params}, @@ -82,18 +91,29 @@ struct PINT_server_req_entry PINT_server_req_table[] = /* 32 */ {PVFS_SERV_LISTEATTR, &pvfs2_list_eattr_params}, /* 33 */ {PVFS_SERV_SMALL_IO, &pvfs2_small_io_params}, /* 34 */ {PVFS_SERV_LISTATTR, &pvfs2_list_attr_params}, + /* 35 */ {PVFS_SERV_BATCH_CREATE, &pvfs2_batch_create_params}, + /* 36 */ {PVFS_SERV_BATCH_REMOVE, &pvfs2_batch_remove_params}, + /* 37 */ {PVFS_SERV_PRECREATE_POOL_REFILLER, &pvfs2_precreate_pool_refiller_params}, + /* 38 */ {PVFS_SERV_UNSTUFF, &pvfs2_unstuff_params}, + /* 39 */ {PVFS_SERV_MIRROR, &pvfs2_mirror_params}, + /* 40 */ {PVFS_SERV_IMM_COPIES, &pvfs2_create_immutable_copies_params}, + /* 41 */ {PVFS_SERV_TREE_REMOVE, &pvfs2_tree_remove_params}, + /* 42 */ {PVFS_SERV_TREE_GET_FILE_SIZE, &pvfs2_tree_get_file_size_params}, + /* 43 */ {PVFS_SERV_MGMT_GET_UID, &pvfs2_uid_mgmt_params}, }; #define CHECK_OP(_op_) assert(_op_ == PINT_server_req_table[_op_].op_type) -inline int PINT_server_req_readonly(struct PVFS_server_req *req) +enum PINT_server_req_access_type PINT_server_req_readonly( + struct PVFS_server_req *req) { - return 1; + return PINT_SERVER_REQ_READONLY; } -inline int PINT_server_req_modify(struct PVFS_server_req *req) +enum PINT_server_req_access_type PINT_server_req_modify( + struct PVFS_server_req *req) { - return 0; + return PINT_SERVER_REQ_MODIFY; } enum PINT_server_req_permissions diff --git a/src/server/pvfs2-server.c b/src/server/pvfs2-server.c index 2ac5c3a..e6ffcf6 100644 --- a/src/server/pvfs2-server.c +++ b/src/server/pvfs2-server.c @@ -16,14 +16,15 @@ #include #include #include -#include +#include #ifdef __PVFS2_SEGV_BACKTRACE__ #include -#define __USE_GNU #include #endif +#define __PINT_REQPROTO_ENCODE_FUNCS_C + #include "bmi.h" #include "gossip.h" #include "job.h" @@ -38,12 +39,14 @@ #include "quicklist.h" #include "pint-dist-utils.h" #include "pint-perf-counter.h" -#include "pint-event.h" #include "id-generator.h" #include "job-time-mgr.h" #include "pint-cached-config.h" #include "pvfs2-internal.h" #include "src/server/request-scheduler/request-scheduler.h" +#include "pint-event.h" +#include "pint-util.h" +#include "pint-uid-mgmt.h" #ifndef PVFS2_VERSION #define PVFS2_VERSION "Unknown" @@ -60,6 +63,7 @@ #endif #define PVFS2_VERSION_REQUEST 0xFF +#define PVFS2_HELP 0xFE /* this controls how many jobs we will test for per job_testcontext() * call. NOTE: this is currently independent of the config file @@ -68,19 +72,6 @@ */ #define PVFS_SERVER_TEST_COUNT 64 -/* track performance counters for the server */ -static struct PINT_perf_key server_keys[] = -{ - {"bytes read", PINT_PERF_READ, 0}, - {"bytes written", PINT_PERF_WRITE, 0}, - {"metadata reads", PINT_PERF_METADATA_READ, PINT_PERF_PRESERVE}, - {"metadata writes", PINT_PERF_METADATA_WRITE, PINT_PERF_PRESERVE}, - {"metadata dspace ops", PINT_PERF_METADATA_DSPACE_OPS, PINT_PERF_PRESERVE}, - {"metadata keyval ops", PINT_PERF_METADATA_KEYVAL_OPS, PINT_PERF_PRESERVE}, - {"request scheduler", PINT_PERF_REQSCHED, PINT_PERF_PRESERVE}, - {NULL, 0, 0}, -}; - /* For the switch statement to know what interfaces to shutdown */ static PINT_server_status_flag server_status_flag; @@ -93,6 +84,8 @@ static struct server_configuration_s server_config; static int signal_recvd_flag = 0; static pid_t server_controlling_pid = 0; +static PINT_event_id PINT_sm_event_id; + /* A list of all serv_op's posted for unexpected message alone */ QLIST_HEAD(posted_sop_list); /* A list of all serv_op's posted for expected messages alone */ @@ -113,7 +106,8 @@ typedef struct } options_t; static options_t s_server_options = { 0, 0, 1, NULL, NULL}; -static char *fs_conf = NULL; +static char fs_conf[PATH_MAX]; +static char startup_cwd[PATH_MAX+1]; /* each of the elements in this array consists of a string and its length. * we're able to use sizeof here because sizeof an inlined string ("") gives @@ -125,7 +119,9 @@ PINT_server_trove_keys_s Trove_Common_Keys[] = {DIRECTORY_ENTRY_KEYSTR, DIRECTORY_ENTRY_KEYLEN}, {DATAFILE_HANDLES_KEYSTR, DATAFILE_HANDLES_KEYLEN}, {METAFILE_DIST_KEYSTR, METAFILE_DIST_KEYLEN}, - {SYMLINK_TARGET_KEYSTR, SYMLINK_TARGET_KEYLEN} + {SYMLINK_TARGET_KEYSTR, SYMLINK_TARGET_KEYLEN}, + {METAFILE_LAYOUT_KEYSTR, METAFILE_LAYOUT_KEYLEN}, + {NUM_DFILES_REQ_KEYSTR, NUM_DFILES_REQ_KEYLEN} }; /* These three are used continuously in our wait loop. They could be @@ -148,7 +144,9 @@ static int server_setup_process_environment(int background); static int server_shutdown( PINT_server_status_flag status, int ret, int sig); +static void reload_config(void); static void server_sig_handler(int sig); +static void hup_sighandler(int sig, siginfo_t *info, void *secret); static int server_parse_cmd_line_args(int argc, char **argv); #ifdef __PVFS2_SEGV_BACKTRACE__ static void bt_sighandler(int sig, siginfo_t *info, void *secret); @@ -156,20 +154,26 @@ static void bt_sighandler(int sig, siginfo_t *info, void *secret); static int create_pidfile(char *pidfile); static void write_pidfile(int fd); static void remove_pidfile(void); -static int generate_shm_key_hint(void); -static char *guess_alias(void); +static int generate_shm_key_hint(int* server_index); + +static void precreate_pool_finalize(void); +static int precreate_pool_initialize(int server_index); + +static int precreate_pool_setup_server(const char* host, PVFS_ds_type type, + PVFS_fs_id fsid, PVFS_handle* pool_handle); +static int precreate_pool_launch_refiller(const char* host, PVFS_ds_type type, + PVFS_BMI_addr_t addr, PVFS_fs_id fsid, PVFS_handle pool_handle); +static int precreate_pool_count( + PVFS_fs_id fsid, PVFS_handle pool_handle, int* count); static TROVE_method_id trove_coll_to_method_callback(TROVE_coll_id); + struct server_configuration_s *PINT_get_server_config(void) { return &server_config; } -#ifdef PVFS_OSD_INTEGRATED -extern void *tgtd(void *arg); -#endif - int main(int argc, char **argv) { int ret = -1, siglevel = 0; @@ -199,6 +203,10 @@ int main(int argc, char **argv) { return 0; } + else if (ret == PVFS2_HELP) + { + return 0; + } else if (ret != 0) { goto server_shutdown; @@ -209,18 +217,16 @@ int main(int argc, char **argv) s_server_options.server_alias, PVFS2_VERSION); /* code to handle older two config file format */ - - ret = PINT_parse_config(&server_config, fs_conf, s_server_options.server_alias); - if (ret < 0) + ret = PINT_parse_config(&server_config, fs_conf, + s_server_options.server_alias, 1); + if (ret) { gossip_err("Error: Please check your config files.\n"); gossip_err("Error: Server aborting.\n"); - free(s_server_options.server_alias); + ret = -PVFS_EINVAL; goto server_shutdown; } - free(s_server_options.server_alias); - server_status_flag |= SERVER_CONFIG_INIT; if (!PINT_config_is_valid_configuration(&server_config)) @@ -305,15 +311,10 @@ int main(int argc, char **argv) goto server_shutdown; } -#ifdef PVFS_OSD_INTEGRATED - pthread_t tgtd_thread; // declare tgtd thread - pthread_create(&tgtd_thread, NULL, tgtd, server_config.storage_path); // create tgtd thread -#endif - +#if 0 #ifndef __PVFS2_DISABLE_PERF_COUNTERS__ /* kick off performance update state machine */ - ret = server_state_machine_alloc_noreq(PVFS_SERV_PERF_UPDATE, - &(tmp_op)); + ret = server_state_machine_alloc_noreq(PVFS_SERV_PERF_UPDATE, &(tmp_op)); if (ret == 0) { ret = server_state_machine_start_noreq(tmp_op); @@ -324,6 +325,7 @@ int main(int argc, char **argv) "state machine.\n", ret); goto server_shutdown; } +#endif #endif /* kick off timer for expired jobs */ @@ -333,7 +335,6 @@ int main(int argc, char **argv) { ret = server_state_machine_start_noreq(tmp_op); } - if (ret < 0) { PVFS_perror_gossip("Error: failed to start job timer " @@ -349,25 +350,40 @@ int main(int argc, char **argv) { int i, comp_ct = PVFS_SERVER_TEST_COUNT; - /* IF a signal was received and we have drained all the state machines - * that were in progress, then we initiate shutdown of the server - */ if (signal_recvd_flag != 0) { - /* - * If we received a signal, then find out if we can exit now - * by checking if all s_ops (for expected messages) have either - * finished or timed out, - */ - if (qlist_empty(&inprogress_sop_list)) + /* If the signal is a SIGHUP, catch and reload configuration */ + if (signal_recvd_flag == SIGHUP) { - ret = 0; - siglevel = signal_recvd_flag; - goto server_shutdown; + reload_config(); + + /* re-open log file to allow normal rotation */ + gossip_reopen_file(server_config.logfile, "a"); + gossip_set_debug_mask(1, GOSSIP_SERVER_DEBUG); + gossip_debug(GOSSIP_SERVER_DEBUG, + "Re-opened log %s, continuing\n", + server_config.logfile); + gossip_set_debug_mask(1, debug_mask); + signal_recvd_flag = 0; /* Reset the flag */ + } + else + { + /* + * If we received a signal and we have drained all the state + * machines that were in progress, we initiate a shutdown of + * the server. Find out if we can exit now * by checking if + * all s_ops (for expected messages) have either finished or + * timed out, + */ + if (qlist_empty(&inprogress_sop_list)) + { + ret = 0; + siglevel = signal_recvd_flag; + goto server_shutdown; + } + /* not completed. continue... */ } - /* not completed. continue... */ } - ret = job_testcontext(server_job_id_array, &comp_ct, server_completed_job_p_array, @@ -429,10 +445,20 @@ static void write_pidfile(int fd) pid_t pid = getpid(); char pid_str[16] = {0}; int len; + int ret; snprintf(pid_str, 16, "%d\n", pid); len = strlen(pid_str); - write(fd, pid_str, len); + ret = write(fd, pid_str, len); + if(ret < len) + { + gossip_err("Error: failed to write pid file.\n"); + close(fd); + remove_pidfile(); + return; + } + close(fd); + return; } static void remove_pidfile(void) @@ -477,9 +503,12 @@ static int server_initialize( /* redirect gossip to specified target if backgrounded */ if (s_server_options.server_background) { - freopen("/dev/null", "r", stdin); - freopen("/dev/null", "w", stdout); - freopen("/dev/null", "w", stderr); + if(!freopen("/dev/null", "r", stdin)) + gossip_err("Error: failed to reopen stdin.\n"); + if(!freopen("/dev/null", "w", stdout)) + gossip_err("Error: failed to reopen stdout.\n"); + if(!freopen("/dev/null", "w", stderr)) + gossip_err("Error: failed to reopen stderr.\n"); if(!strcmp(server_config.logtype, "syslog")) { @@ -488,13 +517,6 @@ static int server_initialize( else if(!strcmp(server_config.logtype, "file")) { ret = gossip_enable_file(server_config.logfile, "a"); -#ifdef PVFS_OSD_INTEGRATED - /* grab stdout, stderr from tgtd */ - freopen(server_config.logfile, "a", stdout); - setlinebuf(stdout); - freopen(server_config.logfile, "a", stderr); - setlinebuf(stderr); -#endif } else { @@ -530,8 +552,7 @@ static int server_initialize( ret = server_initialize_subsystems(server_status_flag); if (ret < 0) { - gossip_err("Error: Could not initialize server interfaces; " - "aborting.\n"); + gossip_err("Error: Could not initialize server subsystems\n"); return ret; } @@ -540,7 +561,7 @@ static int server_initialize( /* Post starting set of BMI unexpected msg buffers */ for (i = 0; i < server_config.initial_unexpected_requests; i++) { - ret = server_post_unexpected_recv(&job_status_structs[i]); + ret = server_post_unexpected_recv(); if (ret < 0) { gossip_err("Error posting unexpected recv\n"); @@ -621,8 +642,8 @@ static int server_setup_process_environment(int background) } if (pid_fd >= 0) { + /* note: pid_fd closed by write_pidfile() */ write_pidfile(pid_fd); - close(pid_fd); atexit(remove_pidfile); } server_controlling_pid = getpid(); @@ -654,10 +675,30 @@ static int server_initialize_subsystems( struct filesystem_configuration_s *cur_fs; TROVE_context_id trove_context = -1; char buf[16] = {0}; - PVFS_fs_id orig_fsid; + PVFS_fs_id orig_fsid=0; PVFS_ds_flags init_flags = 0; int bmi_flags = BMI_INIT_SERVER; int shm_key_hint; + int server_index; + + if(server_config.enable_events) + { + ret = PINT_event_init(PINT_EVENT_TRACE_TAU); + if (ret < 0) + { + gossip_err("Error initializing event interface.\n"); + return (ret); + } + + /* Define the state machine event: + * START: (client_id, request_id, rank, handle, op_id) + * STOP: () + */ + PINT_event_define_event( + NULL, "sm", "%d%d%d%llu%d", "", &PINT_sm_event_id); + + *server_status_flag |= SERVER_EVENT_INIT; + } /* Initialize distributions */ ret = PINT_dist_initialize(0); @@ -687,6 +728,12 @@ static int server_initialize_subsystems( bmi_flags |= BMI_TCP_BIND_SPECIFIC; } + /* Have bmi automatically increment reference count on addresses any + * time a new unexpected message appears. The server will decrement it + * once it has completed processing related to that request. + */ + bmi_flags |= BMI_AUTO_REF_COUNT; + ret = BMI_initialize(server_config.bmi_modules, server_config.host_id, bmi_flags); @@ -715,7 +762,7 @@ static int server_initialize_subsystems( assert(ret == 0); /* help trove chose a differentiating shm key if needed for Berkeley DB */ - shm_key_hint = generate_shm_key_hint(); + shm_key_hint = generate_shm_key_hint(&server_index); gossip_debug(GOSSIP_SERVER_DEBUG, "Server using shm key hint: %d\n", shm_key_hint); ret = trove_collection_setinfo(0, 0, TROVE_SHM_KEY_HINT, &shm_key_hint); assert(ret == 0); @@ -736,15 +783,16 @@ static int server_initialize_subsystems( ret = trove_initialize( server_config.trove_method, trove_coll_to_method_callback, - server_config.storage_path, + server_config.data_path, + server_config.meta_path, init_flags); if (ret < 0) { PVFS_perror_gossip("Error: trove_initialize", ret); gossip_err("\n***********************************************\n"); - gossip_err("Invalid Storage Space: %s\n\n", - server_config.storage_path); + gossip_err("Invalid Storage Space: %s or %s\n\n", + server_config.data_path, server_config.meta_path); gossip_err("Storage initialization failed. The most " "common reason\nfor this is that the storage space " "has not yet been\ncreated or is located on a " @@ -784,21 +832,53 @@ static int server_initialize_subsystems( break; } - ret = PINT_cached_config_handle_load_mapping(cur_fs); + ret = PINT_cached_config_handle_load_mapping(cur_fs, + &server_config); if(ret) { PVFS_perror("Error: PINT_handle_load_mapping", ret); return(ret); } - orig_fsid = cur_fs->coll_id; + /* + set storage hints if any. if any of these fail, we + can't error out since they're just hints. thus, we + complain in logging and continue. + */ + ret = trove_collection_setinfo( + cur_fs->coll_id, 0, + TROVE_DIRECTIO_THREADS_NUM, + (void *)&cur_fs->directio_thread_num); + if (ret < 0) + { + gossip_err("Error setting directio threads num\n"); + } + + ret = trove_collection_setinfo( + cur_fs->coll_id, 0, + TROVE_DIRECTIO_OPS_PER_QUEUE, + (void *)&cur_fs->directio_ops_per_queue); + if (ret < 0) + { + gossip_err("Error setting directio ops per queue\n"); + } + + ret = trove_collection_setinfo( + cur_fs->coll_id, 0, + TROVE_DIRECTIO_TIMEOUT, + (void *)&cur_fs->directio_timeout); + if (ret < 0) + { + gossip_err("Error setting directio threads num\n"); + } + ret = trove_collection_lookup( cur_fs->trove_method, - cur_fs->file_system_name, &(cur_fs->coll_id), NULL, NULL); + cur_fs->file_system_name, &(orig_fsid), NULL, NULL); if (ret < 0) { - gossip_err("Error initializing filesystem %s\n", + gossip_err("Error initializing trove for filesystem %s\n", cur_fs->file_system_name); return ret; } @@ -806,8 +886,8 @@ static int server_initialize_subsystems( if(orig_fsid != cur_fs->coll_id) { gossip_err("Error: configuration file does not match storage collection.\n"); - gossip_err(" config file fs_id: %d\n", (int)orig_fsid); - gossip_err(" storage fs_id: %d\n", (int)cur_fs->coll_id); + gossip_err(" storage file fs_id: %d\n", (int)orig_fsid); + gossip_err(" config file fs_id: %d\n", (int)cur_fs->coll_id); gossip_err("Warning: This most likely means that the configuration\n"); gossip_err(" files have been regenerated without destroying and\n"); gossip_err(" recreating the corresponding storage collection.\n"); @@ -1001,6 +1081,18 @@ static int server_initialize_subsystems( gossip_debug(GOSSIP_SERVER_DEBUG, "%d filesystem(s) initialized\n", PINT_llist_count(server_config.file_systems)); + /* + * Migrate database if needed + */ + ret = trove_migrate(server_config.trove_method, + server_config.data_path, + server_config.meta_path); + if (ret < 0) + { + gossip_err("trove_migrate failed: ret=%d\n", ret); + return(ret); + } + ret = job_time_mgr_init(); if(ret < 0) { @@ -1038,22 +1130,61 @@ static int server_initialize_subsystems( *server_status_flag |= SERVER_REQ_SCHED_INIT; #ifndef __PVFS2_DISABLE_PERF_COUNTERS__ + /* hist size should be in server config too */ PINT_server_pc = PINT_perf_initialize(server_keys); if(!PINT_server_pc) { gossip_err("Error initializing performance counters.\n"); return(ret); } + ret = PINT_perf_set_info(PINT_server_pc, PINT_PERF_UPDATE_INTERVAL, + server_config.perf_update_interval); + if (ret < 0) + { + gossip_err("Error PINT_perf_set_info (update interval)\n"); + return(ret); + } + /* if history_size is greater than 1, start the rollover SM */ + if (PINT_server_pc->running) + { + struct PINT_smcb *tmp_op = NULL; + ret = server_state_machine_alloc_noreq( + PVFS_SERV_PERF_UPDATE, &(tmp_op)); + if (ret == 0) + { + ret = server_state_machine_start_noreq(tmp_op); + } + if (ret < 0) + { + PVFS_perror_gossip("Error: failed to start perf update " + "state machine.\n", ret); + return(ret); + } + } + *server_status_flag |= SERVER_PERF_COUNTER_INIT; #endif - ret = PINT_event_initialize(PINT_EVENT_DEFAULT_RING_SIZE); + ret = PINT_uid_mgmt_initialize(); if (ret < 0) { - gossip_err("Error initializing event interface.\n"); + gossip_err("Error initializing the uid management interface\n"); return (ret); } - *server_status_flag |= SERVER_EVENT_INIT; + + *server_status_flag |= SERVER_UID_MGMT_INIT; + + if (!server_config.osd_type) + { + ret = precreate_pool_initialize(server_index); + if (ret < 0) + { + gossip_err("Error initializing precreate pool.\n"); + return (ret); + } + + *server_status_flag |= SERVER_PRECREATE_INIT; + } return ret; } @@ -1062,6 +1193,10 @@ static int server_setup_signal_handlers(void) { struct sigaction new_action; struct sigaction ign_action; + struct sigaction hup_action; + hup_action.sa_sigaction = (void *)hup_sighandler; + sigemptyset (&hup_action.sa_mask); + hup_action.sa_flags = SA_RESTART | SA_SIGINFO; #ifdef __PVFS2_SEGV_BACKTRACE__ struct sigaction segv_action; @@ -1082,13 +1217,15 @@ static int server_setup_signal_handlers(void) /* catch these */ sigaction (SIGILL, &new_action, NULL); sigaction (SIGTERM, &new_action, NULL); - sigaction (SIGHUP, &new_action, NULL); + sigaction (SIGHUP, &hup_action, NULL); sigaction (SIGINT, &new_action, NULL); sigaction (SIGQUIT, &new_action, NULL); #ifdef __PVFS2_SEGV_BACKTRACE__ sigaction (SIGSEGV, &segv_action, NULL); + sigaction (SIGABRT, &segv_action, NULL); #else sigaction (SIGSEGV, &new_action, NULL); + sigaction (SIGABRT, &new_action, NULL); #endif /* ignore these */ @@ -1106,7 +1243,7 @@ static int server_setup_signal_handlers(void) #elif defined(REG_RIP) # define REG_INSTRUCTION_POINTER REG_RIP #else -# error Unknown instruction pointer location for your architecture, configure without --enable-segv-backtrace. +# error Unknown instruction pointer location for your architecture, configure with --disable-segv-backtrace. #endif /* bt_signalhandler() @@ -1149,6 +1286,207 @@ static void bt_sighandler(int sig, siginfo_t *info, void *secret) } #endif +/* hup_signalhandler() + * + * Reload mutable configuration values. If there are errors, leave server in + * a running state. + * + * NOTE: this _only_ reloads configuration values related to squashing, + * readonly, and trusted settings. It does not allow reloading of arbitrary + * configuration file settings. + * + * no return value + */ +static void hup_sighandler(int sig, siginfo_t *info, void *secret) +{ + uint64_t debug_mask; + int debug_on; + + /* Let's make sure this message is printed out */ + gossip_get_debug_mask(&debug_on, &debug_mask); /* Need to set back later */ + gossip_set_debug_mask(1, GOSSIP_SERVER_DEBUG); /* Make sure debug set */ + gossip_debug(GOSSIP_SERVER_DEBUG, "PVFS2 received server: signal %d\n", sig); + gossip_set_debug_mask(debug_on, debug_mask); /* Set to original values */ + + /* Set the flag so the next server loop picks it up and reloads config */ + signal_recvd_flag = sig; +} + +static void reload_config(void) +{ + struct server_configuration_s sighup_server_config; + struct server_configuration_s *orig_server_config; + PINT_llist *orig_filesystems = NULL; + PINT_llist *hup_filesystems = NULL; + struct filesystem_configuration_s *orig_fs; + struct filesystem_configuration_s *hup_fs; + int tmp_value = 0; + char **tmp_ptr = NULL; + int *tmp_int_ptr = NULL; + + gossip_debug(GOSSIP_SERVER_DEBUG, "Reloading configuration %s\n", + fs_conf); + /* We received a SIGHUP. Update configuration in place */ + if (PINT_parse_config(&sighup_server_config, fs_conf, + s_server_options.server_alias, 1) < 0) + { + gossip_err("Error: Please check your config files.\n"); + gossip_err("Error: SIGHUP unable to update configuration.\n"); + PINT_config_release(&sighup_server_config); /* Free memory */ + } + else /* Successful load of config */ + { + /* Get the current server configuration and update global items */ + orig_server_config = get_server_config_struct(); + if (orig_server_config->event_logging) + { + free(orig_server_config->event_logging); + } + + /* Copy the new logging mask into the current server configuration */ + orig_server_config->event_logging = strdup(sighup_server_config.event_logging); + + /* Reset the debug mask */ + gossip_set_debug_mask(1, PVFS_debug_eventlog_to_mask(orig_server_config->event_logging)); + + orig_filesystems = server_config.file_systems; + /* Loop and update all stored file systems */ + while(orig_filesystems) + { + int found_matching_config = 0; + + orig_fs = PINT_llist_head(orig_filesystems); + if(!orig_fs) + { + break; + } + hup_filesystems = sighup_server_config.file_systems; + + /* Find the matching fs from sighup */ + while(hup_filesystems) + { + hup_fs = PINT_llist_head(hup_filesystems); + if ( !hup_fs ) + { + break; + } + if( hup_fs->coll_id == orig_fs->coll_id ) + { + found_matching_config = 1; + break; + } + hup_filesystems = PINT_llist_head(hup_filesystems); + } + if(!found_matching_config) + { + gossip_err("Error: SIGHUP unable to update configuration" + "Matching configuration not found.\n"); + break; + } + /* Update root squashing. Prelude is only place to accesses + * these values, so no need to lock around them. Swap the + * needed pointers so that server config gets new values, + * and the old values get freed up + */ + orig_fs->exp_flags = hup_fs->exp_flags; + + tmp_value = orig_fs->root_squash_count; + orig_fs->root_squash_count = hup_fs->root_squash_count; + hup_fs->root_squash_count = tmp_value; + + tmp_ptr = orig_fs->root_squash_hosts; + orig_fs->root_squash_hosts = hup_fs->root_squash_hosts; + hup_fs->root_squash_hosts = tmp_ptr; + + tmp_int_ptr = orig_fs->root_squash_netmasks; + orig_fs->root_squash_netmasks = hup_fs->root_squash_netmasks; + hup_fs->root_squash_netmasks = tmp_int_ptr; + + tmp_value = orig_fs->root_squash_exceptions_count; + orig_fs->root_squash_exceptions_count = hup_fs->root_squash_exceptions_count; + hup_fs->root_squash_exceptions_count = tmp_value; + + tmp_ptr = orig_fs->root_squash_exceptions_hosts; + orig_fs->root_squash_exceptions_hosts = hup_fs->root_squash_exceptions_hosts; + hup_fs->root_squash_exceptions_hosts = tmp_ptr; + + tmp_int_ptr = orig_fs->root_squash_exceptions_netmasks; + orig_fs->root_squash_exceptions_netmasks = hup_fs->root_squash_exceptions_netmasks; + hup_fs->root_squash_exceptions_netmasks = tmp_int_ptr; + + /* Update all squashing. Prelude is only place to accesses + * these values, so no need to lock around them. Swap + * pointers so that server config gets new values, and + * the old values get freed up + */ + tmp_value = orig_fs->all_squash_count; + orig_fs->all_squash_count = hup_fs->all_squash_count; + hup_fs->all_squash_count = tmp_value; + + tmp_ptr = orig_fs->all_squash_hosts; + orig_fs->all_squash_hosts = hup_fs->all_squash_hosts; + hup_fs->all_squash_hosts = tmp_ptr; + + tmp_int_ptr = orig_fs->all_squash_netmasks; + orig_fs->all_squash_netmasks = hup_fs->all_squash_netmasks; + hup_fs->all_squash_netmasks = tmp_int_ptr; + + /* Update read only. Prelude is only place to accesses + * these values, so no need to lock around them. Swap + * pointers so that server config gets new values, and + * the old values get freed up + */ + tmp_value = orig_fs->ro_count; + orig_fs->ro_count = hup_fs->ro_count; + hup_fs->ro_count = tmp_value; + + tmp_ptr = orig_fs->ro_hosts; + orig_fs->ro_hosts = hup_fs->ro_hosts; + hup_fs->ro_hosts = tmp_ptr; + + tmp_int_ptr = orig_fs->ro_netmasks; + orig_fs->ro_netmasks = hup_fs->ro_netmasks; + hup_fs->ro_netmasks = tmp_int_ptr; + + orig_fs->exp_anon_uid = hup_fs->exp_anon_uid; + orig_fs->exp_anon_gid = hup_fs->exp_anon_gid; + + orig_filesystems = PINT_llist_next(orig_filesystems); + } +#ifdef USE_TRUSTED + server_config.ports_enabled = sighup_server_config.ports_enabled; + server_config.allowed_ports[0] = sighup_server_config.allowed_ports[0]; + server_config.allowed_ports[1] = sighup_server_config.allowed_ports[1]; + server_config.network_enabled = sighup_server_config.network_enabled; + + tmp_value = server_config.allowed_networks_count; + server_config.allowed_networks_count = sighup_server_config.allowed_networks_count; + sighup_server_config.allowed_networks_count = tmp_value; + + tmp_ptr = server_config.allowed_networks; + server_config.allowed_networks = sighup_server_config.allowed_networks; + sighup_server_config.allowed_networks = tmp_ptr; + + tmp_int_ptr = server_config.allowed_masks; + server_config.allowed_masks = sighup_server_config.allowed_masks; + sighup_server_config.allowed_masks = tmp_int_ptr; + + /* security and security_dtor will be updated in a call + * to BMI_set_info. Need to save old values so they are + * deleted on cleanup + */ + sighup_server_config.security = server_config.security; + sighup_server_config.security_dtor = server_config.security_dtor; + + /* The set_info call grabs the interface_mutex, so we are + * basically using that to lock this resource + */ + BMI_set_info(0, BMI_TRUSTED_CONNECTION, (void *) &server_config); +#endif + PINT_config_release(&sighup_server_config); /* Free memory */ + } +} + static int server_shutdown( PINT_server_status_flag status, int ret, int siglevel) @@ -1162,6 +1500,17 @@ static int server_shutdown( gossip_debug(GOSSIP_SERVER_DEBUG, "*** server shutdown in progress ***\n"); + free(s_server_options.server_alias); + + if (status & SERVER_PRECREATE_INIT) + { + gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting precreate pool " + " [ ... ]\n"); + precreate_pool_finalize(); + gossip_debug(GOSSIP_SERVER_DEBUG, "[-] precreate pool " + " [ stopped ]\n"); + } + if (status & SERVER_STATE_MACHINE_INIT) { gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting state machine " @@ -1241,8 +1590,24 @@ static int server_shutdown( if (status & SERVER_TROVE_INIT) { + PINT_llist *cur; + struct filesystem_configuration_s *cur_fs; gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting storage " "interface [ ... ]\n"); + + cur = server_config.file_systems; + while(cur) + { + cur_fs = PINT_llist_head(cur); + if (!cur_fs) + { + break; + } + trove_collection_clear(cur_fs->trove_method, cur_fs->coll_id); + + cur = PINT_llist_next(cur); + } + trove_finalize(server_config.trove_method); gossip_debug(GOSSIP_SERVER_DEBUG, "[-] storage " "interface [ stopped ]\n"); @@ -1275,6 +1640,16 @@ static int server_shutdown( "interface [ stopped ]\n"); } + if (status & SERVER_UID_MGMT_INIT) + { + gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting uid management " + "interface [ ... ]\n"); + PINT_uid_mgmt_finalize(); + gossip_debug(GOSSIP_SERVER_DEBUG, "[-] uid management " + "interface [ stopped ]\n"); + + } + if (status & SERVER_GOSSIP_INIT) { gossip_debug(GOSSIP_SERVER_DEBUG, @@ -1314,12 +1689,6 @@ static void server_sig_handler(int sig) sig, (int)server_status_flag); } - if (sig == SIGHUP) - { - gossip_err("SIGHUP: pvfs2-server cannot restart; " - "shutting down instead.\n"); - } - /* ignore further invocations of this signal */ new_action.sa_handler = SIG_IGN; sigemptyset(&new_action.sa_mask); @@ -1362,7 +1731,7 @@ static int server_parse_cmd_line_args(int argc, char **argv) { int ret = 0, option_index = 0; int total_arguments = 0; - const char *cur_option = NULL; + const char *cur_option = NULL, *tmp_path = NULL; static struct option long_opts[] = { {"foreground",0,0,0}, @@ -1371,11 +1740,11 @@ static int server_parse_cmd_line_args(int argc, char **argv) {"rmfs",0,0,0}, {"version",0,0,0}, {"pidfile",1,0,0}, - {"alias",0,0,0}, + {"alias",1,0,0}, {0,0,0,0} }; - while ((ret = getopt_long(argc, argv,"dfhrvp:a:", + while ((ret = getopt_long(argc, argv,"dfhrvp:a:e", long_opts, &option_index)) != -1) { total_arguments++; @@ -1436,18 +1805,25 @@ static int server_parse_cmd_line_args(int argc, char **argv) s_server_options.pidfile = optarg; if(optarg[0] != '/') { - gossip_err("Error: pidfile must be specified with an absolute path.\n"); + gossip_err("Error: pidfile must be specified with an " + "absolute path.\n"); goto parse_cmd_line_args_failure; } break; case 'a': - do_alias: + do_alias: total_arguments++; s_server_options.server_alias = strdup(optarg); break; case '?': case 'h': do_help: + usage(argc, argv); + if(s_server_options.server_alias) + { + free(s_server_options.server_alias); + } + return PVFS2_HELP; default: parse_cmd_line_args_failure: usage(argc, argv); @@ -1465,7 +1841,64 @@ static int server_parse_cmd_line_args(int argc, char **argv) goto parse_cmd_line_args_failure; } - fs_conf = argv[optind++]; + if (argv[optind][0] != '/') + { + if( (tmp_path = getcwd(startup_cwd, PATH_MAX)) == NULL ) + { + gossip_err("Failed to get current working directory to create " + "absolute path for configuration file: %s\n", + strerror(errno)); + } + + if( (strlen(argv[optind]) + strlen(startup_cwd) + 1) >= PATH_MAX ) + { + gossip_err("Config file path greater than %d characters\n", + PATH_MAX); + goto parse_cmd_line_args_failure; + } + + if( strncat(startup_cwd, "/", PATH_MAX) == NULL ) + { + gossip_err("Failure creating absolute path from relative " + "configuration file path\n"); + goto parse_cmd_line_args_failure; + } + + /* copy the relative path into the string for the user */ + if( strncat(startup_cwd, argv[optind++], PATH_MAX) == NULL ) + { + gossip_err("Failure creating absolute path from relative " + "configuration file path\n"); + goto parse_cmd_line_args_failure; + } + + if( strncpy(fs_conf, startup_cwd, PATH_MAX) == NULL ) + { + gossip_err("Failure copying created full path into configuration " + "file path\n"); + goto parse_cmd_line_args_failure; + } + } + else + { + if( strlen(argv[optind]) >= PATH_MAX ) + { + gossip_err("Config file path greater than %d characters\n", + PATH_MAX); + goto parse_cmd_line_args_failure; + } + if( strncpy( fs_conf, argv[optind++], PATH_MAX) == NULL ) + { + gossip_err("Failure copying configuration file path\n"); + goto parse_cmd_line_args_failure; + } + } + + if( fs_conf == NULL ) + { + gossip_err("Failure copying configuration file path\n"); + goto parse_cmd_line_args_failure; + } if(argc - total_arguments > 2) { @@ -1482,7 +1915,7 @@ static int server_parse_cmd_line_args(int argc, char **argv) if (s_server_options.server_alias == NULL) { /* Try to guess the alias from the hostname */ - s_server_options.server_alias = guess_alias(); + s_server_options.server_alias = PINT_util_guess_alias(); } return 0; } @@ -1493,44 +1926,59 @@ static int server_parse_cmd_line_args(int argc, char **argv) * * Returns 0 on success, -PVFS_error on failure. */ -int server_post_unexpected_recv(job_status_s *js_p) +int server_post_unexpected_recv(void) { int ret = -PVFS_EINVAL; /* job_id_t j_id; */ struct PINT_smcb *smcb = NULL; struct PINT_server_op *s_op; + /* The job status structure js that is sent into PINT_state_machine_start() below is used until + * the job_bmi_unexp() function call is executed in pvfs2_unexpected_sm.unexpected_post. Once + * job_bmi_unexp() puts the unexp job on the job queue, then a job_status_s structure will be + * assigned from the global structure, server_job_status_array, when the job system processes it. + * (The job system performs this task in a later call to job_testcontext()). It is THIS job_status_s + * structure that carries into the next state of pvfs2_unexpected_sm. Thus, the original js + * structure is ONLY used by job_bmi_unexp() to return an error if something within job_bmi_unexp() + * fails, like a memory issue. It is NOT forwarded through the job system. In the case of an error, + * ret (below) will be SM_ACTION_TERMINATE upon return from PINT_state_machine_start() and the error_code + * is then properly propagated. + * + * NOTE: If an error occurs when this function is called during server_initialize() time, then the + * server will not start. If an error occurs during the pvfs2_unexpected_sm.unexpected_map state, the + * error is noted but the system continues to run. + */ + job_status_s js={0}; + gossip_debug(GOSSIP_SERVER_DEBUG, "server_post_unexpected_recv\n"); - if (js_p) + ret = PINT_smcb_alloc(&smcb, BMI_UNEXPECTED_OP, + sizeof(struct PINT_server_op), + server_op_state_get_machine, + server_state_machine_terminate, + server_job_context); + if (ret < 0) { - ret = PINT_smcb_alloc(&smcb, BMI_UNEXPECTED_OP, - sizeof(struct PINT_server_op), - server_op_state_get_machine, - server_state_machine_terminate, - server_job_context); - if (ret < 0) - { - gossip_lerr("Error: failed to allocate SMCB " - "of op type %x\n", BMI_UNEXPECTED_OP); - return ret; - } - s_op = (struct PINT_server_op *)PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - memset(s_op, 0, sizeof(PINT_server_op)); - s_op->op = BMI_UNEXPECTED_OP; - s_op->target_handle = PVFS_HANDLE_NULL; - s_op->target_fs_id = PVFS_FS_ID_NULL; - /* Add an unexpected s_ops to the list */ - qlist_add_tail(&s_op->next, &posted_sop_list); - - ret = PINT_state_machine_start(smcb, js_p); - if(ret == SM_ACTION_TERMINATE) - { - /* error posting unexpected */ - PINT_smcb_free(smcb); - return js_p->error_code; - } + gossip_lerr("Error: failed to allocate SMCB " + "of op type %x\n", BMI_UNEXPECTED_OP); + return ret; + } + s_op = (struct PINT_server_op *)PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + memset(s_op, 0, sizeof(PINT_server_op)); + s_op->op = BMI_UNEXPECTED_OP; + s_op->target_handle = PVFS_HANDLE_NULL; + s_op->target_fs_id = PVFS_FS_ID_NULL; + + /* Add an unexpected s_ops to the list */ + qlist_add_tail(&s_op->next, &posted_sop_list); + + ret = PINT_state_machine_start(smcb, &js); + if(ret == SM_ACTION_TERMINATE) + { + /* error posting unexpected */ + PINT_smcb_free(smcb); + return js.error_code; } return ret; } @@ -1562,6 +2010,9 @@ static int server_purge_unexpected_recv_machines(void) /* mark the message for cancellation */ s_op->op_cancelled = 1; + + /* cancel the pending job_bmi_unexp operation */ + job_bmi_unexp_cancel(s_op->unexp_id); } return 0; } @@ -1605,7 +2056,13 @@ int server_state_machine_start( else if (ret == 0) { s_op->req = (struct PVFS_server_req *)s_op->decoded.buffer; + /* PINT_smcb_set_op returns 1 when the op's state machine has correctly + * been found and the first state has been set. + */ ret = PINT_smcb_set_op(smcb, s_op->req->op); + s_op->op = s_op->req->op; + PVFS_hint_add(&s_op->req->hints, PVFS_HINT_SERVER_ID_NAME, sizeof(uint32_t), &server_config.host_index); + PVFS_hint_add(&s_op->req->hints, PVFS_HINT_OP_ID_NAME, sizeof(uint32_t), &s_op->req->op); } else { @@ -1621,8 +2078,17 @@ int server_state_machine_start( if(s_op->req) { - PINT_event_timestamp(PVFS_EVENT_API_SM, (int32_t)s_op->req->op, - 0, tmp_id, PVFS_EVENT_FLAG_START); + gossip_debug(GOSSIP_SERVER_DEBUG, "client:%d, reqid:%d, rank:%d\n", + PINT_HINT_GET_CLIENT_ID(s_op->req->hints), + PINT_HINT_GET_REQUEST_ID(s_op->req->hints), + PINT_HINT_GET_RANK(s_op->req->hints)); + PINT_EVENT_START(PINT_sm_event_id, server_controlling_pid, + NULL, &s_op->event_id, + PINT_HINT_GET_CLIENT_ID(s_op->req->hints), + PINT_HINT_GET_REQUEST_ID(s_op->req->hints), + PINT_HINT_GET_RANK(s_op->req->hints), + PINT_HINT_GET_HANDLE(s_op->req->hints), + s_op->req->op); s_op->resp.op = s_op->req->op; } @@ -1631,6 +2097,9 @@ int server_state_machine_start( if (!ret) { + /* ret will be zero when PINT_smcb_set_op cannot find the state machine associated + * with the request's op. + */ gossip_err("Error: server does not implement request type: %d\n", (int)s_op->req->op); PINT_decode_release(&(s_op->decoded),PINT_DECODE_REQ); @@ -1708,6 +2177,7 @@ int server_state_machine_start_noreq(struct PINT_smcb *smcb) if (new_op) { + /* add to list of state machines started without a request */ qlist_add_tail(&new_op->next, &noreq_sop_list); @@ -1741,18 +2211,36 @@ int server_state_machine_complete(PINT_smcb *smcb) /* set a timestamp on the completion of the state machine */ id_gen_fast_register(&tmp_id, s_op); - PINT_event_timestamp(PVFS_EVENT_API_SM, (int32_t)s_op->req->op, - 0, tmp_id, PVFS_EVENT_FLAG_END); + + if(s_op->req) + { + PINT_EVENT_END(PINT_sm_event_id, server_controlling_pid, + NULL, s_op->event_id, 0); + } /* release the decoding of the unexpected request */ if (ENCODING_IS_VALID(s_op->decoded.enc_type)) { + PVFS_hint_free(s_op->decoded.stub_dec.req.hints); + PINT_decode_release(&(s_op->decoded),PINT_DECODE_REQ); } - BMI_set_info(s_op->unexp_bmi_buff.addr, BMI_DEC_ADDR_REF, NULL); + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,"server_state_machine_complete: smcb op code (%d).\n" + ,s_op->op); + gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,"server_state_machine_complete: " + "s_op->unexp_bmi_buff.buffer (%p) " + "\tNULL(%s).\n" + ,s_op->unexp_bmi_buff.buffer + ,s_op->unexp_bmi_buff.buffer ? "NO" : "YES"); + + /* BMI_unexpected_free MUST execute BEFORE BMI_set_info, because BMI_set_info will */ + /* remove the addr info from the cur_ref_list if BMI_DEC_ADDR_REF causes the ref */ + /* count to become zero. The addr info holds the "unexpected-free" function */ + /* pointer. */ BMI_unexpected_free(s_op->unexp_bmi_buff.addr, s_op->unexp_bmi_buff.buffer); + BMI_set_info(s_op->unexp_bmi_buff.addr, BMI_DEC_ADDR_REF, NULL); s_op->unexp_bmi_buff.buffer = NULL; @@ -1855,34 +2343,6 @@ void PINT_server_access_debug(PINT_server_op * s_op, } #endif -static char *guess_alias(void) -{ - char tmp_alias[1024]; - char *tmpstr; - char *alias; - int ret; - - /* hmm...failed to find alias as part of the server config filename, - * use the hostname to guess - */ - ret = gethostname(tmp_alias, 1024); - if(ret != 0) - { - gossip_err("Failed to get hostname while attempting to guess " - "alias. Use -a to specify the alias for this server " - "process directly\n"); - return NULL; - } - alias = tmp_alias; - - tmpstr = strstr(tmp_alias, "."); - if(tmpstr) - { - *tmpstr = 0; - } - return strdup(tmp_alias); -} - /* generate_shm_key_hint() * * Makes a best effort to produce a unique shm key (for Trove's Berkeley @@ -1891,10 +2351,12 @@ static char *guess_alias(void) * * returns integer key */ -static int generate_shm_key_hint(void) +static int generate_shm_key_hint(int* server_index) { - int server_index = 1; struct host_alias_s *cur_alias = NULL; + struct filesystem_configuration_s *first_fs; + + *server_index = 1; PINT_llist *cur = server_config.host_aliases; @@ -1912,10 +2374,11 @@ static int generate_shm_key_hint(void) /* space the shm keys out by 10 to allow for Berkeley DB using * using more than one key on each server */ - return(server_index*10); + first_fs = PINT_llist_head(server_config.file_systems); + return(first_fs->coll_id + (*server_index)*10); } - server_index++; + (*server_index)++; cur = PINT_llist_next(cur); } @@ -1927,6 +2390,459 @@ static int generate_shm_key_hint(void) return(rand()); } +/* precreate_pool_initialize() + * + * starts the infrastructure for managing pools of precreated handles + * + * returns 0 on success, -PVFS_error on failure + */ +static int precreate_pool_initialize(int server_index) +{ + PINT_llist *cur_f = server_config.file_systems; + struct filesystem_configuration_s *cur_fs; + int ret = -1; + PVFS_handle pool_handle; + int server_count; + PVFS_BMI_addr_t* addr_array; + const char* host; + int i, j; + int server_type; + int handle_count = 0; + int fs_count = 0; + unsigned int types_to_pool = 0; + struct server_configuration_s *user_opts = get_server_config_struct(); + assert(user_opts); + + /* iterate through list of file systems */ + while(cur_f) + { + cur_fs = PINT_llist_head(cur_f); + if (!cur_fs) + { + break; + } + + fs_count++; + + /* am I a meta server in this file system? */ + ret = PINT_cached_config_check_type( + cur_fs->coll_id, + server_config.host_id, + &server_type); + if(ret < 0) + { + gossip_err("Error: %s not found in configuration file.\n", + server_config.host_id); + gossip_err("Error: configuration file is inconsistent.\n"); + return(ret); + } + if(!(server_type & PINT_SERVER_TYPE_META)) + { + /* This server is not a meta server for this file system; + * skip doing any precreate setup steps. + */ + cur_f = PINT_llist_next(cur_f); + continue; + } + + /* how many servers do we have? */ + ret = PINT_cached_config_count_servers( + cur_fs->coll_id, PINT_SERVER_TYPE_ALL, &server_count); + if(ret < 0) + { + gossip_err("Error: unable to count servers for fsid: %d\n", + (int)cur_fs->coll_id); + return(ret); + } + + addr_array = malloc(server_count*sizeof(PVFS_BMI_addr_t)); + if(!addr_array) + { + gossip_err("Error: unable to allocate book keeping information for " + "precreate pools.\n"); + return(-PVFS_ENOMEM); + } + + /* resolve addrs for each I/O server */ + ret = PINT_cached_config_get_server_array( + cur_fs->coll_id, PINT_SERVER_TYPE_ALL, + addr_array, &server_count); + if(ret < 0) + { + gossip_err("Error: unable retrieve servers for fsid: %d\n", + (int)cur_fs->coll_id); + return(ret); + } + + for(i=0; icoll_id, addr_array[i], &server_type); + if(!strcmp(host, server_config.host_id) == 0) + { + /* this is a peer server */ + /* make sure a pool exists for that server,type, fsid pair */ + + /* set ds type of handles to setup in the server's pool based + * on the server type */ + types_to_pool = PVFS_TYPE_NONE; + if( (server_type & PINT_SERVER_TYPE_IO) != 0 ) + { + types_to_pool |= PVFS_TYPE_DATAFILE; + } + + if( (server_type & PINT_SERVER_TYPE_META) != 0 ) + { + types_to_pool |= (PVFS_TYPE_METAFILE | PVFS_TYPE_DIRECTORY | + PVFS_TYPE_SYMLINK | PVFS_TYPE_DIRDATA | + PVFS_TYPE_INTERNAL); + } + + /* for each possible bit in the ds_type mask check if we should + * create a pool for it */ + for(j = 0; j < PVFS_DS_TYPE_COUNT; j++ ) + { + PVFS_ds_type t; + int_to_PVFS_ds_type(j, &t); + + /* skip setting up a pool when it doesn't make sense i.e. + * when the remote host doesn't have handle types we want. + * or in the special case that we don't get TYPE_NONE + * handles from IO servers*/ + if(((t & types_to_pool) == 0 ) || + ((t == PVFS_TYPE_NONE) && + (server_type == PINT_SERVER_TYPE_IO)) ) + { + continue; + } + + gossip_debug(GOSSIP_SERVER_DEBUG, "%s: setting up pool on " + "%s, type: %u, fs_id: %llu, handle: %llu\n", + __func__, host, t, + (long long unsigned int)cur_fs->coll_id, + llu(pool_handle)); + ret = precreate_pool_setup_server(host, t, + cur_fs->coll_id, &pool_handle); + if(ret < 0) + { + gossip_err("Error: precreate_pool_initialize failed to " + "setup pool for %s, type %u\n", + server_config.host_id, t); + return(ret); + } + + /* count current handles */ + ret = precreate_pool_count(cur_fs->coll_id, pool_handle, + &handle_count); + if(ret < 0) + { + gossip_err("Error: precreate_pool_initialize failed to " + "count pool for %s\n", + server_config.host_id); + return(ret); + } + + /* prepare the job interface to use this pool */ + ret = job_precreate_pool_register_server(host, t, + cur_fs->coll_id, pool_handle, handle_count, + user_opts->precreate_batch_size); + + /* launch sm to take care of refilling */ + /* the refiller will only actually launch if the batch count + * for the specified type, t, is greater than 0. Otherwise, + * there is no reason to have a refiller running. */ + ret = precreate_pool_launch_refiller(host, t, addr_array[i], + cur_fs->coll_id, pool_handle); + if(ret < 0) + { + gossip_err("Error: precreate_pool_initialize failed to " + "launch refiller SM for %s\n", + server_config.host_id); + return(ret); + } + } // for each PVFS_ds_type + } + } + + job_precreate_pool_set_index(server_index); + + cur_f = PINT_llist_next(cur_f); + free(addr_array); // local variable, malloc'd above to get BMI addrs + + } + + return(0); +} + +/* precreate_pool_finalize() + * + * shuts down infrastructure for managing pools of precreated handles + */ +static void precreate_pool_finalize(void) +{ + /* TODO: anything to do here? */ + /* TODO: maybe try to stop pending refiller sms? */ + return; +} + +/* precreate_pool_setup_server() + * + * This function makes sure that a pool is present for the specified server, + * fsid, and type + * + * host: hostname of server the pool is associated with + * type: DS type of handles to store in the pool + * fsid: fsid of the filesystem the pool is associated with + * handle: out value of the handle of the pool + * + */ +static int precreate_pool_setup_server(const char* host, PVFS_ds_type type, + PVFS_fs_id fsid, PVFS_handle* pool_handle) +{ + job_status_s js; + job_id_t job_id; + int ret; + int outcount; + PVFS_handle_extent_array ext_array; + + PVFS_ds_keyval key; + PVFS_ds_keyval val; + + /* look for the pool handle for this server */ + + /* the key for the pool must now be server name plus handle type. + * since the key is currently a string it makes some sense to keep + * the whole thing printable instead of just tacking on a PVFS_ds_type + * to the end of the buffer. So, we'll sprint the type as an int and + * tack that on the end. Better that just tacking the bits on? + * Maybe not. */ + char type_string[11] = { 0 }; /* 32 bit type only needs 10 digits */ + snprintf(type_string, 11, "%u", type); + + key.buffer_sz = strlen(host) + strlen(type_string) + + strlen("precreate-pool-") + 2; + key.buffer = malloc(key.buffer_sz); + if(!key.buffer) + { + return(-ENOMEM); + } + snprintf((char*)key.buffer, key.buffer_sz, "precreate-pool-%s-%s", + host, type_string); + key.read_sz = 0; + + val.buffer = pool_handle; + val.buffer_sz = sizeof(*pool_handle); + val.read_sz = 0; + + ret = job_trove_fs_geteattr(fsid, &key, &val, 0, NULL, 0, &js, + &job_id, server_job_context, NULL); + while(ret == 0) + { + ret = job_test(job_id, &outcount, NULL, &js, + PVFS2_SERVER_DEFAULT_TIMEOUT_MS, server_job_context); + } + if(ret < 0) + { + gossip_err("Error: precreate_pool failed to read fs eattrs.\n"); + free(key.buffer); + return(ret); + } + if(js.error_code && js.error_code != -TROVE_ENOENT) + { + gossip_err("Error: precreate_pool failed to read fs eattrs.\n"); + free(key.buffer); + return(js.error_code); + } + else if(js.error_code == -TROVE_ENOENT) + { + /* handle doesn't exist yet; let's create it */ + gossip_debug(GOSSIP_SERVER_DEBUG, "precreate_pool didn't find handle " + "for %s, type %s; creating now.\n", host, type_string); + + /* find extent array for ourselves */ + ret = PINT_cached_config_get_server( + fsid, server_config.host_id, PINT_SERVER_TYPE_META, &ext_array); + if(ret < 0) + { + gossip_err("Error: PINT_cached_config_get_meta() failure.\n"); + free(key.buffer); + return(ret); + } + + /* create a trove object for the pool */ + ret = job_trove_dspace_create(fsid, &ext_array, PVFS_TYPE_INTERNAL, + NULL, TROVE_SYNC, NULL, 0, &js, &job_id, server_job_context, NULL); + while(ret == 0) + { + ret = job_test(job_id, &outcount, NULL, &js, + PVFS2_SERVER_DEFAULT_TIMEOUT_MS, server_job_context); + } + if(ret < 0 || js.error_code) + { + gossip_err("Error: precreate_pool failed to create pool.\n"); + free(key.buffer); + return(ret < 0 ? ret : js.error_code); + } + + *pool_handle = js.handle; + + /* store reference to pool handle as collection eattr */ + ret = job_trove_fs_seteattr(fsid, &key, &val, TROVE_SYNC, NULL, 0, &js, + &job_id, server_job_context, NULL); + while(ret == 0) + { + ret = job_test(job_id, &outcount, NULL, &js, + PVFS2_SERVER_DEFAULT_TIMEOUT_MS, server_job_context); + } + if(ret < 0 || js.error_code) + { + gossip_err("Error: failed to record precreate pool handle.\n"); + gossip_err("Warning: fsck may be needed to recover lost handle.\n"); + free(key.buffer); + return(ret < 0 ? ret : js.error_code); + } + gossip_debug(GOSSIP_SERVER_DEBUG, "precreate_pool created handle %llu " + "for %s, type %s.\n", llu(*pool_handle), host, + type_string); + + } + else + { + /* handle already exists */ + gossip_debug(GOSSIP_SERVER_DEBUG, "precreate_pool found handle %llu " + "for %s, type %s.\n", llu(*pool_handle), host, + type_string); + } + free(key.buffer); + return(0); +} + +/* precreate_pool_count() + * + * counts the number of handles stored in a persistent precreate pool + */ +static int precreate_pool_count( + PVFS_fs_id fsid, PVFS_handle pool_handle, int* count) +{ + int ret; + job_status_s js; + job_id_t job_id; + int outcount; + PVFS_ds_keyval_handle_info handle_info; + + /* try to get the current number of handles from the pool */ + ret = job_trove_keyval_get_handle_info( + fsid, pool_handle, TROVE_KEYVAL_HANDLE_COUNT, &handle_info, + NULL, 0, &js, &job_id, server_job_context, NULL); + while(ret == 0) + { + ret = job_test(job_id, &outcount, NULL, &js, + PVFS2_SERVER_DEFAULT_TIMEOUT_MS, server_job_context); + } + if(ret < 0) + { + return(ret); + } + + if(js.error_code == -TROVE_ENOENT) + { + /* this really means there aren't any keyvals there yet */ + handle_info.count = 0; + } + else if(js.error_code != 0) + { + return(js.error_code); + } + + *count = handle_info.count; + + return(0); +} + +/* + * starts a precreate pool refiller state machine for the specified host and + * type of handle. + * host: the remote host to get handles from + * type: the DS type of handle the refiller will be refilling + * addr: the BMI addr of the remote host + * fsid: the filesystem ID of the fs the pool refiller is associated with + * pool_handle: the handle of the pool itself + * + * This will only be called for a host/type that matches and needs a filler + * so a remote server that is I/O only will only get refillers for datafile + * handles. + */ +static int precreate_pool_launch_refiller(const char* host, PVFS_ds_type type, + PVFS_BMI_addr_t addr, PVFS_fs_id fsid, PVFS_handle pool_handle) +{ + struct PINT_smcb *tmp_smcb = NULL; + struct PINT_server_op *s_op; + int ret, index = 0; + struct server_configuration_s *user_opts = get_server_config_struct(); + + assert(user_opts); + PVFS_ds_type_to_int(type, &index); + + if( user_opts->precreate_batch_size[index] == 0 ) + { + gossip_debug(GOSSIP_SERVER_DEBUG, "%s: NOT launching refiller for " + "host %s, type %d, pool: %llu, batch_size is 0\n", + __func__, host, type, llu(pool_handle)); + return 0; + } + + /* allocate smcb */ + ret = server_state_machine_alloc_noreq(PVFS_SERV_PRECREATE_POOL_REFILLER, + &(tmp_smcb)); + if (ret < 0) + { + return(ret); + } + + s_op = PINT_sm_frame(tmp_smcb, PINT_FRAME_CURRENT); + s_op->u.precreate_pool_refiller.host = strdup(host); + if(!s_op->u.precreate_pool_refiller.host) + { + PINT_smcb_free(tmp_smcb); + return(ret); + } + + /* set this refillers handle range based on the type of handle it will + * hold. If it's a datafile get an IO server range, otherwise get a meta + * range. */ + ret = PINT_cached_config_get_server( fsid, host, + ((type == PVFS_TYPE_DATAFILE) ? PINT_SERVER_TYPE_IO : + PINT_SERVER_TYPE_META), + &s_op->u.precreate_pool_refiller.handle_extent_array); + if(ret < 0) + { + free(s_op->u.precreate_pool_refiller.host); + PINT_smcb_free(tmp_smcb); + return(ret); + } + + gossip_debug(GOSSIP_SERVER_DEBUG, "%s: launching refiller for host %s, " + "type %d, pool: %llu, batch size %d (index %d)\n", __func__, + s_op->u.precreate_pool_refiller.host, type, llu(pool_handle), + user_opts->precreate_batch_size[index], index); + + s_op->u.precreate_pool_refiller.pool_handle = pool_handle; + s_op->u.precreate_pool_refiller.fsid = fsid; + s_op->u.precreate_pool_refiller.type = type; + s_op->u.precreate_pool_refiller.host_addr = addr; + + /* start sm */ + ret = server_state_machine_start_noreq(tmp_smcb); + if (ret < 0) + { + free(s_op->u.precreate_pool_refiller.host); + PINT_smcb_free(tmp_smcb); + return(ret); + } + + return(0); +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/server/pvfs2-server.h b/src/server/pvfs2-server.h index 9a92f9f..7e3f4c3 100644 --- a/src/server/pvfs2-server.h +++ b/src/server/pvfs2-server.h @@ -17,8 +17,10 @@ #include #include +#ifndef WIN32 #include #include +#endif #include #include "pvfs2-debug.h" #include "pvfs2-storage.h" @@ -30,15 +32,11 @@ #include "PINT-reqproto-encode.h" #include "msgpairarray.h" #include "pvfs2-req-proto.h" +#include "pvfs2-mirror.h" #include "state-machine.h" +#include "pint-event.h" -/* skip everything except #includes if __SM_CHECK_DEP is already - * defined; this allows us to get the dependencies right for - * msgpairarray.sm which relies on conflicting headers for dependency - * information - */ -#ifndef __SM_CHECK_DEP extern job_context_id server_job_context; #define PVFS2_SERVER_DEFAULT_TIMEOUT_MS 100 @@ -62,6 +60,13 @@ extern job_context_id server_job_context; /* number of milliseconds that clients will delay between retries */ #define PVFS2_CLIENT_RETRY_DELAY_MS_DEFAULT 2000 +/* Specifies the number of handles to be preceated at a time from each + * server using the batch create request. + */ +#define PVFS2_PRECREATE_BATCH_SIZE_DEFAULT 512 +/* precreate pools will be topped off if they fall below this value */ +#define PVFS2_PRECREATE_LOW_THRESHOLD_DEFAULT 256 + /* types of permission checking that a server may need to perform for * incoming requests */ @@ -77,18 +82,6 @@ enum PINT_server_req_permissions needs write and execute */ }; -enum PINT_server_req_access_type -{ - PINT_SERVER_REQ_READONLY = 0, - PINT_SERVER_REQ_MODIFY -}; - -enum PINT_server_sched_policy -{ - PINT_SERVER_REQ_BYPASS = 0, - PINT_SERVER_REQ_SCHEDULE -}; - #define PINT_GET_OBJECT_REF_DEFINE(req_name) \ static inline int PINT_get_object_ref_##req_name( \ struct PVFS_server_req *req, PVFS_fs_id *fs_id, PVFS_handle *handle) \ @@ -98,10 +91,10 @@ static inline int PINT_get_object_ref_##req_name( \ return 0; \ } -typedef int (*PINT_server_req_access_callback)(struct PVFS_server_req *req); - -int PINT_server_req_readonly(struct PVFS_server_req *req); -int PINT_server_req_modify(struct PVFS_server_req *req); +enum PINT_server_req_access_type PINT_server_req_readonly( + struct PVFS_server_req *req); +enum PINT_server_req_access_type PINT_server_req_modify( + struct PVFS_server_req *req); struct PINT_server_req_params { @@ -122,7 +115,8 @@ struct PINT_server_req_params * Default functions PINT_server_req_readonly and PINT_server_req_modify * are used for requests that always require the same access type. */ - PINT_server_req_access_callback access_type; + enum PINT_server_req_access_type (*access_type)( + struct PVFS_server_req *req); /* Specifies the scheduling policy for the request. In some cases, * we can bypass the request scheduler and proceed directly with the @@ -176,6 +170,8 @@ enum METAFILE_HANDLES_KEY = 2, METAFILE_DIST_KEY = 3, SYMLINK_TARGET_KEY = 4, + METAFILE_LAYOUT_KEY = 5, + NUM_DFILES_REQ_KEY = 6 }; /* optional; user-settable keys */ @@ -186,6 +182,9 @@ enum NUM_DFILES_KEY = 2, NUM_SPECIAL_KEYS = 3, /* not an index */ METAFILE_HINT_KEY = 3, + MIRROR_COPIES_KEY = 4, + MIRROR_HANDLES_KEY = 5, + MIRROR_STATUS_KEY = 6, }; typedef enum @@ -209,8 +208,185 @@ typedef enum SERVER_JOB_TIME_MGR_INIT = (1 << 15), SERVER_DIST_INIT = (1 << 16), SERVER_CACHED_CONFIG_INIT = (1 << 17), + SERVER_PRECREATE_INIT = (1 << 18), + SERVER_UID_MGMT_INIT = (1 << 19), } PINT_server_status_flag; +typedef enum +{ + PRELUDE_SCHEDULER_DONE = (1 << 0), + PRELUDE_GETATTR_DONE = (1 << 1), + PRELUDE_PERM_CHECK_DONE = (1 << 2), + PRELUDE_LOCAL_CALL = (1 << 3), +} PINT_prelude_flag; + +struct PINT_server_create_op +{ + const char **io_servers; + const char **remote_io_servers; + int num_io_servers; + PVFS_handle* handle_array_local; + PVFS_handle* handle_array_remote; + int handle_array_local_count; + int handle_array_remote_count; + PVFS_error saved_error_code; + int handle_index; +}; + +/*MIRROR structures*/ +typedef struct +{ + /* session identifier created in the PVFS_SERV_IO request. also used as */ + /* the flow identifier. */ + bmi_msg_tag_t session_tag; + + /*destination server address*/ + PVFS_BMI_addr_t svr_addr; + + /*status from PVFS_SERV_IO*/ + PVFS_error io_status; + + /*variables used to setup write completion ack*/ + void *encoded_resp_p; + job_status_s recv_status; + job_id_t recv_id; + + /*variables used to setup flow between the src & dest datahandle*/ + flow_descriptor *flow_desc; + job_status_s flow_status; + job_id_t flow_job_id; + +} write_job_t; + + +/*This structure is used during the processing of a "mirror" request.*/ +struct PINT_server_mirror_op +{ + /*keep up with the number of outstanding jobs*/ + int job_count; + + /*maximum response size for the write request*/ + int max_resp_sz; + + /*info about each job*/ + write_job_t *jobs; +}; +typedef struct PINT_server_mirror_op PINT_server_mirror_op; + +/* Source refers to the handle being copied, and destination refers to */ +/* its copy. */ +struct PINT_server_create_copies_op +{ + /*number of I/O servers required to meet the mirroring request. */ + uint32_t io_servers_required; + + /*mirroring mode. attribute key is user.pvfs2.mirror.mode*/ + MIRROR_MODE mirror_mode; + + /*the expected mirroring mode tells us how to edit the retrieved mirroring*/ + /*mode. Example: if mirroring was called when immutable was set, then */ + /*the expected mirroring mode would be MIRROR_ON_IMMUTABLE. */ + MIRROR_MODE expected_mirror_mode; + + /*buffer holding list of remote servers for all copies of the file*/ + char **my_remote_servers; + + /*saved error code*/ + PVFS_error saved_error_code; + + /*number of copies desired. value of user.pvfs2.mirror.copies attribute*/ + uint32_t copies; + + /*successful/failed writes array in order of source handles */ + /*0=>successful !UINT64_HIGH=>failure UINT64_HIGH=>initial state */ + /*accessed as if a 2-dimensional array [SrcHandleNR][#ofCopies] */ + PVFS_handle *writes_completed; + + /*number of attempts at writing handles*/ + int retry_count; + + /*list of server names that will be used as destination servers*/ + char **io_servers; + + /*source remote server names in distribution*/ + char **remote_io_servers; + + /*source local server names in distribution*/ + char **local_io_servers; + + /*number of source server names in the distribution*/ + int num_io_servers; + + /*number of source remote server names in distribution*/ + int remote_io_servers_count; + + /*number of source local server names in distribution*/ + int local_io_servers_count; + + /*source datahandles in order of distribution*/ + PVFS_handle *handle_array_base; + + /*local source datahandles*/ + PVFS_handle *handle_array_base_local; + + /*destination datahandles in order of distribution*/ + PVFS_handle *handle_array_copies; + + /*local destination datahandles*/ + PVFS_handle *handle_array_copies_local; + + /*remote destination datahandles*/ + PVFS_handle *handle_array_copies_remote; + + /*number of local source datahandles*/ + int handle_array_base_local_count; + + /*number of local destination datahandles*/ + int handle_array_copies_local_count; + + /*number of remote destination datahandles*/ + int handle_array_copies_remote_count; + + /*number of source datahandles*/ + uint32_t dfile_count; + + /*source metadata handle*/ + PVFS_handle metadata_handle; + + /*source file system*/ + PVFS_fs_id fs_id; + + /*number of io servers defined in the current file system*/ + int io_servers_count; + + /*size of the source distribution structure */ + uint32_t dist_size; + + /*distribution structure for basic_dist*/ + PINT_dist *dist; + + /*local source handles' attribute structure*/ + /*populates bstream_array_base_local with byte stream size*/ + PVFS_ds_attributes *ds_attr_a; + + /*local source handles' byte stream size*/ + /*index corresponds to handle_array_base*/ + PVFS_size *bstream_array_base_local; +}; +typedef struct PINT_server_create_copies_op PINT_server_create_copies_op; + + +/*This macro is used to initialize a PINT_server_op structure when pjmp'ing */ +/*to pvfs2_create_immutable_copies_sm. */ +#define PVFS_SERVOP_IMM_COPIES_FILL(__new_p,__cur_p) \ +do { \ + memcpy(__new_p,__cur_p,sizeof(struct PINT_server_op)); \ + (__new_p)->op = PVFS_SERV_IMM_COPIES; \ + memset(&((__new_p)->u.create_copies),0,sizeof((__new_p)->u.create_copies)); \ +}while(0) + + + /* struct PINT_server_lookup_op * * All the data needed during lookup processing: @@ -284,6 +460,7 @@ struct PINT_server_remove_op int key_count; int index; int remove_keyvals_state; + int saved_error_code; /* holds error_code from previous state. */ }; struct PINT_server_mgmt_remove_dirent_op @@ -291,6 +468,29 @@ struct PINT_server_mgmt_remove_dirent_op PVFS_handle dirdata_handle; }; +struct PINT_server_precreate_pool_refiller_op +{ + PVFS_handle pool_handle; + PVFS_handle* precreate_handle_array; + PVFS_fs_id fsid; + char* host; + PVFS_BMI_addr_t host_addr; + PVFS_handle_extent_array handle_extent_array; + PVFS_ds_type type; +}; + +struct PINT_server_batch_create_op +{ + int saved_error_code; + int batch_index; +}; + +struct PINT_server_batch_remove_op +{ + int handle_index; + int error_code; +}; + struct PINT_server_mgmt_get_dirdata_op { PVFS_handle dirdata_handle; @@ -343,20 +543,16 @@ struct PINT_server_getattr_op PVFS_error* err_array; PVFS_ds_keyval_handle_info keyval_handle_info; PVFS_handle dirent_handle; + int num_dfiles_req; + PVFS_handle *mirror_dfile_status_array; }; struct PINT_server_listattr_op { - uint32_t nhandles; - PVFS_handle *handles; - PVFS_size dirent_count; - PVFS_fs_id fs_id; PVFS_object_attr *attr_a; PVFS_ds_attributes *ds_attr_a; PVFS_error *errors; - uint32_t attrmask; - uint32_t handle_index; - PVFS_ds_keyval_handle_info keyval_handle_info; + int parallel_sms; }; /* this is used in both set_eattr, get_eattr and list_eattr */ @@ -364,7 +560,27 @@ struct PINT_server_eattr_op { void *buffer; }; - + +struct PINT_server_unstuff_op +{ + PVFS_handle* dfile_array; + int num_dfiles_req; + PVFS_sys_layout layout; + void* encoded_layout; +}; + +struct PINT_server_tree_communicate_op +{ + int num_partitions; + PVFS_handle* handle_array_local; + PVFS_handle* handle_array_remote; + uint32_t *local_join_size; + uint32_t *remote_join_size; + int handle_array_local_count; + int handle_array_remote_count; + int handle_index; +}; + /* This structure is passed into the void *ptr * within the job interface. Used to tell us where * to go next in our state machine. @@ -373,8 +589,12 @@ typedef struct PINT_server_op { struct qlist_head next; /* used to queue structures used for unexp style messages */ int op_cancelled; /* indicates unexp message was cancelled */ + job_id_t unexp_id; + enum PVFS_server_op op; /* type of operation that we are servicing */ + PINT_event_id event_id; + /* holds id from request scheduler so we can release it later */ job_id_t scheduled_id; @@ -382,9 +602,15 @@ typedef struct PINT_server_op PVFS_ds_keyval key, val; PVFS_ds_keyval *key_a; PVFS_ds_keyval *val_a; + int *error_a; + int keyval_count; int free_val; + /* generic int for use by state machines that are accessing + * PINT_server_op structs before pjumping to them. */ + int local_index; + /* attributes structure associated with target of operation; may be * partially filled in by prelude nested state machine (for * permission checking); may be used/modified by later states as well @@ -408,24 +634,23 @@ typedef struct PINT_server_op struct PINT_encoded_msg encoded; struct PINT_decoded_msg decoded; - /* generic msgpair used with msgpair substate */ - PINT_sm_msgpair_state msgpair; - - /* state information for msgpairarray nested state machine */ - int msgarray_count; - PINT_sm_msgpair_state *msgarray; - PINT_sm_msgpair_params msgarray_params; + PINT_sm_msgarray_op msgarray_op; PVFS_handle target_handle; PVFS_fs_id target_fs_id; PVFS_object_attr *target_object_attr; + PINT_prelude_flag prelude_mask; + enum PINT_server_req_access_type access_type; enum PINT_server_sched_policy sched_policy; + int num_pjmp_frames; + union { /* request-specific scratch spaces for use during processing */ + struct PINT_server_create_op create; struct PINT_server_eattr_op eattr; struct PINT_server_getattr_op getattr; struct PINT_server_listattr_op listattr; @@ -443,10 +668,42 @@ typedef struct PINT_server_op struct PINT_server_mkdir_op mkdir; struct PINT_server_mgmt_remove_dirent_op mgmt_remove_dirent; struct PINT_server_mgmt_get_dirdata_op mgmt_get_dirdata_handle; + struct PINT_server_precreate_pool_refiller_op + precreate_pool_refiller; + struct PINT_server_batch_create_op batch_create; + struct PINT_server_batch_remove_op batch_remove; + struct PINT_server_unstuff_op unstuff; + struct PINT_server_create_copies_op create_copies; + struct PINT_server_mirror_op mirror; + struct PINT_server_tree_communicate_op tree_communicate; } u; } PINT_server_op; +#define PINT_CREATE_SUBORDINATE_SERVER_FRAME(__smcb, __s_op, __handle, __fs_id, __location, __req, __task_id) \ + do { \ + char server_name[1024]; \ + struct server_configuration_s *server_config = get_server_config_struct(); \ + __s_op = malloc(sizeof(struct PINT_server_op)); \ + if(!__s_op) { return -PVFS_ENOMEM; } \ + memset(__s_op, 0, sizeof(struct PINT_server_op)); \ + __s_op->req = &__s_op->decoded.stub_dec.req; \ + PINT_sm_push_frame(__smcb, __task_id, __s_op); \ + if (__location != LOCAL_OPERATION && __location != REMOTE_OPERATION && __handle) { \ + PINT_cached_config_get_server_name(server_name, 1024, __handle, __fs_id); \ + } \ + if (__location != REMOTE_OPERATION && (__location == LOCAL_OPERATION || ( __handle && ! strcmp(server_config->host_id, server_name)))) { \ + __location = LOCAL_OPERATION; \ + __req = __s_op->req; \ + __s_op->prelude_mask = PRELUDE_SCHEDULER_DONE | PRELUDE_PERM_CHECK_DONE | PRELUDE_LOCAL_CALL; \ + } \ + else { \ + memset(&__s_op->msgarray_op, 0, sizeof(PINT_sm_msgarray_op)); \ + PINT_serv_init_msgarray_params(__s_op, __fs_id); \ + } \ + } while (0) + + /* PINT_ACCESS_DEBUG() * * macro for consistent printing of access records @@ -454,32 +711,64 @@ typedef struct PINT_server_op * no return value */ #ifdef GOSSIP_DISABLE_DEBUG +#ifdef WIN32 +#define PINT_ACCESS_DEBUG(__s_op, __mask, format, ...) do {} while (0) +#else #define PINT_ACCESS_DEBUG(__s_op, __mask, format, f...) do {} while (0) +#endif +#else +#ifdef WIN32 +#define PINT_ACCESS_DEBUG(__s_op, __mask, format, ...) \ + PINT_server_access_debug(__s_op, __mask, format, __VA_ARGS__) #else #define PINT_ACCESS_DEBUG(__s_op, __mask, format, f...) \ PINT_server_access_debug(__s_op, __mask, format, ##f) #endif +#endif +#ifdef WIN32 +void PINT_server_access_debug(PINT_server_op * s_op, + int64_t debug_mask, + const char * format, + ...); +#else void PINT_server_access_debug(PINT_server_op * s_op, int64_t debug_mask, const char * format, ...) __attribute__((format(printf, 3, 4))); +#endif + +/* server side state machines */ +extern struct PINT_state_machine_s pvfs2_mirror_sm; +extern struct PINT_state_machine_s pvfs2_pjmp_call_msgpairarray_sm; +extern struct PINT_state_machine_s pvfs2_pjmp_get_attr_with_prelude_sm; +extern struct PINT_state_machine_s pvfs2_pjmp_remove_work_sm; +extern struct PINT_state_machine_s pvfs2_pjmp_mirror_work_sm; +extern struct PINT_state_machine_s pvfs2_pjmp_create_immutable_copies_sm; +extern struct PINT_state_machine_s pvfs2_pjmp_get_attr_work_sm; /* nested state machines */ extern struct PINT_state_machine_s pvfs2_get_attr_work_sm; +extern struct PINT_state_machine_s pvfs2_get_attr_with_prelude_sm; extern struct PINT_state_machine_s pvfs2_prelude_sm; extern struct PINT_state_machine_s pvfs2_prelude_work_sm; extern struct PINT_state_machine_s pvfs2_final_response_sm; extern struct PINT_state_machine_s pvfs2_check_entry_not_exist_sm; extern struct PINT_state_machine_s pvfs2_remove_work_sm; +extern struct PINT_state_machine_s pvfs2_remove_with_prelude_sm; extern struct PINT_state_machine_s pvfs2_mkdir_work_sm; extern struct PINT_state_machine_s pvfs2_unexpected_sm; +extern struct PINT_state_machine_s pvfs2_create_immutable_copies_sm; +extern struct PINT_state_machine_s pvfs2_mirror_work_sm; +extern struct PINT_state_machine_s pvfs2_tree_remove_work_sm; +extern struct PINT_state_machine_s pvfs2_tree_get_file_size_work_sm; +extern struct PINT_state_machine_s pvfs2_call_msgpairarray_sm; /* Exported Prototypes */ struct server_configuration_s *get_server_config_struct(void); /* exported state machine resource reclamation function */ -int server_post_unexpected_recv(job_status_s *js_p); +int server_post_unexpected_recv(void); int server_state_machine_start( PINT_smcb *smcb, job_status_s *js_p); int server_state_machine_complete(PINT_smcb *smcb); int server_state_machine_terminate(PINT_smcb *smcb, job_status_s *js_p); @@ -506,7 +795,6 @@ int server_state_machine_start_noreq( struct PINT_state_machine_s *server_op_state_get_machine(int); -#endif /* __SM_CHECK_DEP */ #endif /* __PVFS_SERVER_H */ /* diff --git a/src/server/readdir.sm b/src/server/readdir.sm index 5900703..1b2963e 100644 --- a/src/server/readdir.sm +++ b/src/server/readdir.sm @@ -124,7 +124,7 @@ static PINT_sm_action readdir_read_dirdata_handle( &s_op->key, &s_op->val, 0, NULL, smcb, 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -204,8 +204,8 @@ static PINT_sm_action readdir_iterate_on_entries( s_op->req->u.readdir.token, s_op->key_a, s_op->val_a, s_op->req->u.readdir.dirent_count, 0, - NULL, smcb, 0, js_p, - &j_id, server_job_context); + NULL, smcb, 0, js_p, + &j_id, server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/remove.sm b/src/server/remove.sm index 0dcd8d1..0be77a0 100644 --- a/src/server/remove.sm +++ b/src/server/remove.sm @@ -115,24 +115,27 @@ nested machine pvfs2_remove_work_sm } -machine pvfs2_remove_sm +nested machine pvfs2_remove_with_prelude_sm { state prelude { jump pvfs2_prelude_sm; - success => setup_work; - default => final_response; + success => do_work; + default => return; } - state setup_work - { - run remove_setup_work; - default => work; + state do_work + { + jump pvfs2_remove_work_sm; + default => return; } +} +machine pvfs2_remove_sm +{ state work { - jump pvfs2_remove_work_sm; + jump pvfs2_remove_with_prelude_sm; default => final_response; } @@ -235,16 +238,16 @@ static PINT_sm_action remove_read_dirdata_handle( gossip_debug(GOSSIP_SERVER_DEBUG, " trying to read dirdata handle (coll_id = %d, " "handle = %llu, key = %s (%d), val_buf = %p (%d))\n", - s_op->u.remove.fs_id, llu(s_op->u.remove.handle), + s_op->req->u.remove.fs_id, llu(s_op->req->u.remove.handle), (char *)s_op->key.buffer, s_op->key.buffer_sz, s_op->val.buffer, s_op->val.buffer_sz); ret = job_trove_keyval_read( - s_op->u.remove.fs_id, s_op->u.remove.handle, - &s_op->key, &s_op->val, - 0, + s_op->req->u.remove.fs_id, s_op->req->u.remove.handle, + &s_op->key, &s_op->val, + 0, NULL, smcb, 0, js_p, - &j_id, server_job_context); + &j_id, server_job_context, s_op->req->hints); return ret; } @@ -264,7 +267,7 @@ static PINT_sm_action remove_get_dirent_count( ret = job_trove_keyval_iterate_keys( - s_op->u.remove.fs_id, + s_op->req->u.remove.fs_id, s_op->u.remove.dirdata_handle, PVFS_ITERATE_START, &s_op->u.remove.key, @@ -275,7 +278,7 @@ static PINT_sm_action remove_get_dirent_count( 0, js_p, &tmp_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -293,8 +296,8 @@ static PINT_sm_action remove_check_dirdata_entries( " detected non-empty " "directory (fsid: %u, handle: %llu) " "-- returning error\n", - s_op->u.remove.fs_id, - llu(s_op->u.remove.handle)); + s_op->req->u.remove.fs_id, + llu(s_op->req->u.remove.handle)); js_p->error_code = -PVFS_ENOTEMPTY; } } @@ -325,14 +328,14 @@ static PINT_sm_action remove_remove_dirdata_dspace( llu(s_op->u.remove.dirdata_handle)); ret = job_trove_dspace_remove( - s_op->u.remove.fs_id, + s_op->req->u.remove.fs_id, s_op->u.remove.dirdata_handle, 0, /* don't sync here, we do a dspace remove on the dir handle next */ smcb, 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -350,14 +353,14 @@ static PINT_sm_action remove_remove_dspace( int ret = -PVFS_EINVAL; job_id_t j_id; gossip_debug(GOSSIP_SERVER_DEBUG, "(%p) remove: removing dspace " - "object %llu,%d\n", s_op, llu(s_op->u.remove.handle), - s_op->u.remove.fs_id); + "object %llu,%d\n", s_op, llu(s_op->req->u.remove.handle), + s_op->req->u.remove.fs_id); ret = job_trove_dspace_remove( - s_op->u.remove.fs_id, s_op->u.remove.handle, - TROVE_SYNC, + s_op->req->u.remove.fs_id, s_op->req->u.remove.handle, + TROVE_SYNC, smcb, 0, js_p, - &j_id, server_job_context); + &j_id, server_job_context, s_op->req->hints); return ret; } @@ -374,16 +377,6 @@ static PINT_sm_action remove_cleanup( return(server_state_machine_complete(smcb)); } -static PINT_sm_action remove_setup_work( - struct PINT_smcb *smcb, job_status_s *js_p) -{ - struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - s_op->u.remove.handle = s_op->req->u.remove.handle; - s_op->u.remove.fs_id = s_op->req->u.remove.fs_id; - js_p->error_code = 0; - return SM_ACTION_COMPLETE; -} - PINT_GET_OBJECT_REF_DEFINE(remove); struct PINT_server_req_params pvfs2_remove_params = diff --git a/src/server/request-scheduler/request-scheduler.c b/src/server/request-scheduler/request-scheduler.c index e3e7cd4..0c0594a 100644 --- a/src/server/request-scheduler/request-scheduler.c +++ b/src/server/request-scheduler/request-scheduler.c @@ -25,7 +25,9 @@ #include #include +#ifndef WIN32 #include +#endif #include #include "request-scheduler.h" @@ -82,8 +84,8 @@ struct req_sched_element enum req_sched_states state; /* state of this element */ PVFS_handle handle; struct timeval tv; /* used for timer events */ - int readonly_flag; /* indicates a read only operation */ - + /* indicates type of access needed by this op */ + enum PINT_server_req_access_type access_type; int mode_change; /* specifies that the element is a mode change */ enum PVFS_server_mode mode; /* the mode to change to */ }; @@ -145,6 +147,30 @@ int PINT_req_sched_initialize( return (0); } +/** Free resources held by the timer queue + */ +int PINT_timer_queue_finalize(void) +{ + struct qlist_head *scratch=NULL; + struct qlist_head *iterator=NULL; + struct req_sched_element *element=NULL; + + qlist_for_each_safe(iterator,scratch,&timer_queue) + { + element = qlist_entry(iterator,struct req_sched_element,list_link); + qlist_del(&(element->list_link)); + if (element && element->user_ptr) + free(element->user_ptr); + if (element) + free(element); + element=NULL; + } + + return(0); +} + + + /** Tears down the request scheduler and its data structures * * \return 0 on success, -errno on failure @@ -207,6 +233,7 @@ int PINT_req_sched_change_mode(enum PVFS_server_mode mode, { return (-errno); } + memset(mode_element, 0, sizeof(*mode_element)); mode_element->user_ptr = user_ptr; id_gen_fast_register(id, mode_element); @@ -305,14 +332,13 @@ static void PINT_req_sched_do_change_mode( * \return 1 if request should proceed immediately, 0 if the * request will be scheduled later, and -errno on failure */ -int PINT_req_sched_post( - enum PVFS_server_op op, - PVFS_fs_id fs_id, - PVFS_handle handle, - int readonly_flag, - int schedule, - void *in_user_ptr, - req_sched_id * out_id) +int PINT_req_sched_post(enum PVFS_server_op op, + PVFS_fs_id fs_id, + PVFS_handle handle, + enum PINT_server_req_access_type access_type, + enum PINT_server_sched_policy sched_policy, + void *in_user_ptr, + req_sched_id * out_id) { struct qlist_head *hash_link; int ret = -1; @@ -324,9 +350,9 @@ int PINT_req_sched_post( struct qlist_head *iterator; int tmp_flag; - if(!schedule) + if(sched_policy == PINT_SERVER_REQ_BYPASS) { - if(!readonly_flag && !PVFS_SERV_IS_MGMT_OP(op)) + if(access_type == PINT_SERVER_REQ_MODIFY && !PVFS_SERV_IS_MGMT_OP(op)) { /* if this requests modifies the file system, we have to check * to see if we are in admin mode or about to enter admin mode @@ -354,6 +380,7 @@ int PINT_req_sched_post( { return (-errno); } + memset(tmp_element, 0, sizeof(*tmp_element)); tmp_element->op = op; tmp_element->user_ptr = in_user_ptr; @@ -362,9 +389,10 @@ int PINT_req_sched_post( tmp_element->state = REQ_QUEUED; tmp_element->handle = handle; tmp_element->list_head = NULL; - tmp_element->readonly_flag = readonly_flag; + tmp_element->access_type = access_type; + tmp_element->mode_change = 0; - if(!readonly_flag && !PVFS_SERV_IS_MGMT_OP(op)) + if(access_type == PINT_SERVER_REQ_MODIFY && !PVFS_SERV_IS_MGMT_OP(op)) { if(PINT_req_sched_in_admin_mode()) { @@ -405,7 +433,9 @@ int PINT_req_sched_post( /* return 1 if the list is empty before we add this entry */ ret = qlist_empty(&(tmp_list->req_list)); if (ret == 1) + { tmp_element->state = REQ_SCHEDULED; + } else { /* check queue to see if we can apply any optimizations */ @@ -452,7 +482,7 @@ int PINT_req_sched_post( ret = 0; } } - else if (readonly_flag && + else if (access_type == PINT_SERVER_REQ_READONLY && next_element->state == REQ_SCHEDULED && last_element->state == REQ_SCHEDULED) { @@ -465,7 +495,7 @@ int PINT_req_sched_post( { tmp_element2 = qlist_entry(iterator, struct req_sched_element, list_link); - if(!tmp_element2->readonly_flag) + if(tmp_element2->access_type == PINT_SERVER_REQ_MODIFY) { tmp_flag = 1; break; @@ -495,7 +525,7 @@ int PINT_req_sched_post( * dirent request to proceed. */ tmp_element->state = REQ_SCHEDULED; - tmp_element->readonly_flag = 1; + tmp_element->access_type = PINT_SERVER_REQ_READONLY; gossip_debug(GOSSIP_REQ_SCHED_DEBUG, "REQ SCHED allowing " "concurrent dirent op, handle: %llu\n", llu(handle)); @@ -554,6 +584,7 @@ int PINT_req_sched_post_timer( { return (-errno); } + memset(tmp_element, 0, sizeof(*tmp_element)); tmp_element->user_ptr = in_user_ptr; id_gen_fast_register(out_id, tmp_element); @@ -562,6 +593,7 @@ int PINT_req_sched_post_timer( tmp_element->handle = PVFS_HANDLE_NULL; gettimeofday(&tmp_element->tv, NULL); tmp_element->list_head = NULL; + tmp_element->mode_change = 0; /* set time to future, based on msecs arg */ tmp_element->tv.tv_sec += msecs/1000; @@ -807,13 +839,13 @@ int PINT_req_sched_release( } } } - else if(next_element->readonly_flag) + else if(next_element->access_type == PINT_SERVER_REQ_READONLY) { /* keep going as long as the operations are read only; * we let these all go concurrently */ while (next_element && - (next_element->readonly_flag) && + (next_element->access_type == PINT_SERVER_REQ_READONLY) && (next_element->list_link.next != &(tmp_list->req_list))) { next_element = @@ -821,7 +853,7 @@ int PINT_req_sched_release( struct req_sched_element, list_link); if (next_element && - (next_element->readonly_flag)) + (next_element->access_type == PINT_SERVER_REQ_READONLY)) { gossip_debug( GOSSIP_REQ_SCHED_DEBUG, diff --git a/src/server/request-scheduler/request-scheduler.h b/src/server/request-scheduler/request-scheduler.h index f8431da..2c18727 100644 --- a/src/server/request-scheduler/request-scheduler.h +++ b/src/server/request-scheduler/request-scheduler.h @@ -24,6 +24,16 @@ typedef PVFS_id_gen_t req_sched_id; typedef int req_sched_error_code; +enum PINT_server_req_access_type +{ + PINT_SERVER_REQ_READONLY = 0, + PINT_SERVER_REQ_MODIFY +}; +enum PINT_server_sched_policy +{ + PINT_SERVER_REQ_BYPASS = 0, + PINT_SERVER_REQ_SCHEDULE +}; /* setup and teardown */ int PINT_req_sched_initialize( @@ -32,13 +42,16 @@ int PINT_req_sched_initialize( int PINT_req_sched_finalize( void); +int PINT_timer_queue_finalize(void); + + /* retrieving information about incoming requests */ /* scheduler submission */ int PINT_req_sched_post(enum PVFS_server_op op, PVFS_fs_id fs_id, PVFS_handle handle, - int read_only_flag, - int schedule, + enum PINT_server_req_access_type access_type, + enum PINT_server_sched_policy sched_policy, void *in_user_ptr, req_sched_id * out_id); diff --git a/src/server/rmdirent.sm b/src/server/rmdirent.sm index 686df7f..4c6cfc2 100644 --- a/src/server/rmdirent.sm +++ b/src/server/rmdirent.sm @@ -13,6 +13,7 @@ #include "gossip.h" #include "pvfs2-util.h" #include "pvfs2-internal.h" +#include "pint-util.h" enum { @@ -125,7 +126,7 @@ static int rmdirent_verify_parent_metadata_and_read_directory_entry_handle( 0, js_p, &i, - server_job_context); + server_job_context, s_op->req->hints); return ret; } @@ -169,7 +170,7 @@ static int rmdirent_remove_directory_entry( &s_op->key, &s_op->val, flags, - NULL, smcb, 0, js_p, &j_id, server_job_context); + NULL, smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); /* * Removing an entry causes an update of directory timestamps @@ -219,7 +220,7 @@ static PINT_sm_action rmdirent_update_directory_attr( s_op->req->u.rmdirent.fs_id, s_op->req->u.rmdirent.handle, ds_attr, TROVE_SYNC, - smcb, 0, js_p, &j_id, server_job_context); + smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/set-attr.sm b/src/server/set-attr.sm index 9e381c1..4e28784 100644 --- a/src/server/set-attr.sm +++ b/src/server/set-attr.sm @@ -245,6 +245,9 @@ static PINT_sm_action setattr_setobj_attribs( dspace_a_p = &s_op->attr; a_p = &s_op->req->u.setattr.attr; + a_p->cid = s_op->req->u.setattr.cid; + dspace_a_p->cid = a_p->cid; + if (a_p->mask & PVFS_ATTR_META_DFILES) { gossip_debug(GOSSIP_SETATTR_DEBUG, " request has dfile_count of " @@ -294,12 +297,14 @@ static PINT_sm_action setattr_setobj_attribs( GOSSIP_SETATTR_DEBUG, " WRITING attrs: [owner = %d, group = %d\n\t" "perms = %o, type = %d, atime = %llu, mtime = %llu\n\t" - "ctime = %llu | dfile_count = %d | dist_size = %d]\n", + "ctime = %llu | dfile_count = %d | dist_size = %d\n\t" + "handle = %llu | S = %p | mask = %d]\n", dspace_a_p->owner, dspace_a_p->group, dspace_a_p->perms, dspace_a_p->objtype, llu(dspace_a_p->atime), llu(PINT_util_mkversion_time(dspace_a_p->mtime)), llu(dspace_a_p->ctime), (int)dspace_a_p->u.meta.dfile_count, - (int)dspace_a_p->u.meta.dist_size); + (int)dspace_a_p->u.meta.dist_size, + llu(s_op->req->u.setattr.handle), s_op, a_p->mask); /* translate attrs to storage attr format */ ds_attr = &(s_op->ds_attr); @@ -309,7 +314,7 @@ static PINT_sm_action setattr_setobj_attribs( s_op->req->u.setattr.fs_id, s_op->req->u.setattr.handle, ds_attr, TROVE_SYNC, - smcb, 0, js_p, &j_id, server_job_context); + smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } @@ -363,7 +368,7 @@ static PINT_sm_action setattr_write_metafile_datafile_handles_if_required( s_op->req->u.setattr.fs_id, s_op->req->u.setattr.handle, &(s_op->key), &(s_op->val), 0, - NULL, smcb, 0, js_p, &j_id, server_job_context); + NULL, smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } @@ -414,7 +419,7 @@ static PINT_sm_action setattr_write_metafile_distribution_if_required( s_op->req->u.setattr.fs_id, s_op->req->u.setattr.handle, &(s_op->key), &(s_op->val), TROVE_SYNC, - NULL, smcb, 0, js_p, &j_id, server_job_context); + NULL, smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } @@ -459,7 +464,7 @@ static PINT_sm_action setattr_write_symlink_target_if_required( s_op->req->u.setattr.fs_id, s_op->req->u.setattr.handle, &(s_op->key), &(s_op->val), TROVE_SYNC, - NULL, smcb, 0, js_p, &j_id, server_job_context); + NULL, smcb, 0, js_p, &j_id, server_job_context, s_op->req->hints); return ret; } diff --git a/src/server/set-eattr.sm b/src/server/set-eattr.sm index 98a376f..c273044 100644 --- a/src/server/set-eattr.sm +++ b/src/server/set-eattr.sm @@ -14,6 +14,17 @@ #include "pvfs2-util.h" #include "pint-util.h" #include "pint-eattr.h" +#include "pvfs2-mirror.h" + +/*helper functions' prototypes*/ +static void init_keyval_structs(PINT_server_op *s_op, int free_val); + +/*data structures*/ +enum { + PVFS2_MAKE_IMMUTABLE_COPIES = 200, + CREATE_IMM_COPIES = 300, + SKIP_UPDATE = 400 +}; %% @@ -36,9 +47,47 @@ machine pvfs2_set_eattr_sm state setobj_eattrib { run seteattr_setobj_eattribs; + success => check_mirror_mode; + default => final_response; + } + + state check_mirror_mode + { + run seteattr_get_metahint_flag; + SKIP_UPDATE => check_immutable; + default => update_flag; + } + + state update_flag + { + run seteattr_set_metahint_flag; + success => check_immutable; + default => final_response; + } + + state check_immutable + { + run check_immutable; + PVFS2_MAKE_IMMUTABLE_COPIES => generate_copies; + default => final_response; + } + + state generate_copies + { + pjmp setup_create_immutable_copies + { + CREATE_IMM_COPIES => pvfs2_pjmp_create_immutable_copies_sm; + } + success => inspect_imm_copies; default => final_response; } + state inspect_imm_copies + { + run inspect_imm_copies; + default => final_response; + } + state final_response { jump pvfs2_final_response_sm; @@ -56,13 +105,16 @@ machine pvfs2_set_eattr_sm /* * This routine checks keys to confirm that the name spaces are valid. - * It also prints debugging information. */ static int seteattr_verify_eattribs( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int i; + int i; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"Current Frame Pointer in set-eattr.sm " + "is %p.\n",s_op); + PVFS_object_attr *a_p = NULL; a_p = &s_op->attr; @@ -75,17 +127,51 @@ static int seteattr_verify_eattribs( PINT_util_get_object_type(a_p->objtype), a_p->owner, a_p->group, a_p->perms, a_p->objtype); + if( s_op->req->u.seteattr.nkey > PVFS_MAX_XATTR_LISTLEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + + for( i = 0; i < s_op->req->u.seteattr.nkey; i++ ) + { + if( s_op->req->u.seteattr.key[i].buffer_sz > PVFS_MAX_XATTR_NAMELEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + + if( s_op->req->u.seteattr.val[i].buffer_sz > PVFS_MAX_XATTR_VALUELEN ) + { + js_p->error_code = -PVFS_EINVAL; + return SM_ACTION_COMPLETE; + } + } + /* iterate through the keys that are being written */ + int j = 0; + char *valBuf = NULL; for (i = 0; i < s_op->req->u.seteattr.nkey; i++) { + gossip_debug(GOSSIP_MIRROR_DEBUG + ,"\tkey:%s\n" + ,(char *)s_op->req->u.seteattr.key[i].buffer); + valBuf = (char *)s_op->req->u.seteattr.val[i].buffer; + for (j=0; jreq->u.seteattr.val[i].buffer_sz; j++) + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tval[%d]=%#x\n" + ,j + ,(unsigned int)valBuf[j]); js_p->error_code = PINT_eattr_namespace_verify( &s_op->req->u.seteattr.key[i], &s_op->req->u.seteattr.val[i]); - if(!js_p->error_code) + + if(js_p->error_code) { return SM_ACTION_COMPLETE; } - } + + } /*end for*/ + return SM_ACTION_COMPLETE; } @@ -142,11 +228,363 @@ static int seteattr_setobj_eattribs( 0, js_p, &j_id, - server_job_context); + server_job_context, s_op->req->hints); return ret; } +/*This function determines if the meta-hint flag should be updated and + *retrieves it, if necessary. +*/ +static PINT_sm_action seteattr_get_metahint_flag(struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s ...\n",__func__); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_servreq_seteattr *seteattr = &(s_op->req->u.seteattr); + char mir_mode_key[] = USER_PVFS2_MIRROR_MODE; + char meta_hint_key[] = "user.pvfs2.meta_hint"; + int ret = 0; + job_id_t j_id; + int i; + + for (i=0; inkey; i++) + { + if (strncmp(mir_mode_key,seteattr->key[i].buffer,strlen(mir_mode_key)) == 0) + { + break; + } + } + + if (i == seteattr->nkey) + { + /*mirror.mode key not found, so we don't need to change meta-hint*/ + js_p->error_code = SKIP_UPDATE; + return SM_ACTION_COMPLETE; + } + + /*setup call to retrieve meta-hint*/ + init_keyval_structs(s_op,s_op->free_val); + + /*setup key for user.pvfs2.meta_hint*/ + s_op->key.buffer = malloc(strlen(meta_hint_key) + 1); + if (!s_op->key.buffer) + { + gossip_lerr("Cannot allocation memory for key.buffer\n"); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(s_op->key.buffer,0,strlen(meta_hint_key) + 1); + memcpy(s_op->key.buffer,meta_hint_key,strlen(meta_hint_key)); + s_op->key.buffer_sz = strlen(meta_hint_key) + 1; + + /*assign space for key retrieval*/ + s_op->val.buffer = &(s_op->attr.u.meta.hint.flags); + memset(s_op->val.buffer,0,sizeof(s_op->attr.u.meta.hint.flags)); + s_op->val.buffer_sz = sizeof(s_op->attr.u.meta.hint.flags); + s_op->free_val = 0; + + /*retrieve meta-hint key/value pair*/ + ret = job_trove_keyval_read( + seteattr->fs_id, + seteattr->handle, + &s_op->key, + &s_op->val, + TROVE_SYNC, + NULL, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + + return ret; + +error_exit: + init_keyval_structs(s_op,s_op->free_val); + + return SM_ACTION_COMPLETE; +}/*end seteattr_get_metahint_flag*/ + + +/*This function updates the meta-hint flag, whenever the mirroring mode + *changes. +*/ +static PINT_sm_action seteattr_set_metahint_flag(struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing %s ...\n",__func__); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_servreq_seteattr *seteattr = &(s_op->req->u.seteattr); + char mir_mode_key[] = USER_PVFS2_MIRROR_MODE; + char meta_hint_key[] = "user.pvfs2.meta_hint"; + enum MIRROR_MODE_t mir_mode; + PVFS_flags *hint_flags = &(s_op->attr.u.meta.hint.flags); + PVFS_flags flags = 0; + int ret = 0; + job_id_t j_id; + int i; + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tCurrent value of hints:%0x\n" + ,(unsigned int)*hint_flags); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tAttribute object type:%d\n" + ,s_op->attr.objtype); + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tAttribute mask:%x\n" + ,s_op->attr.mask); + /*check return code: if we have an error other than a not found condition, + *then return the error; otherwise, keep processing. + */ + if (js_p->error_code < 0 && js_p->error_code != -TROVE_ENOENT) + return SM_ACTION_COMPLETE; + + for (i=0; inkey; i++) + { + if (strncmp(mir_mode_key,seteattr->key[i].buffer,strlen(mir_mode_key)) == 0) + { + mir_mode = *(enum MIRROR_MODE_t *)seteattr->val[i].buffer; + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tMirroring Mode is %d.\n" + ,mir_mode); + if (mir_mode == NO_MIRRORING) + /*turn off mirroring*/ + flags = *hint_flags & ~PVFS_MIRROR_FL; + else + /*turn on mirroring*/ + flags = *hint_flags | PVFS_MIRROR_FL; + break; + } + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tValue of flags:%0x\n" + ,(unsigned int)flags); + + /*setup job to modify user.pvfs2.meta-hints flag*/ + init_keyval_structs(s_op,s_op->free_val); + s_op->keyval_count = 1; + + s_op->key_a = malloc(sizeof(PVFS_ds_keyval) * s_op->keyval_count); + s_op->val_a = malloc(sizeof(PVFS_ds_keyval) * s_op->keyval_count); + if (!(s_op->key_a && s_op->val_a)) + { + gossip_lerr("Cannot allocate memory for key_a and/or val_a\n"); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + + i=0; /*setup key user.pvfs2.meta_hint*/ + s_op->key_a[i].buffer = malloc(strlen(meta_hint_key) + 1); + if (!s_op->key_a[i].buffer) + { + gossip_lerr("Cannot allocation memory for key_a[%d].buffer\n",i); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(s_op->key_a[i].buffer,0,strlen(meta_hint_key) + 1); + memcpy(s_op->key_a[i].buffer,meta_hint_key,strlen(meta_hint_key)); + s_op->key_a[i].buffer_sz = strlen(meta_hint_key) + 1; + + /*add/remove PVFS_MIRROR_FL from user.pvfs2.meta_hint*/ + s_op->val_a[i].buffer = malloc(sizeof(PVFS_flags)); + if (!s_op->val_a[i].buffer) + { + gossip_lerr("Cannot allocate memory for val_a[%d].buffer\n",i); + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + memset(s_op->val_a[i].buffer,0,sizeof(PVFS_flags)); + memcpy(s_op->val_a[i].buffer,&flags,sizeof(PVFS_flags)); + s_op->val_a[i].buffer_sz = sizeof(PVFS_flags); + + s_op->free_val = 1; + + /*make update to key/value pair*/ + ret = job_trove_keyval_write_list( + seteattr->fs_id, + seteattr->handle, + s_op->key_a, + s_op->val_a, + s_op->keyval_count, + TROVE_SYNC, + NULL, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + + return ret; + +error_exit: + init_keyval_structs(s_op,s_op->free_val); + return SM_ACTION_COMPLETE; +}/*end seteattr_set_metahint_flag*/ + + + + + +static PINT_sm_action setup_create_immutable_copies(struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing setup_create_immutable_copies.." + "\n"); + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT); + int ret; + + js_p->error_code = 0; + + struct PINT_server_op *new_op = malloc(sizeof(struct PINT_server_op)); + if (!new_op) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(new_op,0,sizeof(struct PINT_server_op)); + + PVFS_SERVOP_IMM_COPIES_FILL(new_op,s_op); + new_op->u.create_copies.expected_mirror_mode = MIRROR_ON_IMMUTABLE; + + ret = PINT_sm_push_frame(smcb,CREATE_IMM_COPIES,new_op); + if (ret != 0) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tseteattr: failed to setup " + "nested sm.\n"); + js_p->error_code = ret; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tseteattr:s_op:%p " + "\tnew_op:%p\n",s_op,new_op); + return SM_ACTION_COMPLETE; +}/*end action setup_create_immutable_copies*/ + + +static PINT_sm_action inspect_imm_copies(struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing inspect_imm_copies....\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb,PINT_FRAME_CURRENT) + ,*imm_op = NULL; + int task_id, error_code, remaining; + + /*js_p->error_code will be zero upon entering this function*/ + + /*error_code will have the value returned in js_p->error_code via the */ + /*cleanup function in create-immutable-copies.sm */ + imm_op = PINT_sm_pop_frame(smcb, &task_id, &error_code, &remaining); + + assert(imm_op->op == PVFS_SERV_IMM_COPIES); + + /*check error codes from mirror copies*/ + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tstatus of copies:%d\n",error_code); + + if ( PVFS_get_errno_mapping(error_code) == EPERM ) + { + /*If EPERM, then create-immutable-copies detected that the */ + /*mirror mode is NO_MIRRORING or the mode didn't exist. */ + js_p->error_code = 0; + } else { + /*set error_code based on return from copies. */ + /*this error_code will then be passed to the caller of set-eattr*/ + js_p->error_code = error_code; + } + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tfrom pop, imm_op:%p\n" + ,imm_op); + + gossip_debug(GOSSIP_MIRROR_DEBUG,"\tcurrent, s_op:%p\n",s_op); + + if (js_p->error_code) + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"File NOT mirrored " + "successfully(%d)\n" + ,js_p->error_code); + } + else + { + gossip_debug(GOSSIP_MIRROR_DEBUG,"File successfully mirrored.\n"); + } + + /*free memory from popped stack frame*/ + free(imm_op); + + return SM_ACTION_COMPLETE; +}/*end action inspect_imm_copies*/ + + + + +/* + * Function: check_immutable + * + * Params: server_op *b, + * job_status_s *js_p + * + * Returns: SM_action + * + * Synopsis: If the immutable flag is set, then make a mirror. + * + */ +static PINT_sm_action check_immutable(struct PINT_smcb *smcb + ,job_status_s *js_p) +{ + gossip_debug(GOSSIP_MIRROR_DEBUG,"Executing check_immutable....\n"); + + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i; + + PVFS_flags immutable_is_on = 0; + PVFS_metafile_hint hint; + PVFS_ds_keyval *k=NULL, *v=NULL; + + js_p->error_code = 0; + + for (i = 0; i < s_op->req->u.seteattr.nkey; i++) + { + v = &s_op->req->u.seteattr.val[i]; + k = &s_op->req->u.seteattr.key[i]; + + /* Are we working with the correct namespace? The immutable flag is */ + /* a hint that can be set in the "user.pvfs2.meta_hint" namespace. */ + if (strcmp(k->buffer,"user.pvfs2.meta_hint") != 0){ + continue; + } + + memset(&hint, 0, sizeof(hint)); + memcpy(&hint, v->buffer, sizeof(v->buffer)); + + gossip_debug(GOSSIP_MIRROR_DEBUG,"My converted buffer is %llu.\n" + ,llu(hint.flags)); + + immutable_is_on = hint.flags & PVFS_IMMUTABLE_FL; + + gossip_debug(GOSSIP_MIRROR_DEBUG + ,"My key is %s. My key value is %llu" + "\n" + "PVFS_IMMUTABLE_FL is %llu.\n" + ,(char *)k->buffer + ,llu(hint.flags) + ,llu(PVFS_IMMUTABLE_FL) ); + gossip_debug(GOSSIP_MIRROR_DEBUG,"immutable_is_on is %llu.\n" + ,llu(immutable_is_on)); + gossip_debug(GOSSIP_MIRROR_DEBUG,"IMMUTABLE is %s.\n", + immutable_is_on ? "ON" : "OFF"); + + if (immutable_is_on){ + js_p->error_code = PVFS2_MAKE_IMMUTABLE_COPIES; + break; + } + } /*end for*/ + + return SM_ACTION_COMPLETE; +}/*end action check_immutable*/ + + + + /* * Function: setattr_cleanup * @@ -161,10 +599,79 @@ static int seteattr_setobj_eattribs( static int seteattr_cleanup( struct PINT_smcb *smcb, job_status_s *js_p) { - /* do I need to free memory from decoding key and val?*/ + PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + /*free key/val structs from the s_op*/ + init_keyval_structs(s_op,s_op->free_val); + + return(server_state_machine_complete(smcb)); } +/*This function initializes the keyval structures. It also assumes that the key + *is always allocated, while the val may not be. free_val is turned on, if + *the val must be deallocated; otherwise, val is not deallocated, just initial- + *ized. +*/ +static void init_keyval_structs(PINT_server_op *s_op, int free_val) +{ + int i; + + /*initialize val*/ + if (free_val) + { + if (s_op->val.buffer) + { + free(s_op->val.buffer); + memset(&(s_op->val),0,sizeof(PVFS_ds_keyval)); + } + memset(&(s_op->val),0,sizeof(PVFS_ds_keyval)); + for (i=0; ikeyval_count; i++) + { + if (s_op->val_a && s_op->val_a[i].buffer) + free(s_op->val_a[i].buffer); + } + if (s_op->val_a) + free(s_op->val_a); + s_op->val_a = NULL; + } + else + { + memset(&(s_op->val),0,sizeof(PVFS_ds_keyval)); + if (s_op->val_a) + free(s_op->val_a); + s_op->val_a = NULL; + } + + /*initialize key*/ + if (s_op->key.buffer) + { + free(s_op->key.buffer); + memset(&(s_op->key),0,sizeof(PVFS_ds_keyval)); + } + for (i=0; ikeyval_count; i++) + { + if (s_op->key_a && s_op->key_a[i].buffer) + free(s_op->key_a[i].buffer); + } + if (s_op->key_a) + { + free(s_op->key_a); + s_op->key_a = NULL; + } + + /*initialize the rest*/ + if (s_op->error_a) + free(s_op->error_a); + s_op->error_a = NULL; + s_op->keyval_count = 0; + s_op->free_val = 0; + + return; +}/*end init_keyval_structs*/ + + + PINT_GET_OBJECT_REF_DEFINE(seteattr); struct PINT_server_req_params pvfs2_set_eattr_params = diff --git a/src/server/setparam.sm b/src/server/setparam.sm index d4890a3..dab98ad 100644 --- a/src/server/setparam.sm +++ b/src/server/setparam.sm @@ -15,9 +15,11 @@ #include "pvfs2-server.h" #include "pint-event.h" #include "pvfs2-internal.h" +#include "gossip.h" #include "request-scheduler/request-scheduler.h" static int check_fs_id(PVFS_fs_id fs_id); +static int drop_caches(void); %% @@ -59,22 +61,20 @@ static PINT_sm_action setparam_work( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); - int ret = -1, tmp_on = 0, old_event_on = 0; + int ret = -1, tmp_on = 0; job_id_t tmp_id; uint64_t tmp_mask = 0; - int32_t old_api_mask = 0, old_op_mask = 0; PVFS_handle tmp_handle = PVFS_HANDLE_NULL; struct server_configuration_s *user_opts; struct filesystem_configuration_s *fs_conf; char buf[16] = {0}; - + switch(s_op->req->u.mgmt_setparam.param) { case PVFS_SERV_PARAM_GOSSIP_MASK: gossip_get_debug_mask(&tmp_on, &tmp_mask); - s_op->resp.u.mgmt_setparam.old_value = tmp_mask; gossip_set_debug_mask( - 1, s_op->req->u.mgmt_setparam.value); + 1, s_op->req->u.mgmt_setparam.value.u.value); js_p->error_code = 0; return SM_ACTION_COMPLETE; case PVFS_SERV_PARAM_INVALID: @@ -83,39 +83,26 @@ static PINT_sm_action setparam_work( js_p->error_code = -PVFS_ENOSYS; return SM_ACTION_COMPLETE; case PVFS_SERV_PARAM_FSID_CHECK: - s_op->resp.u.mgmt_setparam.old_value = 0; js_p->error_code = check_fs_id( - (PVFS_fs_id)s_op->req->u.mgmt_setparam.value); + (PVFS_fs_id)s_op->req->u.mgmt_setparam.value.u.value); return SM_ACTION_COMPLETE; case PVFS_SERV_PARAM_ROOT_CHECK: - tmp_handle = (PVFS_handle)s_op->req->u.mgmt_setparam.value; - s_op->resp.u.mgmt_setparam.old_value = 0; + tmp_handle = (PVFS_handle)s_op->req->u.mgmt_setparam.value.u.value; gossip_debug(GOSSIP_SERVER_DEBUG, " - ROOT_CHECK looking for" " handle %llu, on fs_id %d\n", llu(tmp_handle), s_op->req->u.mgmt_setparam.fs_id); ret = job_trove_dspace_verify( s_op->req->u.mgmt_setparam.fs_id, tmp_handle, 0, - smcb, 0, js_p, &tmp_id, server_job_context); + smcb, 0, js_p, &tmp_id, server_job_context, s_op->req->hints); return(ret); - case PVFS_SERV_PARAM_EVENT_ON: + case PVFS_SERV_PARAM_EVENT_ENABLE: ret = 0; - PINT_event_get_masks( - &old_event_on, &old_api_mask, &old_op_mask); - PINT_event_set_masks( - (int)s_op->req->u.mgmt_setparam.value, - old_api_mask, old_op_mask); - s_op->resp.u.mgmt_setparam.old_value = old_event_on; + PINT_event_enable(s_op->req->u.mgmt_setparam.value.u.string_value); js_p->error_code = ret; return SM_ACTION_COMPLETE; - case PVFS_SERV_PARAM_EVENT_MASKS: - PINT_event_get_masks( - &old_event_on, &old_api_mask, &old_op_mask); - PINT_event_set_masks(old_event_on, - (int32_t)(s_op->req->u.mgmt_setparam.value & 0x0FFFFFFFF), - (int32_t)(s_op->req->u.mgmt_setparam.value >> 32)); - s_op->resp.u.mgmt_setparam.old_value = old_api_mask + - ((int64_t)old_op_mask << 32); + case PVFS_SERV_PARAM_EVENT_DISABLE: + PINT_event_disable(s_op->req->u.mgmt_setparam.value.u.string_value); js_p->error_code = 0; return SM_ACTION_COMPLETE; case PVFS_SERV_PARAM_SYNC_META: @@ -124,7 +111,7 @@ static PINT_sm_action setparam_work( s_op->req->u.mgmt_setparam.fs_id); if(fs_conf) { - if(s_op->req->u.mgmt_setparam.value) + if(s_op->req->u.mgmt_setparam.value.u.value) fs_conf->trove_sync_meta = TROVE_SYNC; else fs_conf->trove_sync_meta = 0; @@ -137,7 +124,7 @@ static PINT_sm_action setparam_work( s_op->req->u.mgmt_setparam.fs_id); if(fs_conf) { - if(s_op->req->u.mgmt_setparam.value) + if(s_op->req->u.mgmt_setparam.value.u.value) { snprintf(buf, 16, "%d,%d", s_op->req->u.mgmt_setparam.fs_id, TROVE_SYNC); @@ -156,13 +143,16 @@ static PINT_sm_action setparam_work( return SM_ACTION_COMPLETE; case PVFS_SERV_PARAM_MODE: - s_op->resp.u.mgmt_setparam.old_value = PINT_req_sched_get_mode(); - ret = job_req_sched_change_mode(s_op->req->u.mgmt_setparam.value, - NULL, 0, js_p, &s_op->scheduled_id, - server_job_context); + ret = job_req_sched_change_mode( + s_op->req->u.mgmt_setparam.value.u.value, + NULL, 0, js_p, &s_op->scheduled_id, + server_job_context); js_p->error_code = 0; return ret; + case PVFS_SERV_PARAM_DROP_CACHES: + js_p->error_code = drop_caches(); + return SM_ACTION_COMPLETE; } gossip_lerr("Error: mgmt_setparam for unknown parameter %d.\n", @@ -259,11 +249,49 @@ struct PINT_server_req_params pvfs2_setparam_params = .string_name = "mgmt_setparam", .perm = PINT_SERVER_CHECK_NONE, .access_type = PINT_server_req_modify, - .sched_policy = PINT_SERVER_REQ_SCHEDULE, .get_object_ref = PINT_get_object_ref_setparam, .state_machine = &pvfs2_setparam_sm }; +/* drop_caches() + * + * Linux specific, but should fail cleanly on other platforms. + * + * This function asks the operating system to sync and drop any in memory + * caches that it may have. Applies globally to all file systems on the + * server, not just the PVFS storage space. + */ +static int drop_caches(void) +{ + int fd; + int ret; + + /* try to commit buffer cache first */ + sync(); + + /* open Linux specific control file if present */ + fd = open("/proc/sys/vm/drop_caches", O_WRONLY); + if(fd < 0) + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "Warning: drop_caches not supported.\n"); + return(-PVFS_EOPNOTSUPP); + } + + /* free page cache, dentries, and inodes */ + ret = write(fd, "3", 2); + if(ret < 0) + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "Warning: found drop_caches file but failed to write to it.\n"); + close(fd); + return(-PVFS_EOPNOTSUPP); + } + + close(fd); + return(0); +} + /* * Local variables: * mode: c diff --git a/src/server/small-io.sm b/src/server/small-io.sm index 38b78c7..49167b1 100644 --- a/src/server/small-io.sm +++ b/src/server/small-io.sm @@ -17,6 +17,7 @@ #include "pvfs2-request.h" #include "pint-distribution.h" #include "pint-request.h" +#include "pint-perf-counter.h" %% @@ -59,6 +60,8 @@ machine pvfs2_small_io_sm static PINT_sm_action small_io_start_job( struct PINT_smcb *smcb, job_status_s *js_p) { + gossip_debug(GOSSIP_IO_DEBUG,"Executing small_io_start_job...\n"); + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret; job_id_t tmp_id; @@ -77,7 +80,7 @@ static PINT_sm_action small_io_start_job( s_op->resp.u.small_io.io_type = s_op->req->u.small_io.io_type; if(s_op->req->u.small_io.io_type == PVFS_IO_READ && - s_op->ds_attr.b_size == 0) + s_op->ds_attr.u.datafile.b_size == 0) { /* nothing to read. return SM_ACTION_DEFERRED */ js_p->error_code = 0; @@ -102,8 +105,8 @@ static PINT_sm_action small_io_start_job( s_op->req->u.small_io.file_req_offset + s_op->req->u.small_io.aggregate_size); - s_op->resp.u.small_io.bstream_size = s_op->ds_attr.b_size; - fdata.fsize = s_op->ds_attr.b_size; + s_op->resp.u.small_io.bstream_size = s_op->ds_attr.u.datafile.b_size; + fdata.fsize = s_op->ds_attr.u.datafile.b_size; fdata.extend_flag = (s_op->req->u.small_io.io_type == PVFS_IO_READ) ? 0 : 1; @@ -160,7 +163,8 @@ static PINT_sm_action small_io_start_job( 0, js_p, &tmp_id, - server_job_context); + server_job_context, + s_op->req->hints); if(ret < 0) { gossip_err("small_io: Failed to post trove bstream write\n"); @@ -178,6 +182,10 @@ static PINT_sm_action small_io_start_job( } s_op->u.small_io.result_bytes = result.bytes; + + gossip_debug(GOSSIP_IO_DEBUG, + "\tsubmitting job_trove_bstream_read_list for handle %llu\n" + ,llu(s_op->req->u.small_io.handle)); ret = job_trove_bstream_read_list( s_op->req->u.small_io.fs_id, s_op->req->u.small_io.handle, @@ -194,7 +202,8 @@ static PINT_sm_action small_io_start_job( 0, js_p, &tmp_id, - server_job_context); + server_job_context, + s_op->req->hints); if(ret < 0) { gossip_err("small-io: Failed to post trove bstream read\n"); @@ -223,6 +232,28 @@ static PINT_sm_action small_io_check_size( s_op->resp.u.small_io.result_size = s_op->u.small_io.result_bytes; } } + if(s_op->req->u.small_io.io_type == PVFS_IO_READ) + { + PINT_perf_count(PINT_server_pc, + PINT_PERF_READ, + s_op->resp.u.small_io.result_size, + PINT_PERF_ADD); + PINT_perf_count(PINT_server_pc, + PINT_PERF_SMALL_READ, + s_op->resp.u.small_io.result_size, + PINT_PERF_ADD); + } + else + { + PINT_perf_count(PINT_server_pc, + PINT_PERF_WRITE, + s_op->resp.u.small_io.result_size, + PINT_PERF_ADD); + PINT_perf_count(PINT_server_pc, + PINT_PERF_SMALL_WRITE, + s_op->resp.u.small_io.result_size, + PINT_PERF_ADD); + } return SM_ACTION_COMPLETE; } @@ -241,14 +272,14 @@ static PINT_sm_action small_io_cleanup( return server_state_machine_complete(smcb); } -static inline int PINT_server_req_access_small_io( +static inline enum PINT_server_req_access_type PINT_server_req_access_small_io( struct PVFS_server_req *req) { if(req->u.io.io_type == PVFS_IO_READ) { - return 1; + return PINT_SERVER_REQ_READONLY; } - return 0; + return PINT_SERVER_REQ_MODIFY; } PINT_GET_OBJECT_REF_DEFINE(small_io); diff --git a/src/server/tree-communicate.sm b/src/server/tree-communicate.sm new file mode 100644 index 0000000..f3da1d8 --- /dev/null +++ b/src/server/tree-communicate.sm @@ -0,0 +1,755 @@ +/* + * (C) 2008 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/** \file + * \ingroup sysint + * + * PVFS2 system interface routines for creating files. + */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" + +#include "client-state-machine.h" +#include "pvfs2-debug.h" +#include "pvfs2-dist-simple-stripe.h" +#include "job.h" +#include "gossip.h" +#include "str-utils.h" +#include "pint-cached-config.h" +#include "pint-distribution.h" +#include "PINT-reqproto-encode.h" +#include "pint-util.h" +#include "pint-dist-utils.h" +#include "ncache.h" +#include "pvfs2-internal.h" +#include "extent-utils.h" + +//#define MAX_PARTITIONS 2 + +enum +{ + LOCAL_OPERATION = 2, + REMOTE_OPERATION = 3 +}; + +/* completion function prototypes */ +static int tree_get_file_size_comp_fn( + void *v_p, struct PVFS_server_resp *resp_p, int index); + +%% + +machine pvfs2_tree_remove_sm +{ + state tree_remove_do_work + { + jump pvfs2_tree_remove_work_sm; + default => tree_remove_final_response; + } + + state tree_remove_final_response + { + jump pvfs2_final_response_sm; + default => tree_remove_cleanup; + } + + state tree_remove_cleanup + { + run tree_remove_cleanup; + default => terminate; + } +} + +nested machine pvfs2_tree_remove_work_sm +{ + state tree_remove_work_do_work + { + pjmp tree_remove_setup + { + REMOTE_OPERATION => pvfs2_pjmp_call_msgpairarray_sm; + LOCAL_OPERATION => pvfs2_pjmp_remove_work_sm; + } + default => tree_remove_work_cleanup; + } + + state tree_remove_work_cleanup + { + run tree_remove_work_cleanup; + default => return; + } +} + +machine pvfs2_tree_get_file_size_sm +{ + state tree_get_file_size_do_work + { + jump pvfs2_tree_get_file_size_work_sm; + default => tree_get_file_size_final_response; + } + + state tree_get_file_size_final_response + { + jump pvfs2_final_response_sm; + default => tree_get_file_size_cleanup; + } + + state tree_get_file_size_cleanup + { + run tree_get_file_size_cleanup; + default => terminate; + } +} + +nested machine pvfs2_tree_get_file_size_work_sm +{ + state tree_get_file_size_work_do_work + { + pjmp tree_get_file_size_setup + { + REMOTE_OPERATION => pvfs2_pjmp_call_msgpairarray_sm; + LOCAL_OPERATION => pvfs2_pjmp_get_attr_with_prelude_sm; + } + default => tree_get_file_size_work_cleanup; + } + + state tree_get_file_size_work_cleanup + { + run tree_get_file_size_work_cleanup; + default => return; + } +} + +%% + +static PINT_sm_action tree_communicate_partition_handles(struct PINT_smcb *smcb + ,job_status_s *js_p + ,int num_data_files + ,PVFS_fs_id fs_id + ,enum PVFS_server_op operation + ,PVFS_handle *handle_array) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_server_op *tree_communicate_s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_server_req *this_req = s_op->req; + int num_partitions, num_files_per_server; + int i; + char server_name[1024]; + struct server_configuration_s *server_config = get_server_config_struct(); + int ret = -PVFS_EINVAL; + struct PVFS_server_req *req = NULL; + + s_op->u.tree_communicate.handle_array_local = calloc( + num_data_files, sizeof(*s_op->u.tree_communicate.handle_array_local)); + s_op->u.tree_communicate.handle_array_remote = calloc( + num_data_files, sizeof(*s_op->u.tree_communicate.handle_array_remote)); + s_op->u.tree_communicate.local_join_size = calloc( + num_data_files, sizeof(*s_op->u.tree_communicate.local_join_size)); + s_op->u.tree_communicate.remote_join_size = calloc( + num_data_files, sizeof(*s_op->u.tree_communicate.remote_join_size)); + + if (!s_op->u.tree_communicate.handle_array_local || + !s_op->u.tree_communicate.handle_array_remote || + !s_op->u.tree_communicate.local_join_size || + !s_op->u.tree_communicate.remote_join_size) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + /* Separate the handles into local and remote. */ + for (i = 0; i < num_data_files; i++) + { + PINT_cached_config_get_server_name(server_name, 1024, handle_array[i], fs_id); + if (! strcmp(server_config->host_id, server_name)) /* this one is local */ + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "tree_communicate_partition_handles: local handle = %llu\n", + llu(handle_array[i])); + s_op->u.tree_communicate.handle_array_local[ + s_op->u.tree_communicate.handle_array_local_count] = handle_array[i]; + s_op->u.tree_communicate.local_join_size[ + s_op->u.tree_communicate.handle_array_local_count] = i; + s_op->u.tree_communicate.handle_array_local_count++; + } + else + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "tree_communicate_partition_handles: remote handle = %llu\n", + llu(handle_array[i])); + s_op->u.tree_communicate.handle_array_remote[ + s_op->u.tree_communicate.handle_array_remote_count] = handle_array[i]; + s_op->u.tree_communicate.remote_join_size[ + s_op->u.tree_communicate.handle_array_remote_count] = i; + s_op->u.tree_communicate.handle_array_remote_count++; + } + }/*end for*/ + + if (s_op->u.tree_communicate.handle_array_local_count > 0) { + + for (i=0; iu.tree_communicate.handle_array_local_count; i++) + { + /* Create a stack frame for the local operation. */ + s_op->num_pjmp_frames++; + js_p->error_code = LOCAL_OPERATION; + + PINT_CREATE_SUBORDINATE_SERVER_FRAME(smcb, tree_communicate_s_op, + s_op->u.tree_communicate.handle_array_local[i], + fs_id, js_p->error_code, req, LOCAL_OPERATION); + + switch (operation) + { + case PVFS_SERV_TREE_REMOVE: + { + PINT_SERVREQ_REMOVE_FILL( + *req, + s_op->req->credentials, + fs_id, + s_op->u.tree_communicate.handle_array_local[i], + s_op->req->hints); + + break; + } + + case PVFS_SERV_TREE_GET_FILE_SIZE: + { + PINT_SERVREQ_GETATTR_FILL( + *req, + s_op->req->credentials, + fs_id, + s_op->u.tree_communicate.handle_array_local[i], + PVFS_ATTR_DATA_SIZE, + s_op->req->hints); + + /*this identifies which "local" frame is being populated.*/ + /* we need to store this int in a location not in the union. + * it will be accessed as u.getattr in the state machine + * so we can't rely on a value in u.tree_communicate here. + * All we need is an int, I'm loathe to do a malloc instead of + * just re-purposing one of the existing ints in the s_op. */ + tree_communicate_s_op->local_index = i; + + break; + } + + default: + break; + }/*end switch*/ + }/*end for*/ + }/*end if local*/ + + if (s_op->u.tree_communicate.handle_array_remote_count > 0) { + + /* Decide how to divide the remote handles. If there are only a few (fewer than + tree_threshhold from the config file) then go ahead and send to each remaining server individually. */ + if (s_op->u.tree_communicate.handle_array_remote_count > server_config->tree_threshhold) + { + num_partitions = server_config->tree_width; + num_files_per_server = s_op->u.tree_communicate.handle_array_remote_count / + server_config->tree_width; + if (num_partitions * num_files_per_server < + s_op->u.tree_communicate.handle_array_remote_count) { + num_files_per_server++; + } + } + else + { + num_partitions = s_op->u.tree_communicate.handle_array_remote_count; + num_files_per_server = 1; + } + + gossip_debug(GOSSIP_SERVER_DEBUG, + "tree_communicate_partition_handles: num_data_files = %d," + " num_remote_handles = %d, " + "num_partitions = %d, num_files_per_server = %d\n" + ,num_data_files + ,s_op->u.tree_communicate.handle_array_remote_count + ,num_partitions + ,num_files_per_server); + + /* We need to send tree-based messages to other servers */ + js_p->error_code = REMOTE_OPERATION; + s_op->num_pjmp_frames++; + + /* Prepare the stack for pjmp. */ + PINT_CREATE_SUBORDINATE_SERVER_FRAME(smcb, tree_communicate_s_op, + s_op->u.tree_communicate.handle_array_remote[0], + fs_id, js_p->error_code, req, REMOTE_OPERATION); + + /*store the number of partitions*/ + s_op->u.tree_communicate.num_partitions = num_partitions; + + /*keep info in new op structure for later use*/ + tree_communicate_s_op->resp = s_op->resp; + tree_communicate_s_op->u.tree_communicate = s_op->u.tree_communicate; + + ret = PINT_msgpairarray_init(&tree_communicate_s_op->msgarray_op + ,num_partitions); + if (ret) + { + gossip_lerr("tree_communicate: failed to allocate msgarray\n"); + return -PVFS_ENOMEM; + } + /* Fill in the msgarray. */ + for (i = 0; i < num_partitions; i++) + { + PINT_sm_msgpair_state *msg_p; + int num_data_files_for_this_server; + + /* Handle the case where the last partition has fewer files than the + *other partitions. + */ + for (num_data_files_for_this_server = num_files_per_server; + i*num_files_per_server + num_data_files_for_this_server > + s_op->u.tree_communicate.handle_array_remote_count; + num_data_files_for_this_server--) + { + /* Do nothing; */ ; + } + + msg_p = &tree_communicate_s_op->msgarray_op.msgarray[i]; + + switch (operation) + { + case PVFS_SERV_TREE_REMOVE: + { + PINT_SERVREQ_TREE_REMOVE_FILL( + msg_p->req, + s_op->req->credentials, + fs_id, + num_data_files_for_this_server, + &s_op->u.tree_communicate.handle_array_remote[i*num_files_per_server], + s_op->req->hints); + msg_p->comp_fn = NULL; + break; + } + + case PVFS_SERV_TREE_GET_FILE_SIZE: + { + +/* TODO: Need to pass along hints. */ + PINT_SERVREQ_TREE_GET_FILE_SIZE_FILL( + msg_p->req, + s_op->req->credentials, + fs_id, + (i * num_files_per_server), + num_data_files_for_this_server, + &s_op->u.tree_communicate.handle_array_remote[i*num_files_per_server], + this_req->u.tree_get_file_size.retry_msgpair_at_leaf, + NULL); + msg_p->comp_fn = tree_get_file_size_comp_fn; + break; + } + + default: + break; + } + msg_p->fs_id = fs_id; + msg_p->handle = + s_op->u.tree_communicate.handle_array_remote[i*num_files_per_server]; + + /* if the logical file is mirrored, then we want the mirroring logic + * to handle retries, when we are processing the leaf of the tree, + * i.e., this tree request contains msgpairs having only one handle + * each. Otherwise, we let msgpairarray handle retries. + */ + if ( (this_req->u.tree_get_file_size.retry_msgpair_at_leaf) && + (num_files_per_server == 1)) + { + gossip_debug(GOSSIP_SERVER_DEBUG,"%s:retry_flag:" + "PVFS_MSGPAIR_NO_RETRY\n" + ,__func__); + msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY; + } + else + { + gossip_debug(GOSSIP_SERVER_DEBUG,"%s:retry_flag:" + "PVFS_MSGPAIR_RETRY\n" + ,__func__); + msg_p->retry_flag = PVFS_MSGPAIR_RETRY; + } + + ret = PINT_cached_config_map_to_server(&msg_p->svr_addr + ,msg_p->handle + ,msg_p->fs_id); + if (ret) + { + gossip_err("Failed to map server address\n"); + } + }/*end for*/ + }/*end if remote*/ + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action tree_remove_setup(struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + assert(s_op->req->op == PVFS_SERV_TREE_REMOVE); + + return (tree_communicate_partition_handles(smcb, js_p, s_op->req->u.tree_remove.num_data_files, + s_op->req->u.tree_remove.fs_id, PVFS_SERV_TREE_REMOVE, + s_op->req->u.tree_remove.handle_array)); +} + +static int tree_remove_work_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + /* get frame from bottom of stack */ + PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int i, task_id, error_code; + PINT_server_op *old_frame; + int return_error_code; + + if (js_p->error_code) + { + return_error_code = js_p->error_code; + } + else + { + return_error_code = 0; + } + + assert(s_op->req->op == PVFS_SERV_TREE_REMOVE); + + /* for each state machine spawned, pop a frame */ + for (i = 0; i < s_op->num_pjmp_frames; i++) + { + old_frame = PINT_sm_pop_frame(smcb, &task_id, &error_code, NULL); + if (task_id == REMOTE_OPERATION) { + PINT_msgpairarray_destroy(&old_frame->msgarray_op); + } + + gossip_debug(GOSSIP_SERVER_DEBUG,"%s:frame #%d \ttype:%s \terror_code:%d\n" + ,__func__ + ,i + ,(task_id==LOCAL_OPERATION ? "LOCAL" : "REMOTE") + ,error_code); + /*retain first error encountered.*/ + if (error_code && !return_error_code) + return_error_code = error_code; + + free(old_frame); + } + + /*deallocate resources*/ + if (s_op->u.tree_communicate.handle_array_local) + free(s_op->u.tree_communicate.handle_array_local); + if (s_op->u.tree_communicate.handle_array_remote) + free(s_op->u.tree_communicate.handle_array_remote); + if (s_op->u.tree_communicate.local_join_size) + free(s_op->u.tree_communicate.local_join_size); + if (s_op->u.tree_communicate.remote_join_size) + free(s_op->u.tree_communicate.remote_join_size); + s_op->u.tree_communicate.handle_array_local = NULL; + s_op->u.tree_communicate.handle_array_remote = NULL; + s_op->u.tree_communicate.local_join_size = NULL; + s_op->u.tree_communicate.remote_join_size = NULL; + + js_p->error_code = return_error_code; + return SM_ACTION_COMPLETE; +} + +static int tree_remove_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + return(server_state_machine_complete(smcb)); +} + +static PINT_sm_action tree_get_file_size_setup(struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_server_req *req = s_op->req; + + assert(s_op->req->op == PVFS_SERV_TREE_GET_FILE_SIZE); + + gossip_debug(GOSSIP_SERVER_DEBUG,"%s:req->retry_msgpair_at_leaf:%s\n" + ,__func__ + ,(req->u.tree_get_file_size.retry_msgpair_at_leaf ? "YES" : "NO")); + + s_op->resp.u.tree_get_file_size.caller_handle_index = + s_op->req->u.tree_get_file_size.caller_handle_index; + s_op->resp.u.tree_get_file_size.handle_count = + s_op->req->u.tree_get_file_size.num_data_files; + + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: frame:%p \ttree.caller_handle_index:%d\n" + "\t\ttree.handle_count:%d " + "\ttree.op:%d\n" + ,__func__ + ,s_op + ,s_op->resp.u.tree_get_file_size.caller_handle_index + ,s_op->resp.u.tree_get_file_size.handle_count + ,s_op->resp.op); + + /* allocate response arrays */ + s_op->resp.u.tree_get_file_size.error = (PVFS_error *) + calloc(s_op->req->u.tree_get_file_size.num_data_files, sizeof(PVFS_error)); + s_op->resp.u.tree_get_file_size.size = (PVFS_size *) + calloc(s_op->req->u.tree_get_file_size.num_data_files, sizeof(PVFS_size)); + if (! s_op->resp.u.tree_get_file_size.error || + ! s_op->resp.u.tree_get_file_size.size) + { + gossip_err("tree_get_file_size: failed to allocate arrays\n"); + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + return (tree_communicate_partition_handles(smcb, js_p, + s_op->req->u.tree_get_file_size.num_data_files, + s_op->req->u.tree_get_file_size.fs_id, PVFS_SERV_TREE_GET_FILE_SIZE, + s_op->req->u.tree_get_file_size.handle_array)); +} + +static int tree_get_file_size_comp_fn( + void *v_p, struct PVFS_server_resp *resp_p, int index) +{ + PINT_smcb *smcb = v_p; + PINT_server_op *s_op = PINT_sm_frame(smcb, (PINT_MSGPAIR_PARENT_SM)); + struct PVFS_server_resp *op_resp = &(s_op->resp); /*op resp structure*/ + struct PVFS_server_resp *m_resp = resp_p; /*msgpair resp structure*/ + struct PVFS_servresp_tree_get_file_size *op_tree = &(op_resp->u.tree_get_file_size); + struct PVFS_servresp_tree_get_file_size *m_tree = &(m_resp->u.tree_get_file_size); + struct PINT_server_tree_communicate_op *s_tree_comm = &(s_op->u.tree_communicate); + int i; + uint32_t size_array_index=0, + error_array_index=0; + + gossip_debug(GOSSIP_SERVER_DEBUG, + "tree_get_file_size_comp_fn[%d], caller_handle_index = %d\n", + index, resp_p->u.tree_get_file_size.caller_handle_index); + + assert(m_resp->op == PVFS_SERV_TREE_GET_FILE_SIZE); + + if (m_resp->status != 0) + { + PVFS_perror_gossip("Get file size failure", m_resp->status); + return resp_p->status; + } + + /* stash the sizes and error codes for each file handle */ + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: m_resp->u.tree_get_file_size.handle_count" + " is %d\n" + ,__func__ + ,m_resp->u.tree_get_file_size.handle_count); + + for (i = 0; i < m_tree->handle_count; i++) + { + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: size of datafile[%d] is %lld\n" + ,__func__ + ,i + ,lld(m_tree->size[i])); + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: caller_handle_index:%d " + "\tarray_local_count:%d\n" + ,__func__ + ,m_tree->caller_handle_index + ,s_tree_comm->handle_array_local_count); + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: error code for datafile %d is %d\n" + ,__func__ + ,i + ,m_tree->error[i]); + + size_array_index = s_op->u.tree_communicate.remote_join_size[ i + + resp_p->u.tree_get_file_size.caller_handle_index]; + error_array_index = size_array_index; + + op_tree->size[size_array_index] = m_tree->size[i]; + + op_tree->error[error_array_index] = m_tree->error[i]; + } + + return 0; +} + +static int tree_get_file_size_work_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + /* get frame from bottom of stack */ + PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_servresp_tree_get_file_size *s_tree = &(s_op->resp.u.tree_get_file_size); + int i, j, k, task_id, error_code; + uint32_t size_array_index=0, error_array_index=0; + PINT_server_op *old_frame; + struct PVFS_servreq_tree_get_file_size *tree_req = NULL; + PINT_sm_msgpair_state *tree_msgarray = NULL; + PINT_sm_msgarray_op *tree_msgop = NULL; + struct PINT_server_tree_communicate_op *s_tree_comm = &(s_op->u.tree_communicate); + + + assert(s_op->req->op == PVFS_SERV_TREE_GET_FILE_SIZE); + gossip_debug(GOSSIP_SERVER_DEBUG, + "%s: num_pjmp_frames = %d\n" + ,__func__ + ,s_op->num_pjmp_frames); + + /* for each state machine spawned, pop a frame */ + for (i = 0; i < s_op->num_pjmp_frames; i++) + { + old_frame = PINT_sm_pop_frame(smcb, &task_id, &error_code, NULL); + if (old_frame) + { + /* sub-tree msgpair array*/ + tree_msgarray = old_frame->msgarray_op.msgarray; + + /* sub-tree msgop structure */ + tree_msgop = &(old_frame->msgarray_op); + } + + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: Value of task_id is %d\n" + ,__func__,task_id); + gossip_debug(GOSSIP_SERVER_DEBUG + ,"%s: Value of handle_count is %d\n" + ,__func__ + ,old_frame->resp.u.tree_get_file_size.handle_count); + gossip_debug(GOSSIP_SERVER_DEBUG + ,"%s: Value of error_code is %d\n" + ,__func__ + ,error_code); + + if (task_id == REMOTE_OPERATION) + { + if (error_code != 0) + { /*sub-tree request failed; all handles identified in each msgpair + *must be retried. response structure is invalid; use request. + */ + gossip_debug(GOSSIP_SERVER_DEBUG + ,"%s: REMOTE OPERATION encountered error:%d\n" + ,__func__ + ,error_code); + for (j=0; j < tree_msgop->count; j++) + { + tree_req = &(tree_msgarray[j].req.u.tree_get_file_size); + uint32_t index = 0; + for (k=0; k < tree_req->num_data_files; k++) + { + index = tree_req->caller_handle_index + + k + + s_op->u.tree_communicate.handle_array_local_count; + s_op->resp.u.tree_get_file_size.size[index] = 0; + s_op->resp.u.tree_get_file_size.error[index] = error_code; + gossip_err("%s:index:%d \terror[%d]:%d\n" + ,__func__ + ,index + ,index + ,s_op->resp.u.tree_get_file_size.error[index]); + }/*end for*/ + }/*end for*/ + }/*end if non-zero error-code*/ + + PINT_msgpairarray_destroy(&old_frame->msgarray_op); + } + else { /* LOCAL OPERATION */ + gossip_debug(GOSSIP_SERVER_DEBUG, + "%s: size of local datafile is %lld\n" + ,__func__ + ,lld(old_frame->resp.u.getattr.attr.u.data.size)); + gossip_debug(GOSSIP_SERVER_DEBUG,"%s:handle_index:%d\n" + ,__func__ + ,old_frame->local_index); + size_array_index = s_op->u.tree_communicate.local_join_size[ + old_frame->local_index]; + error_array_index=size_array_index; + s_op->resp.u.tree_get_file_size.size[size_array_index] = old_frame->resp.u.getattr.attr.u.data.size; + s_op->resp.u.tree_get_file_size.error[error_array_index] = error_code; + } + free(old_frame); + }/*end for*/ + + for (i=0; ihandle_count; i++) + { + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: resp->size[%d]:%u " + "\tresp->error[%d]:%d\n" + ,__func__ + ,i + ,(unsigned int)s_tree->size[i] + ,i + ,s_tree->error[i]); + } + + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: Resp op:%d \tResp status:%d\n" + ,__func__ + ,s_op->resp.op + ,s_op->resp.status); + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: s_op->addr:%d\n" + ,__func__ + ,(int)s_op->addr); + gossip_debug(GOSSIP_SERVER_DEBUG,"%s: resp->caller_handle_index:%d " + "\tresp->handle_count:%d\n" + ,__func__ + ,s_tree->caller_handle_index + ,s_tree->handle_count); + + /*cleanup tree-communicate structure*/ + if (s_tree_comm->handle_array_local) + free(s_tree_comm->handle_array_local); + if (s_tree_comm->handle_array_remote) + free(s_tree_comm->handle_array_remote); + if (s_tree_comm->local_join_size) + free(s_tree_comm->local_join_size); + if (s_tree_comm->remote_join_size) + free(s_tree_comm->remote_join_size); + s_tree_comm->handle_array_local = NULL; + s_tree_comm->handle_array_remote = NULL; + s_tree_comm->local_join_size = NULL; + s_tree_comm->remote_join_size = NULL; + + js_p->error_code = 0; + return SM_ACTION_COMPLETE; +}/*end tree_get_file_size_work_cleanup*/ + +static int tree_get_file_size_cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PVFS_servresp_tree_get_file_size *s_tree = &(s_op->resp.u.tree_get_file_size); + + /*cleanup response structure*/ + if (s_tree->size) + free(s_tree->size); + if (s_tree->error) + free(s_tree->error); + + return(server_state_machine_complete(smcb)); +} + +static inline int PINT_get_object_ref_tree_remove( + struct PVFS_server_req *req, PVFS_fs_id *fs_id, PVFS_handle *handle) +{ + *fs_id = req->u.tree_remove.fs_id; + *handle = PVFS_HANDLE_NULL; + return 0; +}; + +struct PINT_server_req_params pvfs2_tree_remove_params = +{ + .string_name = "tree_remove", + .get_object_ref = PINT_get_object_ref_tree_remove, + .perm = PINT_SERVER_CHECK_NONE, + .access_type = PINT_server_req_modify, + .state_machine = &pvfs2_tree_remove_sm +}; + +static inline int PINT_get_object_ref_tree_get_file_size( + struct PVFS_server_req *req, PVFS_fs_id *fs_id, PVFS_handle *handle) +{ + *fs_id = req->u.tree_get_file_size.fs_id; + *handle = PVFS_HANDLE_NULL; + return 0; +}; + +struct PINT_server_req_params pvfs2_tree_get_file_size_params = +{ + .string_name = "tree_get_file_size", + .get_object_ref = PINT_get_object_ref_tree_get_file_size, + .perm = PINT_SERVER_CHECK_NONE, + .access_type = PINT_server_req_readonly, + .state_machine = &pvfs2_tree_get_file_size_sm +}; + diff --git a/src/server/truncate.sm b/src/server/truncate.sm index c154c50..186315d 100644 --- a/src/server/truncate.sm +++ b/src/server/truncate.sm @@ -62,7 +62,8 @@ static PINT_sm_action truncate_resize( ret = job_trove_bstream_resize( s_op->req->u.truncate.fs_id, s_op->req->u.truncate.handle, s_op->req->u.truncate.size, s_op->req->u.truncate.flags, - NULL, smcb, 0, js_p, &i, server_job_context); + NULL, smcb, 0, js_p, &i, server_job_context, + s_op->req->hints); return ret; } diff --git a/src/server/unexpected.sm b/src/server/unexpected.sm index afeda2e..f2b21f3 100644 --- a/src/server/unexpected.sm +++ b/src/server/unexpected.sm @@ -40,7 +40,6 @@ static PINT_sm_action unexpected_post( struct PINT_smcb *smcb, job_status_s *js_p) { int ret = -PVFS_EINVAL; - job_id_t j_id; struct PINT_server_op *s_op = (struct PINT_server_op *)PINT_sm_frame(smcb, PINT_FRAME_CURRENT); @@ -53,15 +52,13 @@ static PINT_sm_action unexpected_post( unexpected message (it is an output parameter). */ ret = job_bmi_unexp(&s_op->unexp_bmi_buff, smcb, 0, - js_p, &j_id, JOB_NO_IMMED_COMPLETE, + js_p, &s_op->unexp_id, JOB_NO_IMMED_COMPLETE, server_job_context); - if (ret < 0) + if(ret == SM_ACTION_COMPLETE) { PVFS_perror_gossip("Error: job_bmi_unexp failure", ret); - js_p->error_code = ret; return SM_ACTION_TERMINATE; } - return SM_ACTION_DEFERRED; } @@ -82,14 +79,13 @@ static PINT_sm_action unexpected_map( /* If op was cancelled, kill the SM */ if (s_op->op_cancelled) { - /* is there a reason to do any cleanup? */ return SM_ACTION_TERMINATE; } /* Else move it to the inprogress_sop_list */ qlist_add_tail(&s_op->next, &inprogress_sop_list); /* start replacement unexpected recv */ - ret = server_post_unexpected_recv(js_p); + ret = server_post_unexpected_recv(); if (ret < 0) { /* TODO: do something here, the return value was @@ -97,12 +93,9 @@ static PINT_sm_action unexpected_map( * put something here to make it exit for the * moment. -Phil */ - gossip_lerr("Error: post unexpected failure when restarting.\n"); + PVFS_perror_gossip("pvfs2_unexpected_sm.unexpected_map: server_post_unexpected_recv() returned an error: ",ret); } - /* Bump up the reference count on the bmi address that we are using */ - BMI_set_info(s_op->unexp_bmi_buff.addr, BMI_INC_ADDR_REF, NULL); - /* restart as new request state machine */ memset(js_p, 0, sizeof(job_status_s)); ret = server_state_machine_start(smcb, js_p); diff --git a/src/server/unstuff.sm b/src/server/unstuff.sm new file mode 100644 index 0000000..102955e --- /dev/null +++ b/src/server/unstuff.sm @@ -0,0 +1,500 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + * + * Changes by Acxiom Corporation to add dirent_count field to attributes + * Copyright © Acxiom Corporation, 2005. + */ + +#include +#include + +#include "server-config.h" +#include "pvfs2-server.h" +#include "pvfs2-attr.h" +#include "pvfs2-types.h" +#include "pvfs2-types-debug.h" +#include "pvfs2-util.h" +#include "pint-util.h" +#include "pvfs2-internal.h" +#include "pint-cached-config.h" + +#define STATE_UNSTUFF 33 + +%% + +machine pvfs2_unstuff_sm +{ + state prelude + { + jump pvfs2_prelude_sm; + success => getattr_setup; + default => final_response; + } + + state getattr_setup + { + run getattr_setup; + success => getattr_do_work; + default => final_response; + } + + state getattr_do_work + { + jump pvfs2_get_attr_work_sm; + default => getattr_interpret; + } + + state getattr_interpret + { + run getattr_interpret; + STATE_UNSTUFF => get_keyvals; + default => final_response; + } + + state get_keyvals + { + run get_keyvals; + success => inspect_keyvals; + default => final_response; + } + + state inspect_keyvals + { + run inspect_keyvals; + success => get_handles; + default => final_response; + } + + state get_handles + { + run get_handles; + default => set_handles_on_object; + } + + state set_handles_on_object + { + run set_handles_on_object; + success => update_dfile_count; + default => final_response; + } + + state update_dfile_count + { + run update_dfile_count; + success => remove_layout; + default => final_response; + } + + state remove_layout + { + run remove_layout; + default => final_response; + } + + state final_response + { + jump pvfs2_final_response_sm; + default => cleanup; + } + + state cleanup + { + run cleanup; + default => terminate; + } +} + +%% + +static PINT_sm_action get_keyvals( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + int kind = 0; + int ret; + job_id_t job_id; + + s_op->keyval_count = 2; + s_op->key_a = malloc(sizeof(*s_op->key_a) * s_op->keyval_count); + if(!s_op->key_a) + { + js_p->error_code = -PVFS_ENOMEM; + goto error_exit; + } + + s_op->val_a = malloc(sizeof(*s_op->val_a) * s_op->keyval_count); + if(!s_op->val_a) + { + js_p->error_code = -PVFS_ENOMEM; + goto free_key_array; + } + + s_op->error_a = malloc(sizeof(*s_op->error_a) * s_op->keyval_count); + if(!s_op->error_a) + { + js_p->error_code = -PVFS_ENOMEM; + goto free_val_array; + } + memset(s_op->error_a, 0, sizeof(*s_op->error_a) * s_op->keyval_count); + + s_op->u.unstuff.encoded_layout = malloc(PVFS_REQ_LIMIT_LAYOUT); + if(!s_op->u.unstuff.encoded_layout) + { + js_p->error_code = -PVFS_ENOMEM; + goto free_encoded_layout; + } + memset(s_op->u.unstuff.encoded_layout, 0, PVFS_REQ_LIMIT_LAYOUT); + + /* kind = 0 */ + s_op->key_a[kind].buffer = Trove_Common_Keys[METAFILE_LAYOUT_KEY].key; + s_op->key_a[kind].buffer_sz = Trove_Common_Keys[METAFILE_LAYOUT_KEY].size; + + s_op->val_a[kind].buffer = s_op->u.unstuff.encoded_layout; + s_op->val_a[kind].buffer_sz = PVFS_REQ_LIMIT_LAYOUT; + + ++kind; + /* kind = 1 */ + s_op->key_a[kind].buffer = Trove_Common_Keys[NUM_DFILES_REQ_KEY].key; + s_op->key_a[kind].buffer_sz = Trove_Common_Keys[NUM_DFILES_REQ_KEY].size; + + s_op->val_a[kind].buffer = &s_op->u.unstuff.num_dfiles_req; + s_op->val_a[kind].buffer_sz = sizeof(s_op->u.unstuff.num_dfiles_req); + + ret = job_trove_keyval_read_list( + s_op->req->u.unstuff.fs_id, + s_op->req->u.unstuff.handle, + s_op->key_a, s_op->val_a, s_op->error_a, s_op->keyval_count, + 0, NULL, smcb, 0, js_p, &job_id, server_job_context, + s_op->req->hints); + return ret; + +free_encoded_layout: + free(s_op->u.unstuff.encoded_layout); + s_op->u.unstuff.encoded_layout = NULL; +free_val_array: + free(s_op->val_a); + s_op->val_a = NULL; +free_key_array: + free(s_op->key_a); + s_op->key_a = NULL; +error_exit: + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action inspect_keyvals( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + char* tmpbuf; + + if(js_p->error_code == 0) + { + /* check keys; we have a big problem if one of them is missing */ + if(s_op->error_a[0]) + { + js_p->error_code = s_op->error_a[0]; + } + else if(s_op->error_a[1]) + { + js_p->error_code = s_op->error_a[1]; + } + + if(js_p->error_code == 0) + { + + /* sanity check num dfiles */ + if(s_op->u.unstuff.num_dfiles_req < 1) + { + js_p->error_code = -PVFS_EINVAL; + } + + /* decode layout information */ + tmpbuf = s_op->u.unstuff.encoded_layout; + decode_PVFS_sys_layout(&tmpbuf, &s_op->u.unstuff.layout); + } + } + + /* pass along error code for next state to handle */ + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action get_handles( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + int ret; + job_id_t j_id; + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + if(s_op->u.unstuff.layout.algorithm != PVFS_SYS_LAYOUT_ROUND_ROBIN) + { + /* see create.sm; for now the only layout that we use stuffing on is + * ROUND_ROBIN. The storage format supports other layouts if we + * want to add support for others later + */ + gossip_err("Error: unstuff doesn't support layout algorithm: %d\n", + s_op->u.unstuff.layout.algorithm); + js_p->error_code = -PVFS_ENOSYS; + return SM_ACTION_COMPLETE; + } + + /* allocate room for final number of handles we want */ + s_op->u.unstuff.dfile_array = + malloc(s_op->u.unstuff.num_dfiles_req * sizeof(PVFS_handle)); + if(!s_op->u.unstuff.dfile_array) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + + /* the very first handle should be our current stuffed handle */ + s_op->u.unstuff.dfile_array[0] + = s_op->resp.u.unstuff.attr.u.meta.dfile_array[0]; + + if(s_op->u.unstuff.num_dfiles_req == 1) + { + /* special case; we are unstuffing to 1 datafile. There is no need + * to retrieve any additional handles + */ + js_p->error_code = 0; + return SM_ACTION_COMPLETE; + } + + /* NOTE: we are leaving the existing resp attr structure alone until we + * get a successful answer from get_handles() and commit to disk + */ + ret = job_precreate_pool_get_handles( + s_op->req->u.unstuff.fs_id, + (s_op->u.unstuff.num_dfiles_req-1), + PVFS_TYPE_DATAFILE, + NULL, + &s_op->u.unstuff.dfile_array[1], + 0, + smcb, + 0, + js_p, + &j_id, + server_job_context, + s_op->req->hints); + return ret; +} + +static PINT_sm_action set_handles_on_object( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t j_id; + + gossip_debug(GOSSIP_SERVER_DEBUG, "job_precreate_pool_get_handles() returned %d\n", js_p->error_code); + + if(js_p->error_code < 0) + { + /* we failed to retrieve any handles */ + if(s_op->u.unstuff.dfile_array) + { + free(s_op->u.unstuff.dfile_array); + /* preserve error code */ + return SM_ACTION_COMPLETE; + } + } + + /* replace dfile information in attr structure */ + free(s_op->resp.u.unstuff.attr.u.meta.dfile_array); + s_op->resp.u.unstuff.attr.u.meta.dfile_array + = s_op->u.unstuff.dfile_array; + s_op->resp.u.unstuff.attr.u.meta.dfile_count + = s_op->u.unstuff.num_dfiles_req; + + /* write new datafile handles to disk */ + s_op->key.buffer = Trove_Common_Keys[METAFILE_HANDLES_KEY].key; + s_op->key.buffer_sz = Trove_Common_Keys[METAFILE_HANDLES_KEY].size; + + s_op->val.buffer = + s_op->resp.u.unstuff.attr.u.meta.dfile_array; + s_op->val.buffer_sz = + s_op->resp.u.unstuff.attr.u.meta.dfile_count * sizeof(PVFS_handle); + + return job_trove_keyval_write( + s_op->req->u.unstuff.fs_id, + s_op->req->u.unstuff.handle, + &s_op->key, + &s_op->val, + 0, NULL, smcb, 0, js_p, &j_id, server_job_context, + s_op->req->hints); +} + +static PINT_sm_action update_dfile_count( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t j_id; + + /* take the current on-disk attribute in object_attr form + * (acquired from prelude) with the modified datafile array, + * and convert it to the dspace form for writing. + */ + PVFS_object_attr_to_ds_attr(&s_op->resp.u.unstuff.attr, &s_op->ds_attr); + + return job_trove_dspace_setattr( + s_op->req->u.unstuff.fs_id, s_op->req->u.unstuff.handle, + &s_op->ds_attr, + TROVE_SYNC, + smcb, 0, js_p, &j_id, server_job_context, + s_op->req->hints); +} + +static PINT_sm_action remove_layout( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + job_id_t j_id; + + /* remove the layout and num_dfiles_req keyvals as the layout has + * now been chosen. + */ + return job_trove_keyval_remove_list( + s_op->req->u.unstuff.fs_id, s_op->req->u.unstuff.handle, + s_op->key_a, + s_op->val_a, + s_op->error_a, + 2, + TROVE_SYNC, NULL, + smcb, 0, js_p, &j_id, server_job_context, + s_op->req->hints); +} + +static PINT_sm_action cleanup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + if(s_op->u.unstuff.layout.server_list.servers) + { + free(s_op->u.unstuff.layout.server_list.servers); + } + if(s_op->u.unstuff.encoded_layout) + { + free(s_op->u.unstuff.encoded_layout); + } + if(s_op->val_a) + { + free(s_op->val_a); + } + if(s_op->key_a) + { + free(s_op->key_a); + } + if(s_op->error_a) + { + free(s_op->error_a); + } + + PINT_free_object_attr(&s_op->resp.u.getattr.attr); + return (server_state_machine_complete(smcb)); +} + +static PINT_sm_action getattr_setup( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + struct PINT_server_op *getattr_op; + int ret; + + js_p->error_code = 0; + + getattr_op = malloc(sizeof(*getattr_op)); + if(!getattr_op) + { + js_p->error_code = -PVFS_ENOMEM; + return SM_ACTION_COMPLETE; + } + memset(getattr_op, 0, sizeof(*getattr_op)); + + /* TODO: can we come up with a way to clean up and nail down what has + * to be set in order to run this nested machine? This seems fragile. + */ + + /* need attrs that the prelude read already */ + getattr_op->attr = s_op->attr; + /* need a valid request structure for some generic features like access + * logging + */ + getattr_op->req = s_op->req; + /* need to fill in the input parameters to the getattr nested machine */ + getattr_op->u.getattr.fs_id = s_op->req->u.unstuff.fs_id; + getattr_op->u.getattr.handle = s_op->req->u.unstuff.handle; + getattr_op->u.getattr.attrmask = s_op->req->u.unstuff.attrmask; + + ret = PINT_sm_push_frame(smcb, 0, getattr_op); + if(ret < 0) + { + js_p->error_code = ret; + } + + return SM_ACTION_COMPLETE; +} + +static PINT_sm_action getattr_interpret( + struct PINT_smcb *smcb, job_status_s *js_p) +{ + struct PINT_server_op *getattr_op; + struct PINT_server_op *s_op; + int task_id; + int remaining; + + getattr_op = PINT_sm_pop_frame(smcb, &task_id, &js_p->error_code, + &remaining); + s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); + + s_op->resp.u.unstuff.attr = getattr_op->resp.u.getattr.attr; + + free(getattr_op); + + if(js_p->error_code) + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "unstuff failed to retrieve existing attrs.\n"); + return(SM_ACTION_COMPLETE); + } + + if(s_op->resp.u.unstuff.attr.mask & PVFS_ATTR_META_UNSTUFFED) + { + gossip_debug(GOSSIP_SERVER_DEBUG, + "unstuff found file already unstuffed; return existing attrs.\n"); + js_p->error_code = 0; + return(SM_ACTION_COMPLETE); + } + +/* + gossip_err("Attributes show file as stuffed.\n"); +*/ + gossip_debug(GOSSIP_SERVER_DEBUG, + "unstuff found stuffed file.\n"); + js_p->error_code = STATE_UNSTUFF; + return SM_ACTION_COMPLETE; +} + + +PINT_GET_OBJECT_REF_DEFINE(unstuff); + +struct PINT_server_req_params pvfs2_unstuff_params = +{ + .string_name = "unstuff", + .perm = PINT_SERVER_CHECK_ATTR, + .access_type = PINT_server_req_modify, + .sched_policy = PINT_SERVER_REQ_SCHEDULE, + .get_object_ref = PINT_get_object_ref_unstuff, + .state_machine = &pvfs2_unstuff_sm +}; + +/* + * Local variables: + * mode: c + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ + diff --git a/test/Makefile.in b/test/Makefile.in index b7331e8..829d83f 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -84,7 +84,9 @@ INCLUDES := \ ${pvfs2_srcdir}/src/proto \ ${pvfs2_srcdir}/src/server \ ${pvfs2_srcdir}/src/server/request-scheduler \ - ${pvfs2_srcdir} + ${pvfs2_srcdir} \ + ${pvfs2_srcdir} \ + ${pvfs2_builddir}/include ################################################################# # Setup global flags @@ -318,7 +320,7 @@ $(MPIIOTESTS): %: %.o $(LIBRARIES) # default rule for building executables from object files %: %.o $(LIBRARIES) $(MISCOBJS) $(SHAREDOBJS) $(Q) " LD $@" - $(E)$(LD) $(LDFLAGS) $(call modldflags,$<) $< $(MISCOBJS) $(SHAREDOBJS) $(LIBS) -o $@ + $(E)$(LD) $(LDFLAGS) $< $(MISCOBJS) $(SHAREDOBJS) $(LIBS) $(call modldflags,$<) -o $@ # default rule for building objects %.o: %.c diff --git a/test/automated/README.tests b/test/automated/README.tests index 6e80640..bea8f1d 100644 --- a/test/automated/README.tests +++ b/test/automated/README.tests @@ -1,5 +1,5 @@ # -# $Date: 2006/09/13 20:23:03 $ +# $Date: 2010-04-30 20:00:56 $ # # A brief description of the contents of the test/automated directory diff --git a/test/automated/mpi-vfs-tests.d/Makefile b/test/automated/mpi-vfs-tests.d/Makefile new file mode 100644 index 0000000..79ac53a --- /dev/null +++ b/test/automated/mpi-vfs-tests.d/Makefile @@ -0,0 +1,11 @@ + +CC=mpicc +CFLAGS=-g -O0 + +all: fsx-mpi + +fsx-mpi: fsx-mpi.c + $(CC) $(CFLAGS) -o fsx-mpi fsx-mpi.c + +clean: ; + @rm fsx-mpi diff --git a/test/automated/mpi-vfs-tests.d/fsx-mpi.c b/test/automated/mpi-vfs-tests.d/fsx-mpi.c new file mode 100644 index 0000000..b77d1b3 --- /dev/null +++ b/test/automated/mpi-vfs-tests.d/fsx-mpi.c @@ -0,0 +1,1281 @@ +/* + * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 2.0 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + * + * File: fsx.c + * Author: Avadis Tevanian, Jr. + * + * File system exerciser. + * + * Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com + * + * Various features from Joe Sokol, Pat Dirks, and Clark Warner. + * + * Small changes to work under Linux -- davej@suse.de + * + * Sundry porting patches from Guy Harris 12/2001 + * + * Checks for mmap last-page zero fill. + * + * Updated license to APSL 2.0, 2004/7/27 - Jordan Hubbard + * + * Modified to test file system symantics using MPI. 07/2008 - Sumit Narayan + * + * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.5 2007/06/26 13:51:53 delphij Exp $ + * + */ + +#include +#include +#include +#ifdef _UWIN +# include +# include +# include +# include +#endif +#include +#include +#ifndef MAP_FILE +# define MAP_FILE 0 +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */ + +#define FSX_MPI /* Enable tests using MPI */ + +#ifdef FSX_MPI +#include +int rank; +#endif + +/* + * A log entry is an operation and a bunch of arguments. + */ + +struct log_entry { + int operation; + int args[3]; +}; + +#define LOGSIZE 1000 + +struct log_entry oplog[LOGSIZE]; /* the log */ +int logptr = 0; /* current position in log */ +int logcount = 0; /* total ops */ + +/* + * Define operations + */ + +#define OP_READ 1 +#define OP_WRITE 2 +#define OP_TRUNCATE 3 +#define OP_CLOSEOPEN 4 +#define OP_MAPREAD 5 +#define OP_MAPWRITE 6 +#define OP_SKIPPED 7 + +int page_size; +int page_mask; + +char *original_buf; /* a pointer to the original data */ +char *good_buf; /* a pointer to the correct data */ +char *temp_buf; /* a pointer to the current data */ +char *fname; /* name of our test file */ +int fd; /* fd for our test file */ + +off_t file_size = 0; +off_t biggest = 0; +char state[256]; +unsigned long testcalls = 0; /* calls to function "test" */ + +unsigned long simulatedopcount = 0; /* -b flag */ +int closeprob = 0; /* -c flag */ +int debug = 0; /* -d flag */ +unsigned long debugstart = 0; /* -D flag */ +unsigned long maxfilelen = 256 * 1024; /* -l flag */ +int sizechecks = 1; /* -n flag disables them */ +int maxoplen = 64 * 1024; /* -o flag */ +int quiet = 0; /* -q flag */ +unsigned long progressinterval = 0; /* -p flag */ +int readbdy = 1; /* -r flag */ +int style = 0; /* -s flag */ +int truncbdy = 1; /* -t flag */ +int writebdy = 1; /* -w flag */ +long monitorstart = -1; /* -m flag */ +long monitorend = -1; /* -m flag */ +int lite = 0; /* -L flag */ +long numops = -1; /* -N flag */ +int randomoplen = 1; /* -O flag disables it */ +int seed = 1; /* -S flag */ +int mapped_writes = 1; /* -W flag disables */ +int mapped_reads = 1; /* -R flag disables it */ +int fsxgoodfd = 0; +FILE * fsxlogf = NULL; +int badoff = -1; +int closeopen = 0; + + +void +vwarnc(code, fmt, ap) + int code; + const char *fmt; + va_list ap; +{ + fprintf(stderr, "fsx: "); + if (fmt != NULL) { + vfprintf(stderr, fmt, ap); + fprintf(stderr, ": "); + } + fprintf(stderr, "%s\n", strerror(code)); +} + + +void +warn(const char * fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarnc(errno, fmt, ap); + va_end(ap); +} + + +void +prt(char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stdout, fmt, args); + + va_end(args); + + if (fsxlogf) { + va_start(args, fmt); + vfprintf(fsxlogf, fmt, args); + va_end(args); + } +} + +void +prterr(char *prefix) +{ + prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno)); +} + + +void +log4(int operation, int arg0, int arg1, int arg2) +{ + struct log_entry *le; + + le = &oplog[logptr]; + le->operation = operation; + if (closeopen) + le->operation = ~ le->operation; + le->args[0] = arg0; + le->args[1] = arg1; + le->args[2] = arg2; + logptr++; + logcount++; + if (logptr >= LOGSIZE) + logptr = 0; +} + + +void +logdump(void) +{ + int i, count, down; + struct log_entry *lp; + + prt("LOG DUMP (%d total operations):\n", logcount); + if (logcount < LOGSIZE) { + i = 0; + count = logcount; + } else { + i = logptr; + count = LOGSIZE; + } + for ( ; count > 0; count--) { + int opnum; + + opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE; + prt("%d(%d mod 256): ", opnum, opnum%256); + lp = &oplog[i]; + if ((closeopen = lp->operation < 0)) + lp->operation = ~ lp->operation; + + switch (lp->operation) { + case OP_MAPREAD: + prt("MAPREAD\t0x%x thru 0x%x\t(0x%x bytes)", + lp->args[0], lp->args[0] + lp->args[1] - 1, + lp->args[1]); + if (badoff >= lp->args[0] && badoff < + lp->args[0] + lp->args[1]) + prt("\t***RRRR***"); + break; + case OP_MAPWRITE: + prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)", + lp->args[0], lp->args[0] + lp->args[1] - 1, + lp->args[1]); + if (badoff >= lp->args[0] && badoff < + lp->args[0] + lp->args[1]) + prt("\t******WWWW"); + break; + case OP_READ: + prt("READ\t0x%x thru 0x%x\t(0x%x bytes)", + lp->args[0], lp->args[0] + lp->args[1] - 1, + lp->args[1]); + if (badoff >= lp->args[0] && + badoff < lp->args[0] + lp->args[1]) + prt("\t***RRRR***"); + break; + case OP_WRITE: + prt("WRITE\t0x%x thru 0x%x\t(0x%x bytes)", + lp->args[0], lp->args[0] + lp->args[1] - 1, + lp->args[1]); + if (lp->args[0] > lp->args[2]) + prt(" HOLE"); + else if (lp->args[0] + lp->args[1] > lp->args[2]) + prt(" EXTEND"); + if ((badoff >= lp->args[0] || badoff >=lp->args[2]) && + badoff < lp->args[0] + lp->args[1]) + prt("\t***WWWW"); + break; + case OP_TRUNCATE: + down = lp->args[0] < lp->args[1]; + prt("TRUNCATE %s\tfrom 0x%x to 0x%x", + down ? "DOWN" : "UP", lp->args[1], lp->args[0]); + if (badoff >= lp->args[!down] && + badoff < lp->args[!!down]) + prt("\t******WWWW"); + break; + case OP_SKIPPED: + prt("SKIPPED (no operation)"); + break; + default: + prt("BOGUS LOG ENTRY (operation code = %d)!", + lp->operation); + } + if (closeopen) + prt("\n\t\tCLOSE/OPEN"); + prt("\n"); + i++; + if (i == LOGSIZE) + i = 0; + } +} + + +void +save_buffer(char *buffer, off_t bufferlength, int fd) +{ + off_t ret; + ssize_t byteswritten; + + if (fd <= 0 || bufferlength == 0) + return; + + if (bufferlength > SSIZE_MAX) { + prt("fsx flaw: overflow in save_buffer\n"); + exit(67); + } + if (lite) { + off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END); + if (size_by_seek == (off_t)-1) + prterr("save_buffer: lseek eof"); + else if (bufferlength > size_by_seek) { + warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek, + (unsigned long long)bufferlength); + bufferlength = size_by_seek; + } + } + + ret = lseek(fd, (off_t)0, SEEK_SET); + if (ret == (off_t)-1) + prterr("save_buffer: lseek 0"); + + byteswritten = write(fd, buffer, (size_t)bufferlength); + if (byteswritten != bufferlength) { + if (byteswritten == -1) + prterr("save_buffer write"); + else + warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n", + (unsigned)byteswritten, + (unsigned long long)bufferlength); + } +} + + +void +report_failure(int status) +{ + logdump(); + + if (fsxgoodfd) { + if (good_buf) { + save_buffer(good_buf, file_size, fsxgoodfd); + prt("Correct content saved for comparison\n"); + prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n", + fname, fname); + } + close(fsxgoodfd); + } + exit(status); +} + + +#define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \ + *(((unsigned char *)(cp)) + 1))) + +void +check_buffers(unsigned offset, unsigned size) +{ + unsigned char c, t; + unsigned i = 0; + unsigned n = 0; + unsigned op = 0; + unsigned bad = 0; + + if (memcmp(good_buf + offset, temp_buf, size) != 0) { + prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n", + offset, size); + prt("OFFSET\tGOOD\tBAD\tRANGE\n"); + while (size > 0) { + c = good_buf[offset]; + t = temp_buf[i]; + if (c != t) { + if (n == 0) { + bad = short_at(&temp_buf[i]); + prt("0x%5x\t0x%04x\t0x%04x", offset, + short_at(&good_buf[offset]), bad); + op = temp_buf[offset & 1 ? i+1 : i]; + } + n++; + badoff = offset; + } + offset++; + i++; + size--; + } + if (n) { + prt("\t0x%5x\n", n); + if (bad) + prt("operation# (mod 256) for the bad data may be %u\n", ((unsigned)op & 0xff)); + else + prt("operation# (mod 256) for the bad data unknown, check HOLE and EXTEND ops\n"); + } else + prt("????????????????\n"); + report_failure(110); + } +} + +void +check_size(void) +{ + struct stat statbuf; + off_t size_by_seek; + + if (fstat(fd, &statbuf)) { + prterr("check_size: fstat"); + statbuf.st_size = -1; + } + size_by_seek = lseek(fd, (off_t)0, SEEK_END); + if (file_size != statbuf.st_size || file_size != size_by_seek) { + prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n", + (unsigned long long)file_size, + (unsigned long long)statbuf.st_size, + (unsigned long long)size_by_seek); + report_failure(120); + } +} + + +void +check_trunc_hack(void) +{ + struct stat statbuf; + + ftruncate(fd, (off_t)0); + ftruncate(fd, (off_t)100000); + fstat(fd, &statbuf); + if (statbuf.st_size != (off_t)100000) { + prt("no extend on truncate! not posix!\n"); + exit(130); + } + ftruncate(fd, (off_t)0); +} + + +void +doread(unsigned offset, unsigned size) +{ + off_t ret; + unsigned iret; + + offset -= offset % readbdy; + if (size == 0) { + if (!quiet && testcalls > simulatedopcount) + prt("skipping zero size read\n"); + log4(OP_SKIPPED, OP_READ, offset, size); + return; + } + if (size + offset > file_size) { + if (!quiet && testcalls > simulatedopcount) + prt("skipping seek/read past end of file\n"); + log4(OP_SKIPPED, OP_READ, offset, size); + return; + } + + log4(OP_READ, offset, size, 0); + + if (testcalls <= simulatedopcount) + return; + + if (!quiet && ((progressinterval && + testcalls % progressinterval == 0) || + (debug && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend)))))) + prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls, + offset, offset + size - 1, size); + ret = lseek(fd, (off_t)offset, SEEK_SET); + if (ret == (off_t)-1) { + prterr("doread: lseek"); + report_failure(140); + } + iret = read(fd, temp_buf, size); + if (iret != size) { + if (iret == -1) + prterr("doread: read"); + else + prt("short read: 0x%x bytes instead of 0x%x\n", + iret, size); + report_failure(141); + } + check_buffers(offset, size); +} + + +void +check_eofpage(char *s, unsigned offset, char *p, int size) +{ + uintptr_t last_page, should_be_zero; + + if (offset + size <= (file_size & ~page_mask)) + return; + /* + * we landed in the last page of the file + * test to make sure the VM system provided 0's + * beyond the true end of the file mapping + * (as required by mmap def in 1996 posix 1003.1) + */ + last_page = ((uintptr_t)p + (offset & page_mask) + size) & ~page_mask; + + for (should_be_zero = last_page + (file_size & page_mask); + should_be_zero < last_page + page_size; + should_be_zero++) + if (*(char *)should_be_zero) { + prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n", + s, file_size - 1, should_be_zero & page_mask, + short_at(should_be_zero)); + report_failure(205); + } +} + + +void +domapread(unsigned offset, unsigned size) +{ + unsigned pg_offset; + unsigned map_size; + char *p; + + offset -= offset % readbdy; + if (size == 0) { + if (!quiet && testcalls > simulatedopcount) + prt("skipping zero size read\n"); + log4(OP_SKIPPED, OP_MAPREAD, offset, size); + return; + } + if (size + offset > file_size) { + if (!quiet && testcalls > simulatedopcount) + prt("skipping seek/read past end of file\n"); + log4(OP_SKIPPED, OP_MAPREAD, offset, size); + return; + } + + log4(OP_MAPREAD, offset, size, 0); + + if (testcalls <= simulatedopcount) + return; + + if (!quiet && ((progressinterval && + testcalls % progressinterval == 0) || + (debug && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend)))))) + prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls, + offset, offset + size - 1, size); + + pg_offset = offset & page_mask; + map_size = pg_offset + size; + + if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd, + (off_t)(offset - pg_offset))) == (char *)-1) { + prterr("domapread: mmap"); + report_failure(190); + } + memcpy(temp_buf, p + pg_offset, size); + + check_eofpage("Read", offset, p, size); + + if (munmap(p, map_size) != 0) { + prterr("domapread: munmap"); + report_failure(191); + } + + check_buffers(offset, size); +} + + +void +gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size) +{ + while (size--) { + good_buf[offset] = testcalls % 256; + if (offset % 2) + good_buf[offset] += original_buf[offset]; + offset++; + } +} + + +void +dowrite(unsigned offset, unsigned size) +{ + off_t ret; + unsigned iret; + + offset -= offset % writebdy; + if (size == 0) { + if (!quiet && testcalls > simulatedopcount) + prt("skipping zero size write\n"); + log4(OP_SKIPPED, OP_WRITE, offset, size); + return; + } + + log4(OP_WRITE, offset, size, file_size); + + gendata(original_buf, good_buf, offset, size); + if (file_size < offset + size) { + if (file_size < offset) + memset(good_buf + file_size, '\0', offset - file_size); + file_size = offset + size; + if (lite) { + warn("Lite file size bug in fsx!"); + report_failure(149); + } + } + + if (testcalls <= simulatedopcount) + return; + + if (!quiet && ((progressinterval && + testcalls % progressinterval == 0) || + (debug && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend)))))) + prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls, + offset, offset + size - 1, size); + ret = lseek(fd, (off_t)offset, SEEK_SET); + if (ret == (off_t)-1) { + prterr("dowrite: lseek"); + report_failure(150); + } + +#ifdef FSX_MPI + if(rank == 0) + { +#endif + iret = write(fd, good_buf + offset, size); + if (iret != size) { + if (iret == -1) + prterr("dowrite: write"); + else + prt("short write: 0x%x bytes instead of 0x%x\n", + iret, size); + report_failure(151); + } +#ifdef FSX_MPI + } +#endif +} + +void +domapwrite(unsigned offset, unsigned size) +{ + unsigned pg_offset; + unsigned map_size; + off_t cur_filesize; + char *p; + + offset -= offset % writebdy; + if (size == 0) { + if (!quiet && testcalls > simulatedopcount) + prt("skipping zero size write\n"); + log4(OP_SKIPPED, OP_MAPWRITE, offset, size); + return; + } + cur_filesize = file_size; + + log4(OP_MAPWRITE, offset, size, 0); + + gendata(original_buf, good_buf, offset, size); + if (file_size < offset + size) { + if (file_size < offset) + memset(good_buf + file_size, '\0', offset - file_size); + file_size = offset + size; + if (lite) { + warn("Lite file size bug in fsx!"); + report_failure(200); + } + } + + if (testcalls <= simulatedopcount) + return; + + if (!quiet && ((progressinterval && + testcalls % progressinterval == 0) || + (debug && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend)))))) + prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls, + offset, offset + size - 1, size); + + if (file_size > cur_filesize) { + if (ftruncate(fd, file_size) == -1) { + prterr("domapwrite: ftruncate"); + exit(201); + } + } + pg_offset = offset & page_mask; + map_size = pg_offset + size; + + if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, fd, + (off_t)(offset - pg_offset))) == (char *)-1) { + prterr("domapwrite: mmap"); + report_failure(202); + } + memcpy(p + pg_offset, good_buf + offset, size); + if (msync(p, map_size, 0) != 0) { + prterr("domapwrite: msync"); + report_failure(203); + } + + check_eofpage("Write", offset, p, size); + + if (munmap(p, map_size) != 0) { + prterr("domapwrite: munmap"); + report_failure(204); + } +} + +void +dotruncate(unsigned size) +{ + int oldsize = file_size; + + size -= size % truncbdy; + if (size > biggest) { + biggest = size; +#ifdef FSX_MPI + if(rank == 0) +#endif + if (!quiet && testcalls > simulatedopcount) + prt("truncating to largest ever: 0x%x\n", size); + } + + log4(OP_TRUNCATE, size, (unsigned)file_size, 0); + + if (size > file_size) + memset(good_buf + file_size, '\0', size - file_size); + file_size = size; + + if (testcalls <= simulatedopcount) + return; + +#ifdef FSX_MPI + if(rank == 0) + { +#endif + if ((progressinterval && testcalls % progressinterval == 0) || + (debug && (monitorstart == -1 || monitorend == -1 || + size <= monitorend))) + prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size); + if (ftruncate(fd, (off_t)size) == -1) { + prt("ftruncate1: %x\n", size); + prterr("dotruncate: ftruncate"); + report_failure(160); + } +#ifdef FSX_MPI + } + MPI_Bcast(&file_size, 1, MPI_LONG, 0, MPI_COMM_WORLD); +#endif +} + + +void +writefileimage() +{ + ssize_t iret; + + if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) { + prterr("writefileimage: lseek"); + report_failure(171); + } + iret = write(fd, good_buf, file_size); + if ((off_t)iret != file_size) { + if (iret == -1) + prterr("writefileimage: write"); + else + prt("short write: 0x%x bytes instead of 0x%llx\n", + iret, (unsigned long long)file_size); + report_failure(172); + } + if (lite ? 0 : ftruncate(fd, file_size) == -1) { + prt("ftruncate2: %llx\n", (unsigned long long)file_size); + prterr("writefileimage: ftruncate"); + report_failure(173); + } +} + + +void +docloseopen(void) +{ + if (testcalls <= simulatedopcount) + return; + + if (debug) + prt("%lu close/open\n", testcalls); + if (close(fd)) { + prterr("docloseopen: close"); + report_failure(180); + } + fd = open(fname, O_RDWR, 0); + if (fd < 0) { + prterr("docloseopen: open"); + report_failure(181); + } +} + + +void +test(void) +{ + unsigned long offset; + unsigned long size = maxoplen; + unsigned long rv = random(); + unsigned long op = rv % (3 + !lite + mapped_writes); + + /* turn off the map read if necessary */ + + if (op == 2 && !mapped_reads) + op = 0; + +#ifdef FSX_MPI + MPI_Bcast(&op, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + if(rank == 0) +#endif + if (simulatedopcount > 0 && testcalls == simulatedopcount) + writefileimage(); + + testcalls++; + + if (closeprob) + { + closeopen = (rv >> 3) < (1 << 28) / closeprob; +#ifdef FSX_MPI + MPI_Bcast(&closeopen, 1, MPI_INT, 0, MPI_COMM_WORLD); +#endif + } + if (debugstart > 0 && testcalls >= debugstart) + debug = 1; + + if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0) + prt("%lu...\n", testcalls); + + /* + * READ: op = 0 + * WRITE: op = 1 + * MAPREAD: op = 2 + * TRUNCATE: op = 3 + * MAPWRITE: op = 3 or 4 + */ + + if (lite ? 0 : op == 3 && style == 0) /* vanilla truncate? */ + dotruncate(random() % maxfilelen); + else { + if (randomoplen) + size = random() % (maxoplen+1); + if (lite ? 0 : op == 3) + dotruncate(size); + else { + offset = random(); + if (op == 1 || op == (lite ? 3 : 4)) { + offset %= maxfilelen; + if (offset + size > maxfilelen) + size = maxfilelen - offset; + if (op != 1) + domapwrite(offset, size); + else + { +#ifdef FSX_MPI + MPI_Bcast(&offset, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + MPI_Bcast(&size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); +#endif + dowrite(offset, size); + } + +#ifdef FSX_MPI + MPI_Barrier(MPI_COMM_WORLD); + if(rank != 0 && size != 0) + doread(offset, size); +#endif + } else { + if (file_size) + offset %= file_size; + else + offset = 0; + if (offset + size > file_size) + size = file_size - offset; + if (op != 0) + domapread(offset, size); + else + { +#ifdef FSX_MPI + MPI_Bcast(&offset, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + MPI_Bcast(&size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); +#endif + doread(offset, size); + } + } + } + } + if (sizechecks && testcalls > simulatedopcount) + { +#ifdef FSX_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + check_size(); + } + if (closeopen) + docloseopen(); +} + + +void +cleanup(sig) + int sig; +{ + if (sig) + prt("signal %d\n", sig); + prt("testcalls = %lu\n", testcalls); + exit(sig); +} + + +void +usage(void) +{ + fprintf(stdout, "usage: %s", + "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\ + -b opnum: beginning operation number (default 1)\n\ + -c P: 1 in P chance of file close+open at each op (default infinity)\n\ + -d: debug output for all operations\n\ + -l flen: the upper bound on file size (default 262144)\n\ + -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\ + -n: no verifications of file size\n\ + -o oplen: the upper bound on operation size (default 65536)\n\ + -p progressinterval: debug output at specified operation interval\n\ + -q: quieter operation\n\ + -r readbdy: 4096 would make reads page aligned (default 1)\n\ + -s style: 1 gives smaller truncates (default 0)\n\ + -t truncbdy: 4096 would make truncates page aligned (default 1)\n\ + -w writebdy: 4096 would make writes page aligned (default 1)\n\ + -D startingop: debug output starting at specified operation\n\ + -L: fsxLite - no file creations & no file size changes\n\ + -N numops: total # operations to do (default infinity)\n\ + -O: use oplen (see -o flag) for every op (default random)\n\ + -P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\ + -S seed: for random # generator (default 1) 0 gets timestamp\n\ + -W: mapped write operations DISabled\n\ + -R: mapped read operations DISabled)\n\ + fname: this filename is REQUIRED (no default)\n"); + exit(90); +} + + +int +getnum(char *s, char **e) +{ + int ret = -1; + + *e = (char *) 0; + ret = strtol(s, e, 0); + if (*e) + switch (**e) { + case 'b': + case 'B': + ret *= 512; + *e = *e + 1; + break; + case 'k': + case 'K': + ret *= 1024; + *e = *e + 1; + break; + case 'm': + case 'M': + ret *= 1024*1024; + *e = *e + 1; + break; + case 'w': + case 'W': + ret *= 4; + *e = *e + 1; + break; + } + return (ret); +} + + +int +main(int argc, char **argv) +{ + int i, ch; + char *endp; + char goodfile[1024]; + char logfile[1024]; + +#ifdef FSX_MPI + int num_processors, ss; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#endif + + goodfile[0] = 0; + logfile[0] = 0; + + page_size = getpagesize(); + page_mask = page_size - 1; + + setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */ + + while ((ch = getopt(argc, argv, "b:c:dl:m:no:p:qr:s:t:w:D:LN:OP:RS:W")) + != EOF) + switch (ch) { + case 'b': + simulatedopcount = getnum(optarg, &endp); + if (!quiet) + fprintf(stdout, "Will begin at operation %ld\n", + simulatedopcount); + if (simulatedopcount == 0) + usage(); + simulatedopcount -= 1; + break; + case 'c': + closeprob = getnum(optarg, &endp); + if (!quiet) + fprintf(stdout, + "Chance of close/open is 1 in %d\n", + closeprob); + if (closeprob <= 0) + usage(); + break; + case 'd': + debug = 1; + break; + case 'l': + maxfilelen = getnum(optarg, &endp); + if (maxfilelen <= 0) + usage(); + break; + case 'm': + monitorstart = getnum(optarg, &endp); + if (monitorstart < 0) + usage(); + if (!endp || *endp++ != ':') + usage(); + monitorend = getnum(endp, &endp); + if (monitorend < 0) + usage(); + if (monitorend == 0) + monitorend = -1; /* aka infinity */ + debug = 1; + case 'n': + sizechecks = 0; + break; + case 'o': + maxoplen = getnum(optarg, &endp); + if (maxoplen <= 0) + usage(); + break; + case 'p': + progressinterval = getnum(optarg, &endp); + if (progressinterval < 0) + usage(); + break; + case 'q': + quiet = 1; + break; + case 'r': + readbdy = getnum(optarg, &endp); + if (readbdy <= 0) + usage(); + break; + case 's': + style = getnum(optarg, &endp); + if (style < 0 || style > 1) + usage(); + break; + case 't': + truncbdy = getnum(optarg, &endp); + if (truncbdy <= 0) + usage(); + break; + case 'w': + writebdy = getnum(optarg, &endp); + if (writebdy <= 0) + usage(); + break; + case 'D': + debugstart = getnum(optarg, &endp); + if (debugstart < 1) + usage(); + break; + case 'L': + lite = 1; + break; + case 'N': + numops = getnum(optarg, &endp); + if (numops < 0) + usage(); + break; + case 'O': + randomoplen = 0; + break; + case 'P': + strncpy(goodfile, optarg, sizeof(goodfile)); + strcat(goodfile, "/"); + strncpy(logfile, optarg, sizeof(logfile)); + strcat(logfile, "/"); + break; + case 'R': + mapped_reads = 0; + if (!quiet) + fprintf(stdout, "mapped reads DISABLED\n"); + break; + case 'S': + seed = getnum(optarg, &endp); + if (seed == 0) + seed = time(0) % 10000; + if (!quiet) + fprintf(stdout, "Seed set to %d\n", seed); + if (seed < 0) + usage(); + break; + case 'W': + mapped_writes = 0; + if (!quiet) + fprintf(stdout, "mapped writes DISABLED\n"); + break; + + default: + usage(); + /* NOTREACHED */ + } + argc -= optind; + argv += optind; + if (argc != 1) + usage(); + fname = argv[0]; + + signal(SIGHUP, cleanup); + signal(SIGINT, cleanup); + signal(SIGPIPE, cleanup); + signal(SIGALRM, cleanup); + signal(SIGTERM, cleanup); + signal(SIGXCPU, cleanup); + signal(SIGXFSZ, cleanup); + signal(SIGVTALRM, cleanup); + signal(SIGUSR1, cleanup); + signal(SIGUSR2, cleanup); + + initstate(seed, state, 256); + setstate(state); +#ifdef FSX_MPI + if(rank == 0) + { +#endif + fd = open(fname, O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC), 0666); + if (fd < 0) { + prterr(fname); + exit(91); + } + strncat(goodfile, fname, 256); + strcat (goodfile, ".fsxgood"); + fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666); + if (fsxgoodfd < 0) { + prterr(goodfile); + exit(92); + } + strncat(logfile, fname, 256); + strcat (logfile, ".fsxlog"); + fsxlogf = fopen(logfile, "w"); + if (fsxlogf == NULL) { + prterr(logfile); + exit(93); + } + if (lite) { + off_t ret; + file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END); + if (file_size == (off_t)-1) { + prterr(fname); + warn("main: lseek eof"); + exit(94); + } + ret = lseek(fd, (off_t)0, SEEK_SET); + if (ret == (off_t)-1) { + prterr(fname); + warn("main: lseek 0"); + exit(95); + } + } + original_buf = (char *) malloc(maxfilelen); + for (i = 0; i < maxfilelen; i++) + original_buf[i] = random() % 256; + good_buf = (char *) malloc(maxfilelen); + memset(good_buf, '\0', maxfilelen); + temp_buf = (char *) malloc(maxoplen); + memset(temp_buf, '\0', maxoplen); + if (lite) { /* zero entire existing file */ + ssize_t written; + + written = write(fd, good_buf, (size_t)maxfilelen); + if (written != maxfilelen) { + if (written == -1) { + prterr(fname); + warn("main: error on write"); + } else + warn("main: short write, 0x%x bytes instead of 0x%x\n", + (unsigned)written, maxfilelen); + exit(98); + } + } else + check_trunc_hack(); +#ifdef FSX_MPI + } + else + { + original_buf = (char *) malloc(maxfilelen); + good_buf = (char *) malloc(maxfilelen); + memset(good_buf, '\0', maxfilelen); + temp_buf = (char *) malloc(maxoplen); + memset(temp_buf, '\0', maxoplen); + } + + /* Broadcast the original buffer. */ + MPI_Bcast(original_buf, maxfilelen, MPI_CHAR, 0, MPI_COMM_WORLD); + + if(rank != 0) + { + fd = open(fname, O_RDWR| O_TRUNC, 0666); + if(fd < 0) + { + prterr(fname); + exit(91); + } + + strncat(goodfile, fname, 256); + strcat (goodfile, ".fsxgood"); + fsxgoodfd = open(goodfile, O_RDWR | O_TRUNC, 0666); + if (fsxgoodfd < 0) { + prterr(goodfile); + exit(92); + } + + strncat(logfile, fname, 256); + strcat (logfile, ".fsxlog"); + fsxlogf = fopen(logfile, "w"); + if (fsxlogf == NULL) { + prterr(logfile); + exit(93); + } + } + + /* Barrier - let all process have the file open. */ + MPI_Barrier(MPI_COMM_WORLD); +#endif + + while (numops == -1 || numops--) + { +#ifdef FSX_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + test(); + } + + if (close(fd)) { + prterr("close"); + report_failure(99); + } + +#ifdef FSX_MPI + if(rank == 0) +#endif + prt("All operations completed A-OK!\n"); + +#ifdef FSX_MPI + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +#endif + + exit(0); + return 0; +} + diff --git a/test/automated/mpiio-tests.d/romio-async b/test/automated/mpiio-tests.d/romio-async index 6e5e21b..b67b728 100755 --- a/test/automated/mpiio-tests.d/romio-async +++ b/test/automated/mpiio-tests.d/romio-async @@ -13,7 +13,7 @@ TESTNAME=async TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-coll_test b/test/automated/mpiio-tests.d/romio-coll_test index a8b0bf3..128f58c 100755 --- a/test/automated/mpiio-tests.d/romio-coll_test +++ b/test/automated/mpiio-tests.d/romio-coll_test @@ -13,7 +13,7 @@ TESTNAME=coll_test TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-error b/test/automated/mpiio-tests.d/romio-error index 56a4c67..9692cc2 100755 --- a/test/automated/mpiio-tests.d/romio-error +++ b/test/automated/mpiio-tests.d/romio-error @@ -13,7 +13,7 @@ TESTNAME=error TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-excl b/test/automated/mpiio-tests.d/romio-excl index d735cad..050e120 100755 --- a/test/automated/mpiio-tests.d/romio-excl +++ b/test/automated/mpiio-tests.d/romio-excl @@ -13,7 +13,7 @@ TESTNAME=excl TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-file_info b/test/automated/mpiio-tests.d/romio-file_info index 20aeb4a..1b8694c 100755 --- a/test/automated/mpiio-tests.d/romio-file_info +++ b/test/automated/mpiio-tests.d/romio-file_info @@ -13,7 +13,7 @@ TESTNAME=file_info TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-noncontig_coll2 b/test/automated/mpiio-tests.d/romio-noncontig_coll2 index 7cf7460..9624910 100755 --- a/test/automated/mpiio-tests.d/romio-noncontig_coll2 +++ b/test/automated/mpiio-tests.d/romio-noncontig_coll2 @@ -13,7 +13,7 @@ TESTNAME=noncontig_coll2 TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-psimple b/test/automated/mpiio-tests.d/romio-psimple index 777afba..fa3fde0 100755 --- a/test/automated/mpiio-tests.d/romio-psimple +++ b/test/automated/mpiio-tests.d/romio-psimple @@ -13,7 +13,7 @@ TESTNAME=psimple TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-simple b/test/automated/mpiio-tests.d/romio-simple index 0be7f41..477bb8c 100755 --- a/test/automated/mpiio-tests.d/romio-simple +++ b/test/automated/mpiio-tests.d/romio-simple @@ -13,7 +13,7 @@ TESTNAME=simple TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-split_coll b/test/automated/mpiio-tests.d/romio-split_coll index b8a0aef..80e9eb9 100755 --- a/test/automated/mpiio-tests.d/romio-split_coll +++ b/test/automated/mpiio-tests.d/romio-split_coll @@ -13,7 +13,7 @@ TESTNAME=split_coll TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/mpiio-tests.d/romio-status b/test/automated/mpiio-tests.d/romio-status index 4d307b0..664c245 100755 --- a/test/automated/mpiio-tests.d/romio-status +++ b/test/automated/mpiio-tests.d/romio-status @@ -13,7 +13,7 @@ TESTNAME=status TEST_DEST=${CLUSTER_DIR}/$TESTNAME -(cd ${PVFS2_DEST}/mpich2-snap-*/build/src/mpi/romio/test && \ +(cd ${PVFS2_DEST}/mpich2-snapshot/build/src/mpi/romio/test && \ make $TESTNAME && cp $TESTNAME $TEST_DEST) if [ $? -ne 0 ] ; then diff --git a/test/automated/sysint-tests.d/misc b/test/automated/sysint-tests.d/misc new file mode 100755 index 0000000..5775942 --- /dev/null +++ b/test/automated/sysint-tests.d/misc @@ -0,0 +1,143 @@ +#!/bin/bash + +# try all of the normal pvfs2 command line utilites and make sure they work +# for basic cases + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-touch $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-touch failure." 1>&2 + exit 1 +fi + +MYGROUP=`groups | cut -d ' ' -f 1` +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-chown $USER $MYGROUP $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-chown failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-chmod 777 $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-chmod failure." 1>&2 + exit 1 +fi + +# look at the error message from this tool. If we got an operation not +# supported error, it just means that the server either doesn't support the +# vm drop caches operation, or doesn't have permission. +DC_OUT=`${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-drop-caches -m $PVFS2_MOUNTPOINT 2>&1` +if [ "${?}" != 0 ] +then + TEST=`echo $DC_OUT | grep "not supported"` + if [ "${?}" != 0 ] + then + echo $DC_OUT 1>&2 + echo "pvfs2-drop-caches failure." 1>&2 + exit 1 + fi +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-fsck -m $PVFS2_MOUNTPOINT +if [ "${?}" != 0 ] +then + echo "pvfs2-fsck failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-validate -d $PVFS2_MOUNTPOINT -c -f -s +if [ "${?}" != 0 ] +then + echo "pvfs2-statfs failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-fs-dump -m $PVFS2_MOUNTPOINT +if [ "${?}" != 0 ] +then + echo "pvfs2-fs-dump failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-ls $PVFS2_MOUNTPOINT +if [ "${?}" != 0 ] +then + echo "pvfs2-ls failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-perror 0 +if [ "${?}" != 0 ] +then + echo "pvfs2-perror failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-stat $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-stat failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-viewdist -f $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-viewdist failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-xattr -s -k user.foo -v bar $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-xattr failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-rm $PVFS2_MOUNTPOINT/miscfile +if [ "${?}" != 0 ] +then + echo "pvfs2-rm failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-set-debugmask -m $PVFS2_MOUNTPOINT "none" +if [ "${?}" != 0 ] +then + echo "pvfs2-set-debugmask failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-set-mode -m $PVFS2_MOUNTPOINT "admin" +if [ "${?}" != 0 ] +then + echo "pvfs2-set-mode failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-set-mode -m $PVFS2_MOUNTPOINT "normal" +if [ "${?}" != 0 ] +then + echo "pvfs2-set-mode failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-set-sync -m $PVFS2_MOUNTPOINT -D 0 -M 1 +if [ "${?}" != 0 ] +then + echo "pvfs2-set-sync failure." 1>&2 + exit 1 +fi + +${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-statfs -m $PVFS2_MOUNTPOINT +if [ "${?}" != 0 ] +then + echo "pvfs2-statfs failure." 1>&2 + exit 1 +fi + + + + diff --git a/test/automated/testscrpt.sh b/test/automated/testscrpt.sh index 0cbfb8f..0e71ff6 100755 --- a/test/automated/testscrpt.sh +++ b/test/automated/testscrpt.sh @@ -10,11 +10,14 @@ # you can override these settings in nightly-tests.cfg export PVFS2_DEST=/tmp/pvfs2-nightly export PVFS2_MOUNTPOINT=/pvfs2-nightly -export EXTRA_TESTS=${HOME}/src/benchmarks +export EXTRA_TESTS=/tmp/${USER}/src/benchmarks +#export EXTRA_TESTS=/tmp/src/benchmarks +export URL=http://devorange.clemson.edu/pvfs +export BENCHMARKS=benchmarks-20060512.tar.gz # look for a 'nightly-test.cfg' in the same directory as this script -if [ -f $(cd `dirname $0`; pwd)/nightly-tests.cfg ] ; then - . $(cd `dirname $0`; pwd)/nightly-tests.cfg +if [ -f /tmp/$USER/nightly-tests.cfg ] ; then + . /tmp/$USER/nightly-tests.cfg fi @@ -24,10 +27,12 @@ export CVS_TAG="${CVS_TAG:-HEAD}" # no need to modify these. they make their own gravy STARTTIME=`date +%s` TINDERSCRIPT=$(cd `dirname $0`; pwd)/tinder-pvfs2-status -SYSINT_SCRIPTS=${PVFS2_DEST}/pvfs2-${CVS_TAG}/test/automated/sysint-tests.d -VFS_SCRIPTS=${PVFS2_DEST}/pvfs2-${CVS_TAG}/test/automated/vfs-tests.d +SYSINT_SCRIPTS=~+/sysint-tests.d +VFS_SCRIPTS=~+/vfs-tests.d +VFS_SCRIPT="dbench" MPIIO_DRIVER=${PVFS2_DEST}/pvfs2-${CVS_TAG}/test/automated/testscrpt-mpi.sh REPORT_LOG=${PVFS2_DEST}/alltests-${CVS_TAG}.log +BENCHMARKS=benchmarks-20060512.tar.gz # for debugging and testing, you might need to set the above to your working # direcory.. .unless you like checking in broken scripts @@ -37,10 +42,17 @@ MPIIO_DRIVER=$(cd `dirname $0`; pwd)/testscrpt-mpi.sh TESTNAME="`hostname -s`-nightly" +# before starting any client apps, we need to deal with the possiblity that we +# might have built with shared libraries +export LD_LIBRARY_PATH=${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/lib:${LD_LIBRARY_PATH} # we only have a few hosts that meet all the earlier stated prereqs -VFS_HOSTS="gil lain stan" +VFS_HOSTS="devorange2 devorange35 andy jeffrey" +# +# Detect basic heap corruption +# +export MALLOC_CHECK_=2 # takes one argument: a tag or branch in CVS pull_and_build_pvfs2 () { @@ -93,12 +105,14 @@ teardown_vfs() { } setup_vfs() { + sudo dmesg -c >/dev/null sudo /sbin/insmod ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/lib/modules/`uname -r`/kernel/fs/pvfs2/pvfs2.ko - sudo ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client \ - -p ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client-core \ - -L ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log +# sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client \ +# -p ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client-core \ +# -L ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log # sudo screen -d -m cgdb -x ${PVFS2_DEST}/.gdbinit --args ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client-core -L ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log #sudo valgrind --log-file=${PVFS2_DEST}/pvfs2-client.vg ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client-core -L ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log & + sudo ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client -p ${PVFS2_DEST}/INSTALL-pvfs2-${CVS_TAG}/sbin/pvfs2-client-core -L ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log sudo chmod 644 ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log sudo mount -t pvfs2 tcp://`hostname -s`:3399/pvfs2-fs ${PVFS2_MOUNTPOINT} } @@ -131,7 +145,11 @@ setup_pvfs2() { grep -q 'pvfs2-nightly' /etc/fstab if [ $? -ne 0 -a $do_vfs -eq 0 ] ; then export PVFS2TAB_FILE=${PVFS2_DEST}/pvfs2tab - fi + fi + #turn on degging on each server + export PVFS2TAB_FILE=${PVFS2_DEST}/pvfs2tab + echo "....setting server-side debug mask" + INSTALL-pvfs2-${CVS_TAG}/bin/pvfs2-set-debugmask -m ${PVFS2_MOUNTPOINT} "all" } teardown_pvfs2() { @@ -164,7 +182,7 @@ buildfail() { setupfail() { echo "Failure in setup" - dmesg | tail -20 > ${PVFS2_DEST}/dmesg + dmesg > ${PVFS2_DEST}/dmesg cat ${PVFS2_DEST}/dmesg ${PVFS2_DEST}/pvfs2-client-${CVS_TAG}.log \ ${PVFS2_DEST}/pvfs2-server-${CVS_TAG}.log* | \ ${TINDERSCRIPT} ${TESTNAME}-${CVS_TAG} test_failed $STARTTIME @@ -190,9 +208,9 @@ run_parts() { # skip CVS [ -d $f ] && continue if [ -x $f ] ; then - echo -n "====== running $f ..." + echo -n "====== `date` == running $f ..." ./$f > ${PVFS2_DEST}/${f}-${CVS_TAG}.log - if [ $? == 0 ] ; then + if [ $? -eq 0 ] ; then nr_passed=$((nr_passed + 1)) echo "OK" else @@ -204,6 +222,20 @@ run_parts() { done } +#run only one script +run_one() { + cd $1 + echo -n "===== `date` == running ${1}/${2} ..." + ${1}/${2} > ${PVFS2_DEST}/${2}-${CVS_TAG}.log + if [ $? -eq 0 ] ; then + nr_passed=$((nr_passed + 1)) + echo "OK" + else + nr_failed=$((nr_failed + 1)) + failure_logs="$failure_logs ${PVFS2_DEST}/${2}-${CVS_TAG}.log" + echo "FAILED" + fi +} ### ### entry point for script ### @@ -220,18 +252,57 @@ for s in $(echo $VFS_HOSTS); do fi done -failure_logs="" # a space-delimited list of logs that failed -# compile and install +# "install" benchmark software, if EXTRA_TESTS is not null +if [ $EXTRA_TESTS ] +then + echo "Installing benchmark software...." + my_cwd=`pwd` + + #create directory, if not already there + mkdir -p $EXTRA_TESTS + if [ $? != 0 ] + then + echo "benchmarks: mkdir failed" + setupfail + fi + + #remove existing tar file and/or subdirectories + cd $EXTRA_TESTS/.. + sudo /bin/rm -rf * + + #get new tar file + wget ${URL}/${BENCHMARKS} + if [ $? != 0 ] + then + echo "benchmarks: wget failed" + setupfail + fi + + #untar the file + tar -xzf ${BENCHMARKS} + if [ $? != 0 ] + then + echo "benchmarks: tar failed" + setupfail + fi + + #go back to original working directory + cd $my_cwd +fi + +echo "pull_and_build_pvfs2" pull_and_build_pvfs2 $CVS_TAG || buildfail +echo "setup_pvfs2" teardown_pvfs2 && setup_pvfs2 if [ $? != 0 ] ; then echo "setup failed" - setupfail + setupfail fi if [ $do_vfs -eq 1 ] ; then + echo "setup_vfs" teardown_vfs && setup_vfs if [ $? != 0 ] ; then @@ -255,11 +326,14 @@ exec 7<&2 exec 1> ${REPORT_LOG} exec 2>&1 +echo "running sysint scripts" run_parts ${SYSINT_SCRIPTS} if [ $do_vfs -eq 1 ] ; then + echo "running vfs scripts" export VFS_SCRIPTS run_parts ${VFS_SCRIPTS} +# run_one ${VFS_SCRIPTS} ${VFS_SCRIPT} fi # down the road (as we get our hands on more clusters) we'll need a more @@ -288,5 +362,5 @@ else tinder_report success fi -[ $do_vfs -eq 1 ] && teardown_vfs -teardown_pvfs2 +#[ $do_vfs -eq 1 ] && teardown_vfs +#teardown_pvfs2 diff --git a/test/automated/tinder-pvfs2-status b/test/automated/tinder-pvfs2-status index a88857c..26cf1bb 100755 --- a/test/automated/tinder-pvfs2-status +++ b/test/automated/tinder-pvfs2-status @@ -1,68 +1,3 @@ -#!/usr/bin/perl -w - -use strict; - -require HTTP::Request; -require LWP::UserAgent; - -if($#ARGV < 2) -{ - print "\nusage: tinder-pvfs2-status " . - " [status info]\n\n"; - exit 1; -} - -my $buildname=shift @ARGV; -my $status=shift @ARGV; -my $starttime=shift @ARGV; - -my $binfo = ""; -if($#ARGV > -1) -{ - $binfo = "TinderboxPrint: " . join(" ", @ARGV); -} - -my $admin="slang\@mcs.anl.gov"; -my $datestr=time(); - -my $logoutput = ""; -my $line; -while (defined($line = )) -{ - $logoutput .= $line; -} - -my $msg = <<"BODYEOF"; - -tinderbox: administrator: $admin -tinderbox: starttime: $starttime -tinderbox: buildname: $buildname -tinderbox: status: $status -tinderbox: timenow: $datestr -tinderbox: tree: PVFS2 -tinderbox: errorparser: unix -tinderbox: END - -$binfo - -$logoutput - -BODYEOF -; - -my $req = HTTP::Request->new( - "POST" => "http://www.pvfs.org/cgi-bin/pvfs2/tinderbox2/process_build" ); -$req->content( $msg ); - -my $ua = LWP::UserAgent->new; - -my $resp = $ua->request( $req ); -if( $resp->is_success ) -{ - print $resp->content; -} -else -{ - die $resp->status_line; -} - +#!/bin/sh +echo "do nothing" +exit diff --git a/test/automated/vfs-tests.d/dbench b/test/automated/vfs-tests.d/dbench index e488f6a..033c348 100755 --- a/test/automated/vfs-tests.d/dbench +++ b/test/automated/vfs-tests.d/dbench @@ -1,7 +1,60 @@ #!/bin/sh -cd ${EXTRA_TESTS}/dbench-3.03 +#this script assumes that the benchmark tar file is already copied into $EXTRA_TESTS +if [ ! $BENCHMARKS ] +then + #this is the existing name of the tar file + BENCHMARKS=benchmarks-20060512.tar.gz +fi + +#make directory, if it doesn't exist +mkdir -p ${EXTRA_TESTS}/dbench-3.03 + +#capture current working directory +cwd=`pwd` + +#goto benchmark directory and cleanup +cd ${EXTRA_TESTS} +sudo /bin/rm -rf dbench-3.03 + +#go back +cd .. + +#untar the dbench directory +tar -xzf ${BENCHMARKS} benchmarks/dbench-3.03/* + +#goto the dbench directory +cd benchmarks/dbench-3.03 + +#configure the dbench software ./configure -q + +#apply patches +patch -p3 < ${VFS_SCRIPTS}/dbench.patch +if [ "${?}" != 0 ] +then + echo "Error: failed to apply patches to DBENCH." + cd $cwd + exit 1; +fi + +#compile the system make 2>&1 + +#setup dbench test and execute. cp client.txt ${PVFS2_MOUNTPOINT} +if [ $? != 0 ] +then + echo "failed to copy client.txt to ${PVFS2_MOUNTPOINT}" + cd $cwd + exit 1 +fi cd ${PVFS2_MOUNTPOINT} && ${EXTRA_TESTS}/dbench-3.03/dbench -c client.txt 10 -t 300 2>&1 +if [ $? != 0 ] +then + echo "dbench failed" + cd $cwd + exit 1 +fi + +cd $cwd diff --git a/test/automated/vfs-tests.d/dbench.patch b/test/automated/vfs-tests.d/dbench.patch new file mode 100644 index 0000000..e321792 --- /dev/null +++ b/test/automated/vfs-tests.d/dbench.patch @@ -0,0 +1,43 @@ +--- test/benchmarks/dbench-3.03/snprintf.c 2004-12-12 21:26:32.000000000 -0500 ++++ src/benchmarks/dbench-3.03/snprintf.c 2010-07-13 16:07:48.000000000 -0400 +@@ -81,6 +81,7 @@ + #include + #endif + ++/* + #ifndef VA_COPY + #ifdef HAVE_VA_COPY + #define VA_COPY(dest, src) __va_copy(dest, src) +@@ -88,7 +89,20 @@ + #define VA_COPY(dest, src) (dest) = (src) + #endif + #endif +- ++#define VA_COPY(dest, src) __va_copy(dest, src) ++*/ ++#ifdef va_copy ++/*using va_copy macro*/ ++#define VA_COPY(d,s) va_copy(d,s) ++#else ++#ifdef __va_copy ++/*using __va_copy macro*/ ++#define VA_COPY(d,s) __va_copy(d,s) ++#else ++/*using va_copy library function*/ ++#define VA_COPY(d,s) va_copy(d,s) ++#endif ++#endif + + #if defined(HAVE_SNPRINTF) && defined(HAVE_VSNPRINTF) && defined(HAVE_C99_VSNPRINTF) + /* only include stdio.h if we are not re-defining snprintf or vsnprintf */ +--- test/benchmarks/dbench-3.03/tbench_srv.c 2004-12-12 21:47:59.000000000 -0500 ++++ src/benchmarks/dbench-3.03/tbench_srv.c 2010-07-13 16:12:42.000000000 -0400 +@@ -64,7 +64,7 @@ + + while (1) { + struct sockaddr addr; +- int in_addrlen = sizeof(addr); ++ socklen_t in_addrlen = sizeof(addr); + int fd; + + while (waitpid((pid_t)-1,(int *)NULL, WNOHANG) > 0) ; diff --git a/test/automated/vfs-tests.d/fsx.c b/test/automated/vfs-tests.d/fsx.c index aca4781..63cc494 100644 --- a/test/automated/vfs-tests.d/fsx.c +++ b/test/automated/vfs-tests.d/fsx.c @@ -116,9 +116,13 @@ prt(char *fmt, ...) va_start(args, fmt); vfprintf(stdout, fmt, args); - if (fsxlogf) - vfprintf(fsxlogf, fmt, args); va_end(args); + if (fsxlogf) + { + va_start(args,fmt); + vfprintf(fsxlogf, fmt, args); + va_end(args); + } } void @@ -954,6 +958,8 @@ main(int argc, char **argv) break; case 'R': mapped_reads = 0; + if (!quiet) + fprintf(stdout, "mapped reads DISABLED\n"); break; case 'S': seed = getnum(optarg, &endp); diff --git a/test/automated/vfs-tests.d/iozone b/test/automated/vfs-tests.d/iozone index 9b77c1f..0281804 100755 --- a/test/automated/vfs-tests.d/iozone +++ b/test/automated/vfs-tests.d/iozone @@ -1,6 +1,12 @@ #!/bin/sh cd ${EXTRA_TESTS}/iozone3_239/src/current || exit 1 +patch -p5 < ${VFS_SCRIPTS}/iozone.patch +if [ $? != 0 ] +then + echo "Attemp to patch iozone software failed." + exit 1 +fi make linux || exit 1 # -y min record size # -q max record size diff --git a/test/automated/vfs-tests.d/iozone.patch b/test/automated/vfs-tests.d/iozone.patch new file mode 100644 index 0000000..615d01d --- /dev/null +++ b/test/automated/vfs-tests.d/iozone.patch @@ -0,0 +1,1058 @@ +diff -Naur patch/benchmarks/iozone3_239/src/current/fileop.c src/benchmarks/iozone3_239/src/current/fileop.c +--- patch/benchmarks/iozone3_239/src/current/fileop.c 2005-04-22 17:09:54.000000000 -0400 ++++ src/benchmarks/iozone3_239/src/current/fileop.c 2010-07-26 17:25:19.000000000 -0400 +@@ -184,8 +184,6 @@ + file_stat(int x) + { + int i,j,k,y; +- int fd; +- int ret; + char buf[100]; + struct stat mystat; + for(i=0;i>%c<< Expecting >>%c<<\n", *where2,*pattern_ptr); + return(1); +@@ -6142,6 +6168,7 @@ + long long *data2; + #endif + { ++ long long int tmp; + double starttime1; + double writetime[2]; + double walltime[2], cputime[2]; +@@ -6458,9 +6485,15 @@ + else + fprintf(rwqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); + #else +- fprintf(wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); ++ { ++ tmp = (long long int)traj_offset; ++ fprintf(wqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); ++ } + else +- fprintf(rwqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); ++ { ++ tmp = (long long int)traj_offset; ++ fprintf(rwqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); ++ } + #endif + } + w_traj_ops_completed++; +@@ -6883,7 +6916,7 @@ + printf("\nError freading block %ld %x\n", i, + (unsigned long long)buffer); + #else +- printf("\nError freading block %ld %x\n", i, ++ printf("\nError freading block %ld %llx\n", i, + (unsigned long long)buffer); + #endif + #else +@@ -6997,6 +7030,7 @@ + long long *data1,*data2; + #endif + { ++ long long int tmp = 0; + double starttime2; + double compute_val = (double)0; + double readtime[2]; +@@ -7137,7 +7171,7 @@ + if(read(fd, (void *)nbuff, (size_t) page_size) != page_size) + { + #ifdef _64BIT_ARCH_ +- printf("\nError reading block %d %x\n", 0, ++ printf("\nError reading block %d %llx\n", 0, + (unsigned long long)nbuff); + #else + printf("\nError reading block %d %lx\n", 0, +@@ -7226,7 +7260,8 @@ + printf("\nError reading block %ld %lx\n", i, + (unsigned long long)nbuff); + #else +- printf("\nError reading block %lld %llx\n", i, ++ tmp = (long long int)i; ++ printf("\nError reading block %lld %llx\n", tmp, + (unsigned long long)nbuff); + #endif + #else +@@ -7269,9 +7304,15 @@ + else + fprintf(rrqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); + #else +- fprintf(rqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); ++ { ++ tmp = (long long int)traj_offset; ++ fprintf(rqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); ++ } + else +- fprintf(rrqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); ++ { ++ tmp = (long long int)traj_offset; ++ fprintf(rrqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); ++ } + #endif + } + r_traj_ops_completed++; +@@ -7418,6 +7459,7 @@ + long long *data1, *data2; + #endif + { ++ long long int tmp = 0; + double randreadtime[2]; + double starttime2; + double walltime[2], cputime[2]; +@@ -7593,8 +7635,9 @@ + printf("\nError reading block at %ld\n", + offset64); + #else ++ tmp = (long long int)offset64; + printf("\nError reading block at %lld\n", +- offset64); ++ tmp); + #endif + perror("read"); + exit(70); +@@ -7711,8 +7754,9 @@ + printf("\nError writing block at %ld\n", + offset64); + #else ++ tmp = (long long int)offset64; + printf("\nError writing block at %lld\n", +- offset64); ++ tmp); + #endif + if(wval==-1) + perror("write"); +@@ -7828,6 +7872,7 @@ + long long *data1,*data2; + #endif + { ++ long long int tmp = 0; + double revreadtime[2]; + double starttime2; + double walltime[2], cputime[2]; +@@ -7969,7 +8014,8 @@ + #ifdef NO_PRINT_LLD + printf("\nError reading block %ld\n", i); + #else +- printf("\nError reading block %lld\n", i); ++ tmp = (long long int)i; ++ printf("\nError reading block %lld\n", tmp); + #endif + perror("read"); + exit(79); +@@ -8365,6 +8411,7 @@ + long long *data1, *data2; + #endif + { ++ long long int tmp = 0; + double strideintime; + double starttime1; + double compute_val = (double)0; +@@ -8501,8 +8548,10 @@ + printf("\nError reading block %ld, fd= %d Filename %s Read returned %ld\n", i, fd,filename,uu); + printf("\nSeeked to %ld Reclen = %ld\n", savepos64,reclen); + #else +- printf("\nError reading block %lld, fd= %d Filename %s Read returned %lld\n", i, fd,filename,uu); +- printf("\nSeeked to %lld Reclen = %lld\n", savepos64,reclen); ++ tmp = (long long int)i; ++ printf("\nError reading block %lld, fd= %d Filename %s Read returned %lld\n", tmp, fd,filename,uu); ++ tmp = (long long int)savepos64; ++ printf("\nSeeked to %lld Reclen = %lld\n",tmp,reclen); + #endif + perror("read"); + exit(88); +@@ -9919,6 +9968,7 @@ + long long who; + #endif + { ++ long long int tmp = 0; + long long i; + off64_t current_file_size; + off64_t rec_size; +@@ -9943,7 +9993,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf(" %c%ld%c",042,rec_size/1024,042); + #else +- if(!silent) printf(" %c%lld%c",042,rec_size/1024,042); ++ if(!silent) ++ { ++ tmp = (long long int)rec_size; ++ printf(" %c%lld%c",042,tmp/1024,042); ++ } + #endif + } + if(!silent) printf("\n"); +@@ -9961,7 +10015,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",042,current_file_size,042); + #else +- if(!silent) printf("%c%lld%c ",042,current_file_size,042); ++ if(!silent) ++ { ++ tmp = (long long int)current_file_size; ++ printf("%c%lld%c ",042,tmp,042); ++ } + #endif + for(i=0;i<=max_y;i++){ + if(report_array[0][i] != current_file_size){ +@@ -9976,7 +10034,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",042,current_file_size,042); + #else +- if(!silent) printf("%c%lld%c ",042,current_file_size,042); ++ if(!silent) ++ { ++ tmp = (long long int)current_file_size; ++ printf("%c%lld%c ",042,tmp,042); ++ } + #endif + } + if(bif_flag) +@@ -9984,7 +10046,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf(" %ld ",report_array[who][i]); + #else +- if(!silent) printf(" %lld ",report_array[who][i]); ++ if(!silent) ++ { ++ tmp = (long long int)report_array[who][i]; ++ printf(" %lld ",tmp); ++ } + #endif + } + if(bif_flag) +@@ -10156,6 +10222,7 @@ + long long who; + #endif + { ++ long long int tmp = 0; + long long i; + off64_t current_file_size; + off64_t rec_size; +@@ -10173,7 +10240,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf(" %c%ld%c",042,rec_size/1024,042); + #else +- if(!silent) printf(" %c%lld%c",042,rec_size/1024,042); ++ if(!silent) ++ { ++ tmp = (long long int)rec_size; ++ printf(" %c%lld%c",042,tmp/1024,042); ++ } + #endif + } + if(!silent) printf("\n"); +@@ -10191,7 +10262,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",042,current_file_size,042); + #else +- if(!silent) printf("%c%lld%c ",042,current_file_size,042); ++ if(!silent) ++ { ++ tmp = (long long int)current_file_size; ++ printf("%c%lld%c ",042,tmp,042); ++ } + #endif + for (i = 0; i <= max_y; i++) { + if (report_array[0][i] != current_file_size) { +@@ -10206,7 +10281,11 @@ + #ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",042,current_file_size,042); + #else +- if(!silent) printf("%c%lld%c ",042,current_file_size,042); ++ if(!silent) ++ { ++ tmp = (long long int)current_file_size; ++ printf("%c%lld%c ",042,tmp,042); ++ } + #endif + } + if (bif_flag) +@@ -10416,7 +10495,7 @@ + #ifdef NO_PRINT_LLD + printf("shmid = %d, size = %ld, size1 = %d, Error %d\n",shmid,size,(size_t)size1,errno); + #else +- printf("shmid = %d, size = %lld, size1 = %d, Error %d\n",shmid,size,(size_t)size1,errno); ++ printf("shmid = %d, size = %lld, size1 = %lx, Error %d\n",shmid,size,(size_t)size1,errno); + #endif + exit(119); + } +@@ -10628,6 +10707,7 @@ + #endif + { + ++ long long int tmp = 0; + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; +@@ -11029,7 +11109,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + w_traj_ops_completed++; +@@ -11186,6 +11267,7 @@ + #endif + { + ++ long long int tmp = 0; + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; +@@ -11588,7 +11670,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + w_traj_ops_completed++; +@@ -11739,6 +11822,7 @@ + thread_rwrite_test(x) + #endif + { ++ long long int tmp = 0; + /************************/ + /* Children only here */ + /************************/ +@@ -12071,7 +12155,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_rwqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_rwqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_rwqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + if(rlocking) +@@ -12205,6 +12290,7 @@ + thread_read_test(x) + #endif + { ++ long long int tmp = 0; + long long xx,xx2; + struct child_stats *child_stat; + double walltime, cputime; +@@ -12478,7 +12564,8 @@ + printf("\nError reading block %ld, fd= %d\n", i, + fd); + #else +- printf("\nError reading block %lld, fd= %d\n", i, ++ tmp = (long long int)i; ++ printf("\nError reading block %lld, fd= %d\n", tmp, + fd); + #endif + perror("read"); +@@ -12525,7 +12612,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_rqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_rqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_rqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + +@@ -12658,6 +12746,7 @@ + thread_pread_test(x) + #endif + { ++ long long int tmp = 0; + long long xx,xx2; + struct child_stats *child_stat; + double walltime, cputime; +@@ -12931,7 +13020,8 @@ + printf("\nError preading block %ld, fd= %d\n", i, + fd); + #else +- printf("\nError preading block %lld, fd= %d\n", i, ++ tmp = (long long int)i; ++ printf("\nError preading block %lld, fd= %d\n", tmp, + fd); + #endif + perror("pread"); +@@ -12978,7 +13068,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_rqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_rqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_rqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + +@@ -13112,6 +13203,7 @@ + thread_rread_test(x) + #endif + { ++ long long int tmp = 0; + long long xx,xx2; + char *nbuff; + struct child_stats *child_stat; +@@ -13384,7 +13476,8 @@ + printf("\nError writing block %ld, fd= %d\n", i, + fd); + #else +- printf("\nError writing block %lld, fd= %d\n", i, ++ tmp = (long long int)i; ++ printf("\nError writing block %lld, fd= %d\n", tmp, + fd); + #endif + perror("read"); +@@ -13431,7 +13524,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_rrqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_rrqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_rrqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + +@@ -13562,6 +13656,7 @@ + thread_reverse_read_test(x) + #endif + { ++ long long int tmp = 0; + long long xx,xx2; + char *nbuff; + struct child_stats *child_stat; +@@ -13818,7 +13913,8 @@ + #ifdef NO_PRINT_LLD + printf("\nError reading block %ld\n", i); + #else +- printf("\nError reading block %lld\n", i); ++ tmp = (long long int)i; ++ printf("\nError reading block %lld\n", tmp); + #endif + perror("read"); + if (!no_unlink) +@@ -13873,7 +13969,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_revqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_revqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_revqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + } +@@ -13990,6 +14087,7 @@ + thread_stride_read_test(x) + #endif + { ++ long long int tmp = 0; + long long xx,xx2; + char *nbuff=0; + struct child_stats *child_stat; +@@ -14239,7 +14337,8 @@ + #ifdef NO_PRINT_LLD + printf("\nError reading block %ld, fd= %d\n", i, fd); + #else +- printf("\nError reading block %lld, fd= %d\n", i, fd); ++ tmp = (long long int)i; ++ printf("\nError reading block %lld, fd= %d\n", tmp, fd); + #endif + perror("read"); + if (!no_unlink) +@@ -14323,7 +14422,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_strqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_strqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_strqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + } +@@ -14509,6 +14609,7 @@ + thread_ranread_test(x) + #endif + { ++ long long int tmp = 0; + long long xx,xx2; + struct child_stats *child_stat; + double walltime, cputime; +@@ -14795,8 +14896,9 @@ + printf("\nError reading block at %ld\n", + offset); + #else ++ tmp = (long long int)offset; + printf("\nError reading block at %lld\n", +- offset); ++ tmp); + #endif + perror("ranread"); + if (!no_unlink) +@@ -14846,7 +14948,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_randrfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_randrfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_randrfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + } +@@ -14968,6 +15071,7 @@ + #endif + { + ++ long long int tmp = 0; + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; +@@ -15375,7 +15479,8 @@ + #ifdef NO_PRINT_LLD + fprintf(thread_randwqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #else +- fprintf(thread_randwqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); ++ tmp = (long long int)traj_offset; ++ fprintf(thread_randwqfd,"%10.1lld %10.0f %10.1lld\n",(tmp)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); + #endif + } + w_traj_ops_completed++; +@@ -15629,7 +15734,7 @@ + if(debug1 ) + { + printf("\nthread created has an id of %lx\n",ts); +- printf("meme %ld\n",meme); ++ printf("meme %lld\n",meme); + } + return((long long)meme); + } +@@ -17473,8 +17578,8 @@ + bzero(&outbuf,sizeof(struct client_neutral_command)); + if(mdebug) + { +- printf("Master_neutral_command size = %d\n",sizeof(struct master_neutral_command)); +- printf("Client_neutral_command size = %d\n",sizeof(struct client_neutral_command)); ++ printf("Master_neutral_command size = %lx\n",sizeof(struct master_neutral_command)); ++ printf("Client_neutral_command size = %lx\n",sizeof(struct client_neutral_command)); + } + /* + * Convert internal commands to string format for neutral format/portability +@@ -17807,6 +17912,7 @@ + #endif + { + int me,ns; ++ socklen_t me2; + struct sockaddr_in *addr; + if(flag) + { +@@ -17827,6 +17933,7 @@ + } + } + me=sizeof(struct sockaddr_in); ++ me2 = (socklen_t)me; + if(cdebug) + { + printf("Child %d enters listen\n",(int)chid); +@@ -17838,7 +17945,7 @@ + printf("Child %d enters accept\n",(int)chid); + fflush(stdout); + } +- ns=accept(s,(void *)addr,&me); ++ ns=accept(s,(void *)addr,&me2); + if(cdebug) + { + printf("Child %d attached for receive. Sock %d %d\n",(int)chid, ns,errno); +@@ -19807,6 +19914,7 @@ + { + int tsize; + int s,ns,me; ++ socklen_t me2; + int rc; + int xx; + int tmp_port; +@@ -19815,6 +19923,7 @@ + int recv_buf_size=65536; + xx = 0; + me=sizeof(struct sockaddr_in); ++ me2 = (socklen_t)me; + tsize=size_of_message; /* Number of messages to receive */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) +@@ -19863,7 +19972,7 @@ + printf("Child enters accept\n"); + fflush(stdout); + } +- ns=accept(s,(void *)addr,&me); ++ ns=accept(s,(void *)addr,&me2); + if(cdebug) + { + printf("Child attached for receive. Sock %d %d\n", ns,errno); +@@ -20028,6 +20137,7 @@ + { + int tsize; + int s,ns,me; ++ socklen_t me2; + int rc; + int xx; + int tmp_port; +@@ -20036,6 +20146,7 @@ + int recv_buf_size=65536; + xx = 0; + me=sizeof(struct sockaddr_in); ++ me2 = (socklen_t)me; + tsize=sp_size_of_message; /* Number of messages to receive */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) +@@ -20084,7 +20195,7 @@ + printf("Master enters accept\n"); + fflush(stdout); + } +- ns=accept(s,(void *)addr,&me); ++ ns=accept(s,(void *)addr,&me2); + if(mdebug) + { + printf("Master attached for receive. Sock %d %d\n", ns,errno); +diff -Naur patch/benchmarks/iozone3_239/src/current/libasync.c src/benchmarks/iozone3_239/src/current/libasync.c +--- patch/benchmarks/iozone3_239/src/current/libasync.c 2005-04-22 17:09:54.000000000 -0400 ++++ src/benchmarks/iozone3_239/src/current/libasync.c 2010-07-26 16:28:31.000000000 -0400 +@@ -301,6 +301,7 @@ + off64_t max; + long long depth; + { ++ long long int tmp = 0; + off64_t a_offset,r_offset; + long long a_size; + struct cache_ent *ce,*first_ce=0; +@@ -331,7 +332,7 @@ + } + if(ret) + { +- printf("aio_error 1: ret %d %d\n",ret,errno); ++ printf("aio_error 1: ret %zd %d\n",ret,errno); + } + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +@@ -369,10 +370,10 @@ + if(retval < ce->myaiocb.aio_nbytes) + #endif + { +- printf("aio_return error1: ret %d %d\n",retval,errno); ++ printf("aio_return error1: ret %zd %d\n",retval,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +- printf("aio_return error1: fd %d offset %ld buffer %lx size %d Opcode %d\n", ++ printf("aio_return error1: fd %d offset %ld buffer %lx size %zd Opcode %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + (long)(ce->myaiocb.aio_buf), +@@ -421,7 +422,7 @@ + if(errno==EAGAIN) + goto again; + else +- printf("error returned from aio_read(). Ret %d errno %d\n",ret,errno); ++ printf("error returned from aio_read(). Ret %zd errno %d\n",ret,errno); + } + } + if(stride==0) /* User does not want read-ahead */ +@@ -477,7 +478,7 @@ + ; + } + if(ret) +- printf("aio_error 2: ret %d %d\n",ret,errno); ++ printf("aio_error 2: ret %zd %d\n",ret,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ + retval=aio_return(&first_ce->myaiocb); +@@ -497,12 +498,13 @@ + if(retval < first_ce->myaiocb.aio_nbytes) + #endif + { +- printf("aio_return error2: ret %d %d\n",retval,errno); ++ printf("aio_return error2: ret %zd %d\n",retval,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +- printf("aio_return error2: fd %d offset %lld buffer %lx size %d Opcode %d\n", ++ tmp = (long long int)first_ce->myaiocb.aio_offset; ++ printf("aio_return error2: fd %d offset %lld buffer %lx size %zd Opcode %d\n", + first_ce->myaiocb.aio_fildes, +- first_ce->myaiocb.aio_offset, ++ tmp, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +@@ -806,6 +808,7 @@ + off64_t max; + long long depth; + { ++ long long int tmp = 0; + off64_t a_offset,r_offset; + long long a_size; + struct cache_ent *ce,*first_ce=0; +@@ -835,7 +838,7 @@ + ; + } + if(ret) +- printf("aio_error 3: ret %d %d\n",ret,errno); ++ printf("aio_error 3: ret %zd %d\n",ret,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ + if(ce->oldbuf != ce->myaiocb.aio_buf || +@@ -885,12 +888,13 @@ + if(retval < ce->myaiocb.aio_nbytes) + #endif + { +- printf("aio_return error4: ret %d %d\n",retval,errno); ++ printf("aio_return error4: ret %zd %d\n",retval,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +- printf("aio_return error4: fd %d offset %lld buffer %lx size %d Opcode %d\n", ++ tmp = (long long int)ce->myaiocb.aio_offset; ++ printf("aio_return error4: fd %d offset %lld buffer %lx size %zd Opcode %d\n", + ce->myaiocb.aio_fildes, +- ce->myaiocb.aio_offset, ++ tmp, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_nbytes, + ce->myaiocb.aio_lio_opcode +@@ -903,9 +907,10 @@ + ce->myaiocb64.aio_lio_opcode + #endif + #else +- printf("aio_return error4: fd %d offset %d buffer %lx size %d Opcode %d\n", ++ tmp = (long long int)ce->myaiocb.aio_offset; ++ printf("aio_return error4: fd %d offset %d buffer %lx size %zd Opcode %d\n", + ce->myaiocb.aio_fildes, +- ce->myaiocb.aio_offset, ++ tmp, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_nbytes, + ce->myaiocb.aio_lio_opcode +@@ -948,7 +953,7 @@ + if(errno==EAGAIN) + goto again; + else +- printf("error returned from aio_read(). Ret %d errno %d\n",ret,errno); ++ printf("error returned from aio_read(). Ret %zd errno %d\n",ret,errno); + } + } + if(stride==0) /* User does not want read-ahead */ +@@ -1013,7 +1018,7 @@ + ; + } + if(ret) +- printf("aio_error 4: ret %d %d\n",ret,errno); ++ printf("aio_error 4: ret %zd %d\n",ret,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ + if(first_ce->oldbuf != first_ce->myaiocb.aio_buf || +@@ -1045,12 +1050,13 @@ + if(retval < first_ce->myaiocb.aio_nbytes) + #endif + { +- printf("aio_return error5: ret %d %d\n",retval,errno); ++ printf("aio_return error5: ret %zd %d\n",retval,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +- printf("aio_return error5: fd %d offset %lld buffer %lx size %d Opcode %d\n", ++ tmp = (long long int)first_ce->myaiocb.aio_offset; ++ printf("aio_return error5: fd %d offset %lld buffer %lx size %zd Opcode %d\n", + first_ce->myaiocb.aio_fildes, +- first_ce->myaiocb.aio_offset, ++ tmp, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +@@ -1063,9 +1069,10 @@ + first_ce->myaiocb64.aio_lio_opcode + #endif + #else +- printf("aio_return error5: fd %d offset %ld buffer %lx size %d Opcode %d\n", ++ tmp = (long long int)first_ce->myaiocb.aio_offset; ++ printf("aio_return error5: fd %d offset %ld buffer %lx size %zd Opcode %d\n", + first_ce->myaiocb.aio_fildes, +- first_ce->myaiocb.aio_offset, ++ tmp, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +@@ -1204,7 +1211,7 @@ + } + else + { +- printf("Error in aio_write: ret %d errno %d count %lld\n",ret,errno,gc->w_count); ++ printf("Error in aio_write: ret %zd errno %d count %lld\n",ret,errno,gc->w_count); + /* + printf("aio_write_no_copy: fd %d buffer %x offset %lld size %d\n", + ce->myaiocb64.aio_fildes, +@@ -1367,6 +1374,7 @@ + async_wait_for_write(gc) + struct cache *gc; + { ++ long long int tmp = 0; + struct cache_ent *ce; + size_t ret,retval; + if(gc->w_head==0) +@@ -1397,12 +1405,13 @@ + } + if(ret) + { +- printf("aio_error 5: ret %d %d\n",ret,errno); ++ printf("aio_error 5: ret %zd %d\n",ret,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +- printf("fd %d offset %lld size %d\n", ++ tmp = (long long int)ce->myaiocb64.aio_offset; ++ printf("fd %d offset %lld size %zd\n", + ce->myaiocb.aio_fildes, +- ce->myaiocb.aio_offset, ++ tmp, + ce->myaiocb.aio_nbytes); + #else + printf("fd %d offset %lld size %d\n", +@@ -1466,6 +1475,7 @@ + long long depth; + char *free_addr; + { ++ long long int tmp = 0; + struct cache_ent *ce; + size_t ret; + long long direct = 1; +@@ -1503,13 +1513,14 @@ + } + else + { +- printf("Error in aio_write: ret %d errno %d\n",ret,errno); ++ printf("Error in aio_write: ret %zd errno %d\n",ret,errno); + #ifdef _LARGEFILE64_SOURCE + #ifdef __LP64__ +- printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %d\n", ++ tmp = (long long int)ce->myaiocb.aio_offset; ++ printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %zd\n", + ce->myaiocb.aio_fildes, + (long)(ce->myaiocb.aio_buf), +- ce->myaiocb.aio_offset, ++ tmp, + ce->myaiocb.aio_nbytes); + #else + printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %d\n", diff --git a/test/automated/vfs-tests.d/ltp b/test/automated/vfs-tests.d/ltp new file mode 100755 index 0000000..b557532 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp @@ -0,0 +1,103 @@ +#!/bin/bash + +#default URL, in case this script is run separately +if [ ! $URL ] +then + URL=http://devorange.clemson.edu/pvfs +fi + +LTPVER="20081130" +THISDATE=`date +%F-%R` +LTP_ARCHIVE=ltp-full-${LTPVER}.tgz +DOWNLOAD=${URL}/${LTP_ARCHIVE} + +cd ${EXTRA_TESTS} + +# get rid of any old copies +sudo rm -rf ltp-full-${LTPVER} +sudo rm -f ltp-full-${LTPVER}.tgz + +# download +echo Downloading LTP... +wget -q ${DOWNLOAD} > /dev/null +if [ "${?}" != 0 ] +then + echo "Error: failed to download ${DOWNLOAD}." + exit 1; +fi + +tar -xvzf ${LTP_ARCHIVE} > /dev/null +if [ "${?}" != 0 ] +then + echo "Error: failed to untar ${DOWNLOAD}." + exit 1; +fi + +# fix some pvfs specific problems +cd ltp-full-${LTPVER} +for patch in \ + ${VFS_SCRIPTS}/ltp-20080630-zoo-path.patch \ + ${VFS_SCRIPTS}/ltp-20080630-accept4-wrapper.patch \ + ${VFS_SCRIPTS}/ltp-full-20081130-no-signalfd.patch \ + ${VFS_SCRIPTS}/ltp-full-20081130-unzip.patch; do + patch -p1 < $patch +done +if [ "${?}" != 0 ] +then + echo "Error: failed to apply patches to LTP." + exit 1; +fi + +echo Compiling LTP... +export CFLAGS="-g" +make >& /dev/null +if [ "${?}" != 0 ] +then + echo "Error: failed to build LTP." + exit 1; +fi + +# NOTE: this does not install anything outside of the ltp directory. It +# just configures the test programs so that they can be executed. We +# deliberately avoid running make install at the top level because that +# _would_ install files in /opt/ltp unecessarily. +cd testcases +sudo make install > /dev/null +if [ "${?}" != 0 ] +then + echo "Error: failed to make install LTP testcases." + exit 1; +fi +cd ../ + +# copy pvfs friendly test cases; we should pass all of these +cp ${VFS_SCRIPTS}/ltp-pvfs-testcases runtest/ +mkdir -p ${PVFS2_MOUNTPOINT}/ltp-tmp +chmod 777 ${PVFS2_MOUNTPOINT}/ltp-tmp +umask 0 + +# run ltp +echo Running LTP... +sudo ./runltp -p -l `pwd`/../ltp-pvfs-testcases-${THISDATE}.log -d ${PVFS2_MOUNTPOINT}/ltp-tmp -f ltp-pvfs-testcases -z ${EXTRA_TESTS}/zoo.tmp >& `pwd`/../ltp-pvfs-testcases-$THISDATE.output +LTPRET=${?} +if [ "${LTPRET}" != 0 ] +then + echo "Error: either failed to invoke LTP, or at least one test failed" +fi + +cd .. + +FAILCOUNT=`grep FAIL ltp-pvfs-testcases-$THISDATE.log | wc -l` + +if [ "${FAILCOUNT}" != 0 -o "${LTPRET}" != 0 ] +then + echo "Error: failed the following LTP test cases:" + grep FAIL ltp-pvfs-testcases-$THISDATE.log + echo "log file: ${EXTRA_TESTS}/ltp-pvfs-testcases-$THISDATE.log" + echo "record of stdout and stderr: ${EXTRA_TESTS}/ltp-pvfs-testcases-$THISDATE.output" + exit 1 +fi + +echo "Completed LTP tests." + +exit 0 diff --git a/test/automated/vfs-tests.d/ltp-20080630-accept4-wrapper.patch b/test/automated/vfs-tests.d/ltp-20080630-accept4-wrapper.patch new file mode 100644 index 0000000..cbfbc80 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp-20080630-accept4-wrapper.patch @@ -0,0 +1,21 @@ +diff -Naupr ltp-full-20081130/testcases/kernel/syscalls/accept4/accept4_01.c ltp-full-20081130-mod/testcases/kernel/syscalls/accept4/accept4_01.c +--- ltp-full-20081130/testcases/kernel/syscalls/accept4/accept4_01.c 2008-11-19 04:34:38.000000000 -0600 ++++ ltp-full-20081130-mod/testcases/kernel/syscalls/accept4/accept4_01.c 2010-02-02 15:40:12.000000000 -0600 +@@ -147,7 +147,7 @@ setup() + + + static int +-accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) ++accept4_internal(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) + { + #ifdef DEBUG + tst_resm(TINFO, "Calling accept4(): flags = %x", flags); +@@ -202,7 +202,7 @@ do_test(int lfd, struct sockaddr_in *con + die("Connect Error"); + + addrlen = sizeof(struct sockaddr_in); +- acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, ++ acceptfd = accept4_internal(lfd, (struct sockaddr *) &claddr, &addrlen, + closeonexec_flag | nonblock_flag); + if (acceptfd == -1) + die("accept4() Error"); diff --git a/test/automated/vfs-tests.d/ltp-20080630-hackbench-limits.patch b/test/automated/vfs-tests.d/ltp-20080630-hackbench-limits.patch new file mode 100644 index 0000000..73a7204 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp-20080630-hackbench-limits.patch @@ -0,0 +1,11 @@ +diff -burpN -x cscope.out -x MakefileHost -x MakefileTarget ltp-full-20080630/testcases/kernel/sched/cfs-scheduler/hackbench.c ltp-full-20080630.modified/testcases/kernel/sched/cfs-scheduler/hackbench.c +--- ltp-full-20080630/testcases/kernel/sched/cfs-scheduler/hackbench.c 2008-06-28 15:19:08.000000000 -0500 ++++ ltp-full-20080630.modified/testcases/kernel/sched/cfs-scheduler/hackbench.c 2008-07-25 10:47:08.000000000 -0500 +@@ -48,6 +48,7 @@ + /* */ + /******************************************************************************/ + #include ++#include + #include + #include + #include diff --git a/test/automated/vfs-tests.d/ltp-20080630-zoo-path.patch b/test/automated/vfs-tests.d/ltp-20080630-zoo-path.patch new file mode 100644 index 0000000..f3ce559 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp-20080630-zoo-path.patch @@ -0,0 +1,53 @@ +--- ltp-full-20080630/runltp 2008-06-28 16:19:02.000000000 -0400 ++++ ltp-full-20080630-new/runltp 2008-07-21 15:47:54.000000000 -0400 +@@ -79,6 +79,7 @@ setup() + export TMP="${TMPBASE}/ltp-$$" + export TMPDIR="${TMP}" + export PATH="${PATH}:${LTPROOT}/testcases/bin" ++ export ZOOFILE="$$" + + [ -d $LTPROOT/testcases/bin ] || + { +@@ -152,6 +153,7 @@ usage() + -v Print more verbose output to screen. + -w CMDFILEADDR Uses wget to get the user's list of testcases. + -x INSTANCES Run multiple instances of this testsuite. ++ -z ZOOFILE Specify an alternate path to zoo file. + + example: ./${0##*/} -c 2 -i 2 -m 2,4,10240,1 -D 2,10,10240,1 -p -q -l /tmp/result-log.$$ -o /tmp/result-output.$$ -C /tmp/result-failed.$$ -d ${PWD} + +@@ -191,7 +193,7 @@ main() + local DEFAULT_FILE_NAME_GENERATION_TIME=`date +"%Y_%b_%d-%Hh_%Mm_%Ss"` + version_date=`head -n 1 $LTPROOT/ChangeLog` + +- while getopts a:c:C:d:D:f:ehi:g:l:m:Nno:pqr:s:t:T:vw:x: arg ++ while getopts a:c:C:d:D:f:ehi:g:l:m:Nno:pqr:s:t:T:vw:x:z: arg + do case $arg in + a) EMAIL_TO=$OPTARG + ALT_EMAIL_OUT=1;; +@@ -377,6 +379,7 @@ main() + sleep 10 + INSTANCES="-x $OPTARG -O ${TMP}";; + ++ z) ZOOFILE=$OPTARG;; + \?) usage;; + esac + done +@@ -624,7 +627,7 @@ main() + fi + + [ ! -z "$QUIET_MODE" ] && { echo "INFO: Test start time: $(date)" ; } +- PAN_COMMAND="${LTPROOT}/pan/pan $QUIET_MODE -e -S $INSTANCES $DURATION -a $$ \ ++ PAN_COMMAND="${LTPROOT}/pan/pan $QUIET_MODE -e -S $INSTANCES $DURATION -a $ZOOFILE \ + -n $$ $PRETTY_PRT -f ${TMP}/alltests $LOGFILE $OUTPUTFILE $FAILCMDFILE" + if [ ! -z "$VERBOSE_MODE" ] ; then + echo "COMMAND: $PAN_COMMAND" +@@ -652,7 +655,7 @@ main() + + echo "Running tests......." + test_start_time=$(date) +- ${LTPROOT}/pan/pan $QUIET_MODE -e -S $INSTANCES $DURATION -a $$ -n $$ $PRETTY_PRT -f ${TMP}/alltests $LOGFILE $OUTPUTFILE $FAILCMDFILE ++ ${LTPROOT}/pan/pan $QUIET_MODE -e -S $INSTANCES $DURATION -a $ZOOFILE -n $$ $PRETTY_PRT -f ${TMP}/alltests $LOGFILE $OUTPUTFILE $FAILCMDFILE + + if [ $? -eq 0 ]; then + echo "INFO: pan reported all tests PASS" diff --git a/test/automated/vfs-tests.d/ltp-full-20081130-no-signalfd.patch b/test/automated/vfs-tests.d/ltp-full-20081130-no-signalfd.patch new file mode 100644 index 0000000..e925942 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp-full-20081130-no-signalfd.patch @@ -0,0 +1,14 @@ +--- ltp-full-20081130/include/config.h.default 2008-11-12 14:30:36.000000000 -0500 ++++ ltp-full-20081130-new/include/config.h.default 2010-06-29 11:47:50.000000000 -0400 +@@ -8,6 +8,11 @@ + #define HAVE_LINUX_TYPES_H 1 + + /* signalfd() is in glibc-2.7+ */ ++/* OrangeFS is currently running glibc-2.5, so we will comment out these ++ * definitions for now. ++*/ ++/* + #define HAVE_SYS_SIGNALFD_H 1 + #define HAVE_SIGNALFD 1 + #define HAVE_SIGNALFD_SIGINFO_SSI_SIGNO 1 ++*/ diff --git a/test/automated/vfs-tests.d/ltp-full-20081130-unzip.patch b/test/automated/vfs-tests.d/ltp-full-20081130-unzip.patch new file mode 100644 index 0000000..82f87e8 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp-full-20081130-unzip.patch @@ -0,0 +1,61 @@ +diff -Naupr ltp-full-20081130/testcases/commands/unzip/Makefile ltp-full-20081130-new/testcases/commands/unzip/Makefile +--- ltp-full-20081130/testcases/commands/unzip/Makefile 2003-03-04 00:37:41.000000000 -0500 ++++ ltp-full-20081130-new/testcases/commands/unzip/Makefile 2010-06-29 14:25:12.000000000 -0400 +@@ -4,7 +4,7 @@ all: + install: + ln -f unzip_tests.sh ../../bin/unzip_tests.sh + ln -f tst_unzip_file.zip ../../bin/tst_unzip_file.zip +- ln -f tst_unzip_file.zip /tmp/tst_unzip_file.zip ++ ln -f tst_unzip_file.zip ../../tmp/tst_unzip_file.zip + + clean: + rm -f tst_unzip_file.zip +diff -Naupr ltp-full-20081130/testcases/commands/unzip/unzip_genfile.sh ltp-full-20081130-new/testcases/commands/unzip/unzip_genfile.sh +--- ltp-full-20081130/testcases/commands/unzip/unzip_genfile.sh 2003-03-04 00:37:41.000000000 -0500 ++++ ltp-full-20081130-new/testcases/commands/unzip/unzip_genfile.sh 2010-06-29 14:26:19.000000000 -0400 +@@ -31,17 +31,17 @@ + + # Create directories and fill them with files. + +-numdirs=3 # number of directories to create +-numfiles=3 # number of file to create in each directory +-dirname=/tmp/tst_unzip.dir # name of the base directory +-dircnt=0 # index into number of dirs created in loop +-fcnt=0 # index into number of files created in loop +-RC=0 # return value from commands ++numdirs=3 # number of directories to create ++numfiles=3 # number of file to create in each directory ++dirname=../../tmp/tst_unzip.dir # name of the base directory ++dircnt=0 # index into number of dirs created in loop ++fcnt=0 # index into number of files created in loop ++RC=0 # return value from commands + + while [ $dircnt -lt $numdirs ] + do +- dirname=$dirname/d.$dircnt +- mkdir -p $dirname &>/dev/null || RC=$? ++ dirname2=$dirname/d.$dircnt ++ mkdir -p $dirname2 &>/dev/null || RC=$? + if [ $RC -ne 0 ] + then + echo "unzip_genfile.sh: ERROR: while creating $numdirs dirs." +@@ -50,7 +50,7 @@ do + fcnt=0 + while [ $fcnt -lt $numfiles ] + do +- touch $dirname/f.$fcnt ++ touch $dirname2/f.$fcnt + if [ $RC -ne 0 ] + then + echo "unzip_genfile.sh: ERROR: creating $numdirs dirs." +@@ -63,8 +63,8 @@ done + + # Create ZIP file. + +-zip -r tst_unzip_file.zip /tmp/tst_unzip.dir &>/dev/null ++zip -r tst_unzip_file.zip $dirname &>/dev/null + +-rm -fr /tmp/tst_unzip.* &>/dev/null ++rm -fr $dirname &>/dev/null + + exit $RC diff --git a/test/automated/vfs-tests.d/ltp-pvfs-testcases b/test/automated/vfs-tests.d/ltp-pvfs-testcases new file mode 100644 index 0000000..528ff14 --- /dev/null +++ b/test/automated/vfs-tests.d/ltp-pvfs-testcases @@ -0,0 +1,323 @@ +# LTP test cases for PVFS +# known to work with LTP version 20060717, not sure about later versions + +access03 access03 +asyncio02 asyncio02 +chdir02 chdir02 +chown01 chown01 +close08 close08 +creat09 creat09 +dup01 dup01 +dup02 dup02 +dup03 dup03 +dup04 dup04 +fchmod01 fchmod01 +fchown01 fchown01 +fcntl02 fcntl02 +fcntl03 fcntl03 +fcntl04 fcntl04 +fcntl07 fcntl07 +fcntl08 fcntl08 +fpathconf01 fpathconf01 +fstat01 fstat01 +fstatfs01 fstatfs01 +fsync01 fsync01 +lseek01 lseek01 +lseek02 lseek02 +lseek03 lseek03 +lseek05 lseek05 +lstat02 lstat02 +mkdir01 mkdir01 +mkdir08 mkdir08 +open03 open03 +pathconf01 pathconf01 +read01 read01 +readdir01 readdir01 +readlink02 readlink02 +rename02 rename02 +rmdir04 rmdir04 +rmdir05 rmdir05 +select01 select01 +select02 select02 +stat05 stat05 +stat06 stat06 +statfs01 statfs01 +sync01 sync01 +umask01 umask01 +unlink05 unlink05 +unlink07 unlink07 +unlink08 unlink08 +write01 write01 +symlink01 symlink01 +symlink02 symlink02 +readlink01 symlink01 -T readlink01 +lstat01 symlink01 -T lstat01 +mkdir05 symlink01 -T mkdir05 +rmdir03 symlink01 -T rmdir03 +chdir01 symlink01 -T chdir01 +unlink01 symlink01 -T unlink01 +chmod01 symlink01 -T chmod01 +utime01 symlink01 -T utime01 +rename01 symlink01 -T rename01 +open01 symlink01 -T open01 +abort01 ulimit -c 1024;abort01 +access02 access02 +access04 access04 +access05 access05 +chdir01 chdir01 +chdir01A symlink01 -T chdir01 +chdir03 chdir03 +chdir04 chdir04 +chmod01A symlink01 -T chmod01 +chown05 chown05 +chroot01 chroot01 +chroot02 chroot02 +chroot03 chroot03 +chroot04 chroot04 +close01 close01 +close02 close02 +creat01 creat01 +creat03 creat03 +creat04 creat04 +creat05 creat05 +creat06 creat06 +creat07 creat07 -F $LTPROOT/testcases/bin/test1 +creat08 creat08 +dup06 dup06 +dup07 dup07 +dup201 dup201 +dup202 dup202 +dup203 dup203 +dup204 dup204 +dup205 dup205 +fchdir01 fchdir01 +fchdir02 fchdir02 +fchdir03 fchdir03 +fchown04 export change_owner=$LTPROOT/testcases/bin/change_owner;fchown04 +fchown05 fchown05 +fcntl01 fcntl01 +fcntl06 fcntl06 +fcntl12 fcntl12 +fcntl13 fcntl13 +fcntl18 fcntl18 +fdatasync01 fdatasync01 +fdatasync02 fdatasync02 +fstat02 fstat02 +fstat03 fstat03 +fstat04 fstat04 +fstat05 fstat05 +fstatfs02 fstatfs02 +fsync03 fsync03 +ftruncate01 ftruncate01 +ftruncate02 ftruncate02 +ftruncate03 ftruncate03 +ftruncate04 ftruncate04 +getcwd01 getcwd01 +getcwd02 getcwd02 +getcwd03 getcwd03 +getdents01 getdents01 +getdents02 getdents02 +getdents03 getdents03 +getdents04 getdents04 +ioperm01 ioperm01 +iopl01 iopl01 +iopl02 iopl02 +lchown01 lchown01 +link06 link06 +link07 link07 +llseek01 llseek01 +llseek02 llseek02 +lseek06 lseek06 +lseek07 lseek07 +lseek08 lseek08 +lseek09 lseek09 +lstat01 lstat01 +lstat03 lstat03 +mallopt01 mallopt01 +mkdir02 mkdir02 +mkdir03 mkdir03 +mkdir04 mkdir04 +mkdir05 mkdir05 +mkdir05A symlink01 -T mkdir05 +mkdir09 mkdir09 +mmap06 mmap06 +mmap07 mmap07 +mmap08 mmap08 +modify_ldt01 modify_ldt01 +modify_ldt02 modify_ldt02 +mprotect01 mprotect01 +mremap02 mremap02 +mremap03 mremap03 +mremap04 mremap04 +msync03 msync03 +msync04 msync04 +msync05 msync05 +nftw01 nftw01 +nftw6401 nftw6401 +open01A symlink01 -T open01 +open02 open02 +open04 open04 +open05 open05 +open07 open07 +open08 open08 +open09 open09 +open10 open10 +pipe01 pipe01 +pipe02 pipe02 +pipe03 pipe03 +pipe04 pipe04 +pipe05 pipe05 +pipe06 pipe06 +pipe07 pipe07 +pipe08 pipe08 +pipe09 pipe09 +pipe10 pipe10 +pipe11 pipe11 +prctl01 prctl01 +prctl02 prctl02 +pread01 pread01 +pread02 pread02 +pread03 pread03 +profil01 profil01 +ptrace01 ptrace01 +ptrace02 ptrace02 +ptrace03 ptrace03 +pwrite01 pwrite01 +pwrite02 pwrite02 +pwrite03 pwrite03 +pwrite04 pwrite04 +read02 read02 +read04 read04 +readdir02 readdir02 +readlink01A symlink01 -T readlink01 +readlink01 readlink01 +readlink03 readlink03 +readlink04 cp -f $LTPROOT/testcases/bin/creat_slink $TMP; readlink04 +readv01 readv01 +readv02 readv02 +readv03 readv03 +rename01 rename01 +rename01A symlink01 -T rename01 +rename03 rename03 +rename04 rename04 +rename05 rename05 +rename06 rename06 +rename07 rename07 +rename08 rename08 +rename09 rename09 +rename10 rename10 +rename14 rename14 +rmdir01 rmdir01 +rmdir02 rmdir02 +rmdir03A symlink01 -T rmdir03 +stat01 stat01 +stat02 stat02 +stat03 stat03 +statfs02 statfs02 +statfs03 statfs03 +symlink03 symlink03 +symlink04 symlink04 +symlink05 symlink05 +sync02 sync02 +syscall01 syscall01 +sysconf01 sysconf01 +sysfs02 sysfs02 +sysfs03 sysfs03 +sysfs04 sysfs04 +sysfs05 sysfs05 +sysfs06 sysfs06 +sysinfo01 sysinfo01 +sysinfo02 sysinfo02 +truncate01 truncate01 +truncate02 truncate02 +truncate03 truncate03 +truncate04 truncate04 +umask02 umask02 +umask03 umask03 +uname01 uname01 +uname02 uname02 +uname03 uname03 +ustat01 ustat01 +ustat02 ustat02 +utime01 utime01 +utime01A symlink01 -T utime01 +utime02 utime02 +utime03 utime03 +utime04 utime04 +utime05 utime05 +utime06 utime06 +write02 write02 +write03 write03 +write05 write05 +writev02 writev02 +writev05 writev05 +pipeio_3 pipeio -T pipeio_3 -c 5 -s 4090 -i 100 -u -b -f x80 +pipeio_4 pipeio -T pipeio_4 -c 5 -s 4090 -i 100 -u -f x80 +pipeio_6 pipeio -T pipeio_6 -c 5 -s 5000 -i 10 -b -u -f x80 +pipeio_8 pipeio -T pipeio_8 -c 5 -s 5000 -i 10 -u -f x80 +openfile01 openfile -f10 -t10 +statvfs01 statvfs01 +stat04 symlink01 -T stat04 +lstat01A symlink01 -T lstat01 + + +# TESTS THAT ARE KNOWN TO FAIL +############################################### + +# this is a known issue, not likely to be fixed. The posix semantics are not +# well defined for writev in some cases and pvfs2 differs in its +# interpretation from ext3 +# http://www.beowulf-underground.org/pipermail/pvfs2-developers/2005-August/001449.html + +# writev01 writev01 +# writev03 writev03 +# writev04 writev04 + +# PVFS does not support mkfifo, needed by a few pipeio,lseek,unlink tests + +# These tests use tests/pipeio to put pipes (named or unnamed) through a workout +# +# pipeio_1 pipeio -T pipeio_1 -c 5 -s 4090 -i 100 -b -f x80 + +# spawns 5 children to write 100 chunks of 4090 bytes to an unnamed pipe +# using non-blocking I/O +# pipeio_5 pipeio -T pipeio_5 -c 5 -s 5000 -i 10 -b -f x80 +# unlink06 unlink06 + +# PVFS does not support mknod, needed by this lseek,open,read test +# lseek10 lseek10 +# open06 open06 +# read03 read03 +# write04 write04 + +# PVFS does not support the S_ISVTX (sticky) permission bit +# rename12 rename12 +# rmdir03 rmdir03 + +# PVFS does not support hard links +# rename13 rename13 + +# PVFS does not support close-on-exec (see F_SETFD in fcntl man page) +# open01 1 FAIL : Save test bit cleared, but should not have been +# open01 open01 + +# PVFS does not support suid? +# chown04 cp -p $LTPROOT/testcases/bin/change_owner $TMP;chown04 +# fchmod06 cp -p $LTPROOT/testcases/bin/change_owner $TMP;fchmod06 +# lchown02 cp -p $LTPROOT/testcases/bin/create_link $TMP; lchown02 + +# PVFS does not support flocks +# fcntl05 fcntl05 +# fcntl22 fcntl22 +# fcntl09 fcntl09 +# fcntl10 fcntl10 +# fcntl11 fcntl11 +# fcntl14 fcntl14 +# fcntl15 fcntl15 +# fcntl17 fcntl17 +# fcntl19 fcntl19 +# fcntl20 fcntl20 +# fcntl21 fcntl21 + +# this test is specific to ext2 +# sysfs01 sysfs01 diff --git a/test/client/mpi-io/module.mk.in b/test/client/mpi-io/module.mk.in index 8fcd37f..5310cc4 100644 --- a/test/client/mpi-io/module.mk.in +++ b/test/client/mpi-io/module.mk.in @@ -3,4 +3,6 @@ DIR := client/mpi-io MPIIOTESTSRC += \ $(DIR)/mpi-io-test.c \ $(DIR)/mpi-md-test.c \ - $(DIR)/mpi-unbalanced-test.c + $(DIR)/mpi-unbalanced-test.c \ + $(DIR)/multi-md-test.c \ + $(DIR)/multi-md-test-size-sweep.c diff --git a/test/client/mpi-io/mpi-io-test.c b/test/client/mpi-io/mpi-io-test.c index 9075d53..81548d2 100644 --- a/test/client/mpi-io/mpi-io-test.c +++ b/test/client/mpi-io/mpi-io-test.c @@ -36,6 +36,7 @@ /* DEFAULT VALUES FOR OPTIONS */ static int64_t opt_block = 16*1024*1024; static int opt_iter = 1; +static int opt_individual_file = 0; static int opt_coll = 0; static int opt_correct = 0; static int opt_sync = 0; @@ -51,6 +52,7 @@ static int opt_pvfstab_set = 0; static int parse_args(int argc, char **argv); static void usage(void); static void handle_error(int errcode, char *str); +static int check_count(int count, MPI_Datatype type, MPI_Status *status); /* global vars */ static int mynod = 0; @@ -79,6 +81,7 @@ int main(int argc, char **argv) int nchars=0; int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; + char file_name[1024]; /* startup MPI and determine the rank of this process */ MPI_Init(&argc,&argv); @@ -91,6 +94,15 @@ int main(int argc, char **argv) if (opt_verbose) fprintf(stdout,"Process %d of %d is on %s\n", mynod, nprocs, processor_name); + + if (opt_individual_file) + { + sprintf(file_name, "%s_%d", opt_file, mynod); + } + else + { + strncpy(file_name, opt_file, 1023); + } if (mynod == 0) printf("# Using mpi-io calls.\n"); @@ -125,7 +137,7 @@ int main(int argc, char **argv) else { comm = MPI_COMM_SELF; } - err = MPI_File_open(comm, opt_file, + err = MPI_File_open(comm, file_name, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) { handle_error(err, "MPI_File_open"); @@ -142,11 +154,22 @@ int main(int argc, char **argv) /* reading and writing to the same block is cheating, but sometimes * we want to measure cached performance of file servers */ if (opt_single == 1) + { seek_position = 0; + } else + { + if (opt_individual_file) + { + seek_position = (j*opt_block); + } + else + { /* seek to an appropriate position depending on the iteration * and rank of the current process */ - seek_position = (j*iter_jump)+(mynod*opt_block); + seek_position = (j*iter_jump)+(mynod*opt_block); + } + } MPI_File_seek(fh, seek_position, MPI_SEEK_SET); @@ -167,13 +190,15 @@ int main(int argc, char **argv) err = MPI_File_write(fh, buf, nchars, MPI_CHAR, &status); } if(err){ - fprintf(stderr, "node %d, write error: %s\n", mynod, - strerror(errno)); + handle_error(err, "MPI_File_write/write_all"); } + if (opt_correct && !check_count(nchars, MPI_CHAR, &status)) { + my_correct = 0; + fprintf(stderr, "short write"); + } if (opt_sync) sync_err = MPI_File_sync(fh); if (sync_err) { - fprintf(stderr, "node %d, sync error: %s\n", mynod, - strerror(errno)); + handle_error(err, "MPI_File_sync"); } /* discover the ending time of the operation */ @@ -187,7 +212,7 @@ int main(int argc, char **argv) err = MPI_File_close(&fh); if(err){ - fprintf(stderr, "node %d, close error after write\n", mynod); + handle_error(err, "MPI_File_close"); } /* wait for everyone to synchronize at this point */ @@ -197,7 +222,7 @@ int main(int argc, char **argv) err = MPI_File_open(comm, opt_file, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (err < 0) { - fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno)); + handle_error(err, "MPI_File_open"); goto die_jar_jar_die; } @@ -207,14 +232,23 @@ int main(int argc, char **argv) for (j=0; j < opt_iter; j++) { /* reading and writing to the same block is cheating, but sometimes * we want to measure cached performance of file servers */ - if (opt_single == 1) { - seek_position = 0; - } - else { - /* seek to an appropriate position depending on the iteration - * and rank of the current process */ - seek_position = (j*iter_jump)+(mynod*opt_block); - } + if (opt_single == 1) + { + seek_position = 0; + } + else + { + if (opt_individual_file) + { + seek_position = (j*opt_block); + } + else + { + /* seek to an appropriate position depending on the iteration + * and rank of the current process */ + seek_position = (j*iter_jump)+(mynod*opt_block); + } + } MPI_File_seek(fh, seek_position, MPI_SEEK_SET); @@ -236,8 +270,11 @@ int main(int argc, char **argv) read_tim += (etim - stim); if (err < 0) { - fprintf(stderr, "node %d, read error, loc = %lld: %s\n", - mynod, (long long) mynod*opt_block, strerror(myerrno)); + handle_error(err, "MPI_File_write/write_all"); + } + if (opt_correct && !check_count(nchars, MPI_CHAR, &status)) { + my_correct = 0; + fprintf(stderr, "short read"); } /* if the user wanted to check correctness, compare the write @@ -270,7 +307,7 @@ int main(int argc, char **argv) /* close the file */ err = MPI_File_close(&fh); if (err) { - fprintf(stderr, "node %d, close error after write\n", mynod); + handle_error(err, "MPI_File_close"); } /* compute the read and write times */ @@ -350,7 +387,7 @@ static int parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "b:i:f:p:CcyShvrw")) != EOF) { + while ((c = getopt(argc, argv, "b:i:f:p:CcyShvrwI")) != EOF) { switch (c) { case 'b': /* block size */ opt_block = atoi(optarg); @@ -358,6 +395,9 @@ static int parse_args(int argc, char **argv) case 'i': /* iterations */ opt_iter = atoi(optarg); break; + case 'I': /* individual file per process */ + opt_individual_file = 1; + break; case 'f': /* filename */ strncpy(opt_file, optarg, 255); break; @@ -398,6 +438,13 @@ static int parse_args(int argc, char **argv) break; } } + + if ( opt_individual_file && opt_coll ) + { + printf("Flags individual file in combination with collective " + "I/O operation not compatible\n"); + exit(1); + } return(0); } @@ -409,6 +456,7 @@ static void usage(void) printf(" -c verify correctness of file data [default: off]\n"); printf(" -C perform operations Collectively [default: off]\n"); printf(" -i iterations [default: 1]\n"); + printf(" -I individual file per process [default: off]\n"); printf(" -f filename [default: /foo/test.out]\n"); printf(" -p path to pvfs2tab file to use [default: notset]\n"); printf(" -S all process write to same Single region of file [default: off]\n"); @@ -426,7 +474,13 @@ static void handle_error(int errcode, char *str) MPI_Error_string(errcode, msg, &resultlen); fprintf(stderr, "%s: %s\n", str, msg); - MPI_Abort(MPI_COMM_WORLD, 1); +} + +static int check_count(int count, MPI_Datatype type, MPI_Status *status) +{ + int statcount; + MPI_Get_count(status, type, &statcount); + return (statcount==count); } /* @@ -435,8 +489,6 @@ static void handle_error(int errcode, char *str) * c-basic-offset: 3 * tab-width: 3 * - * vim: ts=3 * End: + * vim: ts=3 */ - - diff --git a/test/client/mpi-io/mpi-md-test.c b/test/client/mpi-io/mpi-md-test.c index 68a9c43..4ac87ef 100644 --- a/test/client/mpi-io/mpi-md-test.c +++ b/test/client/mpi-io/mpi-md-test.c @@ -8,7 +8,11 @@ * The timing and command-line parsing were so useful that this was further * extended to test resize operations * - * usage: -d /path/to/directory -n number_of_files [-O] [-R] + * And while the default (and most useful) mode is to compare collective + * open/create/resize, it is sometimes instructive to compare with independent + * access + * + * usage: -d /path/to/directory -n number_of_files [-O] [-R] [-D] [-i] */ #include @@ -138,26 +142,33 @@ int opt_nfiles; char opt_basedir[PATH_MAX]; int opt_do_open=0; int opt_do_resize=0; +int opt_do_delete=0; +int opt_do_indep=0; void usage(char *name); int parse_args(int argc, char **argv); void handle_error(int errcode, char *str); int test_opens(int nfiles, char * test_dir, MPI_Info info); int test_resize(int rank, int iterations, char * test_dir, MPI_Info info); +int test_delete(int rank, int size, int nfiles, char * test_dir, MPI_Info info); void usage(char *name) { - fprintf(stderr, "usage: %s -d /path/to/directory -n #_of_files [TEST}\n", name); + fprintf(stderr, "usage: %s -d /path/to/directory -n #_of_files [TEST] [MODE]\n", name); fprintf(stderr, " where TEST is one of:\n" - " -O test file open times\n" - " -R test file resize times\n"); + " -O test file open times (if files do not exist tests file creation times)\n" + " -R test file resize times\n" + " -D test file deletion times\n" + " and MODE is one of (applies to -O and -R only):\n" + " -i independent operations\n" + " -c collective operations (default)\n"); exit(-1); } int parse_args(int argc, char **argv) { int c; - while ( (c = getopt(argc, argv, "d:n:OR")) != -1 ) { + while ( (c = getopt(argc, argv, "d:n:ORDic")) != -1 ) { switch (c) { case 'd': strncpy(opt_basedir, optarg, PATH_MAX); @@ -171,13 +182,19 @@ int parse_args(int argc, char **argv) case 'R': opt_do_resize = 1; break; + case 'D': + opt_do_delete = 1; + break; + case 'i': + opt_do_indep = 1; + break; case '?': case ':': default: usage(argv[0]); } } - if ( (opt_do_open == 0) && (opt_do_resize == 0) ) { + if ( (opt_do_open == 0) && (opt_do_resize == 0) && (opt_do_delete == 0)) { usage(argv[0]); } return 0; @@ -214,6 +231,8 @@ int main(int argc, char **argv) test_opens(opt_nfiles, opt_basedir, info); else if (opt_do_resize) test_resize(rank, opt_nfiles, opt_basedir, info); + else if (opt_do_delete) + test_delete(rank, nprocs, opt_nfiles, opt_basedir, info); test_end = MPI_Wtime(); test_time = test_end - test_start; @@ -224,13 +243,19 @@ int main(int argc, char **argv) if (rank == 0) { printf("%d procs ", nprocs); if (opt_do_open) { - printf("%f seconds to open %d files: %f secs/open\n", - total_time, opt_nfiles, - (total_time)/opt_nfiles); + printf("%f seconds to open %d files: %f secs/open: %s\n", + total_time, opt_nfiles, + (total_time)/opt_nfiles, + (opt_do_indep? "independent" : "collective")); } else if (opt_do_resize) { - printf("%f seconds to perform %d resize ops: %f secs/operation\n", - total_time, opt_nfiles, - (total_time)/opt_nfiles); + printf("%f seconds to perform %d resize ops: %f secs/operation: %s\n", + total_time, opt_nfiles, + (total_time)/opt_nfiles, + (opt_do_indep? "independent" : "collective")); + } else if (opt_do_delete) { + printf("%f seconds to perform %d delete ops: %f secs/operation\n", + total_time, opt_nfiles, + (total_time)/opt_nfiles); } } @@ -245,11 +270,15 @@ int test_opens(int nfiles, char * test_dir, MPI_Info info) int i; char test_file[PATH_MAX]; MPI_File fh; + MPI_Comm comm = MPI_COMM_WORLD; int errcode; + if (opt_do_indep) + comm = MPI_COMM_SELF; + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "pvfs2.h" + +void vfs_mktestdir(int rank, int* n_ops); +void vfs_rmtestdir(int rank, int* n_ops); +void vfs_create(int rank, int* n_ops); +void vfs_rm(int rank, int* n_ops); +void vfs_prep(int rank, int* n_ops); +void vfs_write(int rank, int* n_ops); +void vfs_read(int rank, int* n_ops); +void vfs_close(int rank, int* n_ops); +void vfs_print_error(int errcode, char *str); + +void mpi_print_error(int errcode, char *str); + +void pvfs_prep(int rank, int* n_ops); +void pvfs_mktestdir(int rank, int* n_ops); +void pvfs_rmtestdir(int rank, int* n_ops); +void pvfs_create(int rank, int* n_ops); +void pvfs_rm(int rank, int* n_ops); +void pvfs_read(int rank, int* n_ops); +void pvfs_write(int rank, int* n_ops); + +void pvfs_print_error(int errcode, char *str); + +int* vfs_fds = NULL; +struct stat* vfs_stats = NULL; +char* vfs_buf = NULL; + +#define PVFS_DIRENT_COUNT 32 +PVFS_object_ref* pvfs_refs = NULL; +char* pvfs_buf = NULL; +PVFS_object_ref pvfs_basedir; +PVFS_object_ref pvfs_testdir; +PVFS_credentials pvfs_creds; + +struct api_ops +{ + char *name; + void (*prep) (int rank, int* n_ops); + void (*mktestdir) (int rank, int* n_ops); + void (*rmtestdir) (int rank, int* n_ops); + void (*create) (int rank, int* n_ops); + void (*rm) (int rank, int* n_ops); + void (*write) (int rank, int* n_ops); + void (*read) (int rank, int* n_ops); + void (*close) (int rank, int* n_ops); + void (*print_error) (int errorcode, char* str); +}; + +struct api_ops api_table[] = { + { + .name = "VFS", + .prep = vfs_prep, + .mktestdir = vfs_mktestdir, + .rmtestdir = vfs_rmtestdir, + .create = vfs_create, + .rm = vfs_rm, + .read = vfs_read, + .write = vfs_write, + .close = vfs_close, + .print_error = vfs_print_error, + }, + { + .name = "PVFS_sys", + .prep = pvfs_prep, + .mktestdir = pvfs_mktestdir, + .rmtestdir = pvfs_rmtestdir, + .create = pvfs_create, + .rm = pvfs_rm, + .read = pvfs_read, + .write = pvfs_write, + .close = NULL, + .print_error = pvfs_print_error, + }, + { + .name = "MPI-IO", + .prep = NULL, + .mktestdir = vfs_mktestdir, /* borrow vfs mkdir */ + .rmtestdir = vfs_rmtestdir, /* borrow vfs rmdir */ + .create = NULL, + .rm = NULL, + .read = NULL, + .write = NULL, + .close = NULL, + .print_error = mpi_print_error, + }, + {0} +}; + +struct test_results +{ + char* op; + int n_ops; + double time; + int size; + int nprocs; +}; + +#define MAX_TEST_COUNT 1000 + +#define CHECK_MAX_TEST(__x) \ +do { \ + if(__x > MAX_TEST_COUNT) \ + { \ + fprintf(stderr, "Error: exceeded MAX_TEST_COUNT.\n"); \ + MPI_Abort(MPI_COMM_WORLD, 1); \ + } \ +} while(0) + +struct test_results* result_array; + +#ifndef PATH_MAX +#define PATH_MAX FILENAME_MAX +#endif + +extern char *optarg; +int opt_nfiles = -1; +char opt_basedir[PATH_MAX] = {0}; +int opt_api = -1; +int opt_size = -1; +int opt_start_size = -1; +int opt_end_size = -1; +int opt_interval_size = -1; +unsigned int opt_timeout = 100; + +void usage(char *name); +int parse_args(int argc, char **argv); +void handle_error(int errcode, char *str); +int run_test_phase(double* elapsed_time, int* size, int* n_ops, char* fn_name, + void (*fn)(int, int*), int rank, int procs); +void print_result(int rank, struct test_results* result); + +void usage(char *name) +{ + int i = 0; + + fprintf(stderr, + "usage: %s -d base_dir -n num_files_per_proc -s size_spec -a api <-t tcache_timeout_ms>\n", name); + fprintf(stderr, " where api is one of:\n"); + while(api_table[i].name != NULL) + { + fprintf(stderr, " %d: %s\n", i, api_table[i].name); + i++; + } + fprintf(stderr, " and size_spec is of the form:\n"); + fprintf(stderr, " ,,\n"); + fprintf(stderr, " and must be non-zero.\n"); + + exit(-1); +} + +int parse_args( + int argc, + char **argv) +{ + int c; + int ret; + while ((c = getopt(argc, argv, "d:n:a:s:t:")) != -1) + { + switch (c) + { + case 'd': + strncpy(opt_basedir, optarg, PATH_MAX); + break; + case 'n': + opt_nfiles = atoi(optarg); + break; + case 'a': + opt_api = atoi(optarg); + break; + case 't': + ret = sscanf(optarg, "%u", &opt_timeout); + if(ret != 1) + { + usage(argv[0]); + exit(-1); + } + break; + case 's': + ret = sscanf(optarg, "%d,%d,%d", + &opt_start_size, &opt_interval_size, &opt_end_size); + if(ret != 3) + { + usage(argv[0]); + exit(-1); + } + break; + case '?': + case ':': + default: + usage(argv[0]); + exit(-1); + } + } + if(opt_basedir[0] == 0 || opt_nfiles < 1 || opt_api < 0 || opt_start_size < 1 || opt_end_size < 1 || opt_interval_size < 1 || opt_start_size > opt_end_size) + { + usage(argv[0]); + exit(-1); + } + + return 0; +} + +void handle_error( + int errcode, + char *str) +{ + if(api_table[opt_api].print_error) + { + api_table[opt_api].print_error(errcode, str); + } + else + { + fprintf(stderr, "Error: %s: %d\n", str, errcode); + } + MPI_Abort(MPI_COMM_WORLD, 1); +} + +void pvfs_print_error( + int errcode, + char *str) +{ + PVFS_perror(str, errcode); +} + + +void vfs_print_error( + int errcode, + char *str) +{ + fprintf(stderr, "%s: %s\n", str, strerror(errcode)); +} + +void mpi_print_error( + int errcode, + char *str) +{ + char msg[MPI_MAX_ERROR_STRING]; + int resultlen; + MPI_Error_string(errcode, msg, &resultlen); + fprintf(stderr, "%s: %s\n", str, msg); +} + +int main( + int argc, + char **argv) +{ + int rank, nprocs; + int test = 0; + + /* NOTE: could probably skip tracking each phase and just print and + * discard results after each phase. Hanging onto this for now in case + * it is useful later + */ + result_array = malloc(MAX_TEST_COUNT*sizeof(*result_array)); + if(!result_array) + { + perror("malloc"); + return(-1); + } + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + parse_args(argc, argv); + + /* key for data tables */ + if(rank == 0) + { + printf("# sysint tests using acache and ncache timeout of: %u ms.\n", + opt_timeout); + printf("# \t\t\t\t\t\t