Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
pvfs2-osd/examples/heartbeat/Filesystem-qla-monitor
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
836 lines (755 sloc)
23.2 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# $Id: Filesystem-qla-monitor,v 1.2 2008/01/28 19:45:17 pcarns Exp $ | |
# | |
# Support: linux-ha@lists.linux-ha.org | |
# License: GNU General Public License (GPL) | |
# | |
# Filesystem | |
# Description: Manages a Filesystem on a shared storage medium. | |
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com) | |
# Original Release: 25 Oct 2000 | |
# | |
# Modified to support monitoring of a QLogic adapter, 2007 | |
# Relies on underlying scripts named fs-instance-alarm.pl and | |
# fs-power-control.pl to take action in the event of monitoring failure | |
# | |
# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} | |
# | |
# OCF parameters are as below: | |
# OCF_RESKEY_device | |
# OCF_RESKEY_directory | |
# OCF_RESKEY_fstype | |
# OCF_RESKEY_options | |
# OCF_RESKEY_fsname | |
# OCF_RESKEY_conf_dir | |
# | |
#OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 | |
# Or a -U or -L option for mount, or an NFS mount specification | |
#OCF_RESKEY_directory : the mount point for the filesystem | |
#OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 | |
#OCF_RESKEY_options : options to be given to the mount command via -o | |
#OCF_RESKEY_fsname : file system name (PVFS2) | |
#OCF_RESKEY_conf_dir : file system conf directory (PVFS2) | |
# | |
# | |
# An example usage in /etc/ha.d/haresources: | |
# node1 10.0.0.170 Filesystem::/dev/sda1::/data1::ext2 | |
# or | |
# node1 10.0.0.170 Filesystem::-Ldata1::/data1::ext2 | |
# or | |
# node1 10.0.0.170 Filesystem::server:/data1::/data1::nfs::ro | |
# | |
# This assumes you want to manage a filesystem on a shared (scsi) bus. | |
# Do not put this filesystem in /etc/fstab. This script manages all of | |
# that for you. | |
# | |
# If you are interested in High Availability, you will probably also want | |
# some sort of external hardware RAID controller in front of the actual | |
# disks. I don't mean a RAID controller embedded in the host controller - | |
# it has to be an external controller. | |
# | |
# It can also be an internal RAID controller if the controller supports | |
# failover. IBM's ServeRAID controller does this, and it automatically | |
# prohibits concurrent access too, so it's pretty cool in this application. | |
# | |
# There is a script for software RAID-1 included in this directory. Right | |
# now, I wouldn't recommend using software RAID (see notes in the Raid1 script) | |
# | |
# NOTE: There is no locking (such as a SCSI reservation) being done here. | |
# I would if the SCSI driver could properly maintain the reservation, | |
# which it cannot, even with the 'scsi reservation' patch submitted | |
# earlier this year by James Bottomley. The patch minimizes the | |
# bus resets caused by a RESERVATION_CONFLICT return, and helps the | |
# reservation stay when 2 nodes contend for a reservation, | |
# but it does not attempt to recover the reservation in the | |
# case of a bus reset. | |
# | |
# What all this means is that if 2 nodes mount the same file system | |
# read-write, the filesystem is going to become corrupted. | |
# | |
# As a result, you should use this together with the stonith option | |
# and redundant, independent communications paths. | |
# | |
# If you don't do this, don't blame us when you scramble your disk. | |
# | |
# Note: the ServeRAID controller does prohibit concurrent acess | |
# In this case, you don't actually need STONITH, but redundant comm is | |
# still an excellent idea. | |
# | |
####################################################################### | |
# Initialization: | |
# newer versions of heartbeat have moved the ocf-shellfuncs file | |
if [ -f /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs ] ; then | |
. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs | |
else | |
. /usr/lib/heartbeat/ocf-shellfuncs | |
fi | |
####################################################################### | |
# Utilities used by this script | |
MODPROBE=/sbin/modprobe | |
FSCK=/sbin/fsck | |
FUSER=/sbin/fuser | |
MOUNT=/bin/mount | |
UMOUNT=/bin/umount | |
BLOCKDEV=/sbin/blockdev | |
check_util () { | |
if [ ! -x "$1" ] ; then | |
ocf_log err "Setup problem: Couldn't find utility $1" | |
exit $OCF_ERR_GENERIC | |
fi | |
} | |
usage() { | |
cat <<-EOT | |
usage: $0 {start|stop|status|monitor|validate-all|meta-data} | |
$Id: Filesystem-qla-monitor,v 1.2 2008/01/28 19:45:17 pcarns Exp $ | |
EOT | |
} | |
meta_data() { | |
cat <<END | |
<?xml version="1.0"?> | |
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> | |
<resource-agent name="Filesystem"> | |
<version>1.0</version> | |
<longdesc lang="en"> | |
Resource script for Filesystem. It manages a Filesystem on a shared storage medium. | |
</longdesc> | |
<shortdesc lang="en">Filesystem resource agent</shortdesc> | |
<parameters> | |
<parameter name="device" unique="0" required="1"> | |
<longdesc lang="en"> | |
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. | |
</longdesc> | |
<shortdesc lang="en">block device</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="directory" unique="0" required="1"> | |
<longdesc lang="en"> | |
The mount point for the filesystem. | |
</longdesc> | |
<shortdesc lang="en">mount point</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="fsname" unique="0" required="1"> | |
<longdesc lang="en"> | |
Name of PVFS2 file system | |
</longdesc> | |
<shortdesc lang="en">File system</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="conf_dir" unique="0" required="1"> | |
<longdesc lang="en"> | |
Path to file system conf directory | |
</longdesc> | |
<shortdesc lang="en">File system conf directory</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="fstype" unique="0" required="1"> | |
<longdesc lang="en"> | |
The optional type of filesystem to be mounted. | |
</longdesc> | |
<shortdesc lang="en">filesystem type</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="options" unique="0"> | |
<longdesc lang="en"> | |
Any extra options to be given as -o options to mount. | |
</longdesc> | |
<shortdesc lang="en">options</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="ocfs2_cluster" unique="0"> | |
<longdesc lang="en"> | |
The name (UUID) of the OCFS2 cluster this filesystem is part of, | |
iff this is an OCFS2 resource and there's more than one cluster. You | |
should not need to specify this. | |
</longdesc> | |
<shortdesc lang="en">OCFS2 cluster name/UUID</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
<parameter name="ocfs2_configfs" unique="0"> | |
<longdesc lang="en"> | |
Mountpoint of the cluster hierarchy below configfs. You should not | |
need to specify this. | |
</longdesc> | |
<shortdesc lang="en">OCFS2 configfs root</shortdesc> | |
<content type="string" default="" /> | |
</parameter> | |
</parameters> | |
<actions> | |
<action name="start" timeout="60" /> | |
<action name="stop" timeout="60" /> | |
<action name="notify" timeout="60" /> | |
<action name="status" depth="0" timeout="10" interval="10" start-delay="10" /> | |
<action name="monitor" depth="0" timeout="10" interval="10" start-delay="10" /> | |
<action name="validate-all" timeout="5" /> | |
<action name="meta-data" timeout="5" /> | |
</actions> | |
</resource-agent> | |
END | |
} | |
# | |
# Make sure the kernel does the right thing with the FS buffers | |
# This function should be called after unmounting and before mounting | |
# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt | |
# anything either... | |
# | |
# It's really a bug that you have to do this at all... | |
# | |
flushbufs() { | |
if | |
[ "$BLOCKDEV" != "" -a -x "$BLOCKDEV" -a "$blockdevice" = "yes" ] | |
then | |
$BLOCKDEV --flushbufs $1 | |
return $? | |
fi | |
return 0 | |
} | |
# Take advantage of /proc/mounts if present, use portabel mount command | |
# otherwise. Normalize format to "dev mountpoint fstype". | |
list_mounts() { | |
if [ -f "/proc/mounts" -a -r "/proc/mounts" ]; then | |
cut -d' ' -f1,2,3 </proc/mounts | |
else | |
$MOUNT | cut -d' ' -f1,3,5 | |
fi | |
} | |
determine_blockdevice() { | |
if [ $blockdevice == "yes" ]; then | |
return | |
fi | |
# Get the current real device name, if possible. | |
# (specified devname could be -L or -U...) | |
case "$FSTYPE" in | |
nfs|smbfs|cifs) ;; | |
*) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1` | |
if [ -b "$DEVICE" ]; then | |
blockdevice=yes | |
fi | |
;; | |
esac | |
} | |
# Lists all filesystems potentially mounted under a given path, | |
# excluding the path itself. | |
list_submounts() { | |
list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r | |
} | |
ocfs2_cleanup() { | |
# We'll never see the post-stop notification. We're gone now, | |
# have unmounted, and thus should remove the membership. | |
# | |
# (Do so regardless of whether we were unmounted already, | |
# because the admin might have manually unmounted but not | |
# cleared up the membership directory. Bad admin, no cookie.) | |
# | |
if [ ! -d "$OCFS2_FS_ROOT" ]; then | |
ocf_log info "$OCFS2_FS_ROOT: Filesystem membership already gone." | |
else | |
ocf_log info "$OCFS2_FS_ROOT: Removing membership directory." | |
rm -rf $OCFS2_FS_ROOT/ | |
fi | |
} | |
ocfs2_init() | |
{ | |
# Check & initialize the OCFS2 specific variables. | |
if [ -z "$OCF_RESKEY_CRM_meta_clone_max" ]; then | |
ocf_log err "ocfs2 must be run as a clone." | |
exit $OCF_ERR_GENERIC | |
fi | |
if [ $blockdevice = "no" ]; then | |
ocf_log err "$DEVICE: ocfs2 needs a block device instead." | |
exit $OCF_ERR_GENERIC | |
fi | |
for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster /configfs/cluster ; do | |
if [ -n "$f" -a -d "$f" ]; then | |
OCFS2_CONFIGFS="$f" | |
break | |
fi | |
done | |
if [ ! -d "$OCFS2_CONFIGFS" ]; then | |
ocf_log err "ocfs2 needs configfs mounted." | |
exit $OCF_ERR_GENERIC | |
fi | |
OCFS2_UUID=$(mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d -- -|tr a-z A-Z) | |
if [ -z "$OCFS2_UUID" -o "$OCFS2_UUID" = "UUID" ]; then | |
ocf_log err "$DEVICE: Could not determine ocfs2 UUID." | |
exit $OCF_ERR_GENERIC | |
fi | |
if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then | |
OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster | tr a-z A-Z) | |
else | |
OCFS2_CLUSTER=$(find "$OCFS2_CONFIGFS" -maxdepth 1 -mindepth 1 -type d -printf %f 2>/dev/null) | |
set -- $OCFS2_CLUSTER | |
local n="$#" | |
if [ $n -gt 1 ]; then | |
ocf_log err "$OCFS2_CLUSTER: several clusters found." | |
exit $OCF_ERR_GENERIC | |
fi | |
if [ $n -eq 0 ]; then | |
ocf_log err "$OCFS2_CONFIGFS: no clusters found." | |
exit $OCF_ERR_GENERIC | |
fi | |
fi | |
OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER" | |
if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then | |
ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?" | |
exit $OCF_ERR_GENERIC | |
fi | |
OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID | |
} | |
# | |
# START: Start up the filesystem | |
# | |
Filesystem_start() | |
{ | |
if [ "$FSTYPE" = "ocfs2" ]; then | |
# "start" now has the notification data available; that | |
# we're being started means we didn't get the | |
# pre-notification, because we weren't running, so | |
# process the information now first. | |
ocf_log info "$OCFS2_UUID: Faking pre-notification on start." | |
OCF_RESKEY_CRM_meta_notify_type="pre" | |
OCF_RESKEY_CRM_meta_notify_operation="start" | |
Filesystem_notify | |
fi | |
# See if the device is already mounted. | |
if Filesystem_status >/dev/null 2>&1 ; then | |
ocf_log info "Filesystem $MOUNTPOINT is already mounted." | |
return $OCF_SUCCESS | |
fi | |
# Insert SCSI module | |
# TODO: This probably should go away. Why should the filesystem | |
# RA magically load a kernel module? | |
$MODPROBE scsi_hostadapter >/dev/null 2>&1 | |
if [ -z $FSTYPE ]; then | |
: No $FSTYPE specified, rely on the system has the right file-system support already | |
else | |
# Insert Filesystem module | |
$MODPROBE $FSTYPE >/dev/null 2>&1 | |
grep -e "$FSTYPE"'$' /proc/filesystems >/dev/null | |
if [ $? != 0 ] ; then | |
ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems" | |
return $OCF_ERR_ARGS | |
fi | |
fi | |
# Check the filesystem & auto repair. | |
# NOTE: Some filesystem types don't need this step... Please modify | |
# accordingly | |
if [ $blockdevice = "yes" ]; then | |
if [ ! -b "$DEVICE" ] ; then | |
ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" | |
exit $OCF_ERR_ARGS | |
fi | |
if | |
case $FSTYPE in | |
ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs|cifs|smbfs|ocfs2) false;; | |
*) true;; | |
esac | |
then | |
ocf_log info "Starting filesystem check on $DEVICE" | |
if [ -z $FSTYPE ]; then | |
$FSCK -a $DEVICE | |
else | |
$FSCK -t $FSTYPE -a $DEVICE | |
fi | |
# NOTE: if any errors at all are detected, it returns non-zero | |
# if the error is >= 4 then there is a big problem | |
if | |
[ $? -ge 4 ] | |
then | |
ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE" | |
return $OCF_ERR_GENERIC | |
fi | |
fi | |
fi | |
if [ ! -d "$MOUNTPOINT" ] ; then | |
ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point" | |
exit $OCF_ERR_ARGS | |
fi | |
flushbufs $DEVICE | |
# Mount the filesystem. | |
if [ -z $FSTYPE ]; then | |
$MOUNT $options $DEVICE $MOUNTPOINT | |
else | |
$MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT | |
fi | |
if [ $? -ne 0 ]; then | |
ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" | |
if [ "$FSTYPE" = "ocfs2" ]; then | |
ocfs2_cleanup | |
fi | |
return $OCF_ERR_GENERIC | |
fi | |
return 0 | |
} | |
# end of Filesystem_start | |
Filesystem_notify() { | |
# Process notifications; this is the essential glue level for | |
# giving user-space membership events to a cluster-aware | |
# filesystem. Right now, only OCFS2 is supported. | |
# | |
# When we get a pre-start notification, we set up all the nodes | |
# which will be active in our membership for the filesystem. | |
# (For the resource to be started, this happens at the time of | |
# the actual 'start' operation.) | |
# | |
# At a post-start, actually there's nothing to do for us really, | |
# but no harm done in re-syncing either. | |
# | |
# pre-stop is meaningless; we can't remove any node yet, it | |
# first needs to unmount. | |
# | |
# post-stop: the node is removed from the membership of the | |
# other nodes. | |
# | |
# Note that this expects that the base cluster is already | |
# active; ie o2cb has been started and populated | |
# $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by | |
# simply having o2cb run on all nodes by the CRM too. This | |
# probably ought to be mentioned somewhere in the to be written | |
# documentation. ;-) | |
# | |
if [ "$FSTYPE" != "ocfs2" ]; then | |
# One of the cases which shouldn't occur; it should have | |
# been caught much earlier. Still, you know ... | |
ocf_log err "$DEVICE: Notification received for non-ocfs2 mount." | |
return $OCF_ERR_GENERIC | |
fi | |
local n_type="$OCF_RESKEY_CRM_meta_notify_type" | |
local n_op="$OCF_RESKEY_CRM_meta_notify_operation" | |
local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" | |
local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" | |
local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" | |
ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op" | |
ocf_log info "$OCFS2_UUID: notify active: $n_active" | |
ocf_log info "$OCFS2_UUID: notify stop: $n_stop" | |
ocf_log info "$OCFS2_UUID: notify start: $n_start" | |
case "$n_type" in | |
pre) | |
case "$n_op" in | |
stop) | |
ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop." | |
return $infoUCCESS | |
;; | |
start) | |
# These are about to become active; prepare to | |
# communicate with them. | |
n_active="$n_active $n_start" | |
;; | |
esac | |
;; | |
post) | |
case "$n_op" in | |
stop) | |
# remove unames from notify_stop_uname; these have been | |
# stopped and can no longer be considered active. | |
for UNAME in "$n_stop"; do | |
n_active="${n_active//$UNAME/}" | |
done | |
;; | |
start) | |
if [ "$n_op" = "start" ]; then | |
ocf_log info "$OCFS2_UUID: ignoring post-notify for start." | |
return $OCF_SUCCESS | |
fi | |
;; | |
esac | |
;; | |
esac | |
ocf_log info "$OCFS2_UUID: post-processed active: $n_active" | |
local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} | |
ocf_log info "$OCFS2_UUID: I am node $n_myself." | |
case " $n_active " in | |
*" $n_myself "*) ;; | |
*) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!" | |
return $OCF_ERR_GENERIC | |
;; | |
esac | |
if [ -d "$OCFS2_FS_ROOT" ]; then | |
entry_prefix=$OCFS2_FS_ROOT/ | |
for entry in $OCFS2_FS_ROOT/* ; do | |
n_fs="${entry##$entry_prefix}" | |
# ocf_log info "$OCFS2_UUID: Found current node $n_fs" | |
case " $n_active " in | |
*" $n_fs "*) | |
# Construct a list of nodes which are present | |
# already in the membership. | |
n_exists="$n_exists $n_fs" | |
ocf_log info "$OCFS2_UUID: Keeping node: $n_fs" | |
;; | |
*) | |
# Node is in the membership currently, but not on our | |
# active list. Must be removed. | |
if [ "$n_op" = "start" ]; then | |
ocf_log warn "$OCFS2_UUID: Removing nodes on start" | |
fi | |
ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs" | |
if ! rm -f $entry ; then | |
ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!" | |
fi | |
;; | |
esac | |
done | |
else | |
ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating." | |
mkdir -p $OCFS2_FS_ROOT | |
fi | |
ocf_log info "$OCFS2_UUID: Existing node list: $n_exists" | |
# (2) | |
for entry in $n_active ; do | |
# ocf_log info "$OCFS2_UUID: Expected active node: $entry" | |
case " $n_exists " in | |
*" $entry "*) | |
ocf_log info "$OCFS2_UUID: Already active: $entry" | |
;; | |
*) | |
if [ "$n_op" = "stop" ]; then | |
ocf_log warn "$OCFS2_UUID: Adding nodes on stop" | |
fi | |
ocf_log info "$OCFS2_UUID: Activating node: $entry" | |
if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then | |
ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link" | |
fi | |
;; | |
esac | |
done | |
} | |
# | |
# STOP: Unmount the filesystem | |
# | |
Filesystem_stop() | |
{ | |
# See if the device is currently mounted | |
Filesystem_status >/dev/null 2>&1 | |
if [ $? -ne $OCF_NOT_RUNNING ]; then | |
# Determine the real blockdevice this is mounted on (if | |
# possible) prior to unmounting. | |
determine_blockdevice | |
# For networked filesystems, there's merit in trying -f: | |
case "$FSTYPE" in | |
nfs|cifs|smbfs) umount_force="-f" ;; | |
esac | |
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too. | |
for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do | |
ocf_log info "Trying to unmount $MOUNTPOINT" | |
for sig in SIGTERM SIGTERM SIGTERM SIGKILL SIGKILL SIGKILL; do | |
if $UMOUNT $umount_force $SUB ; then | |
rc=$OCF_SUCCESS | |
ocf_log info "unmounted $SUB successfully" | |
break | |
else | |
rc=$OCF_ERR_GENERIC | |
ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig" | |
# fuser returns a non-zero return code if none of the | |
# specified files is accessed or in case of a fatal | |
# error. | |
if $FUSER -$sig -m -k $SUB ; then | |
ocf_log info "Some processes on $SUB were signalled" | |
else | |
ocf_log info "No processes on $SUB were signalled" | |
fi | |
sleep 1 | |
fi | |
done | |
if [ $rc -ne $OCF_SUCCESS ]; then | |
ocf_log err "Couldn't unmount $SUB, giving up!" | |
fi | |
done | |
else | |
# Already unmounted, wonderful. | |
rc=$OCF_SUCCESS | |
fi | |
flushbufs $DEVICE | |
if [ "$FSTYPE" = "ocfs2" ]; then | |
ocfs2_cleanup | |
fi | |
return $rc | |
} | |
# end of Filesystem_stop | |
# | |
# STATUS: is the file system mounted and the SAN path alive? | |
# | |
Filesystem_status_qla_monitor() | |
{ | |
Filesystem_status >/dev/null 2>&1 | |
ORIG_RET=$? | |
if [ $ORIG_RET -eq $OCF_SUCCESS ]; then | |
# the fs is mounted. Are the paths ok? | |
/usr/bin/qla-monitor.pl --device $DEVICE | |
if [ $? -ne 0 ]; then | |
# log as much as we can | |
ocf_log info "Error: $DEVICE is dead." | |
echo "Error: $DEVICE is dead on node `hostname`, failing over" >> /var/log/pvfs2/pvfs2-failover.log; | |
/usr/bin/fs-instance-alarm.pl --fs-name $fsname --ce `hostname` --type PVFS2_HA --msg "$DEVICE is dead, failing over." | |
/usr/bin/fs-power-control.pl --fs-name $fsname --conf-dir $conf_dir --host `hostname` --command reboot | |
# shoot ourselves in the head | |
return $OCF_ERR_GENERIC | |
else | |
return $ORIG_RET | |
fi | |
fi | |
return $ORIG_RET | |
} | |
# end of Filesystem_status_qla_monitor | |
# | |
# STATUS: is the filesystem mounted or not? | |
# | |
Filesystem_status() | |
{ | |
if | |
list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1 | |
then | |
rc=$OCF_SUCCESS | |
msg="$MOUNTPOINT is mounted (running)" | |
else | |
rc=$OCF_NOT_RUNNING | |
msg="$MOUNTPOINT is unmounted (stopped)" | |
fi | |
# TODO: For ocfs2, or other cluster filesystems, should we be | |
# checking connectivity to other nodes here, or the IO path to | |
# the storage? | |
case "$OP" in | |
status) ocf_log info "$msg";; | |
esac | |
return $rc | |
} | |
# end of Filesystem_status | |
# | |
# VALIDATE_ALL: Are the instance parameters valid? | |
# FIXME!! The only part that's useful is the return code. | |
# This code always returns $OCF_SUCCESS (!) | |
# | |
Filesystem_validate_all() | |
{ | |
if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then | |
ocf_log warn "Mountpoint $MOUNTPOINT does not exist" | |
fi | |
# Check if the $FSTYPE is workable | |
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise | |
# TODO: This is Linux specific crap. | |
if [ ! -z $FSTYPE ]; then | |
cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ | |
if [ $? -ne 0 ]; then | |
modpath=/lib/modules/`uname -r` | |
moddep=$modpath/modules.dep | |
# Do we have $FSTYPE in modules.dep? | |
cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" | |
if [ $? -ne 0 ]; then | |
ocf_log info "It seems we do not have $FSTYPE support" | |
fi | |
fi | |
fi | |
#TODO: How to check the $options ? | |
return $OCF_SUCCESS | |
} | |
# Check the arguments passed to this script | |
if | |
[ $# -ne 1 ] | |
then | |
usage | |
exit $OCF_ERR_ARGS | |
fi | |
if | |
[ -z "$OCF_RESKEY_fsname" ] | |
then | |
ocf_log err "Filesystem-qla-monitor must specify fsname!" | |
# usage | |
exit $OCF_ERR_GENERIC | |
fi | |
if | |
[ -z "$OCF_RESKEY_conf_dir" ] | |
then | |
ocf_log err "Filesystem-qla-monitor must specify conf_dir!" | |
# usage | |
exit $OCF_ERR_GENERIC | |
fi | |
# Check the OCF_RESKEY_ environment variables... | |
DEVICE=$OCF_RESKEY_device | |
FSTYPE=$OCF_RESKEY_fstype | |
if [ ! -z "$OCF_RESKEY_options" ]; then | |
options="-o $OCF_RESKEY_options" | |
fi | |
fsname=$OCF_RESKEY_fsname | |
conf_dir=$OCF_RESKEY_conf_dir | |
OP=$1 | |
# These operations do not require instance parameters | |
case $OP in | |
meta-data) meta_data | |
exit $OCF_SUCCESS | |
;; | |
usage) usage | |
exit $OCF_SUCCESS | |
;; | |
esac | |
blockdevice=no | |
case $DEVICE in | |
"") ocf_log err "Please set OCF_RESKEY_device to the device to be managed" | |
exit $OCF_ERR_ARGS | |
;; | |
-*) # Oh... An option to mount instead... Typically -U or -L | |
;; | |
[^/]*:/*) # An NFS filesystem specification... | |
;; | |
//[^/]*/*) # An SMB filesystem specification... | |
;; | |
*) if [ ! -b "$DEVICE" -a "X$OP" != Xstart ] ; then | |
ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" | |
fi | |
blockdevice=yes | |
;; | |
esac | |
case $FSTYPE in | |
ocfs2) | |
ocfs2_init | |
;; | |
nfs) | |
: # this is kind of safe too | |
;; | |
*) | |
if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ]; then | |
ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" | |
ocf_log err "DO NOT RUN IT AS A CLONE!" | |
ocf_log err "Politely refusing to proceed to avoid data corruption." | |
exit $OCF_ERR_GENERIC | |
fi | |
;; | |
esac | |
# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". | |
# But the output of `mount` and /proc/mounts do not. | |
if [ -z $OCF_RESKEY_directory ]; then | |
if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then | |
ocf_log err "Please specify the directory" | |
exit $OCF_ERR_ARGS | |
fi | |
else | |
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') | |
: ${MOUNTPOINT:=/} | |
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" | |
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll | |
# kill the whole system. Is that a good idea? | |
fi | |
# Check to make sure the utilites are found | |
check_util $MODPROBE | |
check_util $FSCK | |
check_util $FUSER | |
check_util $MOUNT | |
check_util $UMOUNT | |
if [ "$OP" != "monitor" ]; then | |
ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" | |
fi | |
case $OP in | |
start) Filesystem_start | |
;; | |
notify) Filesystem_notify | |
;; | |
stop) Filesystem_stop | |
;; | |
status|monitor) Filesystem_status_qla_monitor | |
;; | |
validate-all) Filesystem_validate_all | |
;; | |
*) usage | |
exit $OCF_ERR_UNIMPLEMENTED | |
;; | |
esac | |
exit $? |