#!/usr/bin/env bash

# version 2024-03-11

# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case
# bucket index entries for objects in the bucket are somehow lost. It
# is expected to be needed and used rarely. A bucket name is provided
# and the data pool for that bucket is scanned for all head objects
# matching the bucket's marker. The rgw object name is then extracted
# from the rados object name, and `radosgw-admin bucket reindex ...`
# is used to add the bucket index entry.
#
# Because this script must process json objects, the `jq` tool must be
# installed on the system.
#
# Usage: see the usage() function below for details
#
# This tool is designed to be interactive, allowing the user to
# examine the list of objects to be reindexed before
# proceeding. However, if the "--proceed" option is provided, the
# script will not prompt the user and simply proceed.

trap "clean ; exit 1" TERM
export TOP_PID=$$

# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
# relies on this ordering
export LC_ALL=C

# whether or not the temporary files are cleaned on completion
export clean_temps=1

# make explicit tabs easier to see in code
export TAB="	"


#
# helper functions
#

super_exit() {
    kill -s TERM -${TOP_PID}
}

usage() {
  >&2 cat << EOF

Usage: $0 -b <bucket-name> [-l <rados-ls-file>] [-p <pool>] [-y]

where:
  -b <bucket-name>     Required - name of the bucket to operate on
  -l <rados-ls-file>   Optional - file containing the output of 'rados ls -p <pool>'
  -r <realm-name>      Optional - specify the realm if not applying to the default realm"
  -g <zonegroup-name>  Optional - specify the zonegroup if not applying to the default zonegroup"
  -z <zone-name>       Optional - specify the zone if not applying to the default zone"
  -p <pool>            Optional - data pool; if not provided will be inferred from bucket and zone information
  -t <tmp-dir>         Optional - specify a directory for temporary files other than the default of /tmp
  -y                   Optional - proceed with restoring without confirming with the user
                       USE WITH CAUTION.
  -d                   Optional - run with debugging output
EOF
  super_exit
}

# cleans all temporary files
clean() {
  if [ "$clean_temps" == 1 ] ;then
    rm -f $bkt_entry $temp_file_list \
       $zone_info $olh_info_enc $olh_info_json
  fi
}

test_temp_space() {
    # use df to determine percentage of data and inodes used; strip
    # out spaces and percent signs from the output, so we just have a
    # number from 0 to 100
    pcent=$(df -k $temp_dir --output=pcent | tail -1 | sed 's/[ %]//g')
    ipcent=$(df -k $temp_dir --output=ipcent | tail -1 | sed 's/[ %]//g')
    if [ "$pcent" -eq 100 -o "$ipcent" -eq 100 ] ;then
	>&2 echo "ERROR: the temporary directory's partition is full, preventing continuation."
	>&2 echo "    NOTE: the temporary directory is \"${temp_dir}\"."
	>&2 df -k $temp_dir -h --output="target,used,avail,pcent,iused,iavail,ipcent"
	>&2 echo "    NOTE: cleaning temporary files before exiting...."
	super_exit
    fi
}

# number of seconds for a bucket index pending op to be completed via
# dir_suggest mechanism
export pending_op_secs=120

#
# sanity checks
#

export exit_code=0

tool_list="radosgw-admin ceph-dencoder jq"
for t in $tool_list ;do
    if which $t > /dev/null ;then
	:
    else
	echo "ERROR: must have tool \`$t\` installed and on \$PATH for operation."
	exit_code=1
    fi
done
if [ "$exit_code" -ne 0 ] ;then
    exit $exit_code
fi

dencode_list="RGWOLHInfo"
for t in $dencode_list ;do
    if ceph-dencoder list_types | grep -q $t ;then
	:
    else
	echo "ERROR: ceph-dencoder lacking module to decode ${t}."
	exit_code=1
    fi
done
if [ "$exit_code" -ne 0 ] ;then
    exit $exit_code
fi

# Determines the name of the data pool. Expects the optional
# command-line argument to appear as $1 if there is one. The
# command-line has the highest priority, then the "explicit_placement"
# in the bucket instance data, and finally the "placement_rule" in the
# bucket instance data.
get_pool() {
  # explicit_placement
  expl_pool=$(jq -r '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst)
  if [ -n "$expl_pool" ] ;then
    echo "$expl_pool"
    exit 0
  fi

  # placement_rule
  plmt_rule=$(jq -r '.data.bucket_info.placement_rule' $bkt_inst)
  plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}')
  plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}')
  if [ -z "$plmt_class" ] ;then
    plmt_class=STANDARD
  fi

  radosgw-admin zone get $multisite_spec >$zone_info 2>/dev/null
  test_temp_space
  pool=$(jq -r ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info)

  if [ -z "$pool" ] ;then
      echo ERROR: unable to determine pool.
      super_exit
  fi
  echo "$pool"
}

export bucket=""
export temp_dir=/tmp
pool=""
multisite_spec=""
lsoutput=""
debug=0

while getopts "b:l:p:r:g:z:ydt:" o; do
    case "${o}" in
	b)
	    bucket="${OPTARG}"
	    ;;
	l)
	    if [ -e "${OPTARG}" ]; then
		lsoutput="${OPTARG}"
	    else
		echo 
		echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}"
		exit 1
	    fi
	    ;;
	p)
	    pool="${OPTARG}"
	    ;;
	r)
	    multisite_spec="$multisite_spec --rgw-realm=${OPTARG}"
	    ;;
	g)
	    multisite_spec="$multisite_spec --rgw-zonegroup=${OPTARG}"
	    ;;
	z)
	    multisite_spec="$multisite_spec --rgw-zone=${OPTARG}"
	    ;;
	y)
	    echo "NOTICE: This tool is currently considered EXPERIMENTAL."
	    proceed=1
	    ;;
	d)
	    echo Debugging On
	    debug=1
	    clean_temps=0
	    ;;
	t)
	    temp_dir="${OPTARG}"
	    ;;
	*)
	    echo
	    usage
	    exit 1 # useage should exit also
	    ;;
    esac
done
shift $((OPTIND-1))

if [ "$debug" == 1 ] ;then
    export debugging_rgwadmin=" --debug-rgw=20 --debug-ms=20 "
else
    export debugging_rgwadmin=" 2>/dev/null "
fi

if [ ! -d "$temp_dir" ] ;then
    echo "ERROR: temporary directory $temp_dir is not a directory"
    exit 1
fi

# temporary files
export bkt_entry=${temp_dir}/rgwrbi-bkt-entry.$$
export bkt_inst=${temp_dir}/rgwrbi-bkt-inst.$$
export marker_ls=${temp_dir}/rgwrbi-marker-ls.$$
export obj_list=${temp_dir}/rgwrbi-object-list.$$
export obj_list_ver=${temp_dir}/rgwrbi-object-list-ver.$$
export zone_info=${temp_dir}/rgwrbi-zone-info.$$
export olh_info_enc=${temp_dir}/rgwrbi-olh-info-enc.$$
export olh_info_json=${temp_dir}/rgwrbi-olh-info-json.$$
export debug_log=${temp_dir}/rgwrbi-debug-log.$$

export temp_file_list="$bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver $zone_info $olh_info_enc $olh_info_json"

# special code path for versioned buckets
handle_versioned() {
    while read o ;do

	# determine object and locator for OLH
	olh_line=$(awk "/_$o(\t.*)?$/"' && !/__:/' $marker_ls)
	olh_obj=$(echo "$olh_line" | sed 's/\t.*//') # obj everything before tab
	olh_loc=$(echo "$olh_line" | sed 's/^.*\t\(.*\)/\1/') # locator everything after tab

	# process OLH object; determine final instance or delete-marker
	rados -p $pool getxattr $olh_obj user.rgw.olh.info --object-locator "$olh_loc" >$olh_info_enc
	test_temp_space
	ceph-dencoder import $olh_info_enc type RGWOLHInfo decode dump_json >$olh_info_json
	test_temp_space
	last_instance=$(jq -r ".target.key.instance" $olh_info_json)
	if [ -z "$last_instance" ] ;then
	    # filters out entry without an instance
	    filter_out_last_instance="${marker}_[^_]"
	else
	    # filters out entry with an instance
	    filter_out_last_instance="$last_instance"
	fi

	if [ "$debug" == 1 ] ;then
	    echo "working on versioned $o"
	    echo "last instance is $last_instance"
	    echo "filter_out_last_instance is $filter_out_last_instance"
	fi >>$debug_log
	test_temp_space

	# we currently don't need the delete marker, but we can have access to it
	# delete_marker=$(jq -r ".removed" $olh_info_json) # true or false

	IFS='\t' grep -E "(__:.*[^_])?_$o(\t.*)?$" $marker_ls | # versioned head objects
	    while read obj loc ;do
		if [ "$debug" == 1 ] ;then
		    echo "obj=$obj ; loc=$loc" >>$debug_log
		fi
		test_temp_space
		rados -p $pool stat2 $obj --object-locator "$loc"
	    done | # output of stat2, which includes mtime
	    sort -T $temp_dir -k 3 | # stat2 but sorted by mtime earlier to later
	    grep -v -e "$filter_out_last_instance" | # remove the final instance in case it's not last

	    # sed 1) removes pool and marker, 2) removes indicator of
	    # version id, 3) removes obj name including escaped
	    # leading underscores, 4) inserts object name and tab at
	    # front of line, and 5) removes trailing tab. Note: after
	    # 3) the line will be empty or contain a version id, so 5)
	    # is for when that line is empty and 4 inserts a tab
	    sed -E \
		-e "s/.*${marker}//" \
		-e 's/^__://' \
		-e "s/_+${o}.*//" \
		-e "s/^/${o}\t/"
	echo "${o}${TAB}$last_instance" # now add the final instance; could be delete marker
    done <$obj_list 2>/dev/null | sed 's/\t$//' >$obj_list_ver
    test_temp_space

} # handle_versioned

if [ -z "$bucket" ]; then
  echo
  echo "ERROR: Bucket option ( -b ) is required."
  usage
fi

# read bucket entry metadata
eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin $multisite_spec >$bkt_entry"
test_temp_space
export marker=$(jq -r ".data.bucket.marker" $bkt_entry)
export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry)
if [ -z "$marker" -o -z "$bucket_id" ] ;then
    echo "ERROR: Unable to read entry-point metadata for bucket \"$bucket\"."
    echo "       Please make sure that <bucket-name> is correct and, if not using"
    echo "       the defaults, that <realm-name>, <zonegroup-name>, and/or"
    echo "       <zone-name> are correctly specified."
    clean
    usage
    exit 1
fi

echo marker is $marker
echo bucket_id is $bucket_id

# read bucket instance metadata
eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $multisite_spec $debugging_rgwadmin >$bkt_inst"
test_temp_space

# examine number of bucket index shards
num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst)
echo number of bucket index shards is $num_shards

# determine data pool
if [ -z "$pool" ]; then
  pool=$(get_pool)
fi
echo data pool is $pool

# handle versioned buckets
export bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst)
if [ -z "$bkt_flags" ] ;then
    echo "ERROR: unable to read instance metadata for bucket \"$bucket\"."
    clean
    exit 1
fi

# search the data pool for all of the head objects that begin with the
# marker that are not in namespaces (indicated by an extra underscore
# and colon) and then strip away all but the rgw object name,
# including optional locator that follows a tab. Initial underscores
# are quoted with an underscore, so swap the first double with a
# single.
if [ -z "$lsoutput" ]; then
  ( rados -p $pool ls | grep "^${marker}_" >$marker_ls ) 2>/dev/null
  test_temp_space
else
  ( grep "^${marker}_" "${lsoutput}" >$marker_ls ) 2>/dev/null
  test_temp_space
fi

( sed -E 's/\t.*//' $marker_ls | grep -v -E "^${marker}__[^_]+_" | sed -E "s/^${marker}_(.*)/\1/" | sed 's/^__/_/' >$obj_list ) 2>/dev/null
test_temp_space

# mask bit indicating it's a versioned bucket
export is_versioned=$(( $bkt_flags & 2))
export is_suspended=$(( $bkt_flags & 4))
if [ "$is_versioned" -ne 0 ] ;then
    echo "INFO: this bucket appears to be versioned."
    handle_versioned
else
    # no additional versioned handling, so just hard link
    ln $obj_list $obj_list_ver
fi

# handle the case where the resulting object list file is empty
if [ -s $obj_list_ver ] ;then
    :
else
    echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered."
    clean
    exit 0
fi

if [ -z "$proceed" ] ;then
    # warn user and get permission to proceed
    echo "NOTICE: This tool is currently considered EXPERIMENTAL."
    echo "The list of objects that we will attempt to restore can be found in \"$obj_list_ver\"."
    echo "Please review the object names in that file (either below or in another window/terminal) before proceeding."
    while true ; do
	read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action
	if [ "$action" == "q" ] ;then
	    echo "Exiting..."
	    clean
	    exit 0
	elif [ "$action" == "view" ] ;then
	    echo "Viewing..."
	    less $obj_list_ver
	elif [ "$action" == "proceed!" ] ;then
	    echo "Proceeding..."
	    break
	else
	    echo "Error: response \"$action\" is not understood."
	fi
    done
fi

eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver $multisite_spec --yes-i-really-mean-it $debugging_rgwadmin"

clean
echo Done
