Fetching Hive schema definitions using Webhcat

Following shell script will get the schema information from Hive using WebHCat server.

 

#!/bin/sh
# fetch_webhcat.sh, v0.1, 2016-04-00, [email protected]
# Pre-requisites: jq, curl, python (json.tool)

_WEBHCAT_SERVER="server:50111"
_USER_NAME="JohnDoe"

while [[ $# > 1 ]]
do
key="$1"

case $key in
    -u|--user)
    _USER_NAME="$2"
    shift # past argument
    ;;
    -s|--server)
    _WEBHCAT_SERVER="$2"
    shift # past argument
    ;;
    *)
        # unknown option
        printf "Usage: %s: [-s <server:port> -u <user name>]\n" $0
        exit 2
    ;;
esac
shift # past argument or value
done

_URL_SUFFIX="?user.name=$_USER_NAME"
_URL_TABLES=""
_URL_COLUMNS=""

# curl hit and get all databases
_URL_START="http://$_WEBHCAT_SERVER/templeton/v1/ddl/database$_URL_SUFFIX"
echo "Fetching Hive server databases from $_URL_START to databases.json"
$(curl $_URL_START > databases.json)

_DATABASES=$(jq -r .databases[] ./databases.json)

for db in $_DATABASES;
do
    _URL_TABLES="http://$_WEBHCAT_SERVER/templeton/v1/ddl/database/$db/table$_URL_SUFFIX";
    _FILE_TABLE=db-$db.json
    echo "Fetching database $_URL_TABLES to $_FILE_TABLE"

    # curl hit and get json for each URL
    curl $_URL_TABLES | python -m json.tool > $_FILE_TABLE
    _TABLES=$(jq -r .tables[] $_FILE_TABLE)

    for table in $_TABLES;
    do
        _URL_COLUMNS="http://$_WEBHCAT_SERVER/templeton/v1/ddl/database/$db/table/$table/column$_URL_SUFFIX";
        _FILE_TABLE_COLUMNS=db-$db-table-$table.json$_FILE_TABLE_COLUMNS

        echo "Fetching table $_URL_COLUMNS to $_FILE_TABLE_COLUMNS"
        # curl hit and get json for each URL
        curl $_URL_COLUMNS | python -m json.tool > $_FILE_TABLE_COLUMNS

    done
done

 

You may also like...

Leave a Reply

Your email address will not be published. Required fields are marked *