#!/bin/bash --
#
# Shell script to query the Wikipedia.
#
# It can be used to output Wikipedia articles to the console, but can also 
# just open the article in any browser.
#
# Author: Christian Brabandt <cb@256bit.org>
# License: BSD

VERSION=0.11
# SVN-ID: $Id: wikipedia2text 31 2009-07-21 15:42:44Z cb $:

set -e

function display_help(){
cat << EOF
NAME

    This script uses text-browser to query and render Wikipedia
    articles. The output will be printed to standard out.
    
SYNOPSIS
    `basename $0` [-BCnNoOpPsSuU] [-b prog] [-c patt] [-i patt] [-l lang] query
    `basename $0` -o [-b prog] [-l lang] query
    `basename $0` [-h]
    `basename $0` -v|-r
   
    -n	do not colorize			    -N	simple colorization (alias -C)
    -p	display using a pager		    -P  don't use pager
    -o	open Wikipedia article in browser   -O	don't open in browser
    -s	display only a summary		    -S	display whole article
    -u	Just output the query URL	    -U	open URL in browser
    -v	display version			    -h	display help		   

    -r          open Random Page
    -i patt	colorize pattern (case insensitive)
    -I patt	colorize pattern (case-sensitive, alias -c)
    -b prog	use prog as browser (by default to invoke elinks, links2,
		links, lynx or w3m, if found)
    -l lang	use language (currently supported are: af, als, ca, cs, da, 
		de, en, eo, es, fi, fr, hu, ia, is, it, la, lb, nds, nl, nn,
		no, pl, pt, rm, ro, simple, sk, sl, sv, tr)
    -W url      use url as base-URL for wikipedia (e.g. use a different 
		Wiki, Querying this URL will happen by appending the search 
		term.

    Query can be any term to search for at Wikipedia. Special
    characters will be taken care of. Note that only one query term is
    supported, however this term can consist of one or more words.

    Configuration can also be controlled by creating a runcontrol file
    .`basename $0`rc your home directory.

    Note that when requesting to open the article in a browser, other
    parameters will be ignored. The same holds for the options -h and
    -v.
EOF
}

function getVersion(){
cat <<EOF
    $(basename "${0}") Version: ${VERSION}
EOF
}


function errorExit(){
    echo "$*" >&2
    exit 3
}

function colorize(){
if [ "${IGNCASE}" = "true" ]; then
    OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|gi")
else
    OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|g")
fi
echo -e "${OUTPUT}" 
}

function uri_decode(){
echo -e "$*" |perl -MURI::Escape -lne 's/ /_/g;s/"//g;print uri_escape($_);'
}

function localize(){
# Per default we use the english localized version of 
# Wikipedia
LOCAL=$(echo ${LOCAL:="en"})
if [ "${LOCAL}" = "de" ]; then 
    #MARKER='Diese Seite'
    MARKER='^\s*Kategorien\?:'
#    MARKER='Kategorien:'
    MARKER2='Bearbeiten'
    RANDOMP='Spezial:Zufällige Seite'
elif [ "${LOCAL}" = "en" -o "${LOCAL}" = "simple" ]; then
    #MARKER='Views'
    MARKER='^\s*Categories:\|^\s*Category:'
    MARKER2='edit'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "fr" ]; then
#    MARKER='Affichages'
    MARKER='^\s*Catégories :'
    MARKER2='modifier'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "nl" ]; then
#    MARKER='Views'
    MARKER='^\s*Categorie:'
    MARKER2='bewerk'
    RANDOMP='Speciaal:Willekeurig'
elif [ "${LOCAL}" = "sv" ]; then
    #MARKER='Visningar'
    MARKER='^\s*Kategorier:'
    MARKER2='redigera'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "es" ]; then
    #MARKER='Views'
    MARKER='^\s*Categorías:'
    MARKER2='editar'
    RANDOMP='Especial:Random'
elif [ "${LOCAL}" = "pt" ]; then
    #MARKER='Vistas'
    MARKER='^\s*Categorias:'
    MARKER2='editar'
    RANDOMP='Especial:Random'
elif [ "${LOCAL}" = "pl" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategorie:'
    MARKER2='Edytuj'
    RANDOMP='Specjalna:Losowa_strona'
elif [ "${LOCAL}" = "it" ]; then
    #MARKER='Views'
    MARKER='^\s*Categorie:'
    MARKER2='modifica'
    RANDOMP='Speciale:PaginaCasuale'
elif [ "${LOCAL}" = "da" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategori:'
    MARKER2='\(redigér\|rediger\)'
    RANDOMP='Speciel:Tilfældig_side'
elif [ "${LOCAL}" = "eo" ]; then
    #MARKER='Vidoj'
    MARKER='^\s*Kategorio:'
    MARKER2='redaktu'
    RANDOMP='Speciala:Random'
elif [ "${LOCAL}" = "no" ]; then
    #MARKER='Visninger'
    MARKER='^\s*Kategorier:'
    MARKER2='rediger'
    RANDOMP='Spesial:Tilfeldig_side'
elif [ "${LOCAL}" = "nn" ]; then
    #MARKER='Visningar'
    MARKER='^\s*Kategoriar:'
    MARKER2='endre'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "fi" ]; then
    #MARKER='Views'
    MARKER='^\s*Luokat:'
    MARKER2='muokkaa'
    RANDOMP='Toiminnot:Satunnainen_sivu'
elif [ "${LOCAL}" = "ca" ]; then
   # MARKER='Views'
    MARKER='^\s*Categoria:'
    MARKER2='edita'
    RANDOMP='Especial:Random'
elif [ "${LOCAL}" = "ro" ]; then
    #MARKER='Vizualizari'
    MARKER='^\s*Categorii:'
    MARKER2='modifica'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "cs" ]; then
    #MARKER='Zobrazení'
    MARKER='^\s*Kategorie:'
    MARKER2='editovat'
    RANDOMP='Speciální:Random'
elif [ "${LOCAL}" = "sk" ]; then
    #MARKER='Zobrazení'
    MARKER='^\s*Kategórie:'
    MARKER2='úprava'
    RANDOMP='Špeciálne:Random'
elif [ "${LOCAL}" = "sl" ]; then
    #MARKER='Ogledov'
    MARKER='^\s*Kategorije:'
    MARKER2='spremeni'
    RANDOMP='Posebno:Random'
elif [ "${LOCAL}" = "lb" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategorie:'
    MARKER2='Änneren'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "la" ]; then
    #MARKER='Views'
    MARKER='^\s*Categoriae:'
    MARKER2='recensere'
    RANDOMP='Specialis:Random'
elif [ "${LOCAL}" = "rm" ]; then
    MARKER='Views'
    MARKER2='edit'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "ia" ]; then
    MARKER='Views'
    MARKER2='modificar'
    RANDOMP='Special:Random'
elif [ "${LOCAL}" = "is" ]; then
    #MARKER='Views'
    MARKER='^\s*Flokkar:'
    MARKER2='breyta'
    RANDOMP='Kerfissíða:Random'
elif [ "${LOCAL}" = "hu" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategóriák:'
    MARKER2='szerkesztés'
    RANDOMP='Speciális:Lap_találomra'
elif [ "${LOCAL}" = "tr" ]; then
    #MARKER='Views'
    MARKER='^\s*Sayfa kategorisi:'
    MARKER2='degistir'
    RANDOMP='Özel:Random'
elif [ "${LOCAL}" = "af" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategorieë: '
    MARKER2='wysig'
    RANDOMP='Spesiaal:Random'
elif [ "${LOCAL}" = "nds" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategorien:'
    MARKER2='Ännern'
    RANDOMP='Spezial:Random'
elif [ "${LOCAL}" = "als" ]; then
    #MARKER='Views'
    MARKER='^\s*Kategorie:'
    MARKER2='ändere'
    RANDOMP='Spezial:Zufällige_Seite'
else
    MARKER='\(Views\|References\|Visible links\)'
    RANDOMP='Special:Random'
fi
}

function stripOutput(){
# Now comes the magic: Strip everything from Marker to end,
# cause this is only the linkdump

SED='sed -e "s|\\[[0-9]*\\]||g" -e "s|\\[IMG\\]||g" -e "/${MARKER}/,$ D" '
if [ -n "${MARKER2}" ]; then 
    echo "`cat`"| eval ${SED} -e '"s#\[${MARKER2}\]##g"'
else
    echo "`cat`"| eval ${SED}
fi
}

function openurl(){
    "${BROWSER}" "${URL}"
}

function summary(){
    if [ "${COLOR}" = "true" ]; then
	"${BROWSER}" -dump "${URL}" |grep -v copyright | head -n 22 \
	| tail -n 17  |stripOutput | colorize
    else
	"${BROWSER}" -dump "${URL}" |grep -v copyright | head -n 22 \
	| tail -n 17  | stripOutput
    fi
}

function getInfo(){
    #LINES=$("${BROWSER}" -dump "${URL}" |wc -l)
    #LINES=$(expr ${LINES} - 6)
    if [ "${COLOR}" = "false" ]; then
	#"${BROWSER}" -dump "${URL}"| tail -n ${LINES}  |stripOutput  
	"${BROWSER}" -dump "${URL}"| stripOutput  
    else
	"${BROWSER}" -dump "${URL}"| stripOutput |colorize 
    fi
}

# First read in the Run configuration File, if one is found 
if [ -r ~/.`basename $0`rc ]; then
    source ~/.`basename $0`rc
    ABROWSER=${BROWSER}
fi

# Process commandline parameters
while getopts "BCnNoOpPsSuvhrUl:b:c:i:B:W:" ARGS 
    do
	case ${ARGS} in
	b) ABROWSER=${OPTARG} ;;
	B) ABROWSER='' ;;
	c) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;;
	C) COLOR="true" ;;
	i) IGNCASE="true";COLOR="true"; PATT=${OPTARG} ;;
	I) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;;
	l) LOCAL=${OPTARG} ;;
	n) COLOR="false" ;;
	N) COLOR="true" ;;
	o) USEBROWSER="true" ;;
	O) USEBROWSER="false" ;;
	p) PAGER="true" ;;
	P) PAGER="false" ;;
	r) RAND="true" ;;
	s) SHORT="true" ;;
	S) SHORT="false" ;;
	u) OUTPUTURL="true" ;;
	U) OPENURL="true";;
	v) getVersion; exit 0 ;;
	W) WURL=${OPTARG} ;;
	h) display_help; exit 0 ;;
	*) display_help; exit 1 ;;
	esac
done

shift `expr ${OPTIND} - 1`

localize

# Setting Up some Variables, to determine, what actually to do
if [ -z "$1"  -a  -z "${RAND}" ]; then
    display_help
    exit 1;
fi

IGNCASE=$(echo ${IGNCASE:="false"})
PAGER=$(echo ${PAGER:="false"})
OPENURL=$(echo ${OPENURL:="false"})
RAND=$(echo ${RAND:="false"})

if [ "$PAGER" = "true" ]; then
    { PAGER=$(which less) || PAGER=$(which more) ; } || errorExit "No Pager found!" ;
fi
#fi

PAGER=$(echo ${PAGER/less/less -Rr})
COLOR=$(echo ${COLOR:="false"})


if [ "$COLOR" = "true" -a -z "${PATT}" ]; then
    PATT="$*"
fi

if [ "$OPENURL" = "true" ]; then
    URL="$*"
fi



# Check for Alternative Browser
if [ -n "${ABROWSER}" -o "${BROWSER}" ]; then
    BROWSER=$(which "${ABROWSER}") ||  errorExit "${ABROWSER} not found"  
else
   { BROWSER=$(which w3m) || 
     BROWSER=$(which elinks) || 
     BROWSER=$(which links2) || 
     BROWSER=$(which lynx) || 
     BROWSER=$(which links.main) || 
     BROWSER=$(which links) ; } || errorExit "No Browser found"
fi


# Open page in Browser?
USEBROWSER=$(echo ${USEBROWSER:="false"})

# Output only a summary?
SHORT=$(echo ${SHORT:="false"})

# Output only the URL?
OUTPUTURL=$(echo ${OUTPUTURL:="false"})

# Now we do some input sanitizing.
ARGUMENT="$(uri_decode "$*")"
LOCAL="$(echo "${LOCAL}"|tr '[:upper:]' '[:lower:]')"
# Random page?
if [ "${RAND}" = "true" ]; then
    ARGUMENT="$(uri_decode "${RANDOMP}")"
fi
if [ -z "${URL}" ]; then 
    URL="http://${LOCAL}.wikipedia.org/wiki/${ARGUMENT}"
fi;

if [ -n "${WURL}" ]; then
    WURL="$(echo "${WURL%%/}")"
    case "${WURL}" in
	http://*) URL="${WURL}"/wiki/"${ARGUMENT}" ;;
	*) URL="http://""${WURL}"/wiki/"${ARGUMENT}" ;;
    esac;
    # unset $LOCAL to force using an english-locale
    # this is used to strip the tags [edit], eg.
    LOCAL="en"
fi;


#errorExit "PAGER: $PAGER Browser: $BROWSER Local: $LOCAL COLOR: $COLOR PATT: $PATT IGNCASE: $IGNCASE URL: $URL"

# Depending on some Variables, we do some different things here
if [ "${USEBROWSER}" = "true" ]; then
    openurl 
    exit 0;
fi

if [ "${SHORT}" = "true" ]; then
    summary  
    exit 0;
fi

if [ "${OUTPUTURL}" = "true" ]; then
    if [ "${COLOR}" = "false" ]; then
	echo "${URL}" 
    else
	echo -e "\033[0;34m${URL}\033[0m" 
    fi
    exit 0;
fi

if [ "$PAGER" != "false" ]; then
    getInfo | ${PAGER}
else
    getInfo
fi
# vim: ft=sh
