1#!/bin/bash -
2
3PROJECT=$1
4echo "Verifying url links of: ${PROJECT}"
5if [ ! -d "$PROJECT" ]
6then
7    echo "Directory passed does not exist"
8    exit 2
9fi
10
11USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36"
12SCRIPT_RET=0
13
14set -o nounset        # Treat unset variables as an error
15
16declare -A dict
17
18function test {
19    while IFS= read -r LINE; do
20        FILE=$(echo $LINE | cut -f 1 -d ':')
21        URL=$(echo $LINE | grep -IoE '\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]')
22
23        # remove trailing / if it exists curl diferenciate between links with
24        # and without / at the end
25        # URL=`echo "$URL" | sed 's,/$,,'`
26        dict+=(["$URL"]="$FILE ")
27    done < <(grep -e 'https\?://' ${PROJECT} -RIa --exclude='*.exe' --exclude-dir=.git | tr '*' ' ')
28
29    for UNIQ_URL in ${!dict[@]} # loop urls
30    do
31     CURL_RES=$(curl -si --user-agent "${USER_AGENT}" ${UNIQ_URL} 2>/dev/null| head -n 1 | cut -f 2 -d ' ')
32     RES=$?
33
34        echo "================================="
35        echo "Checking URL: ${UNIQ_URL}"
36
37        if [ "${CURL_RES}" == '' -o "${CURL_RES}" != '200' ]
38        then
39            if [ "${CURL_RES}" == '' ]  # curl returned an error
40            then
41                CURL_RES=$RES
42                SCRIPT_RET=1
43                echo ERROR: Result is: "${CURL_RES}"
44            elif [ "${CURL_RES}" == '403' ]
45            then
46                SCRIPT_RET=1
47                echo ERROR: Result is: "${CURL_RES}"
48            else
49                echo WARNING: Result is: "${CURL_RES}"
50            fi
51        else
52            echo SUCCESS: Result is: "${CURL_RES}"
53        fi
54        echo "================================="
55    done
56
57    if [ "${SCRIPT_RET}" -eq 0 ]
58    then
59        exit 0
60    else
61        exit 1
62    fi
63}
64
65test
66