1#!/usr/bin/env python
2#
3# Checks that all links in the readme markdown files are valid
4#
5# Copyright 2020 Espressif Systems (Shanghai) PTE LTD
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11#     http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18#
19
20import argparse
21import concurrent.futures
22import os
23import os.path
24import re
25import sys
26import urllib.error
27import urllib.request
28from collections import defaultdict, namedtuple
29from pathlib import Path
30
31EXCLUDE_DOCS_LIST = ['examples/peripherals/secure_element/atecc608_ecdsa/components/esp-cryptoauthlib/cryptoauthlib/**']
32
33# The apple apps links are not accessible from the company network for some reason
34EXCLUDE_URL_LIST = ['https://apps.apple.com/in/app/esp-ble-provisioning/id1473590141', 'https://apps.apple.com/in/app/esp-softap-provisioning/id1474040630']
35
36Link = namedtuple('Link', ['file', 'url'])
37
38
39class ReadmeLinkError(Exception):
40    def __init__(self, file, url):
41        self.file = file
42        self.url = url
43
44
45class RelativeLinkError(ReadmeLinkError):
46    def __str__(self):
47        return 'Relative link error, file - {} not found, linked from {}'.format(self.url, self.file)
48
49
50class UrlLinkError(ReadmeLinkError):
51    def __init__(self, file, url, error_code):
52        self.error_code = error_code
53        super().__init__(file, url)
54
55    def __str__(self):
56        files = [str(f) for f in self.file]
57        return 'URL error, url - {} in files - {} is not accessible, request returned {}'.format(self.url, ', '.join(files), self.error_code)
58
59
60# we do not want a failed test just due to bad network conditions, for non 404 errors we simply print a warning
61def check_url(url, files, timeout):
62    try:
63        with urllib.request.urlopen(url, timeout=timeout):
64            return
65    except urllib.error.HTTPError as e:
66        if e.code == 404:
67            raise UrlLinkError(files, url, str(e))
68        else:
69            print('Unable to access {}, err = {}'.format(url, str(e)))
70    except Exception as e:
71        print('Unable to access {}, err = {}'.format(url, str(e)))
72
73
74def check_web_links(web_links):
75
76    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
77        errors = []
78        future_to_url = {executor.submit(check_url, url, files, timeout=30): (url, files) for url, files in web_links.items()}
79        for future in concurrent.futures.as_completed(future_to_url):
80            try:
81                future.result()
82            except UrlLinkError as e:
83                errors.append(e)
84
85        return errors
86
87
88def check_file_links(file_links):
89    errors = []
90
91    for link in file_links:
92        link_path = link.file.parent / link.url
93
94        if not Path.exists(link_path):
95            errors.append(RelativeLinkError(link.file, link.url))
96
97    print('Found {} errors with relative links'.format(len(errors)))
98    return errors
99
100
101def get_md_links(folder):
102    MD_LINK_RE = r'\[.+?\]\((.+?)(#.+)?\)'
103
104    idf_path = Path(os.getenv('IDF_PATH'))
105    links = []
106
107    for path in (idf_path / folder).rglob('*.md'):
108        if any([path.relative_to(idf_path).match(exclude_doc) for exclude_doc in EXCLUDE_DOCS_LIST]):
109            print('{} - excluded'.format(path))
110            continue
111
112        with path.open(encoding='utf8') as f:
113            content = f.read()
114
115        for url in re.findall(MD_LINK_RE, content):
116            link = Link(path, url[0].lstrip())
117            # Ignore "local" links
118            if not link.url.startswith('#'):
119                links.append(link)
120
121    return links
122
123
124def check_readme_links(args):
125
126    links = get_md_links('examples')
127    print('Found {} links'.format(len(links)))
128
129    errors = []
130
131    web_links = defaultdict(list)
132    file_links = []
133
134    # Sort links into file and web links
135    for link in links:
136        if link.url.startswith('http'):
137                web_links[link.url].append(link.file)
138        else:
139            file_links.append(link)
140
141    for url in EXCLUDE_URL_LIST:
142        del web_links[url]
143
144    errors.extend(check_file_links(file_links))
145
146    if not args.skip_weburl:
147        errors.extend(check_web_links(web_links))
148
149    print('Found {} errors:'.format(len(errors)))
150    for e in errors:
151        print(e)
152
153    return 1 if len(errors) > 0 else 0
154
155
156if __name__ == '__main__':
157
158    parser = argparse.ArgumentParser(description='check_readme_links.py: Checks for dead links in example READMEs', prog='check_readme_links.py')
159    parser.add_argument('--skip-weburl', '-w', action='store_true', help='Skip checking of web URLs, only check links to local files')
160    args = parser.parse_args()
161
162    sys.exit(check_readme_links(args))
163