1#!/usr/bin/env python 2# 3# Checks that all links in the readme markdown files are valid 4# 5# Copyright 2020 Espressif Systems (Shanghai) PTE LTD 6# 7# Licensed under the Apache License, Version 2.0 (the "License"); 8# you may not use this file except in compliance with the License. 9# You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, software 14# distributed under the License is distributed on an "AS IS" BASIS, 15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16# See the License for the specific language governing permissions and 17# limitations under the License. 18# 19 20import argparse 21import concurrent.futures 22import os 23import os.path 24import re 25import sys 26import urllib.error 27import urllib.request 28from collections import defaultdict, namedtuple 29from pathlib import Path 30 31EXCLUDE_DOCS_LIST = ['examples/peripherals/secure_element/atecc608_ecdsa/components/esp-cryptoauthlib/cryptoauthlib/**'] 32 33# The apple apps links are not accessible from the company network for some reason 34EXCLUDE_URL_LIST = ['https://apps.apple.com/in/app/esp-ble-provisioning/id1473590141', 'https://apps.apple.com/in/app/esp-softap-provisioning/id1474040630'] 35 36Link = namedtuple('Link', ['file', 'url']) 37 38 39class ReadmeLinkError(Exception): 40 def __init__(self, file, url): 41 self.file = file 42 self.url = url 43 44 45class RelativeLinkError(ReadmeLinkError): 46 def __str__(self): 47 return 'Relative link error, file - {} not found, linked from {}'.format(self.url, self.file) 48 49 50class UrlLinkError(ReadmeLinkError): 51 def __init__(self, file, url, error_code): 52 self.error_code = error_code 53 super().__init__(file, url) 54 55 def __str__(self): 56 files = [str(f) for f in self.file] 57 return 'URL error, url - {} in files - {} is not accessible, request returned {}'.format(self.url, ', '.join(files), self.error_code) 58 59 60# we do not want a failed test just due to bad network conditions, for non 404 errors we simply print a warning 61def check_url(url, files, timeout): 62 try: 63 with urllib.request.urlopen(url, timeout=timeout): 64 return 65 except urllib.error.HTTPError as e: 66 if e.code == 404: 67 raise UrlLinkError(files, url, str(e)) 68 else: 69 print('Unable to access {}, err = {}'.format(url, str(e))) 70 except Exception as e: 71 print('Unable to access {}, err = {}'.format(url, str(e))) 72 73 74def check_web_links(web_links): 75 76 with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: 77 errors = [] 78 future_to_url = {executor.submit(check_url, url, files, timeout=30): (url, files) for url, files in web_links.items()} 79 for future in concurrent.futures.as_completed(future_to_url): 80 try: 81 future.result() 82 except UrlLinkError as e: 83 errors.append(e) 84 85 return errors 86 87 88def check_file_links(file_links): 89 errors = [] 90 91 for link in file_links: 92 link_path = link.file.parent / link.url 93 94 if not Path.exists(link_path): 95 errors.append(RelativeLinkError(link.file, link.url)) 96 97 print('Found {} errors with relative links'.format(len(errors))) 98 return errors 99 100 101def get_md_links(folder): 102 MD_LINK_RE = r'\[.+?\]\((.+?)(#.+)?\)' 103 104 idf_path = Path(os.getenv('IDF_PATH')) 105 links = [] 106 107 for path in (idf_path / folder).rglob('*.md'): 108 if any([path.relative_to(idf_path).match(exclude_doc) for exclude_doc in EXCLUDE_DOCS_LIST]): 109 print('{} - excluded'.format(path)) 110 continue 111 112 with path.open(encoding='utf8') as f: 113 content = f.read() 114 115 for url in re.findall(MD_LINK_RE, content): 116 link = Link(path, url[0].lstrip()) 117 # Ignore "local" links 118 if not link.url.startswith('#'): 119 links.append(link) 120 121 return links 122 123 124def check_readme_links(args): 125 126 links = get_md_links('examples') 127 print('Found {} links'.format(len(links))) 128 129 errors = [] 130 131 web_links = defaultdict(list) 132 file_links = [] 133 134 # Sort links into file and web links 135 for link in links: 136 if link.url.startswith('http'): 137 web_links[link.url].append(link.file) 138 else: 139 file_links.append(link) 140 141 for url in EXCLUDE_URL_LIST: 142 del web_links[url] 143 144 errors.extend(check_file_links(file_links)) 145 146 if not args.skip_weburl: 147 errors.extend(check_web_links(web_links)) 148 149 print('Found {} errors:'.format(len(errors))) 150 for e in errors: 151 print(e) 152 153 return 1 if len(errors) > 0 else 0 154 155 156if __name__ == '__main__': 157 158 parser = argparse.ArgumentParser(description='check_readme_links.py: Checks for dead links in example READMEs', prog='check_readme_links.py') 159 parser.add_argument('--skip-weburl', '-w', action='store_true', help='Skip checking of web URLs, only check links to local files') 160 args = parser.parse_args() 161 162 sys.exit(check_readme_links(args)) 163