1# Copyright (c) 2020, 2021 The Linux Foundation 2# 3# SPDX-License-Identifier: Apache-2.0 4 5import re 6 7def getSPDXIDSafeCharacter(c): 8 """ 9 Converts a character to an SPDX-ID-safe character. 10 11 Arguments: 12 - c: character to test 13 Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.'); 14 '-' otherwise 15 """ 16 if c.isalpha() or c.isdigit() or c == "-" or c == ".": 17 return c 18 return "-" 19 20def convertToSPDXIDSafe(s): 21 """ 22 Converts a filename or other string to only SPDX-ID-safe characters. 23 Note that a separate check (such as in getUniqueID, below) will need 24 to be used to confirm that this is still a unique identifier, after 25 conversion. 26 27 Arguments: 28 - s: string to be converted. 29 Returns: string with all non-safe characters replaced with dashes. 30 """ 31 return "".join([getSPDXIDSafeCharacter(c) for c in s]) 32 33def getUniqueFileID(filenameOnly, timesSeen): 34 """ 35 Find an SPDX ID that is unique among others seen so far. 36 37 Arguments: 38 - filenameOnly: filename only (directories omitted) seeking ID. 39 - timesSeen: dict of all filename-only to number of times seen. 40 Returns: unique SPDX ID; updates timesSeen to include it. 41 """ 42 43 converted = convertToSPDXIDSafe(filenameOnly) 44 spdxID = f"SPDXRef-File-{converted}" 45 46 # determine whether spdxID is unique so far, or not 47 filenameTimesSeen = timesSeen.get(converted, 0) + 1 48 if filenameTimesSeen > 1: 49 # we'll append the # of times seen to the end 50 spdxID += f"-{filenameTimesSeen}" 51 else: 52 # first time seeing this filename 53 # edge case: if the filename itself ends in "-{number}", then we 54 # need to add a "-1" to it, so that we don't end up overlapping 55 # with an appended number from a similarly-named file. 56 p = re.compile(r"-\d+$") 57 if p.search(converted): 58 spdxID += "-1" 59 60 timesSeen[converted] = filenameTimesSeen 61 return spdxID 62