1# Copyright (c) 2020, 2021 The Linux Foundation 2# 3# SPDX-License-Identifier: Apache-2.0 4 5import re 6 7 8def getSPDXIDSafeCharacter(c): 9 """ 10 Converts a character to an SPDX-ID-safe character. 11 12 Arguments: 13 - c: character to test 14 Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.'); 15 '-' otherwise 16 """ 17 if c.isalpha() or c.isdigit() or c == "-" or c == ".": 18 return c 19 return "-" 20 21 22def convertToSPDXIDSafe(s): 23 """ 24 Converts a filename or other string to only SPDX-ID-safe characters. 25 Note that a separate check (such as in getUniqueID, below) will need 26 to be used to confirm that this is still a unique identifier, after 27 conversion. 28 29 Arguments: 30 - s: string to be converted. 31 Returns: string with all non-safe characters replaced with dashes. 32 """ 33 return "".join([getSPDXIDSafeCharacter(c) for c in s]) 34 35 36def getUniqueFileID(filenameOnly, timesSeen): 37 """ 38 Find an SPDX ID that is unique among others seen so far. 39 40 Arguments: 41 - filenameOnly: filename only (directories omitted) seeking ID. 42 - timesSeen: dict of all filename-only to number of times seen. 43 Returns: unique SPDX ID; updates timesSeen to include it. 44 """ 45 46 converted = convertToSPDXIDSafe(filenameOnly) 47 spdxID = f"SPDXRef-File-{converted}" 48 49 # determine whether spdxID is unique so far, or not 50 filenameTimesSeen = timesSeen.get(converted, 0) + 1 51 if filenameTimesSeen > 1: 52 # we'll append the # of times seen to the end 53 spdxID += f"-{filenameTimesSeen}" 54 else: 55 # first time seeing this filename 56 # edge case: if the filename itself ends in "-{number}", then we 57 # need to add a "-1" to it, so that we don't end up overlapping 58 # with an appended number from a similarly-named file. 59 p = re.compile(r"-\d+$") 60 if p.search(converted): 61 spdxID += "-1" 62 63 timesSeen[converted] = filenameTimesSeen 64 return spdxID 65