1# Copyright (c) 2020, 2021 The Linux Foundation
2#
3# SPDX-License-Identifier: Apache-2.0
4
5import re
6
7def getSPDXIDSafeCharacter(c):
8    """
9    Converts a character to an SPDX-ID-safe character.
10
11    Arguments:
12        - c: character to test
13    Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
14             '-' otherwise
15    """
16    if c.isalpha() or c.isdigit() or c == "-" or c == ".":
17        return c
18    return "-"
19
20def convertToSPDXIDSafe(s):
21    """
22    Converts a filename or other string to only SPDX-ID-safe characters.
23    Note that a separate check (such as in getUniqueID, below) will need
24    to be used to confirm that this is still a unique identifier, after
25    conversion.
26
27    Arguments:
28        - s: string to be converted.
29    Returns: string with all non-safe characters replaced with dashes.
30    """
31    return "".join([getSPDXIDSafeCharacter(c) for c in s])
32
33def getUniqueFileID(filenameOnly, timesSeen):
34    """
35    Find an SPDX ID that is unique among others seen so far.
36
37    Arguments:
38        - filenameOnly: filename only (directories omitted) seeking ID.
39        - timesSeen: dict of all filename-only to number of times seen.
40    Returns: unique SPDX ID; updates timesSeen to include it.
41    """
42
43    converted = convertToSPDXIDSafe(filenameOnly)
44    spdxID = f"SPDXRef-File-{converted}"
45
46    # determine whether spdxID is unique so far, or not
47    filenameTimesSeen = timesSeen.get(converted, 0) + 1
48    if filenameTimesSeen > 1:
49        # we'll append the # of times seen to the end
50        spdxID += f"-{filenameTimesSeen}"
51    else:
52        # first time seeing this filename
53        # edge case: if the filename itself ends in "-{number}", then we
54        # need to add a "-1" to it, so that we don't end up overlapping
55        # with an appended number from a similarly-named file.
56        p = re.compile(r"-\d+$")
57        if p.search(converted):
58            spdxID += "-1"
59
60    timesSeen[converted] = filenameTimesSeen
61    return spdxID
62