1# Copyright (c) 2020, 2021 The Linux Foundation
2#
3# SPDX-License-Identifier: Apache-2.0
4
5import re
6
7
8def getSPDXIDSafeCharacter(c):
9    """
10    Converts a character to an SPDX-ID-safe character.
11
12    Arguments:
13        - c: character to test
14    Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
15             '-' otherwise
16    """
17    if c.isalpha() or c.isdigit() or c == "-" or c == ".":
18        return c
19    return "-"
20
21
22def convertToSPDXIDSafe(s):
23    """
24    Converts a filename or other string to only SPDX-ID-safe characters.
25    Note that a separate check (such as in getUniqueID, below) will need
26    to be used to confirm that this is still a unique identifier, after
27    conversion.
28
29    Arguments:
30        - s: string to be converted.
31    Returns: string with all non-safe characters replaced with dashes.
32    """
33    return "".join([getSPDXIDSafeCharacter(c) for c in s])
34
35
36def getUniqueFileID(filenameOnly, timesSeen):
37    """
38    Find an SPDX ID that is unique among others seen so far.
39
40    Arguments:
41        - filenameOnly: filename only (directories omitted) seeking ID.
42        - timesSeen: dict of all filename-only to number of times seen.
43    Returns: unique SPDX ID; updates timesSeen to include it.
44    """
45
46    converted = convertToSPDXIDSafe(filenameOnly)
47    spdxID = f"SPDXRef-File-{converted}"
48
49    # determine whether spdxID is unique so far, or not
50    filenameTimesSeen = timesSeen.get(converted, 0) + 1
51    if filenameTimesSeen > 1:
52        # we'll append the # of times seen to the end
53        spdxID += f"-{filenameTimesSeen}"
54    else:
55        # first time seeing this filename
56        # edge case: if the filename itself ends in "-{number}", then we
57        # need to add a "-1" to it, so that we don't end up overlapping
58        # with an appended number from a similarly-named file.
59        p = re.compile(r"-\d+$")
60        if p.search(converted):
61            spdxID += "-1"
62
63    timesSeen[converted] = filenameTimesSeen
64    return spdxID
65