1# Copyright (c) 2020-2021 The Linux Foundation
2#
3# SPDX-License-Identifier: Apache-2.0
4
5import os
6import yaml
7import re
8
9from west import log
10from west.util import west_topdir, WestNotFound
11
12from zspdx.cmakecache import parseCMakeCacheFile
13from zspdx.cmakefileapijson import parseReply
14from zspdx.datatypes import DocumentConfig, Document, File, PackageConfig, Package, RelationshipDataElementType, RelationshipData, Relationship
15from zspdx.getincludes import getCIncludes
16import zspdx.spdxids
17
18# WalkerConfig contains configuration data for the Walker.
19class WalkerConfig:
20    def __init__(self):
21        super(WalkerConfig, self).__init__()
22
23        # prefix for Document namespaces; should not end with "/"
24        self.namespacePrefix = ""
25
26        # location of build directory
27        self.buildDir = ""
28
29        # should also analyze for included header files?
30        self.analyzeIncludes = False
31
32        # should also add an SPDX document for the SDK?
33        self.includeSDK = False
34
35# Walker is the main analysis class: it walks through the CMake codemodel,
36# build files, and corresponding source and SDK files, and gathers the
37# information needed to build the SPDX data classes.
38class Walker:
39    # initialize with WalkerConfig
40    def __init__(self, cfg):
41        super(Walker, self).__init__()
42
43        # configuration - WalkerConfig
44        self.cfg = cfg
45
46        # the various Documents that we will be building
47        self.docBuild = None
48        self.docZephyr = None
49        self.docApp = None
50        self.docSDK = None
51        self.docModulesExtRefs = None
52
53        # dict of absolute file path => the Document that owns that file
54        self.allFileLinks = {}
55
56        # queue of pending source Files to create, process and assign
57        self.pendingSources = []
58
59        # queue of pending relationships to create, process and assign
60        self.pendingRelationships = []
61
62        # parsed CMake codemodel
63        self.cm = None
64
65        # parsed CMake cache dict, once we have the build path
66        self.cmakeCache = {}
67
68        # C compiler path from parsed CMake cache
69        self.compilerPath = ""
70
71        # SDK install path from parsed CMake cache
72        self.sdkPath = ""
73
74    def _build_purl(self, url, version=None):
75        if not url:
76            return None
77
78        purl = None
79        # This is designed to match repository with the following url pattern:
80        # '<protocol><base_url>/<namespace>/<package>
81        COMMON_GIT_URL_REGEX=r'((git@|http(s)?:\/\/)(?P<base_url>[\w\.@]+)(\/|:))(?P<namespace>[\w,\-,\_]+)\/(?P<package>[\w,\-,\_]+)(.git){0,1}((\/){0,1})$'
82
83        match = re.fullmatch(COMMON_GIT_URL_REGEX, url)
84        if match:
85            purl = f'pkg:{match.group("base_url")}/{match.group("namespace")}/{match.group("package")}'
86
87        if purl and (version or len(version) > 0):
88            purl += f'@{version}'
89
90        return purl
91
92    def _add_describe_relationship(self, doc, cfgpackage):
93        # create DESCRIBES relationship data
94        rd = RelationshipData()
95        rd.ownerType = RelationshipDataElementType.DOCUMENT
96        rd.ownerDocument = doc
97        rd.otherType = RelationshipDataElementType.PACKAGEID
98        rd.otherPackageID = cfgpackage.spdxID
99        rd.rlnType = "DESCRIBES"
100
101        # add it to pending relationships queue
102        self.pendingRelationships.append(rd)
103
104    # primary entry point
105    def makeDocuments(self):
106        # parse CMake cache file and get compiler path
107        log.inf("parsing CMake Cache file")
108        self.getCacheFile()
109
110        # check if meta file is generated
111        if not self.metaFile:
112            log.err("CONFIG_BUILD_OUTPUT_META must be enabled to generate spdx files; bailing")
113            return False
114
115        # parse codemodel from Walker cfg's build dir
116        log.inf("parsing CMake Codemodel files")
117        self.cm = self.getCodemodel()
118        if not self.cm:
119            log.err("could not parse codemodel from CMake API reply; bailing")
120            return False
121
122        # set up Documents
123        log.inf("setting up SPDX documents")
124        retval = self.setupDocuments()
125        if not retval:
126            return False
127
128        # walk through targets in codemodel to gather information
129        log.inf("walking through targets")
130        self.walkTargets()
131
132        # walk through pending sources and create corresponding files
133        log.inf("walking through pending sources files")
134        self.walkPendingSources()
135
136        # walk through pending relationship data and create relationships
137        log.inf("walking through pending relationships")
138        self.walkRelationships()
139
140        return True
141
142    # parse cache file and pull out relevant data
143    def getCacheFile(self):
144        cacheFilePath = os.path.join(self.cfg.buildDir, "CMakeCache.txt")
145        self.cmakeCache = parseCMakeCacheFile(cacheFilePath)
146        if self.cmakeCache:
147            self.compilerPath = self.cmakeCache.get("CMAKE_C_COMPILER", "")
148            self.sdkPath = self.cmakeCache.get("ZEPHYR_SDK_INSTALL_DIR", "")
149            self.metaFile =  self.cmakeCache.get("KERNEL_META_PATH", "")
150
151    # determine path from build dir to CMake file-based API index file, then
152    # parse it and return the Codemodel
153    def getCodemodel(self):
154        log.dbg("getting codemodel from CMake API reply files")
155
156        # make sure the reply directory exists
157        cmakeReplyDirPath = os.path.join(self.cfg.buildDir, ".cmake", "api", "v1", "reply")
158        if not os.path.exists(cmakeReplyDirPath):
159            log.err(f'cmake api reply directory {cmakeReplyDirPath} does not exist')
160            log.err('was query directory created before cmake build ran?')
161            return None
162        if not os.path.isdir(cmakeReplyDirPath):
163            log.err(f'cmake api reply directory {cmakeReplyDirPath} exists but is not a directory')
164            return None
165
166        # find file with "index" prefix; there should only be one
167        indexFilePath = ""
168        for f in os.listdir(cmakeReplyDirPath):
169            if f.startswith("index"):
170                indexFilePath = os.path.join(cmakeReplyDirPath, f)
171                break
172        if indexFilePath == "":
173            # didn't find it
174            log.err(f'cmake api reply index file not found in {cmakeReplyDirPath}')
175            return None
176
177        # parse it
178        return parseReply(indexFilePath)
179
180    def setupAppDocument(self):
181        # set up app document
182        cfgApp = DocumentConfig()
183        cfgApp.name = "app-sources"
184        cfgApp.namespace = self.cfg.namespacePrefix + "/app"
185        cfgApp.docRefID = "DocumentRef-app"
186        self.docApp = Document(cfgApp)
187
188        # also set up app sources package
189        cfgPackageApp = PackageConfig()
190        cfgPackageApp.name = "app-sources"
191        cfgPackageApp.spdxID = "SPDXRef-app-sources"
192        cfgPackageApp.primaryPurpose = "SOURCE"
193        # relativeBaseDir is app sources dir
194        cfgPackageApp.relativeBaseDir = self.cm.paths_source
195        pkgApp = Package(cfgPackageApp, self.docApp)
196        self.docApp.pkgs[pkgApp.cfg.spdxID] = pkgApp
197
198        self._add_describe_relationship(self.docApp, cfgPackageApp)
199
200    def setupBuildDocument(self):
201        # set up build document
202        cfgBuild = DocumentConfig()
203        cfgBuild.name = "build"
204        cfgBuild.namespace = self.cfg.namespacePrefix + "/build"
205        cfgBuild.docRefID = "DocumentRef-build"
206        self.docBuild = Document(cfgBuild)
207
208        # we'll create the build packages in walkTargets()
209
210        # the DESCRIBES relationship for the build document will be
211        # with the zephyr_final package
212        rd = RelationshipData()
213        rd.ownerType = RelationshipDataElementType.DOCUMENT
214        rd.ownerDocument = self.docBuild
215        rd.otherType = RelationshipDataElementType.TARGETNAME
216        rd.otherTargetName = "zephyr_final"
217        rd.rlnType = "DESCRIBES"
218
219        # add it to pending relationships queue
220        self.pendingRelationships.append(rd)
221
222    def setupZephyrDocument(self, zephyr, modules):
223        # set up zephyr document
224        cfgZephyr = DocumentConfig()
225        cfgZephyr.name = "zephyr-sources"
226        cfgZephyr.namespace = self.cfg.namespacePrefix + "/zephyr"
227        cfgZephyr.docRefID = "DocumentRef-zephyr"
228        self.docZephyr = Document(cfgZephyr)
229
230        # relativeBaseDir is Zephyr sources topdir
231        try:
232            relativeBaseDir = west_topdir(self.cm.paths_source)
233        except WestNotFound:
234            log.err(f"cannot find west_topdir for CMake Codemodel sources path {self.cm.paths_source}; bailing")
235            return False
236
237        # set up zephyr sources package
238        cfgPackageZephyr = PackageConfig()
239        cfgPackageZephyr.name = "zephyr-sources"
240        cfgPackageZephyr.spdxID = "SPDXRef-zephyr-sources"
241        cfgPackageZephyr.relativeBaseDir = relativeBaseDir
242
243        zephyr_url = zephyr.get("remote", "")
244        if zephyr_url:
245            cfgPackageZephyr.url = zephyr_url
246
247        if zephyr.get("revision"):
248            cfgPackageZephyr.revision = zephyr.get("revision")
249
250        purl = None
251        zephyr_tags = zephyr.get("tags", "")
252        if zephyr_tags:
253            # Find tag vX.Y.Z
254            for tag in zephyr_tags:
255                version = re.fullmatch(r'^v(?P<version>\d+\.\d+\.\d+)$', tag)
256                purl = self._build_purl(zephyr_url, tag)
257
258                if purl:
259                    cfgPackageZephyr.externalReferences.append(purl)
260
261                # Extract version from tag once
262                if cfgPackageZephyr.version == "" and version:
263                    cfgPackageZephyr.version = version.group('version')
264
265        if len(cfgPackageZephyr.version) > 0:
266            cpe = f'cpe:2.3:o:zephyrproject:zephyr:{cfgPackageZephyr.version}:-:*:*:*:*:*:*'
267            cfgPackageZephyr.externalReferences.append(cpe)
268
269        pkgZephyr = Package(cfgPackageZephyr, self.docZephyr)
270        self.docZephyr.pkgs[pkgZephyr.cfg.spdxID] = pkgZephyr
271
272        self._add_describe_relationship(self.docZephyr, cfgPackageZephyr)
273
274        for module in modules:
275            module_name = module.get("name", None)
276            module_path = module.get("path", None)
277            module_url = module.get("remote", None)
278            module_revision = module.get("revision", None)
279
280            if not module_name:
281                log.err(f"cannot find module name in meta file; bailing")
282                return False
283
284            # set up zephyr sources package
285            cfgPackageZephyrModule = PackageConfig()
286            cfgPackageZephyrModule.name = module_name + "-sources"
287            cfgPackageZephyrModule.spdxID = "SPDXRef-" + module_name + "-sources"
288            cfgPackageZephyrModule.relativeBaseDir = module_path
289            cfgPackageZephyrModule.primaryPurpose = "SOURCE"
290
291            if module_revision:
292                cfgPackageZephyrModule.revision = module_revision
293
294            if module_url:
295                cfgPackageZephyrModule.url = module_url
296
297            pkgZephyrModule = Package(cfgPackageZephyrModule, self.docZephyr)
298            self.docZephyr.pkgs[pkgZephyrModule.cfg.spdxID] = pkgZephyrModule
299
300            self._add_describe_relationship(self.docZephyr, cfgPackageZephyrModule)
301
302        return True
303
304    def setupSDKDocument(self):
305        # set up SDK document
306        cfgSDK = DocumentConfig()
307        cfgSDK.name = "sdk"
308        cfgSDK.namespace = self.cfg.namespacePrefix + "/sdk"
309        cfgSDK.docRefID = "DocumentRef-sdk"
310        self.docSDK = Document(cfgSDK)
311
312        # also set up zephyr sdk package
313        cfgPackageSDK = PackageConfig()
314        cfgPackageSDK.name = "sdk"
315        cfgPackageSDK.spdxID = "SPDXRef-sdk"
316        # relativeBaseDir is SDK dir
317        cfgPackageSDK.relativeBaseDir = self.sdkPath
318        pkgSDK = Package(cfgPackageSDK, self.docSDK)
319        self.docSDK.pkgs[pkgSDK.cfg.spdxID] = pkgSDK
320
321        # create DESCRIBES relationship data
322        rd = RelationshipData()
323        rd.ownerType = RelationshipDataElementType.DOCUMENT
324        rd.ownerDocument = self.docSDK
325        rd.otherType = RelationshipDataElementType.PACKAGEID
326        rd.otherPackageID = cfgPackageSDK.spdxID
327        rd.rlnType = "DESCRIBES"
328
329        # add it to pending relationships queue
330        self.pendingRelationships.append(rd)
331
332    def setupModulesDocument(self, modules):
333        # set up zephyr document
334        cfgModuleExtRef = DocumentConfig()
335        cfgModuleExtRef.name = "modules-deps"
336        cfgModuleExtRef.namespace = self.cfg.namespacePrefix + "/modules-deps"
337        cfgModuleExtRef.docRefID = "DocumentRef-modules-deps"
338        self.docModulesExtRefs = Document(cfgModuleExtRef)
339
340        for module in modules:
341            module_name = module.get("name", None)
342            module_security = module.get("security", None)
343
344            if not module_name:
345                log.err(f"cannot find module name in meta file; bailing")
346                return False
347
348            module_ext_ref = []
349            if module_security:
350                module_ext_ref = module_security.get("external-references")
351
352            # set up zephyr sources package
353            cfgPackageModuleExtRef = PackageConfig()
354            cfgPackageModuleExtRef.name = module_name + "-deps"
355            cfgPackageModuleExtRef.spdxID = "SPDXRef-" + module_name + "-deps"
356
357            for ref in module_ext_ref:
358                cfgPackageModuleExtRef.externalReferences.append(ref)
359
360            pkgModule = Package(cfgPackageModuleExtRef, self.docModulesExtRefs)
361            self.docModulesExtRefs.pkgs[pkgModule.cfg.spdxID] = pkgModule
362
363            self._add_describe_relationship(self.docModulesExtRefs, cfgPackageModuleExtRef)
364
365
366    # set up Documents before beginning
367    def setupDocuments(self):
368        log.dbg("setting up placeholder documents")
369
370        self.setupBuildDocument()
371
372        try:
373            with open(self.metaFile) as file:
374                content = yaml.load(file.read(), yaml.SafeLoader)
375                if not self.setupZephyrDocument(content["zephyr"], content["modules"]):
376                    return False
377        except (FileNotFoundError, yaml.YAMLError):
378            log.err(f"cannot find a valid zephyr_meta.yml required for SPDX generation; bailing")
379            return False
380
381        self.setupAppDocument()
382
383        if self.cfg.includeSDK:
384            self.setupSDKDocument()
385
386        self.setupModulesDocument(content["modules"])
387
388        return True
389
390    # walk through targets and gather information
391    def walkTargets(self):
392        log.dbg("walking targets from codemodel")
393
394        # assuming just one configuration; consider whether this is incorrect
395        cfgTargets = self.cm.configurations[0].configTargets
396        for cfgTarget in cfgTargets:
397            # build the Package for this target
398            pkg = self.initConfigTargetPackage(cfgTarget)
399
400            # see whether this target has any build artifacts at all
401            if len(cfgTarget.target.artifacts) > 0:
402                # add its build file
403                bf = self.addBuildFile(cfgTarget, pkg)
404                if pkg.cfg.name == "zephyr_final":
405                    pkg.cfg.primaryPurpose = "APPLICATION"
406                else:
407                    pkg.cfg.primaryPurpose = "LIBRARY"
408
409                # get its source files if build file is found
410                if bf:
411                    self.collectPendingSourceFiles(cfgTarget, pkg, bf)
412            else:
413                log.dbg(f"  - target {cfgTarget.name} has no build artifacts")
414
415            # get its target dependencies
416            self.collectTargetDependencies(cfgTargets, cfgTarget, pkg)
417
418    # build a Package in the Build doc for the given ConfigTarget
419    def initConfigTargetPackage(self, cfgTarget):
420        log.dbg(f"  - initializing Package for target: {cfgTarget.name}")
421
422        # create target Package's config
423        cfg = PackageConfig()
424        cfg.name = cfgTarget.name
425        cfg.spdxID = "SPDXRef-" + zspdx.spdxids.convertToSPDXIDSafe(cfgTarget.name)
426        cfg.relativeBaseDir = self.cm.paths_build
427
428        # build Package
429        pkg = Package(cfg, self.docBuild)
430
431        # add Package to build Document
432        self.docBuild.pkgs[cfg.spdxID] = pkg
433        return pkg
434
435    # create a target's build product File and add it to its Package
436    # call with:
437    #   1) ConfigTarget
438    #   2) Package for that target
439    # returns: File
440    def addBuildFile(self, cfgTarget, pkg):
441        # assumes only one artifact in each target
442        artifactPath = os.path.join(pkg.cfg.relativeBaseDir, cfgTarget.target.artifacts[0])
443        log.dbg(f"  - adding File {artifactPath}")
444        log.dbg(f"    - relativeBaseDir: {pkg.cfg.relativeBaseDir}")
445        log.dbg(f"    - artifacts[0]: {cfgTarget.target.artifacts[0]}")
446
447        # don't create build File if artifact path points to nonexistent file
448        if not os.path.exists(artifactPath):
449            log.dbg(f"  - target {cfgTarget.name} lists build artifact {artifactPath} but file not found after build; skipping")
450            return None
451
452        # create build File
453        bf = File(self.docBuild, pkg)
454        bf.abspath = artifactPath
455        bf.relpath = cfgTarget.target.artifacts[0]
456        # can use nameOnDisk b/c it is just the filename w/out directory paths
457        bf.spdxID = zspdx.spdxids.getUniqueFileID(cfgTarget.target.nameOnDisk, self.docBuild.timesSeen)
458        # don't fill hashes / licenses / rlns now, we'll do that after walking
459
460        # add File to Package
461        pkg.files[bf.spdxID] = bf
462
463        # add file path link to Document and global links
464        self.docBuild.fileLinks[bf.abspath] = bf
465        self.allFileLinks[bf.abspath] = self.docBuild
466
467        # also set this file as the target package's build product file
468        pkg.targetBuildFile = bf
469
470        return bf
471
472    # collect a target's source files, add to pending sources queue, and
473    # create pending relationship data entry
474    # call with:
475    #   1) ConfigTarget
476    #   2) Package for that target
477    #   3) build File for that target
478    def collectPendingSourceFiles(self, cfgTarget, pkg, bf):
479        log.dbg(f"  - collecting source files and adding to pending queue")
480
481        targetIncludesSet = set()
482
483        # walk through target's sources
484        for src in cfgTarget.target.sources:
485            log.dbg(f"    - add pending source file and relationship for {src.path}")
486            # get absolute path if we don't have it
487            srcAbspath = src.path
488            if not os.path.isabs(src.path):
489                srcAbspath = os.path.join(self.cm.paths_source, src.path)
490
491            # check whether it even exists
492            if not (os.path.exists(srcAbspath) and os.path.isfile(srcAbspath)):
493                log.dbg(f"  - {srcAbspath} does not exist but is referenced in sources for target {pkg.cfg.name}; skipping")
494                continue
495
496            # add it to pending source files queue
497            self.pendingSources.append(srcAbspath)
498
499            # create relationship data
500            rd = RelationshipData()
501            rd.ownerType = RelationshipDataElementType.FILENAME
502            rd.ownerFileAbspath = bf.abspath
503            rd.otherType = RelationshipDataElementType.FILENAME
504            rd.otherFileAbspath = srcAbspath
505            rd.rlnType = "GENERATED_FROM"
506
507            # add it to pending relationships queue
508            self.pendingRelationships.append(rd)
509
510            # collect this source file's includes
511            if self.cfg.analyzeIncludes and self.compilerPath:
512                includes = self.collectIncludes(cfgTarget, pkg, bf, src)
513                for inc in includes:
514                    targetIncludesSet.add(inc)
515
516        # make relationships for the overall included files,
517        # avoiding duplicates for multiple source files including
518        # the same headers
519        targetIncludesList = list(targetIncludesSet)
520        targetIncludesList.sort()
521        for inc in targetIncludesList:
522            # add it to pending source files queue
523            self.pendingSources.append(inc)
524
525            # create relationship data
526            rd = RelationshipData()
527            rd.ownerType = RelationshipDataElementType.FILENAME
528            rd.ownerFileAbspath = bf.abspath
529            rd.otherType = RelationshipDataElementType.FILENAME
530            rd.otherFileAbspath = inc
531            rd.rlnType = "GENERATED_FROM"
532
533            # add it to pending relationships queue
534            self.pendingRelationships.append(rd)
535
536    # collect the include files corresponding to this source file
537    # call with:
538    #   1) ConfigTarget
539    #   2) Package for this target
540    #   3) build File for this target
541    #   4) TargetSource entry for this source file
542    # returns: sorted list of include files for this source file
543    def collectIncludes(self, cfgTarget, pkg, bf, src):
544        # get the right compile group for this source file
545        if len(cfgTarget.target.compileGroups) < (src.compileGroupIndex + 1):
546            log.dbg(f"    - {cfgTarget.target.name} has compileGroupIndex {src.compileGroupIndex} but only {len(cfgTarget.target.compileGroups)} found; skipping included files search")
547            return []
548        cg = cfgTarget.target.compileGroups[src.compileGroupIndex]
549
550        # currently only doing C includes
551        if cg.language != "C":
552            log.dbg(f"    - {cfgTarget.target.name} has compile group language {cg.language} but currently only searching includes for C files; skipping included files search")
553            return []
554
555        srcAbspath = src.path
556        if src.path[0] != "/":
557            srcAbspath = os.path.join(self.cm.paths_source, src.path)
558        return getCIncludes(self.compilerPath, srcAbspath, cg)
559
560    # collect relationships for dependencies of this target Package
561    # call with:
562    #   1) all ConfigTargets from CodeModel
563    #   2) this particular ConfigTarget
564    #   3) Package for this Target
565    def collectTargetDependencies(self, cfgTargets, cfgTarget, pkg):
566        log.dbg(f"  - collecting target dependencies for {pkg.cfg.name}")
567
568        # walk through target's dependencies
569        for dep in cfgTarget.target.dependencies:
570            # extract dep name from its id
571            depFragments = dep.id.split(":")
572            depName = depFragments[0]
573            log.dbg(f"    - adding pending relationship for {depName}")
574
575            # create relationship data between dependency packages
576            rd = RelationshipData()
577            rd.ownerType = RelationshipDataElementType.TARGETNAME
578            rd.ownerTargetName = pkg.cfg.name
579            rd.otherType = RelationshipDataElementType.TARGETNAME
580            rd.otherTargetName = depName
581            rd.rlnType = "HAS_PREREQUISITE"
582
583            # add it to pending relationships queue
584            self.pendingRelationships.append(rd)
585
586            # if this is a target with any build artifacts (e.g. non-UTILITY),
587            # also create STATIC_LINK relationship for dependency build files,
588            # together with this Package's own target build file
589            if len(cfgTarget.target.artifacts) == 0:
590                continue
591
592            # find the filename for the dependency's build product, using the
593            # codemodel (since we might not have created this dependency's
594            # Package or File yet)
595            depAbspath = ""
596            for ct in cfgTargets:
597                if ct.name == depName:
598                    # skip utility targets
599                    if len(ct.target.artifacts) == 0:
600                        continue
601                    # all targets use the same relativeBaseDir, so this works
602                    # even though pkg is the owner package
603                    depAbspath = os.path.join(pkg.cfg.relativeBaseDir, ct.target.artifacts[0])
604                    break
605            if depAbspath == "":
606                continue
607
608            # create relationship data between build files
609            rd = RelationshipData()
610            rd.ownerType = RelationshipDataElementType.FILENAME
611            rd.ownerFileAbspath = pkg.targetBuildFile.abspath
612            rd.otherType = RelationshipDataElementType.FILENAME
613            rd.otherFileAbspath = depAbspath
614            rd.rlnType = "STATIC_LINK"
615
616            # add it to pending relationships queue
617            self.pendingRelationships.append(rd)
618
619    # walk through pending sources and create corresponding files,
620    # assigning them to the appropriate Document and Package
621    def walkPendingSources(self):
622        log.dbg(f"walking pending sources")
623
624        # only one package in each doc; get it
625        pkgZephyr = list(self.docZephyr.pkgs.values())[0]
626        pkgApp = list(self.docApp.pkgs.values())[0]
627        if self.cfg.includeSDK:
628            pkgSDK = list(self.docSDK.pkgs.values())[0]
629
630        for srcAbspath in self.pendingSources:
631            # check whether we've already seen it
632            srcDoc = self.allFileLinks.get(srcAbspath, None)
633            srcPkg = None
634            if srcDoc:
635                log.dbg(f"  - {srcAbspath}: already seen, assigned to {srcDoc.cfg.name}")
636                continue
637
638            # not yet assigned; figure out where it goes
639            pkgBuild = self.findBuildPackage(srcAbspath)
640            pkgZephyr = self.findZephyrPackage(srcAbspath)
641
642            if pkgBuild:
643                log.dbg(f"  - {srcAbspath}: assigning to build document, package {pkgBuild.cfg.name}")
644                srcDoc = self.docBuild
645                srcPkg = pkgBuild
646            elif self.cfg.includeSDK and os.path.commonpath([srcAbspath, pkgSDK.cfg.relativeBaseDir]) == pkgSDK.cfg.relativeBaseDir:
647                log.dbg(f"  - {srcAbspath}: assigning to sdk document")
648                srcDoc = self.docSDK
649                srcPkg = pkgSDK
650            elif os.path.commonpath([srcAbspath, pkgApp.cfg.relativeBaseDir]) == pkgApp.cfg.relativeBaseDir:
651                log.dbg(f"  - {srcAbspath}: assigning to app document")
652                srcDoc = self.docApp
653                srcPkg = pkgApp
654            elif pkgZephyr:
655                log.dbg(f"  - {srcAbspath}: assigning to zephyr document")
656                srcDoc = self.docZephyr
657                srcPkg = pkgZephyr
658            else:
659                log.dbg(f"  - {srcAbspath}: can't determine which document should own; skipping")
660                continue
661
662            # create File and assign it to the Package and Document
663            sf = File(srcDoc, srcPkg)
664            sf.abspath = srcAbspath
665            sf.relpath = os.path.relpath(srcAbspath, srcPkg.cfg.relativeBaseDir)
666            filenameOnly = os.path.split(srcAbspath)[1]
667            sf.spdxID = zspdx.spdxids.getUniqueFileID(filenameOnly, srcDoc.timesSeen)
668            # don't fill hashes / licenses / rlns now, we'll do that after walking
669
670            # add File to Package
671            srcPkg.files[sf.spdxID] = sf
672
673            # add file path link to Document and global links
674            srcDoc.fileLinks[sf.abspath] = sf
675            self.allFileLinks[sf.abspath] = srcDoc
676
677    # figure out which Package contains the given file, if any
678    # call with:
679    #   1) absolute path for source filename being searched
680    def findPackageFromSrcAbsPath(self, document, srcAbspath):
681        # Multiple target Packages might "contain" the file path, if they
682        # are nested. If so, the one with the longest path would be the
683        # most deeply-nested target directory, so that's the one which
684        # should get the file path.
685        pkgLongestMatch = None
686        for pkg in document.pkgs.values():
687            if os.path.commonpath([srcAbspath, pkg.cfg.relativeBaseDir]) == pkg.cfg.relativeBaseDir:
688                # the package does contain this file; is it the deepest?
689                if pkgLongestMatch:
690                    if len(pkg.cfg.relativeBaseDir) > len(pkgLongestMatch.cfg.relativeBaseDir):
691                        pkgLongestMatch = pkg
692                else:
693                    # first package containing it, so assign it
694                    pkgLongestMatch = pkg
695
696        return pkgLongestMatch
697
698    def findBuildPackage(self, srcAbspath):
699        return self.findPackageFromSrcAbsPath(self.docBuild, srcAbspath)
700
701    def findZephyrPackage(self, srcAbspath):
702        return self.findPackageFromSrcAbsPath(self.docZephyr, srcAbspath)
703
704    # walk through pending RelationshipData entries, create corresponding
705    # Relationships, and assign them to the applicable Files / Packages
706    def walkRelationships(self):
707        for rlnData in self.pendingRelationships:
708            rln = Relationship()
709            # get left side of relationship data
710            docA, spdxIDA, rlnsA = self.getRelationshipLeft(rlnData)
711            if not docA or not spdxIDA:
712                continue
713            rln.refA = spdxIDA
714            # get right side of relationship data
715            spdxIDB = self.getRelationshipRight(rlnData, docA)
716            if not spdxIDB:
717                continue
718            rln.refB = spdxIDB
719            rln.rlnType = rlnData.rlnType
720            rlnsA.append(rln)
721            log.dbg(f"  - adding relationship to {docA.cfg.name}: {rln.refA} {rln.rlnType} {rln.refB}")
722
723    # get owner (left side) document and SPDX ID of Relationship for given RelationshipData
724    # returns: doc, spdxID, rlnsArray (for either Document, Package, or File, as applicable)
725    def getRelationshipLeft(self, rlnData):
726        if rlnData.ownerType == RelationshipDataElementType.FILENAME:
727            # find the document for this file abspath, and then the specific file's ID
728            ownerDoc = self.allFileLinks.get(rlnData.ownerFileAbspath, None)
729            if not ownerDoc:
730                log.dbg(f"  - searching for relationship, can't find document with file {rlnData.ownerFileAbspath}; skipping")
731                return None, None, None
732            sf = ownerDoc.fileLinks.get(rlnData.ownerFileAbspath, None)
733            if not sf:
734                log.dbg(f"  - searching for relationship for file {rlnData.ownerFileAbspath} points to document {ownerDoc.cfg.name} but file not found; skipping")
735                return None, None, None
736            # found it
737            if not sf.spdxID:
738                log.dbg(f"  - searching for relationship for file {rlnData.ownerFileAbspath} found file, but empty ID; skipping")
739                return None, None, None
740            return ownerDoc, sf.spdxID, sf.rlns
741        elif rlnData.ownerType == RelationshipDataElementType.TARGETNAME:
742            # find the document for this target name, and then the specific package's ID
743            # for target names, must be docBuild
744            ownerDoc = self.docBuild
745            # walk through target Packages and check names
746            for pkg in ownerDoc.pkgs.values():
747                if pkg.cfg.name == rlnData.ownerTargetName:
748                    if not pkg.cfg.spdxID:
749                        log.dbg(f"  - searching for relationship for target {rlnData.ownerTargetName} found package, but empty ID; skipping")
750                        return None, None, None
751                    return ownerDoc, pkg.cfg.spdxID, pkg.rlns
752            log.dbg(f"  - searching for relationship for target {rlnData.ownerTargetName}, target not found in build document; skipping")
753            return None, None, None
754        elif rlnData.ownerType == RelationshipDataElementType.DOCUMENT:
755            # will always be SPDXRef-DOCUMENT
756            return rlnData.ownerDocument, "SPDXRef-DOCUMENT", rlnData.ownerDocument.relationships
757        else:
758            log.dbg(f"  - unknown relationship type {rlnData.ownerType}; skipping")
759            return None, None, None
760
761    # get other (right side) SPDX ID of Relationship for given RelationshipData
762    def getRelationshipRight(self, rlnData, docA):
763        if rlnData.otherType == RelationshipDataElementType.FILENAME:
764            # find the document for this file abspath, and then the specific file's ID
765            otherDoc = self.allFileLinks.get(rlnData.otherFileAbspath, None)
766            if not otherDoc:
767                log.dbg(f"  - searching for relationship, can't find document with file {rlnData.otherFileAbspath}; skipping")
768                return None
769            bf = otherDoc.fileLinks.get(rlnData.otherFileAbspath, None)
770            if not bf:
771                log.dbg(f"  - searching for relationship for file {rlnData.otherFileAbspath} points to document {otherDoc.cfg.name} but file not found; skipping")
772                return None
773            # found it
774            if not bf.spdxID:
775                log.dbg(f"  - searching for relationship for file {rlnData.otherFileAbspath} found file, but empty ID; skipping")
776                return None
777            # figure out whether to append DocumentRef
778            spdxIDB = bf.spdxID
779            if otherDoc != docA:
780                spdxIDB = otherDoc.cfg.docRefID + ":" + spdxIDB
781                docA.externalDocuments.add(otherDoc)
782            return spdxIDB
783        elif rlnData.otherType == RelationshipDataElementType.TARGETNAME:
784            # find the document for this target name, and then the specific package's ID
785            # for target names, must be docBuild
786            otherDoc = self.docBuild
787            # walk through target Packages and check names
788            for pkg in otherDoc.pkgs.values():
789                if pkg.cfg.name == rlnData.otherTargetName:
790                    if not pkg.cfg.spdxID:
791                        log.dbg(f"  - searching for relationship for target {rlnData.otherTargetName} found package, but empty ID; skipping")
792                        return None
793                    spdxIDB = pkg.cfg.spdxID
794                    if otherDoc != docA:
795                        spdxIDB = otherDoc.cfg.docRefID + ":" + spdxIDB
796                        docA.externalDocuments.add(otherDoc)
797                    return spdxIDB
798            log.dbg(f"  - searching for relationship for target {rlnData.otherTargetName}, target not found in build document; skipping")
799            return None
800        elif rlnData.otherType == RelationshipDataElementType.PACKAGEID:
801            # will just be the package ID that was passed in
802            return rlnData.otherPackageID
803        else:
804            log.dbg(f"  - unknown relationship type {rlnData.otherType}; skipping")
805            return None
806