1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // (C) Copyright IBM Corporation 2021
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_URI_H_
16 #define RAPIDJSON_URI_H_
17 
18 #include "internal/strfunc.h"
19 
20 #if defined(__clang__)
21 RAPIDJSON_DIAG_PUSH
22 RAPIDJSON_DIAG_OFF(c++98-compat)
23 #elif defined(_MSC_VER)
24 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
25 #endif
26 
27 RAPIDJSON_NAMESPACE_BEGIN
28 
29 ///////////////////////////////////////////////////////////////////////////////
30 // GenericUri
31 
32 template <typename ValueType, typename Allocator=CrtAllocator>
33 class GenericUri {
34 public:
35     typedef typename ValueType::Ch Ch;
36 #if RAPIDJSON_HAS_STDSTRING
37     typedef std::basic_string<Ch> String;
38 #endif
39 
40     //! Constructors
uri_()41     GenericUri(Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
42     }
43 
uri_()44     GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
45         Parse(uri, len);
46     }
47 
uri_()48     GenericUri(const Ch* uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
49         Parse(uri, internal::StrLen<Ch>(uri));
50     }
51 
52     // Use with specializations of GenericValue
uri_()53     template<typename T> GenericUri(const T& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
54         const Ch* u = uri.template Get<const Ch*>(); // TypeHelper from document.h
55         Parse(u, internal::StrLen<Ch>(u));
56     }
57 
58 #if RAPIDJSON_HAS_STDSTRING
uri_()59     GenericUri(const String& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
60         Parse(uri.c_str(), internal::StrLen<Ch>(uri.c_str()));
61     }
62 #endif
63 
64     //! Copy constructor
GenericUri(const GenericUri & rhs)65     GenericUri(const GenericUri& rhs) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(), ownAllocator_() {
66         *this = rhs;
67     }
68 
69     //! Copy constructor
GenericUri(const GenericUri & rhs,Allocator * allocator)70     GenericUri(const GenericUri& rhs, Allocator* allocator) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
71         *this = rhs;
72     }
73 
74     //! Destructor.
~GenericUri()75     ~GenericUri() {
76         Free();
77         RAPIDJSON_DELETE(ownAllocator_);
78     }
79 
80     //! Assignment operator
81     GenericUri& operator=(const GenericUri& rhs) {
82         if (this != &rhs) {
83             // Do not delete ownAllocator
84             Free();
85             Allocate(rhs.GetStringLength());
86             auth_ = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength());
87             path_ = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength());
88             query_ = CopyPart(path_, rhs.path_, rhs.GetPathStringLength());
89             frag_ = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength());
90             base_ = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength());
91             uri_ = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength());
92             CopyPart(uri_, rhs.uri_, rhs.GetStringLength());
93         }
94         return *this;
95     }
96 
97     //! Getters
98     // Use with specializations of GenericValue
Get(T & uri,Allocator & allocator)99     template<typename T> void Get(T& uri, Allocator& allocator) {
100         uri.template Set<const Ch*>(this->GetString(), allocator); // TypeHelper from document.h
101     }
102 
GetString()103     const Ch* GetString() const { return uri_; }
GetStringLength()104     SizeType GetStringLength() const { return uri_ == 0 ? 0 : internal::StrLen<Ch>(uri_); }
GetBaseString()105     const Ch* GetBaseString() const { return base_; }
GetBaseStringLength()106     SizeType GetBaseStringLength() const { return base_ == 0 ? 0 : internal::StrLen<Ch>(base_); }
GetSchemeString()107     const Ch* GetSchemeString() const { return scheme_; }
GetSchemeStringLength()108     SizeType GetSchemeStringLength() const { return scheme_ == 0 ? 0 : internal::StrLen<Ch>(scheme_); }
GetAuthString()109     const Ch* GetAuthString() const { return auth_; }
GetAuthStringLength()110     SizeType GetAuthStringLength() const { return auth_ == 0 ? 0 : internal::StrLen<Ch>(auth_); }
GetPathString()111     const Ch* GetPathString() const { return path_; }
GetPathStringLength()112     SizeType GetPathStringLength() const { return path_ == 0 ? 0 : internal::StrLen<Ch>(path_); }
GetQueryString()113     const Ch* GetQueryString() const { return query_; }
GetQueryStringLength()114     SizeType GetQueryStringLength() const { return query_ == 0 ? 0 : internal::StrLen<Ch>(query_); }
GetFragString()115     const Ch* GetFragString() const { return frag_; }
GetFragStringLength()116     SizeType GetFragStringLength() const { return frag_ == 0 ? 0 : internal::StrLen<Ch>(frag_); }
117 
118 #if RAPIDJSON_HAS_STDSTRING
Get(const GenericUri & uri)119     static String Get(const GenericUri& uri) { return String(uri.GetString(), uri.GetStringLength()); }
GetBase(const GenericUri & uri)120     static String GetBase(const GenericUri& uri) { return String(uri.GetBaseString(), uri.GetBaseStringLength()); }
GetScheme(const GenericUri & uri)121     static String GetScheme(const GenericUri& uri) { return String(uri.GetSchemeString(), uri.GetSchemeStringLength()); }
GetAuth(const GenericUri & uri)122     static String GetAuth(const GenericUri& uri) { return String(uri.GetAuthString(), uri.GetAuthStringLength()); }
GetPath(const GenericUri & uri)123     static String GetPath(const GenericUri& uri) { return String(uri.GetPathString(), uri.GetPathStringLength()); }
GetQuery(const GenericUri & uri)124     static String GetQuery(const GenericUri& uri) { return String(uri.GetQueryString(), uri.GetQueryStringLength()); }
GetFrag(const GenericUri & uri)125     static String GetFrag(const GenericUri& uri) { return String(uri.GetFragString(), uri.GetFragStringLength()); }
126 #endif
127 
128     //! Equality operators
129     bool operator==(const GenericUri& rhs) const {
130         return Match(rhs, true);
131     }
132 
133     bool operator!=(const GenericUri& rhs) const {
134         return !Match(rhs, true);
135     }
136 
137     bool Match(const GenericUri& uri, bool full = true) const {
138         Ch* s1;
139         Ch* s2;
140         if (full) {
141             s1 = uri_;
142             s2 = uri.uri_;
143         } else {
144             s1 = base_;
145             s2 = uri.base_;
146         }
147         if (s1 == s2) return true;
148         if (s1 == 0 || s2 == 0) return false;
149         return internal::StrCmp<Ch>(s1, s2) == 0;
150     }
151 
152     //! Resolve this URI against another (base) URI in accordance with URI resolution rules.
153     // See https://tools.ietf.org/html/rfc3986
154     // Use for resolving an id or $ref with an in-scope id.
155     // Returns a new GenericUri for the resolved URI.
156     GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0) {
157         GenericUri resuri;
158         resuri.allocator_ = allocator;
159         // Ensure enough space for combining paths
160         resuri.Allocate(GetStringLength() + baseuri.GetStringLength() + 1); // + 1 for joining slash
161 
162         if (!(GetSchemeStringLength() == 0)) {
163             // Use all of this URI
164             resuri.auth_ = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength());
165             resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
166             resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
167             resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
168             resuri.RemoveDotSegments();
169         } else {
170             // Use the base scheme
171             resuri.auth_ = CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength());
172             if (!(GetAuthStringLength() == 0)) {
173                 // Use this auth, path, query
174                 resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
175                 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
176                 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
177                 resuri.RemoveDotSegments();
178             } else {
179                 // Use the base auth
180                 resuri.path_ = CopyPart(resuri.auth_, baseuri.auth_, baseuri.GetAuthStringLength());
181                 if (GetPathStringLength() == 0) {
182                     // Use the base path
183                     resuri.query_ = CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength());
184                     if (GetQueryStringLength() == 0) {
185                         // Use the base query
186                         resuri.frag_ = CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength());
187                     } else {
188                         // Use this query
189                         resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
190                     }
191                 } else {
192                     if (path_[0] == '/') {
193                         // Absolute path - use all of this path
194                         resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
195                         resuri.RemoveDotSegments();
196                     } else {
197                         // Relative path - append this path to base path after base path's last slash
198                         size_t pos = 0;
199                         if (!(baseuri.GetAuthStringLength() == 0) && baseuri.GetPathStringLength() == 0) {
200                             resuri.path_[pos] = '/';
201                             pos++;
202                         }
203                         size_t lastslashpos = baseuri.GetPathStringLength();
204                         while (lastslashpos > 0) {
205                             if (baseuri.path_[lastslashpos - 1] == '/') break;
206                             lastslashpos--;
207                         }
208                         std::memcpy(&resuri.path_[pos], baseuri.path_, lastslashpos * sizeof(Ch));
209                         pos += lastslashpos;
210                         resuri.query_ = CopyPart(&resuri.path_[pos], path_, GetPathStringLength());
211                         resuri.RemoveDotSegments();
212                     }
213                     // Use this query
214                     resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
215                 }
216             }
217         }
218         // Always use this frag
219         resuri.base_ = CopyPart(resuri.frag_, frag_, GetFragStringLength());
220 
221         // Re-constitute base_ and uri_
222         resuri.SetBase();
223         resuri.uri_ = resuri.base_ + resuri.GetBaseStringLength() + 1;
224         resuri.SetUri();
225         return resuri;
226     }
227 
228     //! Get the allocator of this GenericUri.
GetAllocator()229     Allocator& GetAllocator() { return *allocator_; }
230 
231 private:
232     // Allocate memory for a URI
233     // Returns total amount allocated
Allocate(std::size_t len)234     std::size_t Allocate(std::size_t len) {
235         // Create own allocator if user did not supply.
236         if (!allocator_)
237             ownAllocator_ =  allocator_ = RAPIDJSON_NEW(Allocator)();
238 
239         // Allocate one block containing each part of the URI (5) plus base plus full URI, all null terminated.
240         // Order: scheme, auth, path, query, frag, base, uri
241         // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
242         size_t total = (3 * len + 7) * sizeof(Ch);
243         scheme_ = static_cast<Ch*>(allocator_->Malloc(total));
244         *scheme_ = '\0';
245         auth_ = scheme_;
246         auth_++;
247         *auth_ = '\0';
248         path_ = auth_;
249         path_++;
250         *path_ = '\0';
251         query_ = path_;
252         query_++;
253         *query_ = '\0';
254         frag_ = query_;
255         frag_++;
256         *frag_ = '\0';
257         base_ = frag_;
258         base_++;
259         *base_ = '\0';
260         uri_ = base_;
261         uri_++;
262         *uri_ = '\0';
263         return total;
264     }
265 
266     // Free memory for a URI
Free()267     void Free() {
268         if (scheme_) {
269             Allocator::Free(scheme_);
270             scheme_ = 0;
271         }
272     }
273 
274     // Parse a URI into constituent scheme, authority, path, query, & fragment parts
275     // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as per
276     // https://tools.ietf.org/html/rfc3986
Parse(const Ch * uri,std::size_t len)277     void Parse(const Ch* uri, std::size_t len) {
278         std::size_t start = 0, pos1 = 0, pos2 = 0;
279         Allocate(len);
280 
281         // Look for scheme ([^:/?#]+):)?
282         if (start < len) {
283             while (pos1 < len) {
284                 if (uri[pos1] == ':') break;
285                 pos1++;
286             }
287             if (pos1 != len) {
288                 while (pos2 < len) {
289                     if (uri[pos2] == '/') break;
290                     if (uri[pos2] == '?') break;
291                     if (uri[pos2] == '#') break;
292                     pos2++;
293                 }
294                 if (pos1 < pos2) {
295                     pos1++;
296                     std::memcpy(scheme_, &uri[start], pos1 * sizeof(Ch));
297                     scheme_[pos1] = '\0';
298                     start = pos1;
299                 }
300             }
301         }
302         // Look for auth (//([^/?#]*))?
303         // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
304         auth_ = scheme_ + GetSchemeStringLength();
305         auth_++;
306         *auth_ = '\0';
307         if (start < len - 1 && uri[start] == '/' && uri[start + 1] == '/') {
308             pos2 = start + 2;
309             while (pos2 < len) {
310                 if (uri[pos2] == '/') break;
311                 if (uri[pos2] == '?') break;
312                 if (uri[pos2] == '#') break;
313                 pos2++;
314             }
315             std::memcpy(auth_, &uri[start], (pos2 - start) * sizeof(Ch));
316             auth_[pos2 - start] = '\0';
317             start = pos2;
318         }
319         // Look for path ([^?#]*)
320         // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
321         path_ = auth_ + GetAuthStringLength();
322         path_++;
323         *path_ = '\0';
324         if (start < len) {
325             pos2 = start;
326             while (pos2 < len) {
327                 if (uri[pos2] == '?') break;
328                 if (uri[pos2] == '#') break;
329                 pos2++;
330             }
331             if (start != pos2) {
332                 std::memcpy(path_, &uri[start], (pos2 - start) * sizeof(Ch));
333                 path_[pos2 - start] = '\0';
334                 if (path_[0] == '/')
335                     RemoveDotSegments();   // absolute path - normalize
336                 start = pos2;
337             }
338         }
339         // Look for query (\?([^#]*))?
340         // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
341         query_ = path_ + GetPathStringLength();
342         query_++;
343         *query_ = '\0';
344         if (start < len && uri[start] == '?') {
345             pos2 = start + 1;
346             while (pos2 < len) {
347                 if (uri[pos2] == '#') break;
348                 pos2++;
349             }
350             if (start != pos2) {
351                 std::memcpy(query_, &uri[start], (pos2 - start) * sizeof(Ch));
352                 query_[pos2 - start] = '\0';
353                 start = pos2;
354             }
355         }
356         // Look for fragment (#(.*))?
357         // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
358         frag_ = query_ + GetQueryStringLength();
359         frag_++;
360         *frag_ = '\0';
361         if (start < len && uri[start] == '#') {
362             std::memcpy(frag_, &uri[start], (len - start) * sizeof(Ch));
363             frag_[len - start] = '\0';
364         }
365 
366         // Re-constitute base_ and uri_
367         base_ = frag_ + GetFragStringLength() + 1;
368         SetBase();
369         uri_ = base_ + GetBaseStringLength() + 1;
370         SetUri();
371     }
372 
373     // Reconstitute base
SetBase()374     void SetBase() {
375         Ch* next = base_;
376         std::memcpy(next, scheme_, GetSchemeStringLength() * sizeof(Ch));
377         next+= GetSchemeStringLength();
378         std::memcpy(next, auth_, GetAuthStringLength() * sizeof(Ch));
379         next+= GetAuthStringLength();
380         std::memcpy(next, path_, GetPathStringLength() * sizeof(Ch));
381         next+= GetPathStringLength();
382         std::memcpy(next, query_, GetQueryStringLength() * sizeof(Ch));
383         next+= GetQueryStringLength();
384         *next = '\0';
385     }
386 
387     // Reconstitute uri
SetUri()388     void SetUri() {
389         Ch* next = uri_;
390         std::memcpy(next, base_, GetBaseStringLength() * sizeof(Ch));
391         next+= GetBaseStringLength();
392         std::memcpy(next, frag_, GetFragStringLength() * sizeof(Ch));
393         next+= GetFragStringLength();
394         *next = '\0';
395     }
396 
397     // Copy a part from one GenericUri to another
398     // Return the pointer to the next part to be copied to
CopyPart(Ch * to,Ch * from,std::size_t len)399     Ch* CopyPart(Ch* to, Ch* from, std::size_t len) {
400         RAPIDJSON_ASSERT(to != 0);
401         RAPIDJSON_ASSERT(from != 0);
402         std::memcpy(to, from, len * sizeof(Ch));
403         to[len] = '\0';
404         Ch* next = to + len + 1;
405         return next;
406     }
407 
408     // Remove . and .. segments from the path_ member.
409     // https://tools.ietf.org/html/rfc3986
410     // This is done in place as we are only removing segments.
RemoveDotSegments()411     void RemoveDotSegments() {
412         std::size_t pathlen = GetPathStringLength();
413         std::size_t pathpos = 0;  // Position in path_
414         std::size_t newpos = 0;   // Position in new path_
415 
416         // Loop through each segment in original path_
417         while (pathpos < pathlen) {
418             // Get next segment, bounded by '/' or end
419             size_t slashpos = 0;
420             while ((pathpos + slashpos) < pathlen) {
421                 if (path_[pathpos + slashpos] == '/') break;
422                 slashpos++;
423             }
424             // Check for .. and . segments
425             if (slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.') {
426                 // Backup a .. segment in the new path_
427                 // We expect to find a previously added slash at the end or nothing
428                 RAPIDJSON_ASSERT(newpos == 0 || path_[newpos - 1] == '/');
429                 size_t lastslashpos = newpos;
430                 // Make sure we don't go beyond the start segment
431                 if (lastslashpos > 1) {
432                     // Find the next to last slash and back up to it
433                     lastslashpos--;
434                     while (lastslashpos > 0) {
435                         if (path_[lastslashpos - 1] == '/') break;
436                         lastslashpos--;
437                     }
438                     // Set the new path_ position
439                     newpos = lastslashpos;
440                 }
441             } else if (slashpos == 1 && path_[pathpos] == '.') {
442                 // Discard . segment, leaves new path_ unchanged
443             } else {
444                 // Move any other kind of segment to the new path_
445                 RAPIDJSON_ASSERT(newpos <= pathpos);
446                 std::memmove(&path_[newpos], &path_[pathpos], slashpos * sizeof(Ch));
447                 newpos += slashpos;
448                 // Add slash if not at end
449                 if ((pathpos + slashpos) < pathlen) {
450                     path_[newpos] = '/';
451                     newpos++;
452                 }
453             }
454             // Move to next segment
455             pathpos += slashpos + 1;
456         }
457         path_[newpos] = '\0';
458     }
459 
460     Ch* uri_;    // Everything
461     Ch* base_;   // Everything except fragment
462     Ch* scheme_; // Includes the :
463     Ch* auth_;   // Includes the //
464     Ch* path_;   // Absolute if starts with /
465     Ch* query_;  // Includes the ?
466     Ch* frag_;   // Includes the #
467 
468     Allocator* allocator_;      //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
469     Allocator* ownAllocator_;   //!< Allocator owned by this Uri.
470 };
471 
472 //! GenericUri for Value (UTF-8, default allocator).
473 typedef GenericUri<Value> Uri;
474 
475 RAPIDJSON_NAMESPACE_END
476 
477 #if defined(__clang__)
478 RAPIDJSON_DIAG_POP
479 #endif
480 
481 #endif // RAPIDJSON_URI_H_
482