1 // Tencent is pleased to support the open source community by making RapidJSON available. 2 // 3 // (C) Copyright IBM Corporation 2021 4 // 5 // Licensed under the MIT License (the "License"); you may not use this file except 6 // in compliance with the License. You may obtain a copy of the License at 7 // 8 // http://opensource.org/licenses/MIT 9 // 10 // Unless required by applicable law or agreed to in writing, software distributed 11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 // specific language governing permissions and limitations under the License. 14 15 #ifndef RAPIDJSON_URI_H_ 16 #define RAPIDJSON_URI_H_ 17 18 #include "internal/strfunc.h" 19 20 #if defined(__clang__) 21 RAPIDJSON_DIAG_PUSH 22 RAPIDJSON_DIAG_OFF(c++98-compat) 23 #elif defined(_MSC_VER) 24 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated 25 #endif 26 27 RAPIDJSON_NAMESPACE_BEGIN 28 29 /////////////////////////////////////////////////////////////////////////////// 30 // GenericUri 31 32 template <typename ValueType, typename Allocator=CrtAllocator> 33 class GenericUri { 34 public: 35 typedef typename ValueType::Ch Ch; 36 #if RAPIDJSON_HAS_STDSTRING 37 typedef std::basic_string<Ch> String; 38 #endif 39 40 //! Constructors uri_()41 GenericUri(Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { 42 } 43 uri_()44 GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { 45 Parse(uri, len); 46 } 47 uri_()48 GenericUri(const Ch* uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { 49 Parse(uri, internal::StrLen<Ch>(uri)); 50 } 51 52 // Use with specializations of GenericValue uri_()53 template<typename T> GenericUri(const T& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { 54 const Ch* u = uri.template Get<const Ch*>(); // TypeHelper from document.h 55 Parse(u, internal::StrLen<Ch>(u)); 56 } 57 58 #if RAPIDJSON_HAS_STDSTRING uri_()59 GenericUri(const String& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { 60 Parse(uri.c_str(), internal::StrLen<Ch>(uri.c_str())); 61 } 62 #endif 63 64 //! Copy constructor GenericUri(const GenericUri & rhs)65 GenericUri(const GenericUri& rhs) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(), ownAllocator_() { 66 *this = rhs; 67 } 68 69 //! Copy constructor GenericUri(const GenericUri & rhs,Allocator * allocator)70 GenericUri(const GenericUri& rhs, Allocator* allocator) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { 71 *this = rhs; 72 } 73 74 //! Destructor. ~GenericUri()75 ~GenericUri() { 76 Free(); 77 RAPIDJSON_DELETE(ownAllocator_); 78 } 79 80 //! Assignment operator 81 GenericUri& operator=(const GenericUri& rhs) { 82 if (this != &rhs) { 83 // Do not delete ownAllocator 84 Free(); 85 Allocate(rhs.GetStringLength()); 86 auth_ = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength()); 87 path_ = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength()); 88 query_ = CopyPart(path_, rhs.path_, rhs.GetPathStringLength()); 89 frag_ = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength()); 90 base_ = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength()); 91 uri_ = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength()); 92 CopyPart(uri_, rhs.uri_, rhs.GetStringLength()); 93 } 94 return *this; 95 } 96 97 //! Getters 98 // Use with specializations of GenericValue Get(T & uri,Allocator & allocator)99 template<typename T> void Get(T& uri, Allocator& allocator) { 100 uri.template Set<const Ch*>(this->GetString(), allocator); // TypeHelper from document.h 101 } 102 GetString()103 const Ch* GetString() const { return uri_; } GetStringLength()104 SizeType GetStringLength() const { return uri_ == 0 ? 0 : internal::StrLen<Ch>(uri_); } GetBaseString()105 const Ch* GetBaseString() const { return base_; } GetBaseStringLength()106 SizeType GetBaseStringLength() const { return base_ == 0 ? 0 : internal::StrLen<Ch>(base_); } GetSchemeString()107 const Ch* GetSchemeString() const { return scheme_; } GetSchemeStringLength()108 SizeType GetSchemeStringLength() const { return scheme_ == 0 ? 0 : internal::StrLen<Ch>(scheme_); } GetAuthString()109 const Ch* GetAuthString() const { return auth_; } GetAuthStringLength()110 SizeType GetAuthStringLength() const { return auth_ == 0 ? 0 : internal::StrLen<Ch>(auth_); } GetPathString()111 const Ch* GetPathString() const { return path_; } GetPathStringLength()112 SizeType GetPathStringLength() const { return path_ == 0 ? 0 : internal::StrLen<Ch>(path_); } GetQueryString()113 const Ch* GetQueryString() const { return query_; } GetQueryStringLength()114 SizeType GetQueryStringLength() const { return query_ == 0 ? 0 : internal::StrLen<Ch>(query_); } GetFragString()115 const Ch* GetFragString() const { return frag_; } GetFragStringLength()116 SizeType GetFragStringLength() const { return frag_ == 0 ? 0 : internal::StrLen<Ch>(frag_); } 117 118 #if RAPIDJSON_HAS_STDSTRING Get(const GenericUri & uri)119 static String Get(const GenericUri& uri) { return String(uri.GetString(), uri.GetStringLength()); } GetBase(const GenericUri & uri)120 static String GetBase(const GenericUri& uri) { return String(uri.GetBaseString(), uri.GetBaseStringLength()); } GetScheme(const GenericUri & uri)121 static String GetScheme(const GenericUri& uri) { return String(uri.GetSchemeString(), uri.GetSchemeStringLength()); } GetAuth(const GenericUri & uri)122 static String GetAuth(const GenericUri& uri) { return String(uri.GetAuthString(), uri.GetAuthStringLength()); } GetPath(const GenericUri & uri)123 static String GetPath(const GenericUri& uri) { return String(uri.GetPathString(), uri.GetPathStringLength()); } GetQuery(const GenericUri & uri)124 static String GetQuery(const GenericUri& uri) { return String(uri.GetQueryString(), uri.GetQueryStringLength()); } GetFrag(const GenericUri & uri)125 static String GetFrag(const GenericUri& uri) { return String(uri.GetFragString(), uri.GetFragStringLength()); } 126 #endif 127 128 //! Equality operators 129 bool operator==(const GenericUri& rhs) const { 130 return Match(rhs, true); 131 } 132 133 bool operator!=(const GenericUri& rhs) const { 134 return !Match(rhs, true); 135 } 136 137 bool Match(const GenericUri& uri, bool full = true) const { 138 Ch* s1; 139 Ch* s2; 140 if (full) { 141 s1 = uri_; 142 s2 = uri.uri_; 143 } else { 144 s1 = base_; 145 s2 = uri.base_; 146 } 147 if (s1 == s2) return true; 148 if (s1 == 0 || s2 == 0) return false; 149 return internal::StrCmp<Ch>(s1, s2) == 0; 150 } 151 152 //! Resolve this URI against another (base) URI in accordance with URI resolution rules. 153 // See https://tools.ietf.org/html/rfc3986 154 // Use for resolving an id or $ref with an in-scope id. 155 // Returns a new GenericUri for the resolved URI. 156 GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0) { 157 GenericUri resuri; 158 resuri.allocator_ = allocator; 159 // Ensure enough space for combining paths 160 resuri.Allocate(GetStringLength() + baseuri.GetStringLength() + 1); // + 1 for joining slash 161 162 if (!(GetSchemeStringLength() == 0)) { 163 // Use all of this URI 164 resuri.auth_ = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength()); 165 resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength()); 166 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength()); 167 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); 168 resuri.RemoveDotSegments(); 169 } else { 170 // Use the base scheme 171 resuri.auth_ = CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength()); 172 if (!(GetAuthStringLength() == 0)) { 173 // Use this auth, path, query 174 resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength()); 175 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength()); 176 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); 177 resuri.RemoveDotSegments(); 178 } else { 179 // Use the base auth 180 resuri.path_ = CopyPart(resuri.auth_, baseuri.auth_, baseuri.GetAuthStringLength()); 181 if (GetPathStringLength() == 0) { 182 // Use the base path 183 resuri.query_ = CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength()); 184 if (GetQueryStringLength() == 0) { 185 // Use the base query 186 resuri.frag_ = CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength()); 187 } else { 188 // Use this query 189 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); 190 } 191 } else { 192 if (path_[0] == '/') { 193 // Absolute path - use all of this path 194 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength()); 195 resuri.RemoveDotSegments(); 196 } else { 197 // Relative path - append this path to base path after base path's last slash 198 size_t pos = 0; 199 if (!(baseuri.GetAuthStringLength() == 0) && baseuri.GetPathStringLength() == 0) { 200 resuri.path_[pos] = '/'; 201 pos++; 202 } 203 size_t lastslashpos = baseuri.GetPathStringLength(); 204 while (lastslashpos > 0) { 205 if (baseuri.path_[lastslashpos - 1] == '/') break; 206 lastslashpos--; 207 } 208 std::memcpy(&resuri.path_[pos], baseuri.path_, lastslashpos * sizeof(Ch)); 209 pos += lastslashpos; 210 resuri.query_ = CopyPart(&resuri.path_[pos], path_, GetPathStringLength()); 211 resuri.RemoveDotSegments(); 212 } 213 // Use this query 214 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); 215 } 216 } 217 } 218 // Always use this frag 219 resuri.base_ = CopyPart(resuri.frag_, frag_, GetFragStringLength()); 220 221 // Re-constitute base_ and uri_ 222 resuri.SetBase(); 223 resuri.uri_ = resuri.base_ + resuri.GetBaseStringLength() + 1; 224 resuri.SetUri(); 225 return resuri; 226 } 227 228 //! Get the allocator of this GenericUri. GetAllocator()229 Allocator& GetAllocator() { return *allocator_; } 230 231 private: 232 // Allocate memory for a URI 233 // Returns total amount allocated Allocate(std::size_t len)234 std::size_t Allocate(std::size_t len) { 235 // Create own allocator if user did not supply. 236 if (!allocator_) 237 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); 238 239 // Allocate one block containing each part of the URI (5) plus base plus full URI, all null terminated. 240 // Order: scheme, auth, path, query, frag, base, uri 241 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. 242 size_t total = (3 * len + 7) * sizeof(Ch); 243 scheme_ = static_cast<Ch*>(allocator_->Malloc(total)); 244 *scheme_ = '\0'; 245 auth_ = scheme_; 246 auth_++; 247 *auth_ = '\0'; 248 path_ = auth_; 249 path_++; 250 *path_ = '\0'; 251 query_ = path_; 252 query_++; 253 *query_ = '\0'; 254 frag_ = query_; 255 frag_++; 256 *frag_ = '\0'; 257 base_ = frag_; 258 base_++; 259 *base_ = '\0'; 260 uri_ = base_; 261 uri_++; 262 *uri_ = '\0'; 263 return total; 264 } 265 266 // Free memory for a URI Free()267 void Free() { 268 if (scheme_) { 269 Allocator::Free(scheme_); 270 scheme_ = 0; 271 } 272 } 273 274 // Parse a URI into constituent scheme, authority, path, query, & fragment parts 275 // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as per 276 // https://tools.ietf.org/html/rfc3986 Parse(const Ch * uri,std::size_t len)277 void Parse(const Ch* uri, std::size_t len) { 278 std::size_t start = 0, pos1 = 0, pos2 = 0; 279 Allocate(len); 280 281 // Look for scheme ([^:/?#]+):)? 282 if (start < len) { 283 while (pos1 < len) { 284 if (uri[pos1] == ':') break; 285 pos1++; 286 } 287 if (pos1 != len) { 288 while (pos2 < len) { 289 if (uri[pos2] == '/') break; 290 if (uri[pos2] == '?') break; 291 if (uri[pos2] == '#') break; 292 pos2++; 293 } 294 if (pos1 < pos2) { 295 pos1++; 296 std::memcpy(scheme_, &uri[start], pos1 * sizeof(Ch)); 297 scheme_[pos1] = '\0'; 298 start = pos1; 299 } 300 } 301 } 302 // Look for auth (//([^/?#]*))? 303 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. 304 auth_ = scheme_ + GetSchemeStringLength(); 305 auth_++; 306 *auth_ = '\0'; 307 if (start < len - 1 && uri[start] == '/' && uri[start + 1] == '/') { 308 pos2 = start + 2; 309 while (pos2 < len) { 310 if (uri[pos2] == '/') break; 311 if (uri[pos2] == '?') break; 312 if (uri[pos2] == '#') break; 313 pos2++; 314 } 315 std::memcpy(auth_, &uri[start], (pos2 - start) * sizeof(Ch)); 316 auth_[pos2 - start] = '\0'; 317 start = pos2; 318 } 319 // Look for path ([^?#]*) 320 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. 321 path_ = auth_ + GetAuthStringLength(); 322 path_++; 323 *path_ = '\0'; 324 if (start < len) { 325 pos2 = start; 326 while (pos2 < len) { 327 if (uri[pos2] == '?') break; 328 if (uri[pos2] == '#') break; 329 pos2++; 330 } 331 if (start != pos2) { 332 std::memcpy(path_, &uri[start], (pos2 - start) * sizeof(Ch)); 333 path_[pos2 - start] = '\0'; 334 if (path_[0] == '/') 335 RemoveDotSegments(); // absolute path - normalize 336 start = pos2; 337 } 338 } 339 // Look for query (\?([^#]*))? 340 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. 341 query_ = path_ + GetPathStringLength(); 342 query_++; 343 *query_ = '\0'; 344 if (start < len && uri[start] == '?') { 345 pos2 = start + 1; 346 while (pos2 < len) { 347 if (uri[pos2] == '#') break; 348 pos2++; 349 } 350 if (start != pos2) { 351 std::memcpy(query_, &uri[start], (pos2 - start) * sizeof(Ch)); 352 query_[pos2 - start] = '\0'; 353 start = pos2; 354 } 355 } 356 // Look for fragment (#(.*))? 357 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. 358 frag_ = query_ + GetQueryStringLength(); 359 frag_++; 360 *frag_ = '\0'; 361 if (start < len && uri[start] == '#') { 362 std::memcpy(frag_, &uri[start], (len - start) * sizeof(Ch)); 363 frag_[len - start] = '\0'; 364 } 365 366 // Re-constitute base_ and uri_ 367 base_ = frag_ + GetFragStringLength() + 1; 368 SetBase(); 369 uri_ = base_ + GetBaseStringLength() + 1; 370 SetUri(); 371 } 372 373 // Reconstitute base SetBase()374 void SetBase() { 375 Ch* next = base_; 376 std::memcpy(next, scheme_, GetSchemeStringLength() * sizeof(Ch)); 377 next+= GetSchemeStringLength(); 378 std::memcpy(next, auth_, GetAuthStringLength() * sizeof(Ch)); 379 next+= GetAuthStringLength(); 380 std::memcpy(next, path_, GetPathStringLength() * sizeof(Ch)); 381 next+= GetPathStringLength(); 382 std::memcpy(next, query_, GetQueryStringLength() * sizeof(Ch)); 383 next+= GetQueryStringLength(); 384 *next = '\0'; 385 } 386 387 // Reconstitute uri SetUri()388 void SetUri() { 389 Ch* next = uri_; 390 std::memcpy(next, base_, GetBaseStringLength() * sizeof(Ch)); 391 next+= GetBaseStringLength(); 392 std::memcpy(next, frag_, GetFragStringLength() * sizeof(Ch)); 393 next+= GetFragStringLength(); 394 *next = '\0'; 395 } 396 397 // Copy a part from one GenericUri to another 398 // Return the pointer to the next part to be copied to CopyPart(Ch * to,Ch * from,std::size_t len)399 Ch* CopyPart(Ch* to, Ch* from, std::size_t len) { 400 RAPIDJSON_ASSERT(to != 0); 401 RAPIDJSON_ASSERT(from != 0); 402 std::memcpy(to, from, len * sizeof(Ch)); 403 to[len] = '\0'; 404 Ch* next = to + len + 1; 405 return next; 406 } 407 408 // Remove . and .. segments from the path_ member. 409 // https://tools.ietf.org/html/rfc3986 410 // This is done in place as we are only removing segments. RemoveDotSegments()411 void RemoveDotSegments() { 412 std::size_t pathlen = GetPathStringLength(); 413 std::size_t pathpos = 0; // Position in path_ 414 std::size_t newpos = 0; // Position in new path_ 415 416 // Loop through each segment in original path_ 417 while (pathpos < pathlen) { 418 // Get next segment, bounded by '/' or end 419 size_t slashpos = 0; 420 while ((pathpos + slashpos) < pathlen) { 421 if (path_[pathpos + slashpos] == '/') break; 422 slashpos++; 423 } 424 // Check for .. and . segments 425 if (slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.') { 426 // Backup a .. segment in the new path_ 427 // We expect to find a previously added slash at the end or nothing 428 RAPIDJSON_ASSERT(newpos == 0 || path_[newpos - 1] == '/'); 429 size_t lastslashpos = newpos; 430 // Make sure we don't go beyond the start segment 431 if (lastslashpos > 1) { 432 // Find the next to last slash and back up to it 433 lastslashpos--; 434 while (lastslashpos > 0) { 435 if (path_[lastslashpos - 1] == '/') break; 436 lastslashpos--; 437 } 438 // Set the new path_ position 439 newpos = lastslashpos; 440 } 441 } else if (slashpos == 1 && path_[pathpos] == '.') { 442 // Discard . segment, leaves new path_ unchanged 443 } else { 444 // Move any other kind of segment to the new path_ 445 RAPIDJSON_ASSERT(newpos <= pathpos); 446 std::memmove(&path_[newpos], &path_[pathpos], slashpos * sizeof(Ch)); 447 newpos += slashpos; 448 // Add slash if not at end 449 if ((pathpos + slashpos) < pathlen) { 450 path_[newpos] = '/'; 451 newpos++; 452 } 453 } 454 // Move to next segment 455 pathpos += slashpos + 1; 456 } 457 path_[newpos] = '\0'; 458 } 459 460 Ch* uri_; // Everything 461 Ch* base_; // Everything except fragment 462 Ch* scheme_; // Includes the : 463 Ch* auth_; // Includes the // 464 Ch* path_; // Absolute if starts with / 465 Ch* query_; // Includes the ? 466 Ch* frag_; // Includes the # 467 468 Allocator* allocator_; //!< The current allocator. It is either user-supplied or equal to ownAllocator_. 469 Allocator* ownAllocator_; //!< Allocator owned by this Uri. 470 }; 471 472 //! GenericUri for Value (UTF-8, default allocator). 473 typedef GenericUri<Value> Uri; 474 475 RAPIDJSON_NAMESPACE_END 476 477 #if defined(__clang__) 478 RAPIDJSON_DIAG_POP 479 #endif 480 481 #endif // RAPIDJSON_URI_H_ 482