Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions libraries/s3api/src/s3api/s3api_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Request PutObject(
Request req;
req.method = clients::http::HttpMethod::kPut;
req.bucket = bucket;
req.req = path;
req.req = http::EncodeS3Key(path);

req.headers[USERVER_NAMESPACE::http::headers::kContentLength] = std::to_string(data.size());
req.headers[USERVER_NAMESPACE::http::headers::kContentType] = content_type;
Expand All @@ -80,15 +80,15 @@ Request DeleteObject(std::string_view bucket, std::string_view path) {
Request req;
req.method = clients::http::HttpMethod::kDelete;
req.bucket = bucket;
req.req = path;
req.req = http::EncodeS3Key(path);
return req;
}

Request GetObject(std::string_view bucket, std::string_view path, std::optional<std::string_view> version) {
Request req;
req.method = clients::http::HttpMethod::kGet;
req.bucket = bucket;
req.req = path;
req.req = http::EncodeS3Key(path);

if (version) {
req.req += "?" + USERVER_NAMESPACE::http::MakeQuery({{"versionId", *version}});
Expand All @@ -101,7 +101,7 @@ Request GetObjectHead(std::string_view bucket, std::string_view path) {
Request req;
req.method = clients::http::HttpMethod::kHead;
req.bucket = bucket;
req.req = path;
req.req = http::EncodeS3Key(path);
return req;
}

Expand Down Expand Up @@ -152,9 +152,9 @@ Request CopyObject(
Request req;
req.method = clients::http::HttpMethod::kPut;
req.bucket = dest_bucket;
req.req = dest_key;
req.req = http::EncodeS3Key(dest_key);

req.headers[headers::kAmzCopySource] = fmt::format("/{}/{}", source_bucket, source_key);
req.headers[headers::kAmzCopySource] = fmt::format("/{}/{}", source_bucket, http::EncodeS3Key(source_key));
req.headers[USERVER_NAMESPACE::http::headers::kContentType] = content_type;

return req;
Expand All @@ -167,7 +167,7 @@ Request CreateInternalApiRequest(
Request result;
result.method = clients::http::HttpMethod::kPost;
result.bucket = bucket;
result.req = fmt::format("{}?uploads", request.key);
result.req = fmt::format("{}?uploads", http::EncodeS3Key(request.key));

if (request.content_type) {
result.headers[http::headers::kContentType] = *request.content_type;
Expand Down Expand Up @@ -195,7 +195,11 @@ Request CreateInternalApiRequest(
Request result;
result.method = clients::http::HttpMethod::kDelete;
result.bucket = bucket;
result.req = fmt::format("{}?{}", request.key, http::MakeQuery({{query_args::kUploadId, request.upload_id}}));
result.req = fmt::format(
"{}?{}",
http::EncodeS3Key(request.key),
http::MakeQuery({{query_args::kUploadId, request.upload_id}})
);
return result;
}

Expand All @@ -206,7 +210,11 @@ Request CreateInternalApiRequest(
Request result;
result.method = clients::http::HttpMethod::kPost;
result.bucket = bucket;
result.req = fmt::format("{}?{}", request.key, http::MakeQuery({{query_args::kUploadId, request.upload_id}}));
result.req = fmt::format(
"{}?{}",
http::EncodeS3Key(request.key),
http::MakeQuery({{query_args::kUploadId, request.upload_id}})
);

pugi::xml_document doc;
auto multipart_upload_node = doc.append_child("CompleteMultipartUpload");
Expand Down Expand Up @@ -243,7 +251,7 @@ Request CreateInternalApiRequest(const std::string& bucket, const multipart_uplo
result.bucket = bucket;
result.req = fmt::format(
"{}?{}",
request.key,
http::EncodeS3Key(request.key),
http::MakeQuery(
{{query_args::kUploadId, request.upload_id}, {query_args::kPartNumber, std::to_string(request.part_number)}}
)
Expand All @@ -256,7 +264,7 @@ Request CreateInternalApiRequest(const std::string& bucket, const multipart_uplo
Request result;
result.method = clients::http::HttpMethod::kGet;
result.bucket = bucket;
result.req = request.key;
result.req = http::EncodeS3Key(request.key);

http::Args params{{std::string{query_args::kUploadId}, request.upload_id}};

Expand All @@ -268,7 +276,7 @@ Request CreateInternalApiRequest(const std::string& bucket, const multipart_uplo
params.emplace(query_args::kPartNumberMarker, std::to_string(part_number_marker));
}

result.req = fmt::format("{}?{}", request.key, http::MakeQuery(params));
result.req = fmt::format("{}?{}", http::EncodeS3Key(request.key), http::MakeQuery(params));

return result;
}
Expand Down
23 changes: 23 additions & 0 deletions universal/include/userver/http/url.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@ struct DecomposedUrlView {
/// @endcode
std::string UrlEncode(std::string_view input_string);

/// @brief Encode a URL path segment (for use in S3 and similar APIs)
/// @param input_string String to encode
/// @returns URL-encoded string where special characters are encoded as %XX sequences,
/// but path-safe characters (-, _, ., ~, $, &, ,, :, =, @) are kept unescaped
/// @note This is less aggressive than UrlEncode and is suitable for encoding path segments
/// where you want to preserve readability of certain special characters
/// @code
/// auto encoded = UrlEncodePathSegment("file-name_with spaces.txt");
/// // Returns: "file-name_with%20spaces.txt"
/// @endcode
std::string UrlEncodePathSegment(std::string_view input_string);

/// @brief Encode an S3 object key for use in URL path
/// @param key S3 object key (may contain '/' as part of the key name)
/// @returns URL-encoded path where each segment is encoded but '/' separators are preserved
/// @note S3 object keys can contain '/' which should be preserved as path separators,
/// while other special characters should be encoded
/// @code
/// auto encoded = EncodeS3Key("folder/file with spaces.txt");
/// // Returns: "folder/file%20with%20spaces.txt"
/// @endcode
std::string EncodeS3Key(std::string_view key);

using Args = std::unordered_map<std::string, std::string, utils::StrCaseHash>;
using MultiArgs = std::multimap<std::string, std::string>;
using PathArgs = std::unordered_map<std::string, std::string>;
Expand Down
80 changes: 74 additions & 6 deletions universal/src/http/url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,41 @@ constexpr std::string_view kSchemaSeparator = "://";
constexpr char kQuerySeparator = '?';
constexpr char kFragmentSeparator = '#';

// RFC 3986 unreserved characters plus some additional characters that are safe
// in path segments for S3 and similar APIs
bool IsPathSafeChar(unsigned char c) {
// Alphanumeric characters are always safe
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
return true;
}
// RFC 3986 unreserved: - _ . ~
// Additional path-safe characters for S3 compatibility: $ & , : = @
switch (c) {
case '-':
case '_':
case '.':
case '~':
case '$':
case '&':
case ',':
case ':':
case '=':
case '@':
return true;
default:
return false;
}
}

void EncodeByte(unsigned char symbol, std::string& result) {
std::array<char, 3> bytes = {'%', 0, 0};
bytes[1] = (symbol & 0xF0) / 16;
bytes[1] += (bytes[1] > 9) ? 'A' - 10 : '0';
bytes[2] = symbol & 0x0F;
bytes[2] += (bytes[2] > 9) ? 'A' - 10 : '0';
result.append(bytes.data(), bytes.size());
}

void UrlEncodeTo(std::string_view input_string, std::string& result) {
for (const char symbol : input_string) {
if (isalnum(symbol)) {
Expand All @@ -37,17 +72,22 @@ void UrlEncodeTo(std::string_view input_string, std::string& result) {
result.append(1, symbol);
break;
default:
std::array<char, 3> bytes = {'%', 0, 0};
bytes[1] = (symbol & 0xF0) / 16;
bytes[1] += (bytes[1] > 9) ? 'A' - 10 : '0';
bytes[2] = symbol & 0x0F;
bytes[2] += (bytes[2] > 9) ? 'A' - 10 : '0';
result.append(bytes.data(), bytes.size());
EncodeByte(static_cast<unsigned char>(symbol), result);
break;
}
}
}

void UrlEncodePathSegmentTo(std::string_view input_string, std::string& result) {
for (const unsigned char symbol : input_string) {
if (IsPathSafeChar(symbol)) {
result.append(1, symbol);
} else {
EncodeByte(symbol, result);
}
}
}

} // namespace

std::string UrlEncode(std::string_view input_string) {
Expand All @@ -58,6 +98,34 @@ std::string UrlEncode(std::string_view input_string) {
return result;
}

std::string UrlEncodePathSegment(std::string_view input_string) {
std::string result;
result.reserve(3 * input_string.size());

UrlEncodePathSegmentTo(input_string, result);
return result;
}

std::string EncodeS3Key(std::string_view key) {
std::string result;
result.reserve(key.size());

size_t start = 0;
while (start < key.size()) {
size_t slash_pos = key.find('/', start);
if (slash_pos == std::string::npos) {
UrlEncodePathSegmentTo(key.substr(start), result);
break;
} else {
UrlEncodePathSegmentTo(key.substr(start, slash_pos - start), result);
result.push_back('/');
start = slash_pos + 1;
}
}

return result;
}

std::string UrlDecode(std::string_view range) { return impl::UrlDecode(utils::impl::InternalTag{}, range); }

namespace {
Expand Down
94 changes: 94 additions & 0 deletions universal/src/http/url_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

USERVER_NAMESPACE_BEGIN

using http::EncodeS3Key;
using http::UrlEncode;
using http::UrlEncodePathSegment;

namespace {

Expand All @@ -22,6 +24,98 @@ TEST(UrlEncode, Latin) {
EXPECT_EQ(str, UrlEncode(str));
}

TEST(UrlEncodePathSegment, Empty) { EXPECT_EQ("", UrlEncodePathSegment("")); }

TEST(UrlEncodePathSegment, Latin) {
constexpr std::string_view str = "SomeText1234567890";
EXPECT_EQ(str, UrlEncodePathSegment(str));
}

TEST(UrlEncodePathSegment, UnreservedChars) {
// RFC 3986 unreserved: - _ . ~
constexpr std::string_view str = "file-name_test.txt~backup";
EXPECT_EQ(str, UrlEncodePathSegment(str));
}

TEST(UrlEncodePathSegment, PathSafeSpecialChars) {
// Additional path-safe: $ & , : = @
constexpr std::string_view str = "price$100&tax:50=total@rate";
EXPECT_EQ(str, UrlEncodePathSegment(str));
}

TEST(UrlEncodePathSegment, SpacesAndSpecial) {
constexpr std::string_view str = "file with spaces.txt";
EXPECT_EQ("file%20with%20spaces.txt", UrlEncodePathSegment(str));
}

TEST(UrlEncodePathSegment, SlashShouldNotBeEncoded) {
// Slash should be encoded in path segment context
constexpr std::string_view str = "folder/file";
EXPECT_EQ("folder%2Ffile", UrlEncodePathSegment(str));
}

TEST(UrlEncodePathSegment, QueryChars) {
// ? and # should be encoded
constexpr std::string_view str = "file?query#fragment";
EXPECT_EQ("file%3Fquery%23fragment", UrlEncodePathSegment(str));
}

TEST(EncodeS3Key, Empty) { EXPECT_EQ("", EncodeS3Key("")); }

TEST(EncodeS3Key, SimpleKey) {
constexpr std::string_view key = "simple-key.txt";
EXPECT_EQ(key, EncodeS3Key(key));
}

TEST(EncodeS3Key, WithSpaces) {
constexpr std::string_view key = "file with spaces.txt";
EXPECT_EQ("file%20with%20spaces.txt", EncodeS3Key(key));
}

TEST(EncodeS3Key, WithSlashes) {
constexpr std::string_view key = "folder/subfolder/file.txt";
EXPECT_EQ("folder/subfolder/file.txt", EncodeS3Key(key));
}

TEST(EncodeS3Key, WithSlashesAndSpaces) {
constexpr std::string_view key = "folder/file with spaces.txt";
EXPECT_EQ("folder/file%20with%20spaces.txt", EncodeS3Key(key));
}

TEST(EncodeS3Key, ComplexKey) {
constexpr std::string_view key = "path/to/my file (copy).txt";
// Parentheses are encoded as they're not in our path-safe set (RFC 3986 unreserved + S3-safe)
EXPECT_EQ("path/to/my%20file%20%28copy%29.txt", EncodeS3Key(key));
}

TEST(EncodeS3Key, SpecialCharsInSegments) {
// Path-safe chars should be preserved, others encoded
constexpr std::string_view key = "folder/file-name_with.dots~and$symbols&commas,colons:equals=at@sign";
EXPECT_EQ("folder/file-name_with.dots~and$symbols&commas,colons:equals=at@sign", EncodeS3Key(key));
}

TEST(EncodeS3Key, LeadingSlash) {
// Leading slash should be preserved as S3 path separator
constexpr std::string_view key = "/leading/slash.txt";
EXPECT_EQ("/leading/slash.txt", EncodeS3Key(key));
}

TEST(EncodeS3Key, MultipleSpacesInPath) {
constexpr std::string_view key = "folder with spaces/file with spaces.txt";
EXPECT_EQ("folder%20with%20spaces/file%20with%20spaces.txt", EncodeS3Key(key));
}

TEST(EncodeS3Key, UnicodeCharacters) {
// Unicode characters should be percent-encoded
// UTF-8 encoding of Cyrillic 'ф' (U+0444) is D1 84, 'а' (U+0430) is D0 B0, 'й' (U+0439) is D0 B9
constexpr std::string_view key = "folder/файл.txt";
auto result = EncodeS3Key(key);
// Check that Cyrillic characters are percent-encoded
EXPECT_EQ(result, "folder/%D1%84%D0%B0%D0%B9%D0%BB.txt");
EXPECT_TRUE(result.find("folder/") != std::string::npos);
EXPECT_TRUE(result.find(".txt") != std::string::npos);
}

TEST(UrlEncode, Special) {
constexpr std::string_view str = "Text with spaces,?&=";
EXPECT_EQ("Text%20with%20spaces%2C%3F%26%3D", UrlEncode(str));
Expand Down
Loading