/* Copyright (C) 2019 * * This file is part of the osdev components suite * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2, or (at your option) any * later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include "uriparser.h" // std #include #include #include // gnu-c #include // boost #include #include #include #include // osdev::components::mqtt #include "bimap.h" #include "stringutils.h" #include "uriutils.h" // mlogic::common::logger // #include "mlogic/common/logger/loggerprovider.h" // #include "mlogic/common/invalidargumentexception.h" // #include "mlogic/common/nullptrexception.h" // #include "mlogic/common/systemexception.h" using namespace osdev::components::mqtt; namespace { /** * @brief Copies an item from the from container to the to container. * @tparam TFrom The type of the container from which to copy. * @tparam TTo The type of the container to which to copy. * @param itemName The name of the item to copy. * @param from The container from which to copy. * @param to The cointainer to which to copy. * @param transformation Apply transformation function to the input. Default no transformation. */ template void copyItem(const std::string& itemName, const TFrom& from, TTo& to, const std::function& transformation = std::function()) { if (transformation) { to.insert(std::make_pair(itemName, transformation(from[itemName]))); } else { to.insert(std::make_pair(itemName, from[itemName])); } } const std::string& getItem(const std::map& source, const std::string& itemName) { static const std::string s_empty; const auto cit = source.find(itemName); if (cit != source.end()) { return cit->second; } return s_empty; } static const boost::bimap& getReservedCharacterMap() { static const auto s_lookupTable = makeBimap( { { ':', percentEncode<':'>() }, { '/', percentEncode<'/'>() }, { '?', percentEncode<'?'>() }, { '#', percentEncode<'#'>() }, { '[', percentEncode<'['>() }, { ']', percentEncode<']'>() }, { '@', percentEncode<'@'>() }, { '!', percentEncode<'!'>() }, { '$', percentEncode<'$'>() }, { '&', percentEncode<'&'>() }, { '\'', percentEncode<'\''>() }, { '(', percentEncode<'('>() }, { ')', percentEncode<')'>() }, { '*', percentEncode<'*'>() }, { '+', percentEncode<'+'>() }, { ',', percentEncode<','>() }, { ';', percentEncode<';'>() }, { '=', percentEncode<'='>() }, { '"', percentEncode<'"'>() }, { '%', percentEncode<'%'>() }, { '-', percentEncode<'-'>() }, { '.', percentEncode<'.'>() }, { '<', percentEncode<'<'>() }, { '>', percentEncode<'>'>() }, { '\\', percentEncode<'\\'>() }, { '^', percentEncode<'^'>() }, { '_', percentEncode<'_'>() }, { '`', percentEncode<'`'>() }, { '{', percentEncode<'{'>() }, { '|', percentEncode<'|'>() }, { '}', percentEncode<'}'>() }, { '~', percentEncode<'~'>() } }); return s_lookupTable; } std::string decode(const std::string& in) { static constexpr size_t encodingTokenSize = 3; // example: %20 encodes a space character. const auto& reservedLookup = getReservedCharacterMap(); std::string out = in; std::size_t pos = 0; while ((pos = out.find('%', pos)) != std::string::npos) { if (pos + encodingTokenSize > out.size()) { // MLOGIC_COMMON_THROW(InvalidArgumentException, "Invalid encoding at end of string"); } const auto cit = reservedLookup.right.find(out.substr(pos, 3)); if (reservedLookup.right.end() != cit) { // string& replace (size_t pos, size_t len, size_t n, char c) // where n is the number of fill characters (1 in this case). out.replace(pos, encodingTokenSize, 1, cit->second); } ++pos; } return out; } std::string encode(const std::string& in) { const auto& reservedLookup = getReservedCharacterMap(); std::string out = in; for (size_t pos = 0; pos < out.size(); ++pos) { const auto cit = reservedLookup.left.find(out[pos]); if (reservedLookup.left.end() != cit) { out.replace(pos, 1, cit->second); pos += 2; } } return out; } } // anonymous // static ParsedUri UriParser::parse(const std::string& uri) { // Before turning to regular expressions, the following ibraries were evaluated to achieve this functionality: // Qt QUrlParser: http://doc.qt.io/qt-4.8/qurl.html // cpp-netlib: https://github.com/cpp-netlib/uri // uriparser: http://uriparser.sourceforge.net/ // From the above, cpp-netlib was the most compelling because of its pending standardization for C++(17?). // However, none of these libraries could handle strings for port service names, so a custom implementation seems necessary. // As an additional validation step, one of the above libraries could be used after the port service name was replaced (see below). // // Split the uri in two stages. First break down the uri in its components: scheme (cannot be empty), authority (cannot be empty), path, query, fragment. // The path, query and fragment parts are optional. Because the scheme and authority part cannot be empty only a subset of uri's is handled by this function. // In the second stage the authority is parsed to get the hostname and the port. In order to use an ipv6 host address it must be // wrapped in brackets. The addresses are not validated whether they are correct. When a open bracket is found the part that is between brackets // must contain at least two colons. This is enough to discern something that resembles an ipv6 address from the other possibilities. static const std::string regexUriString( R"regex(^(?[^:/?#]+)://(?[^/?#]+)(?[^?#]*)(?\?(?[^#]*))?(?#(?.*))?)regex"); // This regex only checks for non occurrence of the @ symbol since the input is the result from the regexUri where it is already validated that characters "/>#" do not exist // in the authority part. This regexAuthority uses the if then else construct "(?(?=regex)then|else)" to choose between ipv6 or something else. static const std::string regexAuthorityString( R"regex(^((?[^:@]+)(:(?[^@]+))?@)?(?(?=(?\[))\[(?=(.*:){2,}.*)(?[^@]+)\]|(?[^:@]+))(?:(?[^:@]+))?$)regex"); static boost::regex uriRegex(regexUriString); boost::cmatch whatUri; auto uriMatches = boost::regex_match( uri.c_str(), whatUri, uriRegex); if (!uriMatches) { // ErrorLogToFile("UriParser", "Invalid uri: '%1'", uri); // throw (InvalidArgumentException, "No match for the specified uri."); } static boost::regex authorityRegex(regexAuthorityString); boost::cmatch whatAuthority; std::string authority = whatUri["authority"]; auto authorityMatches = boost::regex_match( authority.c_str(), whatAuthority, authorityRegex); if (!authorityMatches) { // ErrorToLogFile("UriParser", "Uri contains invalid authority part: %1", authority); // Throw (InvalidArgumentException, "Uri contains invalid authority part."); } static const auto toLower = [](const std::string& in) -> std::string { return boost::to_lower_copy(in); }; ParsedUri parsedUri; copyItem("scheme", whatUri, parsedUri, toLower); copyItem("user", whatAuthority, parsedUri, &decode); copyItem("password", whatAuthority, parsedUri, &decode); copyItem("ipv6", whatAuthority, parsedUri); // Acts as a flag. Empty means not ipv6, not empty means ipv6 (this is not validated in the parse however!) copyItem("host", whatAuthority, parsedUri, toLower); copyItem("port", whatAuthority, parsedUri, toLower); copyItem("path", whatUri, parsedUri); copyItem("query", whatUri, parsedUri); copyItem("fragment", whatUri, parsedUri, &decode); return parsedUri; } // static ParsedQuery UriParser::parseQuery(const ParsedUri& parsedUri) { const auto cit = parsedUri.find("query"); if (parsedUri.end() == cit) { return {}; } const auto& queryString = cit->second; std::vector keyValues; boost::algorithm::split(keyValues, queryString, [](char ch) { return ch == '&'; }); std::map retval; for (const auto& query : keyValues) { auto pos = query.find('='); if (std::string::npos != pos) { retval[decode(query.substr(0, pos))] = pos + 1 < query.size() ? decode(query.substr(pos + 1)) : ""; } } return retval; } // static ParsedPath UriParser::parsePath(const ParsedUri& parsedUri) { const auto cit = parsedUri.find("path"); if (parsedUri.end() == cit) { return {}; } const auto& pathString = cit->second; ParsedPath pathElements; if (!pathString.empty()) { const auto path = pathString.substr(1); boost::algorithm::split(pathElements, path, [](char ch) { return ch == '/'; }); // empty string will lead to a single pathElement std::transform(pathElements.begin(), pathElements.end(), pathElements.begin(), &decode); } return pathElements; } // static std::string UriParser::normalize(const std::string& uri) { auto parsedUri = parse(uri); auto& portString = parsedUri["port"]; if (!portString.empty() && !is_numeric(portString)) { auto portnumber = getPortnumber(portString); portString = boost::lexical_cast(portnumber); } return toString(parsedUri); } // static std::string UriParser::toString(const ParsedUri& parsedUri) { const auto& schemeString = getItem(parsedUri, "scheme"); const auto& hostString = getItem(parsedUri, "host"); if( schemeString.empty() ) { // Do something sensible } if( hostString.empty() ) { // Do something sensible. } const auto& user = getItem(parsedUri, "user"); const auto& password = getItem(parsedUri, "password"); const auto& portString = getItem(parsedUri, "port"); const auto& pathString = getItem(parsedUri, "path"); const auto& queryString = getItem(parsedUri, "query"); const auto& fragmentString = getItem(parsedUri, "fragment"); // wrap hostname in brackets only if the incoming url uses them bool ipv6 = !getItem(parsedUri, "ipv6").empty(); std::ostringstream oss; oss << schemeString << "://"; if (!user.empty()) { oss << encode(user); if (!password.empty()) { oss << ":" << encode(password); } oss << "@"; } if (ipv6) { oss << '['; } oss << hostString; if (ipv6) { oss << ']'; } if (!portString.empty()) { oss << ':' << portString; } if (!pathString.empty()) { auto pathElements = UriParser::parsePath(parsedUri); for (const auto& element : pathElements) { oss << "/" << encode(element); } } if (!queryString.empty()) { auto queryElements = UriParser::parseQuery(parsedUri); oss << '?'; for (auto cit = queryElements.cbegin(); queryElements.cend() != cit; ++cit) { oss << encode(cit->first) << '=' << encode(cit->second); if (next(cit) != queryElements.cend()) { oss << '&'; } } } if (!fragmentString.empty()) { oss << '#' << encode(fragmentString); } return oss.str(); } // static int UriParser::getPortnumber(const std::string& service, const std::string& protocolName) { const unsigned int bufSize = 1024; struct servent data; struct servent* result; char buf[bufSize]; // contains the strings that data points to ::getservbyname_r(service.c_str(), protocolName.c_str(), &data, buf, bufSize, &result); if (nullptr == result) { // ErrorLogToFile ("UriParser", "Could not determine the portnumber for the specified service: %1 for protocol: %2. Please check the portnumber configuration in /etc/services.", service, protocolName); // throw (?) (InvalidArgumentException, "Could not determine the portnumber for the specified service and/or protocol."); } // htonx functions need -Wold-style-cast disabled #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wold-style-cast" return static_cast(ntohs(static_cast(data.s_port))); #pragma GCC diagnostic pop }