From 979ff60a6b76aa954a20bad9699955abf15b366c Mon Sep 17 00:00:00 2001 From: Dimon4eg Date: Tue, 7 May 2019 00:45:02 +0300 Subject: [PATCH] Use LUrlParser to fix issue of Windows (#53) LGTM --- CMakeLists.txt | 2 + ixwebsocket/IXHttpClient.cpp | 3 +- ixwebsocket/IXUrlParser.cpp | 41 ++--- ixwebsocket/IXUrlParser.h | 10 +- ixwebsocket/IXWebSocketHandshake.cpp | 1 - ixwebsocket/IXWebSocketTransport.cpp | 3 +- ixwebsocket/LUrlParser.cpp | 265 +++++++++++++++++++++++++++ ixwebsocket/LUrlParser.h | 78 ++++++++ test/IXUrlParserTest.cpp | 15 +- 9 files changed, 366 insertions(+), 52 deletions(-) create mode 100644 ixwebsocket/LUrlParser.cpp create mode 100644 ixwebsocket/LUrlParser.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b13e6f2..f86fcf95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ set( IXWEBSOCKET_SOURCES ixwebsocket/IXWebSocketHttpHeaders.cpp ixwebsocket/IXHttpClient.cpp ixwebsocket/IXUrlParser.cpp + ixwebsocket/LUrlParser.cpp ixwebsocket/IXSelectInterrupt.cpp ixwebsocket/IXSelectInterruptFactory.cpp ixwebsocket/IXConnectionState.cpp @@ -65,6 +66,7 @@ set( IXWEBSOCKET_HEADERS ixwebsocket/libwshandshake.hpp ixwebsocket/IXHttpClient.h ixwebsocket/IXUrlParser.h + ixwebsocket/LUrlParser.h ixwebsocket/IXSelectInterrupt.h ixwebsocket/IXSelectInterruptFactory.h ixwebsocket/IXConnectionState.h diff --git a/ixwebsocket/IXHttpClient.cpp b/ixwebsocket/IXHttpClient.cpp index a6730c82..ed612218 100644 --- a/ixwebsocket/IXHttpClient.cpp +++ b/ixwebsocket/IXHttpClient.cpp @@ -47,9 +47,8 @@ namespace ix std::string protocol, host, path, query; int port; - bool websocket = false; - if (!UrlParser::parse(url, protocol, host, path, query, port, websocket)) + if (!UrlParser::parse(url, protocol, host, path, query, port)) { std::stringstream ss; ss << "Cannot parse url: " << url; diff --git a/ixwebsocket/IXUrlParser.cpp b/ixwebsocket/IXUrlParser.cpp index 00f4a3dd..f9722a1b 100644 --- a/ixwebsocket/IXUrlParser.cpp +++ b/ixwebsocket/IXUrlParser.cpp @@ -5,43 +5,32 @@ */ #include "IXUrlParser.h" +#include "LUrlParser.h" #include -#include - namespace ix { - // - // The only difference between those 2 regex is the protocol - // - std::regex UrlParser::_httpRegex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)"); - std::regex UrlParser::_webSocketRegex("(ws|wss)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)"); - bool UrlParser::parse(const std::string& url, std::string& protocol, std::string& host, std::string& path, std::string& query, - int& port, - bool websocket) + int& port) { - std::cmatch what; - if (!regex_match(url.c_str(), what, - websocket ? _webSocketRegex : _httpRegex)) + LUrlParser::clParseURL res = LUrlParser::clParseURL::ParseURL(url); + + if (!res.IsValid()) { return false; } - std::string portStr; + protocol = res.m_Scheme; + host = res.m_Host; + path = res.m_Path; + query = res.m_Query; - protocol = std::string(what[1].first, what[1].second); - host = std::string(what[2].first, what[2].second); - portStr = std::string(what[3].first, what[3].second); - path = std::string(what[4].first, what[4].second); - query = std::string(what[5].first, what[5].second); - - if (portStr.empty()) + if (!res.GetPort(&port)) { if (protocol == "ws" || protocol == "http") { @@ -58,12 +47,6 @@ namespace ix return false; } } - else - { - std::stringstream ss; - ss << portStr; - ss >> port; - } if (path.empty()) { @@ -83,12 +66,12 @@ namespace ix return true; } - void UrlParser::printUrl(const std::string& url, bool websocket) + void UrlParser::printUrl(const std::string& url) { std::string protocol, host, path, query; int port {0}; - if (!parse(url, protocol, host, path, query, port, websocket)) + if (!parse(url, protocol, host, path, query, port)) { return; } diff --git a/ixwebsocket/IXUrlParser.h b/ixwebsocket/IXUrlParser.h index 3c3bdbbe..9215f088 100644 --- a/ixwebsocket/IXUrlParser.h +++ b/ixwebsocket/IXUrlParser.h @@ -7,7 +7,6 @@ #pragma once #include -#include namespace ix { @@ -19,13 +18,8 @@ namespace ix std::string& host, std::string& path, std::string& query, - int& port, - bool websocket); + int& port); - static void printUrl(const std::string& url, bool websocket); - - private: - static std::regex _httpRegex; - static std::regex _webSocketRegex; + static void printUrl(const std::string& url); }; } diff --git a/ixwebsocket/IXWebSocketHandshake.cpp b/ixwebsocket/IXWebSocketHandshake.cpp index 7986e749..61ca73e2 100644 --- a/ixwebsocket/IXWebSocketHandshake.cpp +++ b/ixwebsocket/IXWebSocketHandshake.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include diff --git a/ixwebsocket/IXWebSocketTransport.cpp b/ixwebsocket/IXWebSocketTransport.cpp index 91c89241..fff230c6 100644 --- a/ixwebsocket/IXWebSocketTransport.cpp +++ b/ixwebsocket/IXWebSocketTransport.cpp @@ -137,9 +137,8 @@ namespace ix { std::string protocol, host, path, query; int port; - bool websocket = true; - if (!UrlParser::parse(url, protocol, host, path, query, port, websocket)) + if (!UrlParser::parse(url, protocol, host, path, query, port)) { return WebSocketInitResult(false, 0, std::string("Could not parse URL ") + url); diff --git a/ixwebsocket/LUrlParser.cpp b/ixwebsocket/LUrlParser.cpp new file mode 100644 index 00000000..ffe345b6 --- /dev/null +++ b/ixwebsocket/LUrlParser.cpp @@ -0,0 +1,265 @@ +/* + * Lightweight URL & URI parser (RFC 1738, RFC 3986) + * https://github.com/corporateshark/LUrlParser + * + * The MIT License (MIT) + * + * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "LUrlParser.h" + +#include +#include +#include + +// check if the scheme name is valid +static bool IsSchemeValid( const std::string& SchemeName ) +{ + for ( auto c : SchemeName ) + { + if ( !isalpha( c ) && c != '+' && c != '-' && c != '.' ) return false; + } + + return true; +} + +bool LUrlParser::clParseURL::GetPort( int* OutPort ) const +{ + if ( !IsValid() ) { return false; } + + int Port = atoi( m_Port.c_str() ); + + if ( Port <= 0 || Port > 65535 ) { return false; } + + if ( OutPort ) { *OutPort = Port; } + + return true; +} + +// based on RFC 1738 and RFC 3986 +LUrlParser::clParseURL LUrlParser::clParseURL::ParseURL( const std::string& URL ) +{ + LUrlParser::clParseURL Result; + + const char* CurrentString = URL.c_str(); + + /* + * : + * := [a-z\+\-\.]+ + * For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names + */ + + // try to read scheme + { + const char* LocalString = strchr( CurrentString, ':' ); + + if ( !LocalString ) + { + return clParseURL( LUrlParserError_NoUrlCharacter ); + } + + // save the scheme name + Result.m_Scheme = std::string( CurrentString, LocalString - CurrentString ); + + if ( !IsSchemeValid( Result.m_Scheme ) ) + { + return clParseURL( LUrlParserError_InvalidSchemeName ); + } + + // scheme should be lowercase + std::transform( Result.m_Scheme.begin(), Result.m_Scheme.end(), Result.m_Scheme.begin(), ::tolower ); + + // skip ':' + CurrentString = LocalString+1; + } + + /* + * //:@:/ + * any ":", "@" and "/" must be normalized + */ + + // skip "//" + if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash ); + if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash ); + + // check if the user name and password are specified + bool bHasUserName = false; + + const char* LocalString = CurrentString; + + while ( *LocalString ) + { + if ( *LocalString == '@' ) + { + // user name and password are specified + bHasUserName = true; + break; + } + else if ( *LocalString == '/' ) + { + // end of : specification + bHasUserName = false; + break; + } + + LocalString++; + } + + // user name and password + LocalString = CurrentString; + + if ( bHasUserName ) + { + // read user name + while ( *LocalString && *LocalString != ':' && *LocalString != '@' ) LocalString++; + + Result.m_UserName = std::string( CurrentString, LocalString - CurrentString ); + + // proceed with the current pointer + CurrentString = LocalString; + + if ( *CurrentString == ':' ) + { + // skip ':' + CurrentString++; + + // read password + LocalString = CurrentString; + + while ( *LocalString && *LocalString != '@' ) LocalString++; + + Result.m_Password = std::string( CurrentString, LocalString - CurrentString ); + + CurrentString = LocalString; + } + + // skip '@' + if ( *CurrentString != '@' ) + { + return clParseURL( LUrlParserError_NoAtSign ); + } + + CurrentString++; + } + + bool bHasBracket = ( *CurrentString == '[' ); + + // go ahead, read the host name + LocalString = CurrentString; + + while ( *LocalString ) + { + if ( bHasBracket && *LocalString == ']' ) + { + // end of IPv6 address + LocalString++; + break; + } + else if ( !bHasBracket && ( *LocalString == ':' || *LocalString == '/' ) ) + { + // port number is specified + break; + } + + LocalString++; + } + + Result.m_Host = std::string( CurrentString, LocalString - CurrentString ); + + CurrentString = LocalString; + + // is port number specified? + if ( *CurrentString == ':' ) + { + CurrentString++; + + // read port number + LocalString = CurrentString; + + while ( *LocalString && *LocalString != '/' ) LocalString++; + + Result.m_Port = std::string( CurrentString, LocalString - CurrentString ); + + CurrentString = LocalString; + } + + // end of string + if ( !*CurrentString ) + { + Result.m_ErrorCode = LUrlParserError_Ok; + + return Result; + } + + // skip '/' + if ( *CurrentString != '/' ) + { + return clParseURL( LUrlParserError_NoSlash ); + } + + CurrentString++; + + // parse the path + LocalString = CurrentString; + + while ( *LocalString && *LocalString != '#' && *LocalString != '?' ) LocalString++; + + Result.m_Path = std::string( CurrentString, LocalString - CurrentString ); + + CurrentString = LocalString; + + // check for query + if ( *CurrentString == '?' ) + { + // skip '?' + CurrentString++; + + // read query + LocalString = CurrentString; + + while ( *LocalString && *LocalString != '#' ) LocalString++; + + Result.m_Query = std::string( CurrentString, LocalString - CurrentString ); + + CurrentString = LocalString; + } + + // check for fragment + if ( *CurrentString == '#' ) + { + // skip '#' + CurrentString++; + + // read fragment + LocalString = CurrentString; + + while ( *LocalString ) LocalString++; + + Result.m_Fragment = std::string( CurrentString, LocalString - CurrentString ); + + CurrentString = LocalString; + } + + Result.m_ErrorCode = LUrlParserError_Ok; + + return Result; +} diff --git a/ixwebsocket/LUrlParser.h b/ixwebsocket/LUrlParser.h new file mode 100644 index 00000000..e347b369 --- /dev/null +++ b/ixwebsocket/LUrlParser.h @@ -0,0 +1,78 @@ +/* + * Lightweight URL & URI parser (RFC 1738, RFC 3986) + * https://github.com/corporateshark/LUrlParser + * + * The MIT License (MIT) + * + * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +namespace LUrlParser +{ + enum LUrlParserError + { + LUrlParserError_Ok = 0, + LUrlParserError_Uninitialized = 1, + LUrlParserError_NoUrlCharacter = 2, + LUrlParserError_InvalidSchemeName = 3, + LUrlParserError_NoDoubleSlash = 4, + LUrlParserError_NoAtSign = 5, + LUrlParserError_UnexpectedEndOfLine = 6, + LUrlParserError_NoSlash = 7, + }; + + class clParseURL + { + public: + LUrlParserError m_ErrorCode; + std::string m_Scheme; + std::string m_Host; + std::string m_Port; + std::string m_Path; + std::string m_Query; + std::string m_Fragment; + std::string m_UserName; + std::string m_Password; + + clParseURL() + : m_ErrorCode( LUrlParserError_Uninitialized ) + {} + + /// return 'true' if the parsing was successful + bool IsValid() const { return m_ErrorCode == LUrlParserError_Ok; } + + /// helper to convert the port number to int, return 'true' if the port is valid (within the 0..65535 range) + bool GetPort( int* OutPort ) const; + + /// parse the URL + static clParseURL ParseURL( const std::string& URL ); + + private: + explicit clParseURL( LUrlParserError ErrorCode ) + : m_ErrorCode( ErrorCode ) + {} + }; + +} // namespace LUrlParser diff --git a/test/IXUrlParserTest.cpp b/test/IXUrlParserTest.cpp index 444957b2..39220b7c 100644 --- a/test/IXUrlParserTest.cpp +++ b/test/IXUrlParserTest.cpp @@ -23,10 +23,9 @@ TEST_CASE("urlParser", "[urlParser]") std::string url = "http://google.com"; std::string protocol, host, path, query; int port; - bool websocket = false; bool res; - res = UrlParser::parse(url, protocol, host, path, query, port, websocket); + res = UrlParser::parse(url, protocol, host, path, query, port); REQUIRE(res); REQUIRE(protocol == "http"); @@ -41,10 +40,9 @@ TEST_CASE("urlParser", "[urlParser]") std::string url = "https://google.com"; std::string protocol, host, path, query; int port; - bool websocket = false; bool res; - res = UrlParser::parse(url, protocol, host, path, query, port, websocket); + res = UrlParser::parse(url, protocol, host, path, query, port); REQUIRE(res); REQUIRE(protocol == "https"); @@ -59,10 +57,9 @@ TEST_CASE("urlParser", "[urlParser]") std::string url = "ws://google.com"; std::string protocol, host, path, query; int port; - bool websocket = true; bool res; - res = UrlParser::parse(url, protocol, host, path, query, port, websocket); + res = UrlParser::parse(url, protocol, host, path, query, port); REQUIRE(res); REQUIRE(protocol == "ws"); @@ -77,10 +74,9 @@ TEST_CASE("urlParser", "[urlParser]") std::string url = "wss://google.com/ws?arg=value&arg2=value2"; std::string protocol, host, path, query; int port; - bool websocket = true; bool res; - res = UrlParser::parse(url, protocol, host, path, query, port, websocket); + res = UrlParser::parse(url, protocol, host, path, query, port); REQUIRE(res); REQUIRE(protocol == "wss"); @@ -95,10 +91,9 @@ TEST_CASE("urlParser", "[urlParser]") std::string url = "ws://127.0.0.1:7350/ws?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE1NTcxNzAwNzIsInVpZCI6ImMwZmZjOGE1LTk4OTktNDAwYi1hNGU5LTJjNWM3NjFmNWQxZiIsInVzbiI6InN2YmhOdlNJSmEifQ.5L8BUbpTA4XAHlSrdwhIVlrlIpRtjExepim7Yh5eEO4&status=true&format=protobuf"; std::string protocol, host, path, query; int port; - bool websocket = true; bool res; - res = UrlParser::parse(url, protocol, host, path, query, port, websocket); + res = UrlParser::parse(url, protocol, host, path, query, port); REQUIRE(res); REQUIRE(protocol == "ws");