can GET some pages

This commit is contained in:
Benjamin Sergeant 2019-02-14 20:11:42 -08:00
parent 29c96f287f
commit 0b7c3ec235
11 changed files with 399 additions and 183 deletions

View File

@ -28,7 +28,9 @@ set( IXWEBSOCKET_SOURCES
ixwebsocket/IXWebSocketPerMessageDeflate.cpp
ixwebsocket/IXWebSocketPerMessageDeflateCodec.cpp
ixwebsocket/IXWebSocketPerMessageDeflateOptions.cpp
ixwebsocket/IXWebSocketHttpHeaders.cpp
ixwebsocket/IXHttpClient.cpp
ixwebsocket/IXUrlParser.cpp
)
set( IXWEBSOCKET_HEADERS
@ -51,6 +53,7 @@ set( IXWEBSOCKET_HEADERS
ixwebsocket/IXWebSocketHttpHeaders.h
ixwebsocket/libwshandshake.hpp
ixwebsocket/IXHttpClient.h
ixwebsocket/IXUrlParser.h
)
# Platform specific code

View File

@ -13,7 +13,25 @@ using namespace ix;
void run(const std::string& url)
{
HttpClient httpClient;
httpClient.get(url);
bool verbose = true;
auto out = httpClient.get(url, verbose);
auto errorCode = std::get<0>(out);
auto headers = std::get<1>(out);
auto payload = std::get<2>(out);
auto errorMsg = std::get<3>(out);
for (auto it : headers)
{
std::cout << it.first << ": " << it.second << std::endl;
}
std::cout << "error code: " << errorCode << std::endl;
if (!errorMsg.empty())
{
std::cout << "error message: " << errorMsg << std::endl;
}
std::cout << "payload: " << payload << std::endl;
}

View File

@ -5,8 +5,20 @@
*/
#include "IXHttpClient.h"
#include "IXUrlParser.h"
#include "IXWebSocketHttpHeaders.h"
#if defined(__APPLE__) or defined(__linux__)
# ifdef __APPLE__
# include <ixwebsocket/IXSocketAppleSSL.h>
# else
# include <ixwebsocket/IXSocketOpenSSL.h>
# endif
#endif
#include <iostream>
#include <sstream>
#include <vector>
namespace ix
{
@ -20,18 +32,52 @@ namespace ix
}
HttpResponse HttpClient::get(const std::string& url)
HttpResponse HttpClient::get(const std::string& url,
bool verbose)
{
int code = 0;
WebSocketHttpHeaders headers;
std::string payload;
std::string protocol, host, path, query;
int port;
if (!parseUrl(url, protocol, host, path, query, port))
{
code = 0; // 0 ?
std::string errorMsg("Cannot parse url");
return std::make_tuple(code, headers, payload, errorMsg);
}
if (protocol == "http")
{
_socket = std::make_shared<Socket>();
}
else if (protocol == "https")
{
# ifdef __APPLE__
_socket = std::make_shared<SocketAppleSSL>();
# else
_socket = std::make_shared<SocketOpenSSL>();
# endif
}
else
{
code = 0; // 0 ?
std::string errorMsg("Bad protocol");
return std::make_tuple(code, headers, payload, errorMsg);
}
// FIXME: missing url parsing
std::string host("www.cnn.com");
int port = 80;
std::string request("GET / HTTP/1.1\r\n\r\n");
int expectedStatus = 200;
std::stringstream ss;
ss << "GET " << path << " HTTP/1.1\r\n";
ss << "Host: " << host << "\r\n";
ss << "User-Agent: ixwebsocket/1.0.0" << "\r\n";
ss << "Accept: */*" << "\r\n";
ss << "\r\n";
std::string request(ss.str());
int timeoutSecs = 3;
std::string errMsg;
@ -39,40 +85,138 @@ namespace ix
auto isCancellationRequested =
makeCancellationRequestWithTimeout(timeoutSecs, requestInitCancellation);
bool success = _socket.connect(host, port, errMsg, isCancellationRequested);
bool success = _socket->connect(host, port, errMsg, isCancellationRequested);
if (!success)
{
int code = 0; // 0 ?
return std::make_tuple(code, headers, payload);
code = 0; // 0 ?
std::string errorMsg("Cannot connect to url");
return std::make_tuple(code, headers, payload, errorMsg);
}
if (verbose)
{
std::cout << "Sending request: " << request
<< "to " << host << ":" << port
<< std::endl;
if (!_socket.writeBytes(request, isCancellationRequested))
{
int code = 0; // 0 ?
return std::make_tuple(code, headers, payload);
}
auto lineResult = _socket.readLine(isCancellationRequested);
if (!_socket->writeBytes(request, isCancellationRequested))
{
code = 0; // 0 ?
std::string errorMsg("Cannot send request");
return std::make_tuple(code, headers, payload, errorMsg);
}
auto lineResult = _socket->readLine(isCancellationRequested);
auto lineValid = lineResult.first;
auto line = lineResult.second;
std::cout << "first line: " << line << std::endl;
int status = -1;
sscanf(line.c_str(), "HTTP/1.1 %d", &status) == 1;
return std::make_tuple(code, headers, payload);
if (!lineValid)
{
code = 0; // 0 ?
std::string errorMsg("Cannot retrieve status line");
return std::make_tuple(code, headers, payload, errorMsg);
}
HttpResponse HttpClient::post(const std::string& url)
if (verbose)
{
int code = 0;
WebSocketHttpHeaders headers;
std::string payload;
std::cout << "first line: " << line << std::endl;
}
return std::make_tuple(code, headers, payload);
code = -1;
if (sscanf(line.c_str(), "HTTP/1.1 %d", &code) != 1)
{
code = 0; // 0 ?
std::string errorMsg("Cannot parse response code from status line");
return std::make_tuple(code, headers, payload, errorMsg);
}
auto result = parseHttpHeaders(_socket, isCancellationRequested);
auto headersValid = result.first;
headers = result.second;
if (!headersValid)
{
code = 0; // 0 ?
std::string errorMsg("Cannot parse http headers");
return std::make_tuple(code, headers, payload, errorMsg);
}
// Parse response:
// http://bryce-thomas.blogspot.com/2012/01/technical-parsing-http-to-extract.html
if (headers.find("content-length") == headers.end())
{
code = 0; // 0 ?
std::string errorMsg("No content length header");
return std::make_tuple(code, headers, payload, errorMsg);
}
ssize_t contentLength = -1;
ss.str("");
ss << headers["content-length"];
ss >> contentLength;
payload.reserve(contentLength);
// very inefficient way to read bytes, but it works...
for (int i = 0; i < contentLength; ++i)
{
char c;
if (!_socket->readByte(&c, isCancellationRequested))
{
ss.str("");
ss << "Cannot read byte";
return std::make_tuple(-1, headers, payload, ss.str());
}
payload += c;
}
return std::make_tuple(code, headers, payload, "");
}
}
#if 0
std::vector<uint8_t> rxbuf;
while (true)
{
int N = (int) _rxbuf.size();
_rxbuf.resize(N + 1500);
ssize_t ret = _socket->recv((char*)&_rxbuf[0] + N, 1500);
if (ret < 0 && (_socket->getErrno() == EWOULDBLOCK ||
_socket->getErrno() == EAGAIN)) {
_rxbuf.resize(N);
break;
}
else if (ret <= 0)
{
_rxbuf.resize(N);
_socket->close();
setReadyState(CLOSED);
break;
}
else
{
_rxbuf.resize(N + ret);
}
}
ssize_t ret = _socket->recv((char*)&rxbuf[0], contentLength);
payload = std::string(rxbuf.begin(), rxbuf.end());
std::cerr << "socket->recv: " << ret << std::endl;
if (ret != contentLength)
{
ss.str("");
ss << "Cannot retrieve all bytes"
<< " want: " << contentLength
<< ", got: " << ret;
std::cerr << "adscasdcadcasdc" << std::endl;
std::cerr << ss.str() << std::endl;
return std::make_tuple(-1, headers, payload, ss.str());
}
#endif

View File

@ -11,13 +11,14 @@
#include <mutex>
#include <atomic>
#include <tuple>
#include <memory>
#include "IXSocket.h"
#include "IXWebSocketHttpHeaders.h"
namespace ix
{
using HttpResponse = std::tuple<int, WebSocketHttpHeaders, std::string>;
using HttpResponse = std::tuple<int, WebSocketHttpHeaders, std::string, std::string>;
class HttpClient {
public:
@ -25,10 +26,9 @@ namespace ix
~HttpClient();
// Static methods ?
HttpResponse get(const std::string& url);
HttpResponse post(const std::string& url);
HttpResponse get(const std::string& url, bool verbose);
private:
Socket _socket;
std::shared_ptr<Socket> _socket;
};
}

View File

@ -0,0 +1,98 @@
/*
* IXUrlParser.cpp
* Author: Benjamin Sergeant
* Copyright (c) 2019 Machine Zone, Inc. All rights reserved.
*/
#include "IXUrlParser.h"
#include <regex>
#include <iostream>
#include <sstream>
namespace ix
{
bool parseUrl(const std::string& url,
std::string& protocol,
std::string& host,
std::string& path,
std::string& query,
int& port)
{
std::regex ex("(ws|wss|http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");
std::cmatch what;
if (!regex_match(url.c_str(), what, ex))
{
return false;
}
std::string portStr;
protocol = std::string(what[1].first, what[1].second);
host = std::string(what[2].first, what[2].second);
portStr = std::string(what[3].first, what[3].second);
path = std::string(what[4].first, what[4].second);
query = std::string(what[5].first, what[5].second);
if (portStr.empty())
{
if (protocol == "ws" || protocol == "http")
{
port = 80;
}
else if (protocol == "wss" || protocol == "https")
{
port = 443;
}
else
{
// Invalid protocol. Should be caught by regex check
// but this missing branch trigger cpplint linter.
return false;
}
}
else
{
std::stringstream ss;
ss << portStr;
ss >> port;
}
if (path.empty())
{
path = "/";
}
else if (path[0] != '/')
{
path = '/' + path;
}
if (!query.empty())
{
path += "?";
path += query;
}
return true;
}
void printUrl(const std::string& url)
{
std::string protocol, host, path, query;
int port {0};
if (!parseUrl(url, protocol, host, path, query, port))
{
return;
}
std::cout << "[" << url << "]" << std::endl;
std::cout << protocol << std::endl;
std::cout << host << std::endl;
std::cout << port << std::endl;
std::cout << path << std::endl;
std::cout << query << std::endl;
std::cout << "-------------------------------" << std::endl;
}
}

21
ixwebsocket/IXUrlParser.h Normal file
View File

@ -0,0 +1,21 @@
/*
* IXUrlParser.h
* Author: Benjamin Sergeant
* Copyright (c) 2019 Machine Zone, Inc. All rights reserved.
*/
#pragma once
#include <string>
namespace ix
{
bool parseUrl(const std::string& url,
std::string& protocol,
std::string& host,
std::string& path,
std::string& query,
int& port);
void printUrl(const std::string& url);
}

View File

@ -6,6 +6,7 @@
#include "IXWebSocketHandshake.h"
#include "IXSocketConnect.h"
#include "IXUrlParser.h"
#include "libwshandshake.hpp"
@ -32,90 +33,6 @@ namespace ix
}
bool WebSocketHandshake::parseUrl(const std::string& url,
std::string& protocol,
std::string& host,
std::string& path,
std::string& query,
int& port)
{
std::regex ex("(ws|wss)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");
std::cmatch what;
if (!regex_match(url.c_str(), what, ex))
{
return false;
}
std::string portStr;
protocol = std::string(what[1].first, what[1].second);
host = std::string(what[2].first, what[2].second);
portStr = std::string(what[3].first, what[3].second);
path = std::string(what[4].first, what[4].second);
query = std::string(what[5].first, what[5].second);
if (portStr.empty())
{
if (protocol == "ws")
{
port = 80;
}
else if (protocol == "wss")
{
port = 443;
}
else
{
// Invalid protocol. Should be caught by regex check
// but this missing branch trigger cpplint linter.
return false;
}
}
else
{
std::stringstream ss;
ss << portStr;
ss >> port;
}
if (path.empty())
{
path = "/";
}
else if (path[0] != '/')
{
path = '/' + path;
}
if (!query.empty())
{
path += "?";
path += query;
}
return true;
}
void WebSocketHandshake::printUrl(const std::string& url)
{
std::string protocol, host, path, query;
int port {0};
if (!WebSocketHandshake::parseUrl(url, protocol, host,
path, query, port))
{
return;
}
std::cout << "[" << url << "]" << std::endl;
std::cout << protocol << std::endl;
std::cout << host << std::endl;
std::cout << port << std::endl;
std::cout << path << std::endl;
std::cout << query << std::endl;
std::cout << "-------------------------------" << std::endl;
}
std::string WebSocketHandshake::trim(const std::string& str)
{
std::string out(str);
@ -192,61 +109,6 @@ namespace ix
return s;
}
std::pair<bool, WebSocketHttpHeaders> WebSocketHandshake::parseHttpHeaders(
const CancellationRequest& isCancellationRequested)
{
WebSocketHttpHeaders headers;
char line[256];
int i;
while (true)
{
int colon = 0;
for (i = 0;
i < 2 || (i < 255 && line[i-2] != '\r' && line[i-1] != '\n');
++i)
{
if (!_socket->readByte(line+i, isCancellationRequested))
{
return std::make_pair(false, headers);
}
if (line[i] == ':' && colon == 0)
{
colon = i;
}
}
if (line[0] == '\r' && line[1] == '\n')
{
break;
}
// line is a single header entry. split by ':', and add it to our
// header map. ignore lines with no colon.
if (colon > 0)
{
line[i] = '\0';
std::string lineStr(line);
// colon is ':', colon+1 is ' ', colon+2 is the start of the value.
// i is end of string (\0), i-colon is length of string minus key;
// subtract 1 for '\0', 1 for '\n', 1 for '\r',
// 1 for the ' ' after the ':', and total is -4
std::string name(lineStr.substr(0, colon));
std::string value(lineStr.substr(colon + 2, i - colon - 4));
// Make the name lower case.
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
headers[name] = value;
}
}
return std::make_pair(true, headers);
}
WebSocketInitResult WebSocketHandshake::sendErrorResponse(int code, const std::string& reason)
{
std::stringstream ss;
@ -355,7 +217,7 @@ namespace ix
return WebSocketInitResult(false, status, ss.str());
}
auto result = parseHttpHeaders(isCancellationRequested);
auto result = parseHttpHeaders(_socket, isCancellationRequested);
auto headersValid = result.first;
auto headers = result.second;
@ -450,7 +312,7 @@ namespace ix
}
// Retrieve and validate HTTP headers
auto result = parseHttpHeaders(isCancellationRequested);
auto result = parseHttpHeaders(_socket, isCancellationRequested);
auto headersValid = result.first;
auto headers = result.second;

View File

@ -59,19 +59,10 @@ namespace ix
WebSocketInitResult serverHandshake(int fd,
int timeoutSecs);
static bool parseUrl(const std::string& url,
std::string& protocol,
std::string& host,
std::string& path,
std::string& query,
int& port);
private:
static void printUrl(const std::string& url);
std::string genRandomString(const int len);
// Parse HTTP headers
std::pair<bool, WebSocketHttpHeaders> parseHttpHeaders(const CancellationRequest& isCancellationRequested);
WebSocketInitResult sendErrorResponse(int code, const std::string& reason);
std::tuple<std::string, std::string, std::string> parseRequestLine(const std::string& line);

View File

@ -0,0 +1,69 @@
/*
* IXWebSocketHttpHeaders.h
* Author: Benjamin Sergeant
* Copyright (c) 2018 Machine Zone, Inc. All rights reserved.
*/
#include "IXWebSocketHttpHeaders.h"
#include "IXSocket.h"
#include <string>
#include <unordered_map>
namespace ix
{
std::pair<bool, WebSocketHttpHeaders> parseHttpHeaders(
std::shared_ptr<Socket> socket,
const CancellationRequest& isCancellationRequested)
{
WebSocketHttpHeaders headers;
char line[1024];
int i;
while (true)
{
int colon = 0;
for (i = 0;
i < 2 || (i < 1023 && line[i-2] != '\r' && line[i-1] != '\n');
++i)
{
if (!socket->readByte(line+i, isCancellationRequested))
{
return std::make_pair(false, headers);
}
if (line[i] == ':' && colon == 0)
{
colon = i;
}
}
if (line[0] == '\r' && line[1] == '\n')
{
break;
}
// line is a single header entry. split by ':', and add it to our
// header map. ignore lines with no colon.
if (colon > 0)
{
line[i] = '\0';
std::string lineStr(line);
// colon is ':', colon+1 is ' ', colon+2 is the start of the value.
// i is end of string (\0), i-colon is length of string minus key;
// subtract 1 for '\0', 1 for '\n', 1 for '\r',
// 1 for the ' ' after the ':', and total is -4
std::string name(lineStr.substr(0, colon));
std::string value(lineStr.substr(colon + 2, i - colon - 4));
// Make the name lower case.
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
headers[name] = value;
}
}
return std::make_pair(true, headers);
}
}

View File

@ -6,10 +6,20 @@
#pragma once
#include "IXCancellationRequest.h"
#include <string>
#include <unordered_map>
#include <memory>
#include <algorithm>
namespace ix
{
class Socket;
using WebSocketHttpHeaders = std::unordered_map<std::string, std::string>;
std::pair<bool, WebSocketHttpHeaders> parseHttpHeaders(
std::shared_ptr<Socket> socket,
const CancellationRequest& isCancellationRequested);
}

View File

@ -11,6 +11,7 @@
#include "IXWebSocketTransport.h"
#include "IXWebSocketHandshake.h"
#include "IXWebSocketHttpHeaders.h"
#include "IXUrlParser.h"
#ifdef IXWEBSOCKET_USE_TLS
# ifdef __APPLE__
@ -68,8 +69,7 @@ namespace ix
std::string protocol, host, path, query;
int port;
if (!WebSocketHandshake::parseUrl(url, protocol, host,
path, query, port))
if (!parseUrl(url, protocol, host, path, query, port))
{
return WebSocketInitResult(false, 0,
std::string("Could not parse URL ") + url);