cobra bots: handle stalled connection by disconnecting and reconnecting instead of quitting, and expecting kubernete to restart us
This commit is contained in:
parent
e49bf24d2d
commit
ecfca1f905
@ -44,6 +44,7 @@ namespace ix
|
||||
std::atomic<bool> stop(false);
|
||||
std::atomic<bool> throttled(false);
|
||||
std::atomic<bool> fatalCobraError(false);
|
||||
std::atomic<bool> stalledConnection(false);
|
||||
int minuteCounter = 0;
|
||||
|
||||
auto timer = [&sentCount,
|
||||
@ -95,7 +96,13 @@ namespace ix
|
||||
|
||||
std::thread t1(timer);
|
||||
|
||||
auto heartbeat = [&sentCount, &receivedCount, &stop, &enableHeartbeat, &heartBeatTimeout, &fatalCobraError] {
|
||||
auto heartbeat = [&sentCount,
|
||||
&receivedCount,
|
||||
&stop,
|
||||
&enableHeartbeat,
|
||||
&heartBeatTimeout,
|
||||
&stalledConnection]
|
||||
{
|
||||
setThreadName("Bot heartbeat");
|
||||
std::string state("na");
|
||||
|
||||
@ -111,9 +118,12 @@ namespace ix
|
||||
|
||||
if (currentState == state)
|
||||
{
|
||||
CoreLogger::error("no messages received or sent for 1 minute, exiting");
|
||||
fatalCobraError = true;
|
||||
break;
|
||||
ss.str("");
|
||||
ss << "no messages received or sent for "
|
||||
<< heartBeatTimeout << " seconds, reconnecting";
|
||||
|
||||
CoreLogger::error(ss.str());
|
||||
stalledConnection = true;
|
||||
}
|
||||
state = currentState;
|
||||
|
||||
@ -234,6 +244,13 @@ namespace ix
|
||||
std::this_thread::sleep_for(duration);
|
||||
|
||||
if (fatalCobraError) break;
|
||||
|
||||
if (stalledConnection)
|
||||
{
|
||||
conn.disconnect();
|
||||
conn.connect();
|
||||
stalledConnection = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Run for a duration, used by unittesting now
|
||||
@ -245,6 +262,13 @@ namespace ix
|
||||
std::this_thread::sleep_for(duration);
|
||||
|
||||
if (fatalCobraError) break;
|
||||
|
||||
if (stalledConnection)
|
||||
{
|
||||
conn.disconnect();
|
||||
conn.connect();
|
||||
stalledConnection = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,7 +123,7 @@ namespace ix
|
||||
}
|
||||
|
||||
// periodically display all device ids
|
||||
if (sentCount % 100 == 0)
|
||||
if (sentCount % 1000 == 0)
|
||||
{
|
||||
ss.str(""); // reset the stringstream
|
||||
ss << "## " << deviceIdCounters.size() << " unique device ids ##" << std::endl;
|
||||
|
Loading…
x
Reference in New Issue
Block a user