Browse Source

Rewrite message parsing using string_view

It's a bit too early yet to require C++17 so the implementation from
BackportCpp (string_view-standalone) is used instead.

Fixes https://crbug.com/oss-fuzz/34413 - slow message parsing on huge
messages. In real word, messages can't be that big, because CSocket
enforces a line length limit.

This can be considered a regression of 1.7.0, because before it, instead
of gathering params into a vector, code was searching 1st word in the
string, then 2nd word, then 3rd word, starting from beginning each time.
It was not very efficient, but the number of passes over the string was
limited.
pull/1785/head
Alexey Sokolov 1 year ago
parent
commit
fd71a69fab
  1. 1
      CMakeLists.txt
  2. 1
      NOTICE
  3. 2
      include/znc/Message.h
  4. 1
      src/CMakeLists.txt
  5. 61
      src/Message.cpp
  6. 10
      test/MessageTest.cpp
  7. 1424
      third_party/bpstd/bpstd/string_view.hpp

1
CMakeLists.txt

@ -284,7 +284,6 @@ if(append_git_version)
endif()
file(GLOB csocket_files LIST_DIRECTORIES FALSE
"${PROJECT_SOURCE_DIR}/third_party/Csocket/Csocket.*")
if(csocket_files STREQUAL "")

1
NOTICE

@ -16,6 +16,7 @@ ZNC includes code from jQuery UI (http://jqueryui.com/), licensed under the MIT
ZNC includes code from Selectize (http://brianreavis.github.io/selectize.js/), licensed under the Apache License 2.0.
ZNC includes modified code from CMakeFindFrameworks.cmake by Kitware, Inc., licensed under BSD License.
ZNC includes modified code from TestLargeFiles.cmake, licensed under Boost Software License, Version 1.0.
ZNC includes code from BackportCpp (https://github.com/bitwizeshift/string_view-standalone), licensed under the MIT license.
ZNC is developed by these people:

2
include/znc/Message.h

@ -161,7 +161,7 @@ class CMessage {
};
CString ToString(unsigned int uFlags = IncludeAll) const;
void Parse(CString sMessage);
void Parse(const CString& sMessage);
// Implicit and explicit conversion to a subclass reference.
#ifndef SWIG

1
src/CMakeLists.txt

@ -60,6 +60,7 @@ add_custom_target(version
add_dependencies(znclib copy_csocket_h copy_csocket_cc version)
set(znc_include_dirs
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/bpstd>"
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_FULL_INCLUDEDIR}>")

61
src/Message.cpp

@ -16,6 +16,7 @@
#include <znc/Message.h>
#include <znc/Utils.h>
#include "bpstd/string_view.hpp"
CMessage::CMessage(const CString& sMessage) {
Parse(sMessage);
@ -157,19 +158,43 @@ CString CMessage::ToString(unsigned int uFlags) const {
return sMessage;
}
void CMessage::Parse(CString sMessage) {
void CMessage::Parse(const CString& sMessage) {
const char* begin = sMessage.c_str();
const char* const end = begin + sMessage.size();
auto next_word = [&]() {
// Find the end of the first word
const char* p = begin;
while (p < end && *p != ' ') ++p;
bpstd::string_view result(begin, p - begin);
begin = p;
// Prepare for the following word
while (begin < end && *begin == ' ') ++begin;
return result;
};
// <tags>
m_mssTags.clear();
if (sMessage.StartsWith("@")) {
VCString vsTags;
sMessage.Token(0).TrimPrefix_n("@").Split(";", vsTags, false);
for (const CString& sTag : vsTags) {
CString sKey = sTag.Token(0, false, "=", true);
CString sValue = sTag.Token(1, true, "=", true);
if (begin < end && *begin == '@') {
bpstd::string_view svTags = next_word().substr(1);
std::vector<bpstd::string_view> vsTags;
// Split by ';'
while (true) {
auto delim = svTags.find_first_of(';');
if (delim == bpstd::string_view::npos) {
vsTags.push_back(svTags);
break;
}
vsTags.push_back(svTags.substr(0, delim));
svTags = svTags.substr(delim + 1);
}
// Save key and value
for (bpstd::string_view svTag : vsTags) {
auto delim = svTag.find_first_of('=');
CString sKey = std::string(delim == bpstd::string_view::npos ? svTag : svTag.substr(0, delim));
CString sValue = delim == bpstd::string_view::npos ? std::string() : std::string(svTag.substr(delim + 1));
m_mssTags[sKey] =
sValue.Escape(CString::EMSGTAG, CString::CString::EASCII);
}
sMessage = sMessage.Token(1, true);
}
// <message> ::= [':' <prefix> <SPACE> ] <command> <params> <crlf>
@ -183,26 +208,24 @@ void CMessage::Parse(CString sMessage) {
// NUL or CR or LF>
// <prefix>
if (sMessage.TrimPrefix(":")) {
m_Nick.Parse(sMessage.Token(0));
sMessage = sMessage.Token(1, true);
if (begin < end && *begin == ':') {
m_Nick.Parse(std::string(next_word().substr(1)));
}
// <command>
m_sCommand = sMessage.Token(0);
sMessage = sMessage.Token(1, true);
m_sCommand = std::string(next_word());
// <params>
m_bColon = false;
m_vsParams.clear();
while (!sMessage.empty()) {
m_bColon = sMessage.TrimPrefix(":");
while (begin < end) {
m_bColon = *begin == ':';
if (m_bColon) {
m_vsParams.push_back(sMessage);
sMessage.clear();
++begin;
m_vsParams.push_back(std::string(begin, end - begin));
begin = end;
} else {
m_vsParams.push_back(sMessage.Token(0));
sMessage = sMessage.Token(1, true);
m_vsParams.push_back(std::string(next_word()));
}
}

10
test/MessageTest.cpp

@ -22,6 +22,7 @@
using ::testing::IsEmpty;
using ::testing::ContainerEq;
using ::testing::ElementsAre;
using ::testing::SizeIs;
TEST(MessageTest, SetParam) {
CMessage msg;
@ -609,3 +610,12 @@ TEST(MessageTest, ParseWithoutSourceAndTags) {
EXPECT_EQ(msg.GetCommand(), "COMMAND");
EXPECT_EQ(msg.GetParams(), VCString());
}
TEST(MessageTest, HugeParse) {
CString line;
for (int i = 0; i < 1000000; ++i) {
line += "a ";
}
CMessage msg(line);
EXPECT_THAT(msg.GetParams(), SizeIs(999999));
}

1424
third_party/bpstd/bpstd/string_view.hpp vendored

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save