1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 21:37:36 +00:00

Spreadsheet: Make the XSV parser start with a preview parse

Instead of parsing the whole document. That's really wasteful and
super slow.
This commit is contained in:
Ali Mohammad Pur 2021-06-16 08:34:19 +04:30 committed by Ali Mohammad Pur
parent 88b168ff16
commit b11b3c2f1c
4 changed files with 36 additions and 7 deletions

View file

@ -147,7 +147,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
if (should_trim_trailing) if (should_trim_trailing)
behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces; behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces;
return Reader::XSV(m_csv, traits, behaviours); return Reader::XSV(m_csv, move(traits), behaviours);
}; };
void CSVImportDialogPage::update_preview() void CSVImportDialogPage::update_preview()
@ -195,6 +195,7 @@ Result<NonnullRefPtrVector<Sheet>, String> ImportDialog::make_and_run_for(String
NonnullRefPtrVector<Sheet> sheets; NonnullRefPtrVector<Sheet> sheets;
if (reader.has_value()) { if (reader.has_value()) {
reader->parse();
if (reader.value().has_error()) if (reader.value().has_error())
return String::formatted("CSV Import failed: {}", reader.value().error_string()); return String::formatted("CSV Import failed: {}", reader.value().error_string());

View file

@ -18,6 +18,7 @@ TEST_CASE(should_parse_valid_data)
4, 5, 6 4, 5, 6
"""x", y"z, 9)~~~"; """x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error()); EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1"); EXPECT_EQ(csv[0]["Foo"], "1");
@ -31,6 +32,7 @@ TEST_CASE(should_parse_valid_data)
4, "5 " , 6 4, "5 " , 6
"""x", y"z, 9 )~~~"; """x", y"z, 9 )~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces }; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error()); EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1"); EXPECT_EQ(csv[0]["Foo"], "1");
@ -46,6 +48,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz auto data = R"~~~(Foo, Bar, Baz
x, y)~~~"; x, y)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(csv.has_error()); EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount); EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
} }
@ -54,6 +57,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz auto data = R"~~~(Foo, Bar, Baz
x, y, "z)~~~"; x, y, "z)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(csv.has_error()); EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure); EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
} }
@ -66,6 +70,7 @@ TEST_CASE(should_iterate_rows)
4, 5, 6 4, 5, 6
"""x", y"z, 9)~~~"; """x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error()); EXPECT(!csv.has_error());
bool ran = false; bool ran = false;
@ -82,6 +87,7 @@ BENCHMARK_CASE(fairly_big_data)
auto data = file_or_error.value()->read_all(); auto data = file_or_error.value()->read_all();
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders }; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
csv.parse();
EXPECT(!csv.has_error()); EXPECT(!csv.has_error());
EXPECT_EQ(csv.size(), 100000u); EXPECT_EQ(csv.size(), 100000u);

View file

@ -11,12 +11,12 @@ namespace Reader {
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right) ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
{ {
return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right)); return static_cast<ParserBehaviour>(to_underlying(left) & to_underlying(right));
} }
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right) ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
{ {
return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right)); return static_cast<ParserBehaviour>(to_underlying(left) | to_underlying(right));
} }
void XSV::set_error(ReadError error) void XSV::set_error(ReadError error)
@ -43,8 +43,22 @@ Vector<String> XSV::headers() const
return headers; return headers;
} }
void XSV::parse_preview()
{
reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();
while (!has_error() && !m_lexer.is_eof()) {
if (m_rows.size() >= 10)
break;
m_rows.append(read_row());
}
}
void XSV::parse() void XSV::parse()
{ {
reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None) if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers(); read_headers();

View file

@ -59,17 +59,18 @@ constexpr ParserBehaviour default_behaviours()
class XSV { class XSV {
public: public:
XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours()) XSV(StringView source, ParserTraits traits, ParserBehaviour behaviours = default_behaviours())
: m_source(source) : m_source(source)
, m_lexer(m_source) , m_lexer(m_source)
, m_traits(traits) , m_traits(traits)
, m_behaviours(behaviours) , m_behaviours(behaviours)
{ {
parse(); parse_preview();
} }
virtual ~XSV() { } virtual ~XSV() { }
void parse();
bool has_error() const { return m_error != ReadError::None; } bool has_error() const { return m_error != ReadError::None; }
ReadError error() const { return m_error; } ReadError error() const { return m_error; }
String error_string() const String error_string() const
@ -180,8 +181,15 @@ private:
} }
}; };
void set_error(ReadError error); void set_error(ReadError error);
void parse(); void parse_preview();
void read_headers(); void read_headers();
void reset()
{
m_lexer = GenericLexer { m_source };
m_rows.clear();
m_names.clear();
m_error = ReadError::None;
}
Vector<Field> read_row(bool header_row = false); Vector<Field> read_row(bool header_row = false);
Field read_one_field(); Field read_one_field();
Field read_one_quoted_field(); Field read_one_quoted_field();
@ -189,7 +197,7 @@ private:
StringView m_source; StringView m_source;
GenericLexer m_lexer; GenericLexer m_lexer;
const ParserTraits& m_traits; ParserTraits m_traits;
ParserBehaviour m_behaviours; ParserBehaviour m_behaviours;
Vector<Field> m_names; Vector<Field> m_names;
Vector<Vector<Field>> m_rows; Vector<Vector<Field>> m_rows;