mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 21:37:36 +00:00
Spreadsheet: Make the XSV parser start with a preview parse
Instead of parsing the whole document. That's really wasteful and super slow.
This commit is contained in:
parent
88b168ff16
commit
b11b3c2f1c
4 changed files with 36 additions and 7 deletions
|
@ -147,7 +147,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
|
||||||
if (should_trim_trailing)
|
if (should_trim_trailing)
|
||||||
behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces;
|
behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces;
|
||||||
|
|
||||||
return Reader::XSV(m_csv, traits, behaviours);
|
return Reader::XSV(m_csv, move(traits), behaviours);
|
||||||
};
|
};
|
||||||
|
|
||||||
void CSVImportDialogPage::update_preview()
|
void CSVImportDialogPage::update_preview()
|
||||||
|
@ -195,6 +195,7 @@ Result<NonnullRefPtrVector<Sheet>, String> ImportDialog::make_and_run_for(String
|
||||||
NonnullRefPtrVector<Sheet> sheets;
|
NonnullRefPtrVector<Sheet> sheets;
|
||||||
|
|
||||||
if (reader.has_value()) {
|
if (reader.has_value()) {
|
||||||
|
reader->parse();
|
||||||
if (reader.value().has_error())
|
if (reader.value().has_error())
|
||||||
return String::formatted("CSV Import failed: {}", reader.value().error_string());
|
return String::formatted("CSV Import failed: {}", reader.value().error_string());
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ TEST_CASE(should_parse_valid_data)
|
||||||
4, 5, 6
|
4, 5, 6
|
||||||
"""x", y"z, 9)~~~";
|
"""x", y"z, 9)~~~";
|
||||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||||
|
csv.parse();
|
||||||
EXPECT(!csv.has_error());
|
EXPECT(!csv.has_error());
|
||||||
|
|
||||||
EXPECT_EQ(csv[0]["Foo"], "1");
|
EXPECT_EQ(csv[0]["Foo"], "1");
|
||||||
|
@ -31,6 +32,7 @@ TEST_CASE(should_parse_valid_data)
|
||||||
4, "5 " , 6
|
4, "5 " , 6
|
||||||
"""x", y"z, 9 )~~~";
|
"""x", y"z, 9 )~~~";
|
||||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
|
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
|
||||||
|
csv.parse();
|
||||||
EXPECT(!csv.has_error());
|
EXPECT(!csv.has_error());
|
||||||
|
|
||||||
EXPECT_EQ(csv[0]["Foo"], "1");
|
EXPECT_EQ(csv[0]["Foo"], "1");
|
||||||
|
@ -46,6 +48,7 @@ TEST_CASE(should_fail_nicely)
|
||||||
auto data = R"~~~(Foo, Bar, Baz
|
auto data = R"~~~(Foo, Bar, Baz
|
||||||
x, y)~~~";
|
x, y)~~~";
|
||||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||||
|
csv.parse();
|
||||||
EXPECT(csv.has_error());
|
EXPECT(csv.has_error());
|
||||||
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
|
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
|
||||||
}
|
}
|
||||||
|
@ -54,6 +57,7 @@ TEST_CASE(should_fail_nicely)
|
||||||
auto data = R"~~~(Foo, Bar, Baz
|
auto data = R"~~~(Foo, Bar, Baz
|
||||||
x, y, "z)~~~";
|
x, y, "z)~~~";
|
||||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||||
|
csv.parse();
|
||||||
EXPECT(csv.has_error());
|
EXPECT(csv.has_error());
|
||||||
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
|
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
|
||||||
}
|
}
|
||||||
|
@ -66,6 +70,7 @@ TEST_CASE(should_iterate_rows)
|
||||||
4, 5, 6
|
4, 5, 6
|
||||||
"""x", y"z, 9)~~~";
|
"""x", y"z, 9)~~~";
|
||||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||||
|
csv.parse();
|
||||||
EXPECT(!csv.has_error());
|
EXPECT(!csv.has_error());
|
||||||
|
|
||||||
bool ran = false;
|
bool ran = false;
|
||||||
|
@ -82,6 +87,7 @@ BENCHMARK_CASE(fairly_big_data)
|
||||||
|
|
||||||
auto data = file_or_error.value()->read_all();
|
auto data = file_or_error.value()->read_all();
|
||||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
|
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
|
||||||
|
csv.parse();
|
||||||
|
|
||||||
EXPECT(!csv.has_error());
|
EXPECT(!csv.has_error());
|
||||||
EXPECT_EQ(csv.size(), 100000u);
|
EXPECT_EQ(csv.size(), 100000u);
|
||||||
|
|
|
@ -11,12 +11,12 @@ namespace Reader {
|
||||||
|
|
||||||
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
|
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
|
||||||
{
|
{
|
||||||
return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
|
return static_cast<ParserBehaviour>(to_underlying(left) & to_underlying(right));
|
||||||
}
|
}
|
||||||
|
|
||||||
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
|
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
|
||||||
{
|
{
|
||||||
return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
|
return static_cast<ParserBehaviour>(to_underlying(left) | to_underlying(right));
|
||||||
}
|
}
|
||||||
|
|
||||||
void XSV::set_error(ReadError error)
|
void XSV::set_error(ReadError error)
|
||||||
|
@ -43,8 +43,22 @@ Vector<String> XSV::headers() const
|
||||||
return headers;
|
return headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void XSV::parse_preview()
|
||||||
|
{
|
||||||
|
reset();
|
||||||
|
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
|
||||||
|
read_headers();
|
||||||
|
|
||||||
|
while (!has_error() && !m_lexer.is_eof()) {
|
||||||
|
if (m_rows.size() >= 10)
|
||||||
|
break;
|
||||||
|
m_rows.append(read_row());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void XSV::parse()
|
void XSV::parse()
|
||||||
{
|
{
|
||||||
|
reset();
|
||||||
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
|
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
|
||||||
read_headers();
|
read_headers();
|
||||||
|
|
||||||
|
|
|
@ -59,17 +59,18 @@ constexpr ParserBehaviour default_behaviours()
|
||||||
|
|
||||||
class XSV {
|
class XSV {
|
||||||
public:
|
public:
|
||||||
XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours())
|
XSV(StringView source, ParserTraits traits, ParserBehaviour behaviours = default_behaviours())
|
||||||
: m_source(source)
|
: m_source(source)
|
||||||
, m_lexer(m_source)
|
, m_lexer(m_source)
|
||||||
, m_traits(traits)
|
, m_traits(traits)
|
||||||
, m_behaviours(behaviours)
|
, m_behaviours(behaviours)
|
||||||
{
|
{
|
||||||
parse();
|
parse_preview();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~XSV() { }
|
virtual ~XSV() { }
|
||||||
|
|
||||||
|
void parse();
|
||||||
bool has_error() const { return m_error != ReadError::None; }
|
bool has_error() const { return m_error != ReadError::None; }
|
||||||
ReadError error() const { return m_error; }
|
ReadError error() const { return m_error; }
|
||||||
String error_string() const
|
String error_string() const
|
||||||
|
@ -180,8 +181,15 @@ private:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
void set_error(ReadError error);
|
void set_error(ReadError error);
|
||||||
void parse();
|
void parse_preview();
|
||||||
void read_headers();
|
void read_headers();
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
m_lexer = GenericLexer { m_source };
|
||||||
|
m_rows.clear();
|
||||||
|
m_names.clear();
|
||||||
|
m_error = ReadError::None;
|
||||||
|
}
|
||||||
Vector<Field> read_row(bool header_row = false);
|
Vector<Field> read_row(bool header_row = false);
|
||||||
Field read_one_field();
|
Field read_one_field();
|
||||||
Field read_one_quoted_field();
|
Field read_one_quoted_field();
|
||||||
|
@ -189,7 +197,7 @@ private:
|
||||||
|
|
||||||
StringView m_source;
|
StringView m_source;
|
||||||
GenericLexer m_lexer;
|
GenericLexer m_lexer;
|
||||||
const ParserTraits& m_traits;
|
ParserTraits m_traits;
|
||||||
ParserBehaviour m_behaviours;
|
ParserBehaviour m_behaviours;
|
||||||
Vector<Field> m_names;
|
Vector<Field> m_names;
|
||||||
Vector<Vector<Field>> m_rows;
|
Vector<Vector<Field>> m_rows;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue