mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 06:37:44 +00:00
Spreadsheet: Make the CSV reader more lenient
This adds an option "Lenient" that makes the reader conform to what appears to be the norm in spreadsheet-land: - Treat missing values as empty ones - Update previously read rows if another row with more columns are seen afterwards
This commit is contained in:
parent
102065a8a9
commit
894bfa30a2
3 changed files with 28 additions and 5 deletions
|
@ -158,7 +158,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
|
|||
quote_escape,
|
||||
};
|
||||
|
||||
auto behaviours = Reader::default_behaviours();
|
||||
auto behaviours = Reader::default_behaviours() | Reader::ParserBehaviour::Lenient;
|
||||
|
||||
if (should_read_headers)
|
||||
behaviours = behaviours | Reader::ParserBehaviour::ReadHeaders;
|
||||
|
|
|
@ -103,10 +103,29 @@ Vector<XSV::Field> XSV::read_row(bool header_row)
|
|||
}
|
||||
}
|
||||
|
||||
if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size())
|
||||
set_error(ReadError::NonConformingColumnCount);
|
||||
else if (!header_row && !has_explicit_headers() && !m_rows.is_empty() && m_rows.first().size() != row.size())
|
||||
set_error(ReadError::NonConformingColumnCount);
|
||||
auto is_lenient = (m_behaviours & ParserBehaviour::Lenient) != ParserBehaviour::None;
|
||||
if (is_lenient) {
|
||||
if (m_rows.is_empty())
|
||||
return row;
|
||||
|
||||
auto& last_row = m_rows.last();
|
||||
if (row.size() < last_row.size()) {
|
||||
if (!m_names.is_empty())
|
||||
row.resize(m_names.size());
|
||||
else
|
||||
row.resize(last_row.size());
|
||||
} else if (row.size() > last_row.size()) {
|
||||
auto new_size = row.size();
|
||||
for (auto& row : m_rows)
|
||||
row.resize(new_size);
|
||||
}
|
||||
} else {
|
||||
auto should_read_headers = (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None;
|
||||
if (!header_row && should_read_headers && row.size() != m_names.size())
|
||||
set_error(ReadError::NonConformingColumnCount);
|
||||
else if (!header_row && !has_explicit_headers() && !m_rows.is_empty() && m_rows.first().size() != row.size())
|
||||
set_error(ReadError::NonConformingColumnCount);
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,10 @@ enum class ParserBehaviour : u32 {
|
|||
TrimLeadingFieldSpaces = ReadHeaders << 2,
|
||||
TrimTrailingFieldSpaces = ReadHeaders << 3,
|
||||
QuoteOnlyInFieldStart = ReadHeaders << 4,
|
||||
Lenient = ReadHeaders << 5, // This is the typical "spreadsheet import" behavior
|
||||
// Currently, it:
|
||||
// - fills in missing fields with empty values
|
||||
// - updates previous rows with extra columns
|
||||
};
|
||||
|
||||
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue