mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 06:37:44 +00:00
Spreadsheet: Make the CSV reader more lenient
This adds an option "Lenient" that makes the reader conform to what appears to be the norm in spreadsheet-land: - Treat missing values as empty ones - Update previously read rows if another row with more columns are seen afterwards
This commit is contained in:
parent
102065a8a9
commit
894bfa30a2
3 changed files with 28 additions and 5 deletions
|
@ -158,7 +158,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
|
||||||
quote_escape,
|
quote_escape,
|
||||||
};
|
};
|
||||||
|
|
||||||
auto behaviours = Reader::default_behaviours();
|
auto behaviours = Reader::default_behaviours() | Reader::ParserBehaviour::Lenient;
|
||||||
|
|
||||||
if (should_read_headers)
|
if (should_read_headers)
|
||||||
behaviours = behaviours | Reader::ParserBehaviour::ReadHeaders;
|
behaviours = behaviours | Reader::ParserBehaviour::ReadHeaders;
|
||||||
|
|
|
@ -103,10 +103,29 @@ Vector<XSV::Field> XSV::read_row(bool header_row)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size())
|
auto is_lenient = (m_behaviours & ParserBehaviour::Lenient) != ParserBehaviour::None;
|
||||||
set_error(ReadError::NonConformingColumnCount);
|
if (is_lenient) {
|
||||||
else if (!header_row && !has_explicit_headers() && !m_rows.is_empty() && m_rows.first().size() != row.size())
|
if (m_rows.is_empty())
|
||||||
set_error(ReadError::NonConformingColumnCount);
|
return row;
|
||||||
|
|
||||||
|
auto& last_row = m_rows.last();
|
||||||
|
if (row.size() < last_row.size()) {
|
||||||
|
if (!m_names.is_empty())
|
||||||
|
row.resize(m_names.size());
|
||||||
|
else
|
||||||
|
row.resize(last_row.size());
|
||||||
|
} else if (row.size() > last_row.size()) {
|
||||||
|
auto new_size = row.size();
|
||||||
|
for (auto& row : m_rows)
|
||||||
|
row.resize(new_size);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto should_read_headers = (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None;
|
||||||
|
if (!header_row && should_read_headers && row.size() != m_names.size())
|
||||||
|
set_error(ReadError::NonConformingColumnCount);
|
||||||
|
else if (!header_row && !has_explicit_headers() && !m_rows.is_empty() && m_rows.first().size() != row.size())
|
||||||
|
set_error(ReadError::NonConformingColumnCount);
|
||||||
|
}
|
||||||
|
|
||||||
return row;
|
return row;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,10 @@ enum class ParserBehaviour : u32 {
|
||||||
TrimLeadingFieldSpaces = ReadHeaders << 2,
|
TrimLeadingFieldSpaces = ReadHeaders << 2,
|
||||||
TrimTrailingFieldSpaces = ReadHeaders << 3,
|
TrimTrailingFieldSpaces = ReadHeaders << 3,
|
||||||
QuoteOnlyInFieldStart = ReadHeaders << 4,
|
QuoteOnlyInFieldStart = ReadHeaders << 4,
|
||||||
|
Lenient = ReadHeaders << 5, // This is the typical "spreadsheet import" behavior
|
||||||
|
// Currently, it:
|
||||||
|
// - fills in missing fields with empty values
|
||||||
|
// - updates previous rows with extra columns
|
||||||
};
|
};
|
||||||
|
|
||||||
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right);
|
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue