From 31523f6c644f193dea221b82f37ef91c1077b723 Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Mon, 23 Nov 2020 11:52:45 +0330 Subject: [PATCH] Spreadsheet: Add a CSV reader and writer This is not utilised yet. --- Applications/Spreadsheet/CMakeLists.txt | 1 + Applications/Spreadsheet/Readers/CSV.h | 43 +++ .../Spreadsheet/Readers/Test/TestXSV.cpp | 110 +++++++ Applications/Spreadsheet/Readers/XSV.cpp | 272 ++++++++++++++++++ Applications/Spreadsheet/Readers/XSV.h | 208 ++++++++++++++ Applications/Spreadsheet/Writers/CSV.h | 44 +++ .../Writers/Test/TestXSVWriter.cpp | 96 +++++++ Applications/Spreadsheet/Writers/XSV.h | 215 ++++++++++++++ 8 files changed, 989 insertions(+) create mode 100644 Applications/Spreadsheet/Readers/CSV.h create mode 100644 Applications/Spreadsheet/Readers/Test/TestXSV.cpp create mode 100644 Applications/Spreadsheet/Readers/XSV.cpp create mode 100644 Applications/Spreadsheet/Readers/XSV.h create mode 100644 Applications/Spreadsheet/Writers/CSV.h create mode 100644 Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp create mode 100644 Applications/Spreadsheet/Writers/XSV.h diff --git a/Applications/Spreadsheet/CMakeLists.txt b/Applications/Spreadsheet/CMakeLists.txt index a80b4f07b7..9ba555450d 100644 --- a/Applications/Spreadsheet/CMakeLists.txt +++ b/Applications/Spreadsheet/CMakeLists.txt @@ -15,6 +15,7 @@ set(SOURCES CondFormattingViewUI.h HelpWindow.cpp JSIntegration.cpp + Readers/XSV.cpp Spreadsheet.cpp SpreadsheetModel.cpp SpreadsheetView.cpp diff --git a/Applications/Spreadsheet/Readers/CSV.h b/Applications/Spreadsheet/Readers/CSV.h new file mode 100644 index 0000000000..866ae67141 --- /dev/null +++ b/Applications/Spreadsheet/Readers/CSV.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "XSV.h" +#include +#include + +namespace Reader { + +class CSV : public XSV { +public: + CSV(StringView source, ParserBehaviour behaviours = default_behaviours()) + : XSV(source, { ",", "\"", ParserTraits::Repeat }, behaviours) + { + } +}; + +} diff --git a/Applications/Spreadsheet/Readers/Test/TestXSV.cpp b/Applications/Spreadsheet/Readers/Test/TestXSV.cpp new file mode 100644 index 0000000000..b80093d556 --- /dev/null +++ b/Applications/Spreadsheet/Readers/Test/TestXSV.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "../CSV.h" +#include "../XSV.h" +#include + +TEST_CASE(should_parse_valid_data) +{ + { + auto data = R"~~~(Foo, Bar, Baz + 1, 2, 3 + 4, 5, 6 + """x", y"z, 9)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(!csv.has_error()); + + EXPECT_EQ(csv[0]["Foo"], "1"); + EXPECT_EQ(csv[2]["Foo"], "\"x"); + EXPECT_EQ(csv[2]["Bar"], "y\"z"); + } + + { + auto data = R"~~~(Foo, Bar, Baz + 1 , 2, 3 + 4, "5 " , 6 + """x", y"z, 9 )~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces }; + EXPECT(!csv.has_error()); + + EXPECT_EQ(csv[0]["Foo"], "1"); + EXPECT_EQ(csv[1]["Bar"], "5 "); + EXPECT_EQ(csv[2]["Foo"], "\"x"); + EXPECT_EQ(csv[2]["Baz"], "9"); + } +} + +TEST_CASE(should_fail_nicely) +{ + { + auto data = R"~~~(Foo, Bar, Baz + x, y)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(csv.has_error()); + EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount); + } + + { + auto data = R"~~~(Foo, Bar, Baz + x, y, "z)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(csv.has_error()); + EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure); + } +} + +TEST_CASE(should_iterate_rows) +{ + auto data = R"~~~(Foo, Bar, Baz + 1, 2, 3 + 4, 5, 6 + """x", y"z, 9)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(!csv.has_error()); + + bool ran = false; + for (auto row : csv) + ran = !row[0].is_empty(); + + EXPECT(ran); +} + +BENCHMARK_CASE(fairly_big_data) +{ + auto file_or_error = Core::File::open(__FILE__ ".data", Core::IODevice::OpenMode::ReadOnly); + EXPECT_EQ_FORCE(file_or_error.is_error(), false); + + auto data = file_or_error.value()->read_all(); + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders }; + + EXPECT(!csv.has_error()); + EXPECT_EQ(csv.size(), 100000u); +} + +TEST_MAIN(XSV) diff --git a/Applications/Spreadsheet/Readers/XSV.cpp b/Applications/Spreadsheet/Readers/XSV.cpp new file mode 100644 index 0000000000..d87c86748e --- /dev/null +++ b/Applications/Spreadsheet/Readers/XSV.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "XSV.h" +#include + +namespace Reader { + +ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right) +{ + return static_cast(static_cast(left) & static_cast(right)); +} + +ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right) +{ + return static_cast(static_cast(left) | static_cast(right)); +} + +void XSV::set_error(ReadError error) +{ + if (m_error == ReadError::None) + m_error = error; +} + +Vector XSV::headers() const +{ + Vector headers; + for (auto& field : m_names) + headers.append(field.is_string_view ? field.as_string_view : field.as_string.view()); + + return headers; +} + +void XSV::parse() +{ + if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None) + read_headers(); + + while (!has_error() && !m_lexer.is_eof()) + m_rows.append(read_row()); + + if (!m_lexer.is_eof()) + set_error(ReadError::DataPastLogicalEnd); +} + +void XSV::read_headers() +{ + if (!m_names.is_empty()) { + set_error(ReadError::InternalError); + m_names.clear(); + } + + m_names = read_row(true); +} + +Vector XSV::read_row(bool header_row) +{ + Vector row; + bool first = true; + while (!(m_lexer.is_eof() || m_lexer.next_is('\n') || m_lexer.next_is("\r\n")) && (first || m_lexer.consume_specific(m_traits.separator))) { + first = false; + row.append(read_one_field()); + } + + if (!m_lexer.is_eof()) { + auto crlf_ok = m_lexer.consume_specific("\r\n"); + if (!crlf_ok) { + auto lf_ok = m_lexer.consume_specific('\n'); + if (!lf_ok) + set_error(ReadError::DataPastLogicalEnd); + } + } + + if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size()) + set_error(ReadError::NonConformingColumnCount); + + return row; +} + +XSV::Field XSV::read_one_field() +{ + if ((m_behaviours & ParserBehaviour::TrimLeadingFieldSpaces) != ParserBehaviour::None) + m_lexer.consume_while(is_any_of(" \t\v")); + + bool is_quoted = false; + Field field; + if (m_lexer.next_is(m_traits.quote.view())) { + is_quoted = true; + field = read_one_quoted_field(); + } else { + field = read_one_unquoted_field(); + } + + if ((m_behaviours & ParserBehaviour::TrimTrailingFieldSpaces) != ParserBehaviour::None) { + m_lexer.consume_while(is_any_of(" \t\v")); + + if (!is_quoted) { + // Also have to trim trailing spaces from unquoted fields. + StringView view; + if (field.is_string_view) + view = field.as_string_view; + else + view = field.as_string; + + if (!view.is_empty()) { + ssize_t i = view.length() - 1; + for (; i >= 0; --i) { + if (!view.substring_view(i, 1).is_one_of(" ", "\t", "\v")) + break; + } + view = view.substring_view(0, i + 1); + } + + if (field.is_string_view) + field.as_string_view = view; + else + field.as_string = field.as_string.substring(0, view.length()); + } + } + + return field; +} + +XSV::Field XSV::read_one_quoted_field() +{ + if (!m_lexer.consume_specific(m_traits.quote)) + set_error(ReadError::InternalError); + + size_t start = m_lexer.tell(), end = start; + bool is_copy = false; + StringBuilder builder; + auto allow_newlines = (m_behaviours & ParserBehaviour::AllowNewlinesInFields) != ParserBehaviour::None; + + for (; !m_lexer.is_eof();) { + char ch; + switch (m_traits.quote_escape) { + case ParserTraits::Backslash: + if (m_lexer.consume_specific('\\') && m_lexer.consume_specific(m_traits.quote)) { + // If there is an escaped quote, we have no choice but to make a copy. + if (!is_copy) { + is_copy = true; + builder.append(m_source.substring_view(start, end - start)); + } + builder.append(m_traits.quote); + end = m_lexer.tell(); + continue; + } + break; + case ParserTraits::Repeat: + if (m_lexer.consume_specific(m_traits.quote)) { + if (m_lexer.consume_specific(m_traits.quote)) { + // If there is an escaped quote, we have no choice but to make a copy. + if (!is_copy) { + is_copy = true; + builder.append(m_source.substring_view(start, end - start)); + } + builder.append(m_traits.quote); + end = m_lexer.tell(); + continue; + } + for (size_t i = 0; i < m_traits.quote.length(); ++i) + m_lexer.retreat(); + goto end; + } + break; + } + + if (m_lexer.next_is(m_traits.quote.view())) + goto end; + + if (!allow_newlines) { + if (m_lexer.next_is('\n') || m_lexer.next_is("\r\n")) + goto end; + } + + ch = m_lexer.consume(); + if (is_copy) + builder.append(ch); + end = m_lexer.tell(); + continue; + + end: + break; + } + + if (!m_lexer.consume_specific(m_traits.quote)) + set_error(ReadError::QuoteFailure); + + if (is_copy) + return { {}, builder.to_string(), false }; + + return { m_source.substring_view(start, end - start), {}, true }; +} + +XSV::Field XSV::read_one_unquoted_field() +{ + size_t start = m_lexer.tell(), end = start; + bool allow_quote_in_field = (m_behaviours & ParserBehaviour::QuoteOnlyInFieldStart) != ParserBehaviour::None; + + for (; !m_lexer.is_eof();) { + if (m_lexer.next_is(m_traits.separator.view())) + break; + + if (m_lexer.next_is("\r\n") || m_lexer.next_is("\n")) + break; + + if (m_lexer.consume_specific(m_traits.quote)) { + if (!allow_quote_in_field) + set_error(ReadError::QuoteFailure); + end = m_lexer.tell(); + continue; + } + + m_lexer.consume(); + end = m_lexer.tell(); + } + + return { m_source.substring_view(start, end - start), {}, true }; +} + +StringView XSV::Row::operator[](StringView name) const +{ + ASSERT(!m_xsv.m_names.is_empty()); + auto it = m_xsv.m_names.find([&](auto&& entry) { return name == entry; }); + ASSERT(!it.is_end()); + + return (*this)[it.index()]; +} + +StringView XSV::Row::operator[](size_t column) const +{ + auto& field = m_xsv.m_rows[m_index][column]; + if (field.is_string_view) + return field.as_string_view; + return field.as_string; +} + +const XSV::Row XSV::operator[](size_t index) const +{ + return const_cast(*this)[index]; +} + +XSV::Row XSV::operator[](size_t index) +{ + ASSERT(m_rows.size() > index); + return Row { *this, index }; +} + +} diff --git a/Applications/Spreadsheet/Readers/XSV.h b/Applications/Spreadsheet/Readers/XSV.h new file mode 100644 index 0000000000..0b32ca767d --- /dev/null +++ b/Applications/Spreadsheet/Readers/XSV.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Reader { + +enum class ParserBehaviour : u32 { + None = 0, + ReadHeaders = 1, + AllowNewlinesInFields = ReadHeaders << 1, + TrimLeadingFieldSpaces = ReadHeaders << 2, + TrimTrailingFieldSpaces = ReadHeaders << 3, + QuoteOnlyInFieldStart = ReadHeaders << 4, +}; + +ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right); +ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right); + +struct ParserTraits { + String separator; + String quote { "\"" }; + enum { + Repeat, + Backslash, + } quote_escape { Repeat }; +}; + +#define ENUMERATE_READ_ERRORS() \ + E(None, "No errors") \ + E(NonConformingColumnCount, "Header count does not match given column count") \ + E(QuoteFailure, "Quoting failure") \ + E(InternalError, "Internal error") \ + E(DataPastLogicalEnd, "Exrta data past the logical end of the rows") + +enum class ReadError { +#define E(name, _) name, + ENUMERATE_READ_ERRORS() +#undef E +}; + +inline constexpr ParserBehaviour default_behaviours() +{ + return ParserBehaviour::QuoteOnlyInFieldStart; +} + +class XSV { +public: + XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours()) + : m_source(source) + , m_lexer(m_source) + , m_traits(traits) + , m_behaviours(behaviours) + { + parse(); + } + + virtual ~XSV() { } + + bool has_error() const { return m_error != ReadError::None; } + ReadError error() const { return m_error; } + String error_string() const + { + switch (m_error) { +#define E(x, y) \ + case ReadError::x: \ + return y; + + ENUMERATE_READ_ERRORS(); +#undef E + } + ASSERT_NOT_REACHED(); + } + + size_t size() const { return m_rows.size(); } + Vector headers() const; + + class Row { + public: + explicit Row(XSV& xsv, size_t index) + : m_xsv(xsv) + , m_index(index) + { + } + + StringView operator[](StringView name) const; + StringView operator[](size_t column) const; + + size_t index() const { return m_index; } + + // FIXME: Implement begin() and end(), keeping `Field' out of the API. + + private: + XSV& m_xsv; + size_t m_index { 0 }; + }; + + template + class RowIterator { + public: + explicit RowIterator(const XSV& xsv, size_t init_index = 0) requires(const_) + : m_xsv(const_cast(xsv)) + , m_index(init_index) + { + } + + explicit RowIterator(XSV& xsv, size_t init_index = 0) requires(!const_) + : m_xsv(xsv) + , m_index(init_index) + { + } + + Row operator*() const { return Row { m_xsv, m_index }; } + Row operator*() requires(!const_) { return Row { m_xsv, m_index }; } + + RowIterator& operator++() + { + ++m_index; + return *this; + } + + bool is_end() const { return m_index == m_xsv.m_rows.size(); } + bool operator==(const RowIterator& other) const + { + return m_index == other.m_index && &m_xsv == &other.m_xsv; + } + bool operator==(const RowIterator& other) const + { + return m_index == other.m_index && &m_xsv == &other.m_xsv; + } + + private: + XSV& m_xsv; + size_t m_index { 0 }; + }; + + const Row operator[](size_t index) const; + Row operator[](size_t index); + + auto begin() { return RowIterator(*this); } + auto end() { return RowIterator(*this, m_rows.size()); } + + auto begin() const { return RowIterator(*this); } + auto end() const { return RowIterator(*this, m_rows.size()); } + + using ConstIterator = RowIterator; + using Iterator = RowIterator; + +private: + struct Field { + StringView as_string_view; + String as_string; // This member only used if the parser couldn't use the original source verbatim. + bool is_string_view { true }; + + bool operator==(StringView other) const + { + if (is_string_view) + return other == as_string_view; + return as_string == other; + } + }; + void set_error(ReadError error); + void parse(); + void read_headers(); + Vector read_row(bool header_row = false); + Field read_one_field(); + Field read_one_quoted_field(); + Field read_one_unquoted_field(); + + StringView m_source; + GenericLexer m_lexer; + const ParserTraits& m_traits; + ParserBehaviour m_behaviours; + Vector m_names; + Vector> m_rows; + ReadError m_error { ReadError::None }; +}; + +} diff --git a/Applications/Spreadsheet/Writers/CSV.h b/Applications/Spreadsheet/Writers/CSV.h new file mode 100644 index 0000000000..49940fbdf1 --- /dev/null +++ b/Applications/Spreadsheet/Writers/CSV.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "XSV.h" +#include +#include + +namespace Writer { + +template +class CSV : public XSV { +public: + CSV(OutputStream& output, const ContainerType& data, const Vector& headers = {}, WriterBehaviour behaviours = default_behaviours()) + : XSV(output, data, { ",", "\"", WriterTraits::Repeat }, headers, behaviours) + { + } +}; + +} diff --git a/Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp b/Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp new file mode 100644 index 0000000000..4971658431 --- /dev/null +++ b/Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "../CSV.h" +#include "../XSV.h" +#include + +TEST_CASE(can_write) +{ + Vector> data = { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 }, + }; + + auto buffer = ByteBuffer::create_uninitialized(1024); + OutputMemoryStream stream { buffer }; + + Writer::CSV csv(stream, data); + + auto expected_output = R"~(1,2,3 +4,5,6 +7,8,9 +)~"; + + EXPECT_EQ(StringView { stream.bytes() }, expected_output); +} + +TEST_CASE(can_write_with_header) +{ + Vector> data = { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 }, + }; + + auto buffer = ByteBuffer::create_uninitialized(1024); + OutputMemoryStream stream { buffer }; + + Writer::CSV csv(stream, data, { "A", "B\"", "C" }); + + auto expected_output = R"~(A,"B""",C +1,2,3 +4,5,6 +7,8,9 +)~"; + + EXPECT_EQ(StringView { stream.bytes() }, expected_output); +} + +TEST_CASE(can_write_with_different_behaviours) +{ + Vector> data = { + { "Well", "Hello\"", "Friends" }, + { "We\"ll", "Hello,", " Friends" }, + }; + + auto buffer = ByteBuffer::create_uninitialized(1024); + OutputMemoryStream stream { buffer }; + + Writer::CSV csv(stream, data, { "A", "B\"", "C" }, Writer::WriterBehaviour::QuoteOnlyInFieldStart | Writer::WriterBehaviour::WriteHeaders); + + auto expected_output = R"~(A,B",C +Well,Hello",Friends +We"ll,"Hello,", Friends +)~"; + + EXPECT_EQ(StringView { stream.bytes() }, expected_output); +} + +TEST_MAIN(XSV) diff --git a/Applications/Spreadsheet/Writers/XSV.h b/Applications/Spreadsheet/Writers/XSV.h new file mode 100644 index 0000000000..7a065f87d1 --- /dev/null +++ b/Applications/Spreadsheet/Writers/XSV.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace Writer { + +enum class WriterBehaviour : u32 { + None = 0, + WriteHeaders = 1, + AllowNewlinesInFields = WriteHeaders << 1, + QuoteOnlyInFieldStart = WriteHeaders << 2, + QuoteAll = WriteHeaders << 3, +}; + +inline WriterBehaviour operator&(WriterBehaviour left, WriterBehaviour right) +{ + return static_cast(static_cast(left) & static_cast(right)); +} + +inline WriterBehaviour operator|(WriterBehaviour left, WriterBehaviour right) +{ + return static_cast(static_cast(left) | static_cast(right)); +} + +struct WriterTraits { + String separator; + String quote { "\"" }; + enum { + Repeat, + Backslash, + } quote_escape { Repeat }; +}; + +#define ENUMERATE_WRITE_ERRORS() \ + E(None, "No errors") \ + E(NonConformingColumnCount, "Header count does not match given column count") \ + E(InternalError, "Internal error") + +enum class WriteError { +#define E(name, _) name, + ENUMERATE_WRITE_ERRORS() +#undef E +}; + +inline constexpr WriterBehaviour default_behaviours() +{ + return WriterBehaviour::None; +} + +template +class XSV { +public: + XSV(OutputStream& output, const ContainerType& data, const WriterTraits& traits, const Vector& headers = {}, WriterBehaviour behaviours = default_behaviours()) + : m_data(data) + , m_traits(traits) + , m_behaviours(behaviours) + , m_names(headers) + , m_output(output) + { + if (!headers.is_empty()) + m_behaviours = m_behaviours | WriterBehaviour::WriteHeaders; + + generate(); + } + + virtual ~XSV() { } + + bool has_error() const { return m_error != WriteError::None; } + WriteError error() const { return m_error; } + String error_string() const + { + switch (m_error) { +#define E(x, y) \ + case WriteError::x: \ + return y; + + ENUMERATE_WRITE_ERRORS(); +#undef E + } + ASSERT_NOT_REACHED(); + } + +private: + void set_error(WriteError error) + { + if (m_error == WriteError::None) + m_error = error; + } + + void generate() + { + auto with_headers = (m_behaviours & WriterBehaviour::WriteHeaders) != WriterBehaviour::None; + if (with_headers) { + write_row(m_names); + if (m_output.write({ "\n", 1 }) != 1) + set_error(WriteError::InternalError); + } + + for (auto&& row : m_data) { + if (with_headers) { + if (row.size() != m_names.size()) + set_error(WriteError::NonConformingColumnCount); + } + + write_row(row); + if (m_output.write({ "\n", 1 }) != 1) + set_error(WriteError::InternalError); + } + } + + template + void write_row(T&& row) + { + bool first = true; + for (auto&& entry : row) { + if (!first) { + if (m_output.write(m_traits.separator.bytes()) != m_traits.separator.length()) + set_error(WriteError::InternalError); + } + first = false; + write_entry(entry); + } + } + + template + void write_entry(T&& entry) + { + auto string = String::formatted("{}", FormatIfSupported(entry)); + + auto safe_to_write_normally = !string.contains("\n") && !string.contains(m_traits.separator); + if (safe_to_write_normally) { + if ((m_behaviours & WriterBehaviour::QuoteOnlyInFieldStart) == WriterBehaviour::None) + safe_to_write_normally = !string.contains(m_traits.quote); + else + safe_to_write_normally = !string.starts_with(m_traits.quote); + } + if (safe_to_write_normally) { + if (m_output.write(string.bytes()) != string.length()) + set_error(WriteError::InternalError); + return; + } + + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + + GenericLexer lexer(string); + while (!lexer.is_eof()) { + if (lexer.consume_specific(m_traits.quote)) { + switch (m_traits.quote_escape) { + case WriterTraits::Repeat: + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + break; + case WriterTraits::Backslash: + if (m_output.write({ "\\", 1 }) != 1) + set_error(WriteError::InternalError); + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + break; + } + continue; + } + + auto ch = lexer.consume(); + if (m_output.write({ &ch, 1 }) != 1) + set_error(WriteError::InternalError); + } + + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + } + + const ContainerType& m_data; + const WriterTraits& m_traits; + WriterBehaviour m_behaviours; + const Vector& m_names; + WriteError m_error { WriteError::None }; + OutputStream& m_output; +}; + +}