From 6e7a6e2d026aef77395518593b3417a8e6b6e6a7 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Tue, 14 Feb 2023 12:13:53 -0500 Subject: [PATCH] LibUnicode: Support finding the next/previous text segmentation boundary --- Userland/Libraries/LibUnicode/Segmentation.h | 103 +++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/Userland/Libraries/LibUnicode/Segmentation.h b/Userland/Libraries/LibUnicode/Segmentation.h index 6e0f24f184..126e5db78c 100644 --- a/Userland/Libraries/LibUnicode/Segmentation.h +++ b/Userland/Libraries/LibUnicode/Segmentation.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,40 @@ Vector find_grapheme_segmentation_boundaries(ViewType const& view) return boundaries; } +template +Optional next_grapheme_segmentation_boundary(ViewType const& view, size_t index) +{ + Optional result; + + for_each_grapheme_segmentation_boundary(view, [&](auto boundary) { + if (boundary > index) { + result = boundary; + return IterationDecision::Break; + } + + return IterationDecision::Continue; + }); + + return result; +} + +template +Optional previous_grapheme_segmentation_boundary(ViewType const& view, size_t index) +{ + Optional result; + + for_each_grapheme_segmentation_boundary(view, [&](auto boundary) { + if (boundary < index) { + result = boundary; + return IterationDecision::Continue; + } + + return IterationDecision::Break; + }); + + return result; +} + void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback); void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback); void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback); @@ -51,6 +86,40 @@ Vector find_word_segmentation_boundaries(ViewType const& view) return boundaries; } +template +Optional next_word_segmentation_boundary(ViewType const& view, size_t index) +{ + Optional result; + + for_each_word_segmentation_boundary(view, [&](auto boundary) { + if (boundary > index) { + result = boundary; + return IterationDecision::Break; + } + + return IterationDecision::Continue; + }); + + return result; +} + +template +Optional previous_word_segmentation_boundary(ViewType const& view, size_t index) +{ + Optional result; + + for_each_word_segmentation_boundary(view, [&](auto boundary) { + if (boundary < index) { + result = boundary; + return IterationDecision::Continue; + } + + return IterationDecision::Break; + }); + + return result; +} + void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback); void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback); void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback); @@ -68,4 +137,38 @@ Vector find_sentence_segmentation_boundaries(ViewType const& view) return boundaries; } +template +Optional next_sentence_segmentation_boundary(ViewType const& view, size_t index) +{ + Optional result; + + for_each_sentence_segmentation_boundary(view, [&](auto boundary) { + if (boundary > index) { + result = boundary; + return IterationDecision::Break; + } + + return IterationDecision::Continue; + }); + + return result; +} + +template +Optional previous_sentence_segmentation_boundary(ViewType const& view, size_t index) +{ + Optional result; + + for_each_sentence_segmentation_boundary(view, [&](auto boundary) { + if (boundary < index) { + result = boundary; + return IterationDecision::Continue; + } + + return IterationDecision::Break; + }); + + return result; +} + }