1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 23:47:45 +00:00

LibUnicode: Allow iterating over text segmentation boundaries

This will be useful for e.g. finding the next boundary after a specific
index - we can just stop iterating once a condition is satisfied.
This commit is contained in:
Timothy Flynn 2023-02-14 12:03:35 -05:00 committed by Linus Groh
parent dd4c47456e
commit abe7786a81
2 changed files with 97 additions and 57 deletions

View file

@ -8,21 +8,64 @@
#pragma once
#include <AK/Forward.h>
#include <AK/Function.h>
#include <AK/IterationDecision.h>
#include <AK/Types.h>
#include <AK/Vector.h>
namespace Unicode {
Vector<size_t> find_grapheme_segmentation_boundaries(Utf8View const&);
Vector<size_t> find_grapheme_segmentation_boundaries(Utf16View const&);
Vector<size_t> find_grapheme_segmentation_boundaries(Utf32View const&);
using SegmentationCallback = Function<IterationDecision(size_t)>;
Vector<size_t> find_word_segmentation_boundaries(Utf8View const&);
Vector<size_t> find_word_segmentation_boundaries(Utf16View const&);
Vector<size_t> find_word_segmentation_boundaries(Utf32View const&);
void for_each_grapheme_segmentation_boundary(Utf8View const&, SegmentationCallback);
void for_each_grapheme_segmentation_boundary(Utf16View const&, SegmentationCallback);
void for_each_grapheme_segmentation_boundary(Utf32View const&, SegmentationCallback);
Vector<size_t> find_sentence_segmentation_boundaries(Utf8View const&);
Vector<size_t> find_sentence_segmentation_boundaries(Utf16View const&);
Vector<size_t> find_sentence_segmentation_boundaries(Utf32View const&);
template<typename ViewType>
Vector<size_t> find_grapheme_segmentation_boundaries(ViewType const& view)
{
Vector<size_t> boundaries;
for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
boundaries.append(boundary);
return IterationDecision::Continue;
});
return boundaries;
}
void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback);
void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback);
void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback);
template<typename ViewType>
Vector<size_t> find_word_segmentation_boundaries(ViewType const& view)
{
Vector<size_t> boundaries;
for_each_word_segmentation_boundary(view, [&](auto boundary) {
boundaries.append(boundary);
return IterationDecision::Continue;
});
return boundaries;
}
void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback);
void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback);
void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback);
template<typename ViewType>
Vector<size_t> find_sentence_segmentation_boundaries(ViewType const& view)
{
Vector<size_t> boundaries;
for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
boundaries.append(boundary);
return IterationDecision::Continue;
});
return boundaries;
}
}