mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 23:47:45 +00:00
LibUnicode: Allow iterating over text segmentation boundaries
This will be useful for e.g. finding the next boundary after a specific index - we can just stop iterating once a condition is satisfied.
This commit is contained in:
parent
dd4c47456e
commit
abe7786a81
2 changed files with 97 additions and 57 deletions
|
@ -8,21 +8,64 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/Function.h>
|
||||
#include <AK/IterationDecision.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Vector<size_t> find_grapheme_segmentation_boundaries(Utf8View const&);
|
||||
Vector<size_t> find_grapheme_segmentation_boundaries(Utf16View const&);
|
||||
Vector<size_t> find_grapheme_segmentation_boundaries(Utf32View const&);
|
||||
using SegmentationCallback = Function<IterationDecision(size_t)>;
|
||||
|
||||
Vector<size_t> find_word_segmentation_boundaries(Utf8View const&);
|
||||
Vector<size_t> find_word_segmentation_boundaries(Utf16View const&);
|
||||
Vector<size_t> find_word_segmentation_boundaries(Utf32View const&);
|
||||
void for_each_grapheme_segmentation_boundary(Utf8View const&, SegmentationCallback);
|
||||
void for_each_grapheme_segmentation_boundary(Utf16View const&, SegmentationCallback);
|
||||
void for_each_grapheme_segmentation_boundary(Utf32View const&, SegmentationCallback);
|
||||
|
||||
Vector<size_t> find_sentence_segmentation_boundaries(Utf8View const&);
|
||||
Vector<size_t> find_sentence_segmentation_boundaries(Utf16View const&);
|
||||
Vector<size_t> find_sentence_segmentation_boundaries(Utf32View const&);
|
||||
template<typename ViewType>
|
||||
Vector<size_t> find_grapheme_segmentation_boundaries(ViewType const& view)
|
||||
{
|
||||
Vector<size_t> boundaries;
|
||||
|
||||
for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
|
||||
boundaries.append(boundary);
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback);
|
||||
void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback);
|
||||
void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback);
|
||||
|
||||
template<typename ViewType>
|
||||
Vector<size_t> find_word_segmentation_boundaries(ViewType const& view)
|
||||
{
|
||||
Vector<size_t> boundaries;
|
||||
|
||||
for_each_word_segmentation_boundary(view, [&](auto boundary) {
|
||||
boundaries.append(boundary);
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback);
|
||||
void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback);
|
||||
void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback);
|
||||
|
||||
template<typename ViewType>
|
||||
Vector<size_t> find_sentence_segmentation_boundaries(ViewType const& view)
|
||||
{
|
||||
Vector<size_t> boundaries;
|
||||
|
||||
for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
|
||||
boundaries.append(boundary);
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue