1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-24 01:15:07 +00:00
serenity/Userland/Libraries/LibUnicode/Segmentation.h
Timothy Flynn abe7786a81 LibUnicode: Allow iterating over text segmentation boundaries
This will be useful for e.g. finding the next boundary after a specific
index - we can just stop iterating once a condition is satisfied.
2023-02-15 12:36:47 +01:00

71 lines
2.1 KiB
C++

/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
#include <AK/Function.h>
#include <AK/IterationDecision.h>
#include <AK/Types.h>
#include <AK/Vector.h>
namespace Unicode {
using SegmentationCallback = Function<IterationDecision(size_t)>;
void for_each_grapheme_segmentation_boundary(Utf8View const&, SegmentationCallback);
void for_each_grapheme_segmentation_boundary(Utf16View const&, SegmentationCallback);
void for_each_grapheme_segmentation_boundary(Utf32View const&, SegmentationCallback);
template<typename ViewType>
Vector<size_t> find_grapheme_segmentation_boundaries(ViewType const& view)
{
Vector<size_t> boundaries;
for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
boundaries.append(boundary);
return IterationDecision::Continue;
});
return boundaries;
}
void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback);
void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback);
void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback);
template<typename ViewType>
Vector<size_t> find_word_segmentation_boundaries(ViewType const& view)
{
Vector<size_t> boundaries;
for_each_word_segmentation_boundary(view, [&](auto boundary) {
boundaries.append(boundary);
return IterationDecision::Continue;
});
return boundaries;
}
void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback);
void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback);
void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback);
template<typename ViewType>
Vector<size_t> find_sentence_segmentation_boundaries(ViewType const& view)
{
Vector<size_t> boundaries;
for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
boundaries.append(boundary);
return IterationDecision::Continue;
});
return boundaries;
}
}