Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/intl/grapheme/grapheme.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ void grapheme_close_global_iterator( void );
#define GRAPHEME_EXTRACT_TYPE_MAXCHARS 2
#define GRAPHEME_EXTRACT_TYPE_MIN GRAPHEME_EXTRACT_TYPE_COUNT
#define GRAPHEME_EXTRACT_TYPE_MAX GRAPHEME_EXTRACT_TYPE_MAXCHARS
#define GRAPHEME_LIMIT_CODEPOINTS 32

#endif // GRAPHEME_GRAPHEME_H
65 changes: 65 additions & 0 deletions ext/intl/grapheme/grapheme_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1135,4 +1135,69 @@ U_CFUNC PHP_FUNCTION(grapheme_levenshtein)
efree(ustring1);
}

U_CFUNC PHP_FUNCTION(grapheme_limit_codepoints)
{
char *string;
size_t string_len = 0;
zend_long limit_codepoint = GRAPHEME_LIMIT_CODEPOINTS;
int ustring_len = 0;
UErrorCode status;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];

ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(string, string_len)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(limit_codepoint)
ZEND_PARSE_PARAMETERS_END();

status = U_ZERO_ERROR;
UBreakIterator *bi;
UText ut = UTEXT_INITIALIZER;
bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );

if( U_FAILURE(status) ) {
intl_error_set_code( nullptr, status );

/* Set error messages. */
intl_error_set_custom_msg( nullptr, "Error in grapheme_get_break_iterator" );
RETURN_FALSE;
}

utext_openUTF8(&ut, string, string_len, &status);
ubrk_setUText(bi, &ut, &status);

if ( U_FAILURE( status ) ) {
/* Set global error code. */
intl_error_set_code( nullptr, status );

/* Set error messages. */
intl_error_set_custom_msg( nullptr, "Error opening UTF-8 text");

RETURN_FALSE;
}

zend_ulong pos, before, pos_codepoint;
zend_bool ret = true;
for (before = pos = 0; pos != UBRK_DONE; ) {
pos = ubrk_next(bi);
if (pos != UBRK_DONE) {
pos_codepoint = pos - before;
for (zend_ulong i = before, codepoint = 0; i < pos_codepoint; i++, codepoint++) {
U8_FWD_1(string, before, pos_codepoint - i);
if (codepoint >= limit_codepoint) {
ret = false;
goto bi_close;
}
}
}
before = pos;
}

bi_close:
ubrk_close(bi);
utext_close(&ut);

RETURN_BOOL(ret);

}
/* }}} */
7 changes: 7 additions & 0 deletions ext/intl/php_intl.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@
* @cvalue UIDNA_ERROR_CONTEXTJ
*/
const IDNA_ERROR_CONTEXTJ = UNKNOWN;
/**
* @var int
* @cvalue GRAPHEME_LIMIT_CODEPOINTS
*/
const GRAPHEME_LIMIT_CODEPOINTS = UNKNOWN;

class IntlException extends Exception
{
Expand Down Expand Up @@ -445,6 +450,8 @@ function grapheme_str_split(string $string, int $length = 1): array|false {}

function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, string $locale = ""): int|false {}

function grapheme_limit_codepoints(string $string, int $limit = GRAPHEME_LIMIT_CODEPOINTS): bool {}

/** @param int $next */
function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {}

Expand Down
10 changes: 9 additions & 1 deletion ext/intl/php_intl_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions ext/intl/tests/grapheme_limit_codepoints.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
--TEST--
grapheme_limit_codepoints() function test
--EXTENSIONS--
intl
--FILE--
<?php
$f = "あい👨‍👨‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦‍👦うえお";
var_dump(grapheme_limit_codepoints($f));
$f = "あいうえお👨‍👨‍👦";
var_dump(grapheme_limit_codepoints($f));
var_dump(grapheme_strlen($f));
$f = "あいうえおH̵̛͕̞̦̰̜͍̰̥̟͆̏͂̌͑ͅä̷͔̟͓̬̯̟͍̭͉͈̮͙̣̯̬͚̞̭̍̀̾͠m̴̡̧̛̝̯̹̗̹̤̲̺̟̥̈̏͊̔̑̍͆̌̀̚͝͝b̴̢̢̫̝̠̗̼̬̻̮̺̭͔̘͑̆̎̚ư̵̧̡̥̙̭̿̈̀̒̐̊͒͑r̷̡̡̲̼̖͎̫̮̜͇̬͌͘g̷̹͍͎̬͕͓͕̐̃̈́̓̆̚͝ẻ̵̡̼̬̥̹͇̭͔̯̉͛̈́̕r̸̮̖̻̮̣̗͚͖̝̂͌̾̓̀̿̔̀͋̈́͌̈́̋͜👨‍👨‍👦";
var_dump(grapheme_limit_codepoints($f));
var_dump(grapheme_strlen($f));
$f = "ཧྐྵྨླྺྼྻྂ";
var_dump(grapheme_limit_codepoints($f));
var_dump(grapheme_strlen($f));
?>
--EXPECT--
bool(false)
bool(true)
int(6)
bool(true)
int(15)
bool(true)
int(1)
Loading