numpy/_core/tests/test_strings.py

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205
import sys
import pytest

import operator
import numpy as np

from numpy.testing import assert_array_equal, assert_raises, IS_PYPY


COMPARISONS = [
    (operator.eq, np.equal, "=="),
    (operator.ne, np.not_equal, "!="),
    (operator.lt, np.less, "<"),
    (operator.le, np.less_equal, "<="),
    (operator.gt, np.greater, ">"),
    (operator.ge, np.greater_equal, ">="),
]

MAX = np.iinfo(np.int64).max

IS_PYPY_LT_7_3_16 = IS_PYPY and sys.implementation.version < (7, 3, 16)

@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
def test_mixed_string_comparison_ufuncs_fail(op, ufunc, sym):
    arr_string = np.array(["a", "b"], dtype="S")
    arr_unicode = np.array(["a", "c"], dtype="U")

    with pytest.raises(TypeError, match="did not contain a loop"):
        ufunc(arr_string, arr_unicode)

    with pytest.raises(TypeError, match="did not contain a loop"):
        ufunc(arr_unicode, arr_string)

@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
def test_mixed_string_comparisons_ufuncs_with_cast(op, ufunc, sym):
    arr_string = np.array(["a", "b"], dtype="S")
    arr_unicode = np.array(["a", "c"], dtype="U")

    # While there is no loop, manual casting is acceptable:
    res1 = ufunc(arr_string, arr_unicode, signature="UU->?", casting="unsafe")
    res2 = ufunc(arr_string, arr_unicode, signature="SS->?", casting="unsafe")

    expected = op(arr_string.astype("U"), arr_unicode)
    assert_array_equal(res1, expected)
    assert_array_equal(res2, expected)


@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
@pytest.mark.parametrize("dtypes", [
        ("S2", "S2"), ("S2", "S10"),
        ("<U1", "<U1"), ("<U1", ">U1"), (">U1", ">U1"),
        ("<U1", "<U10"), ("<U1", ">U10")])
@pytest.mark.parametrize("aligned", [True, False])
def test_string_comparisons(op, ufunc, sym, dtypes, aligned):
    # ensure native byte-order for the first view to stay within unicode range
    native_dt = np.dtype(dtypes[0]).newbyteorder("=")
    arr = np.arange(2**15).view(native_dt).astype(dtypes[0])
    if not aligned:
        # Make `arr` unaligned:
        new = np.zeros(arr.nbytes + 1, dtype=np.uint8)[1:].view(dtypes[0])
        new[...] = arr
        arr = new

    arr2 = arr.astype(dtypes[1], copy=True)
    np.random.shuffle(arr2)
    arr[0] = arr2[0]  # make sure one matches

    expected = [op(d1, d2) for d1, d2 in zip(arr.tolist(), arr2.tolist())]
    assert_array_equal(op(arr, arr2), expected)
    assert_array_equal(ufunc(arr, arr2), expected)
    assert_array_equal(
        np.char.compare_chararrays(arr, arr2, sym, False), expected
    )

    expected = [op(d2, d1) for d1, d2 in zip(arr.tolist(), arr2.tolist())]
    assert_array_equal(op(arr2, arr), expected)
    assert_array_equal(ufunc(arr2, arr), expected)
    assert_array_equal(
        np.char.compare_chararrays(arr2, arr, sym, False), expected
    )


@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
@pytest.mark.parametrize("dtypes", [
        ("S2", "S2"), ("S2", "S10"), ("<U1", "<U1"), ("<U1", ">U10")])
def test_string_comparisons_empty(op, ufunc, sym, dtypes):
    arr = np.empty((1, 0, 1, 5), dtype=dtypes[0])
    arr2 = np.empty((100, 1, 0, 1), dtype=dtypes[1])

    expected = np.empty(np.broadcast_shapes(arr.shape, arr2.shape), dtype=bool)
    assert_array_equal(op(arr, arr2), expected)
    assert_array_equal(ufunc(arr, arr2), expected)
    assert_array_equal(
        np.char.compare_chararrays(arr, arr2, sym, False), expected
    )


@pytest.mark.parametrize("str_dt", ["S", "U"])
@pytest.mark.parametrize("float_dt", np.typecodes["AllFloat"])
def test_float_to_string_cast(str_dt, float_dt):
    float_dt = np.dtype(float_dt)
    fi = np.finfo(float_dt)
    arr = np.array([np.nan, np.inf, -np.inf, fi.max, fi.min], dtype=float_dt)
    expected = ["nan", "inf", "-inf", str(fi.max), str(fi.min)]
    if float_dt.kind == "c":
        expected = [f"({r}+0j)" for r in expected]

    res = arr.astype(str_dt)
    assert_array_equal(res, np.array(expected, dtype=str_dt))


@pytest.mark.parametrize("dt", ["S", "U", "T"])
class TestMethods:

    @pytest.mark.parametrize("in1,in2,out", [
        ("", "", ""),
        ("abc", "abc", "abcabc"),
        ("12345", "12345", "1234512345"),
        ("MixedCase", "MixedCase", "MixedCaseMixedCase"),
        ("12345 \0 ", "12345 \0 ", "12345 \0 12345 \0 "),
        ("UPPER", "UPPER", "UPPERUPPER"),
        (["abc", "def"], ["hello", "world"], ["abchello", "defworld"]),
    ])
    def test_add(self, in1, in2, out, dt):
        in1 = np.array(in1, dtype=dt)
        in2 = np.array(in2, dtype=dt)
        out = np.array(out, dtype=dt)
        assert_array_equal(np.strings.add(in1, in2), out)

    @pytest.mark.parametrize("in1,in2,out", [
        ("abc", 3, "abcabcabc"),
        ("abc", 0, ""),
        ("abc", -1, ""),
        (["abc", "def"], [1, 4], ["abc", "defdefdefdef"]),
    ])
    def test_multiply(self, in1, in2, out, dt):
        in1 = np.array(in1, dtype=dt)
        out = np.array(out, dtype=dt)
        assert_array_equal(np.strings.multiply(in1, in2), out)

    def test_multiply_raises(self, dt):
        with pytest.raises(TypeError, match="unsupported type"):
            np.strings.multiply(np.array("abc", dtype=dt), 3.14)

        with pytest.raises(MemoryError):
            np.strings.multiply(np.array("abc", dtype=dt), sys.maxsize)

    @pytest.mark.parametrize("i_dt", [np.int8, np.int16, np.int32,
                                      np.int64, np.int_])
    def test_multiply_integer_dtypes(self, i_dt, dt):
        a = np.array("abc", dtype=dt)
        i = np.array(3, dtype=i_dt)
        res = np.array("abcabcabc", dtype=dt)
        assert_array_equal(np.strings.multiply(a, i), res)

    @pytest.mark.parametrize("in_,out", [
        ("", False),
        ("a", True),
        ("A", True),
        ("\n", False),
        ("abc", True),
        ("aBc123", False),
        ("abc\n", False),
        (["abc", "aBc123"], [True, False]),
    ])
    def test_isalpha(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isalpha(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ('', False),
        ('a', True),
        ('A', True),
        ('\n', False),
        ('123abc456', True),
        ('a1b3c', True),
        ('aBc000 ', False),
        ('abc\n', False),
    ])
    def test_isalnum(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isalnum(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ("", False),
        ("a", False),
        ("0", True),
        ("012345", True),
        ("012345a", False),
        (["a", "012345"], [False, True]),
    ])
    def test_isdigit(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isdigit(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ("", False),
        ("a", False),
        ("1", False),
        (" ", True),
        ("\t", True),
        ("\r", True),
        ("\n", True),
        (" \t\r \n", True),
        (" \t\r\na", False),
        (["\t1", " \t\r \n"], [False, True])
    ])
    def test_isspace(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isspace(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ('', False),
        ('a', True),
        ('A', False),
        ('\n', False),
        ('abc', True),
        ('aBc', False),
        ('abc\n', True),
    ])
    def test_islower(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.islower(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ('', False),
        ('a', False),
        ('A', True),
        ('\n', False),
        ('ABC', True),
        ('AbC', False),
        ('ABC\n', True),
    ])
    def test_isupper(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isupper(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ('', False),
        ('a', False),
        ('A', True),
        ('\n', False),
        ('A Titlecased Line', True),
        ('A\nTitlecased Line', True),
        ('A Titlecased, Line', True),
        ('Not a capitalized String', False),
        ('Not\ta Titlecase String', False),
        ('Not--a Titlecase String', False),
        ('NOT', False),
    ])
    def test_istitle(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.istitle(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ("", 0),
        ("abc", 3),
        ("12345", 5),
        ("MixedCase", 9),
        ("12345 \x00 ", 8),
        ("UPPER", 5),
        (["abc", "12345 \x00 "], [3, 8]),
    ])
    def test_str_len(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.str_len(in_), out)

    @pytest.mark.parametrize("a,sub,start,end,out", [
        ("abcdefghiabc", "abc", 0, None, 0),
        ("abcdefghiabc", "abc", 1, None, 9),
        ("abcdefghiabc", "def", 4, None, -1),
        ("abc", "", 0, None, 0),
        ("abc", "", 3, None, 3),
        ("abc", "", 4, None, -1),
        ("rrarrrrrrrrra", "a", 0, None, 2),
        ("rrarrrrrrrrra", "a", 4, None, 12),
        ("rrarrrrrrrrra", "a", 4, 6, -1),
        ("", "", 0, None, 0),
        ("", "", 1, 1, -1),
        ("", "", MAX, 0, -1),
        ("", "xx", 0, None, -1),
        ("", "xx", 1, 1, -1),
        ("", "xx", MAX, 0, -1),
        pytest.param(99*"a" + "b", "b", 0, None, 99,
                     id="99*a+b-b-0-None-99"),
        pytest.param(98*"a" + "ba", "ba", 0, None, 98,
                     id="98*a+ba-ba-0-None-98"),
        pytest.param(100*"a", "b", 0, None, -1,
                     id="100*a-b-0-None--1"),
        pytest.param(30000*"a" + 100*"b", 100*"b", 0, None, 30000,
                     id="30000*a+100*b-100*b-0-None-30000"),
        pytest.param(30000*"a", 100*"b", 0, None, -1,
                     id="30000*a-100*b-0-None--1"),
        pytest.param(15000*"a" + 15000*"b", 15000*"b", 0, None, 15000,
                     id="15000*a+15000*b-15000*b-0-None-15000"),
        pytest.param(15000*"a" + 15000*"b", 15000*"c", 0, None, -1,
                     id="15000*a+15000*b-15000*c-0-None--1"),
        (["abcdefghiabc", "rrarrrrrrrrra"], ["def", "arr"], [0, 3],
         None, [3, -1]),
        ("Ae¢☃€ 😊" * 2, "😊", 0, None, 6),
        ("Ae¢☃€ 😊" * 2, "😊", 7, None, 13),
    ])
    def test_find(self, a, sub, start, end, out, dt):
        if "😊" in a and dt == "S":
            pytest.skip("Bytes dtype does not support non-ascii input")
        a = np.array(a, dtype=dt)
        sub = np.array(sub, dtype=dt)
        assert_array_equal(np.strings.find(a, sub, start, end), out)

    @pytest.mark.parametrize("a,sub,start,end,out", [
        ("abcdefghiabc", "abc", 0, None, 9),
        ("abcdefghiabc", "", 0, None, 12),
        ("abcdefghiabc", "abcd", 0, None, 0),
        ("abcdefghiabc", "abcz", 0, None, -1),
        ("abc", "", 0, None, 3),
        ("abc", "", 3, None, 3),
        ("abc", "", 4, None, -1),
        ("rrarrrrrrrrra", "a", 0, None, 12),
        ("rrarrrrrrrrra", "a", 4, None, 12),
        ("rrarrrrrrrrra", "a", 4, 6, -1),
        (["abcdefghiabc", "rrarrrrrrrrra"], ["abc", "a"], [0, 0],
         None, [9, 12]),
        ("Ae¢☃€ 😊" * 2, "😊", 0, None, 13),
        ("Ae¢☃€ 😊" * 2, "😊", 0, 7, 6),
    ])
    def test_rfind(self, a, sub, start, end, out, dt):
        if "😊" in a and dt == "S":
            pytest.skip("Bytes dtype does not support non-ascii input")
        a = np.array(a, dtype=dt)
        sub = np.array(sub, dtype=dt)
        assert_array_equal(np.strings.rfind(a, sub, start, end), out)

    @pytest.mark.parametrize("a,sub,start,end,out", [
        ("aaa", "a", 0, None, 3),
        ("aaa", "b", 0, None, 0),
        ("aaa", "a", 1, None, 2),
        ("aaa", "a", 10, None, 0),
        ("aaa", "a", -1, None, 1),
        ("aaa", "a", -10, None, 3),
        ("aaa", "a", 0, 1, 1),
        ("aaa", "a", 0, 10, 3),
        ("aaa", "a", 0, -1, 2),
        ("aaa", "a", 0, -10, 0),
        ("aaa", "", 1, None, 3),
        ("aaa", "", 3, None, 1),
        ("aaa", "", 10, None, 0),
        ("aaa", "", -1, None, 2),
        ("aaa", "", -10, None, 4),
        ("aaa", "aaaa", 0, None, 0),
        pytest.param(98*"a" + "ba", "ba", 0, None, 1,
                     id="98*a+ba-ba-0-None-1"),
        pytest.param(30000*"a" + 100*"b", 100*"b", 0, None, 1,
                     id="30000*a+100*b-100*b-0-None-1"),
        pytest.param(30000*"a", 100*"b", 0, None, 0,
                     id="30000*a-100*b-0-None-0"),
        pytest.param(30000*"a" + 100*"ab", "ab", 0, None, 100,
                     id="30000*a+100*ab-ab-0-None-100"),
        pytest.param(15000*"a" + 15000*"b", 15000*"b", 0, None, 1,
                     id="15000*a+15000*b-15000*b-0-None-1"),
        pytest.param(15000*"a" + 15000*"b", 15000*"c", 0, None, 0,
                     id="15000*a+15000*b-15000*c-0-None-0"),
        ("", "", 0, None, 1),
        ("", "", 1, 1, 0),
        ("", "", MAX, 0, 0),
        ("", "xx", 0, None, 0),
        ("", "xx", 1, 1, 0),
        ("", "xx", MAX, 0, 0),
        (["aaa", ""], ["a", ""], [0, 0], None, [3, 1]),
        ("Ae¢☃€ 😊" * 100, "😊", 0, None, 100),
    ])
    def test_count(self, a, sub, start, end, out, dt):
        if "😊" in a and dt == "S":
            pytest.skip("Bytes dtype does not support non-ascii input")
        a = np.array(a, dtype=dt)
        sub = np.array(sub, dtype=dt)
        assert_array_equal(np.strings.count(a, sub, start, end), out)

    @pytest.mark.parametrize("a,prefix,start,end,out", [
        ("hello", "he", 0, None, True),
        ("hello", "hello", 0, None, True),
        ("hello", "hello world", 0, None, False),
        ("hello", "", 0, None, True),
        ("hello", "ello", 0, None, False),
        ("hello", "ello", 1, None, True),
        ("hello", "o", 4, None, True),
        ("hello", "o", 5, None, False),
        ("hello", "", 5, None, True),
        ("hello", "lo", 6, None, False),
        ("helloworld", "lowo", 3, None, True),
        ("helloworld", "lowo", 3, 7, True),
        ("helloworld", "lowo", 3, 6, False),
        ("", "", 0, 1, True),
        ("", "", 0, 0, True),
        ("", "", 1, 0, False),
        ("hello", "he", 0, -1, True),
        ("hello", "he", -53, -1, True),
        ("hello", "hello", 0, -1, False),
        ("hello", "hello world", -1, -10, False),
        ("hello", "ello", -5, None, False),
        ("hello", "ello", -4, None, True),
        ("hello", "o", -2, None, False),
        ("hello", "o", -1, None, True),
        ("hello", "", -3, -3, True),
        ("hello", "lo", -9, None, False),
        (["hello", ""], ["he", ""], [0, 0], None, [True, True]),
    ])
    def test_startswith(self, a, prefix, start, end, out, dt):
        a = np.array(a, dtype=dt)
        prefix = np.array(prefix, dtype=dt)
        assert_array_equal(np.strings.startswith(a, prefix, start, end), out)

    @pytest.mark.parametrize("a,suffix,start,end,out", [
        ("hello", "lo", 0, None, True),
        ("hello", "he", 0, None, False),
        ("hello", "", 0, None, True),
        ("hello", "hello world", 0, None, False),
        ("helloworld", "worl", 0, None, False),
        ("helloworld", "worl", 3, 9, True),
        ("helloworld", "world", 3, 12, True),
        ("helloworld", "lowo", 1, 7, True),
        ("helloworld", "lowo", 2, 7, True),
        ("helloworld", "lowo", 3, 7, True),
        ("helloworld", "lowo", 4, 7, False),
        ("helloworld", "lowo", 3, 8, False),
        ("ab", "ab", 0, 1, False),
        ("ab", "ab", 0, 0, False),
        ("", "", 0, 1, True),
        ("", "", 0, 0, True),
        ("", "", 1, 0, False),
        ("hello", "lo", -2, None, True),
        ("hello", "he", -2, None, False),
        ("hello", "", -3, -3, True),
        ("hello", "hello world", -10, -2, False),
        ("helloworld", "worl", -6, None, False),
        ("helloworld", "worl", -5, -1, True),
        ("helloworld", "worl", -5, 9, True),
        ("helloworld", "world", -7, 12, True),
        ("helloworld", "lowo", -99, -3, True),
        ("helloworld", "lowo", -8, -3, True),
        ("helloworld", "lowo", -7, -3, True),
        ("helloworld", "lowo", 3, -4, False),
        ("helloworld", "lowo", -8, -2, False),
        (["hello", "helloworld"], ["lo", "worl"], [0, -6], None,
         [True, False]),
    ])
    def test_endswith(self, a, suffix, start, end, out, dt):
        a = np.array(a, dtype=dt)
        suffix = np.array(suffix, dtype=dt)
        assert_array_equal(np.strings.endswith(a, suffix, start, end), out)

    @pytest.mark.parametrize("a,chars,out", [
        ("", None, ""),
        ("   hello   ", None, "hello   "),
        ("hello", None, "hello"),
        (" \t\n\r\f\vabc \t\n\r\f\v", None, "abc \t\n\r\f\v"),
        (["   hello   ", "hello"], None, ["hello   ", "hello"]),
        ("", "", ""),
        ("", "xyz", ""),
        ("hello", "", "hello"),
        ("xyzzyhelloxyzzy", "xyz", "helloxyzzy"),
        ("hello", "xyz", "hello"),
        ("xyxz", "xyxz", ""),
        ("xyxzx", "x", "yxzx"),
        (["xyzzyhelloxyzzy", "hello"], ["xyz", "xyz"],
         ["helloxyzzy", "hello"]),
        (["ba", "ac", "baa", "bba"], "b", ["a", "ac", "aa", "a"]),
    ])
    def test_lstrip(self, a, chars, out, dt):
        a = np.array(a, dtype=dt)
        out = np.array(out, dtype=dt)
        if chars is not None:
            chars = np.array(chars, dtype=dt)
            assert_array_equal(np.strings.lstrip(a, chars), out)
        else:
            assert_array_equal(np.strings.lstrip(a), out)

    @pytest.mark.parametrize("a,chars,out", [
        ("", None, ""),
        ("   hello   ", None, "   hello"),
        ("hello", None, "hello"),
        (" \t\n\r\f\vabc \t\n\r\f\v", None, " \t\n\r\f\vabc"),
        (["   hello   ", "hello"], None, ["   hello", "hello"]),
        ("", "", ""),
        ("", "xyz", ""),
        ("hello", "", "hello"),
        (["hello    ", "abcdefghijklmnop"], None,
         ["hello", "abcdefghijklmnop"]),
        ("xyzzyhelloxyzzy", "xyz", "xyzzyhello"),
        ("hello", "xyz", "hello"),
        ("xyxz", "xyxz", ""),
        ("    ", None, ""),
        ("xyxzx", "x", "xyxz"),
        (["xyzzyhelloxyzzy", "hello"], ["xyz", "xyz"],
         ["xyzzyhello", "hello"]),
        (["ab", "ac", "aab", "abb"], "b", ["a", "ac", "aa", "a"]),
    ])
    def test_rstrip(self, a, chars, out, dt):
        a = np.array(a, dtype=dt)
        out = np.array(out, dtype=dt)
        if chars is not None:
            chars = np.array(chars, dtype=dt)
            assert_array_equal(np.strings.rstrip(a, chars), out)
        else:
            assert_array_equal(np.strings.rstrip(a), out)

    @pytest.mark.parametrize("a,chars,out", [
        ("", None, ""),
        ("   hello   ", None, "hello"),
        ("hello", None, "hello"),
        (" \t\n\r\f\vabc \t\n\r\f\v", None, "abc"),
        (["   hello   ", "hello"], None, ["hello", "hello"]),
        ("", "", ""),
        ("", "xyz", ""),
        ("hello", "", "hello"),
        ("xyzzyhelloxyzzy", "xyz", "hello"),
        ("hello", "xyz", "hello"),
        ("xyxz", "xyxz", ""),
        ("xyxzx", "x", "yxz"),
        (["xyzzyhelloxyzzy", "hello"], ["xyz", "xyz"],
         ["hello", "hello"]),
        (["bab", "ac", "baab", "bbabb"], "b", ["a", "ac", "aa", "a"]),
    ])
    def test_strip(self, a, chars, out, dt):
        a = np.array(a, dtype=dt)
        if chars is not None:
            chars = np.array(chars, dtype=dt)
        out = np.array(out, dtype=dt)
        assert_array_equal(np.strings.strip(a, chars), out)

    @pytest.mark.parametrize("buf,old,new,count,res", [
        ("", "", "", -1, ""),
        ("", "", "A", -1, "A"),
        ("", "A", "", -1, ""),
        ("", "A", "A", -1, ""),
        ("", "", "", 100, ""),
        ("", "", "A", 100, "A"),
        ("A", "", "", -1, "A"),
        ("A", "", "*", -1, "*A*"),
        ("A", "", "*1", -1, "*1A*1"),
        ("A", "", "*-#", -1, "*-#A*-#"),
        ("AA", "", "*-", -1, "*-A*-A*-"),
        ("AA", "", "*-", -1, "*-A*-A*-"),
        ("AA", "", "*-", 4, "*-A*-A*-"),
        ("AA", "", "*-", 3, "*-A*-A*-"),
        ("AA", "", "*-", 2, "*-A*-A"),
        ("AA", "", "*-", 1, "*-AA"),
        ("AA", "", "*-", 0, "AA"),
        ("A", "A", "", -1, ""),
        ("AAA", "A", "", -1, ""),
        ("AAA", "A", "", -1, ""),
        ("AAA", "A", "", 4, ""),
        ("AAA", "A", "", 3, ""),
        ("AAA", "A", "", 2, "A"),
        ("AAA", "A", "", 1, "AA"),
        ("AAA", "A", "", 0, "AAA"),
        ("AAAAAAAAAA", "A", "", -1, ""),
        ("ABACADA", "A", "", -1, "BCD"),
        ("ABACADA", "A", "", -1, "BCD"),
        ("ABACADA", "A", "", 5, "BCD"),
        ("ABACADA", "A", "", 4, "BCD"),
        ("ABACADA", "A", "", 3, "BCDA"),
        ("ABACADA", "A", "", 2, "BCADA"),
        ("ABACADA", "A", "", 1, "BACADA"),
        ("ABACADA", "A", "", 0, "ABACADA"),
        ("ABCAD", "A", "", -1, "BCD"),
        ("ABCADAA", "A", "", -1, "BCD"),
        ("BCD", "A", "", -1, "BCD"),
        ("*************", "A", "", -1, "*************"),
        ("^"+"A"*1000+"^", "A", "", 999, "^A^"),
        ("the", "the", "", -1, ""),
        ("theater", "the", "", -1, "ater"),
        ("thethe", "the", "", -1, ""),
        ("thethethethe", "the", "", -1, ""),
        ("theatheatheathea", "the", "", -1, "aaaa"),
        ("that", "the", "", -1, "that"),
        ("thaet", "the", "", -1, "thaet"),
        ("here and there", "the", "", -1, "here and re"),
        ("here and there and there", "the", "", -1, "here and re and re"),
        ("here and there and there", "the", "", 3, "here and re and re"),
        ("here and there and there", "the", "", 2, "here and re and re"),
        ("here and there and there", "the", "", 1, "here and re and there"),
        ("here and there and there", "the", "", 0, "here and there and there"),
        ("here and there and there", "the", "", -1, "here and re and re"),
        ("abc", "the", "", -1, "abc"),
        ("abcdefg", "the", "", -1, "abcdefg"),
        ("bbobob", "bob", "", -1, "bob"),
        ("bbobobXbbobob", "bob", "", -1, "bobXbob"),
        ("aaaaaaabob", "bob", "", -1, "aaaaaaa"),
        ("aaaaaaa", "bob", "", -1, "aaaaaaa"),
        ("Who goes there?", "o", "o", -1, "Who goes there?"),
        ("Who goes there?", "o", "O", -1, "WhO gOes there?"),
        ("Who goes there?", "o", "O", -1, "WhO gOes there?"),
        ("Who goes there?", "o", "O", 3, "WhO gOes there?"),
        ("Who goes there?", "o", "O", 2, "WhO gOes there?"),
        ("Who goes there?", "o", "O", 1, "WhO goes there?"),
        ("Who goes there?", "o", "O", 0, "Who goes there?"),
        ("Who goes there?", "a", "q", -1, "Who goes there?"),
        ("Who goes there?", "W", "w", -1, "who goes there?"),
        ("WWho goes there?WW", "W", "w", -1, "wwho goes there?ww"),
        ("Who goes there?", "?", "!", -1, "Who goes there!"),
        ("Who goes there??", "?", "!", -1, "Who goes there!!"),
        ("Who goes there?", ".", "!", -1, "Who goes there?"),
        ("This is a tissue", "is", "**", -1, "Th** ** a t**sue"),
        ("This is a tissue", "is", "**", -1, "Th** ** a t**sue"),
        ("This is a tissue", "is", "**", 4, "Th** ** a t**sue"),
        ("This is a tissue", "is", "**", 3, "Th** ** a t**sue"),
        ("This is a tissue", "is", "**", 2, "Th** ** a tissue"),
        ("This is a tissue", "is", "**", 1, "Th** is a tissue"),
        ("This is a tissue", "is", "**", 0, "This is a tissue"),
        ("bobob", "bob", "cob", -1, "cobob"),
        ("bobobXbobobob", "bob", "cob", -1, "cobobXcobocob"),
        ("bobob", "bot", "bot", -1, "bobob"),
        ("Reykjavik", "k", "KK", -1, "ReyKKjaviKK"),
        ("Reykjavik", "k", "KK", -1, "ReyKKjaviKK"),
        ("Reykjavik", "k", "KK", 2, "ReyKKjaviKK"),
        ("Reykjavik", "k", "KK", 1, "ReyKKjavik"),
        ("Reykjavik", "k", "KK", 0, "Reykjavik"),
        ("A.B.C.", ".", "----", -1, "A----B----C----"),
        ("Reykjavik", "q", "KK", -1, "Reykjavik"),
        ("spam, spam, eggs and spam", "spam", "ham", -1,
            "ham, ham, eggs and ham"),
        ("spam, spam, eggs and spam", "spam", "ham", -1,
            "ham, ham, eggs and ham"),
        ("spam, spam, eggs and spam", "spam", "ham", 4,
            "ham, ham, eggs and ham"),
        ("spam, spam, eggs and spam", "spam", "ham", 3,
            "ham, ham, eggs and ham"),
        ("spam, spam, eggs and spam", "spam", "ham", 2,
            "ham, ham, eggs and spam"),
        ("spam, spam, eggs and spam", "spam", "ham", 1,
            "ham, spam, eggs and spam"),
        ("spam, spam, eggs and spam", "spam", "ham", 0,
            "spam, spam, eggs and spam"),
        ("bobobob", "bobob", "bob", -1, "bobob"),
        ("bobobobXbobobob", "bobob", "bob", -1, "bobobXbobob"),
        ("BOBOBOB", "bob", "bobby", -1, "BOBOBOB"),
        ("one!two!three!", "!", "@", 1, "one@two!three!"),
        ("one!two!three!", "!", "", -1, "onetwothree"),
        ("one!two!three!", "!", "@", 2, "one@two@three!"),
        ("one!two!three!", "!", "@", 3, "one@two@three@"),
        ("one!two!three!", "!", "@", 4, "one@two@three@"),
        ("one!two!three!", "!", "@", 0, "one!two!three!"),
        ("one!two!three!", "!", "@", -1, "one@two@three@"),
        ("one!two!three!", "x", "@", -1, "one!two!three!"),
        ("one!two!three!", "x", "@", 2, "one!two!three!"),
        ("abc", "", "-", -1, "-a-b-c-"),
        ("abc", "", "-", 3, "-a-b-c"),
        ("abc", "", "-", 0, "abc"),
        ("abc", "ab", "--", 0, "abc"),
        ("abc", "xy", "--", -1, "abc"),
        (["abbc", "abbd"], "b", "z", [1, 2], ["azbc", "azzd"]),
    ])
    def test_replace(self, buf, old, new, count, res, dt):
        if "😊" in buf and dt == "S":
            pytest.skip("Bytes dtype does not support non-ascii input")
        buf = np.array(buf, dtype=dt)
        old = np.array(old, dtype=dt)
        new = np.array(new, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.replace(buf, old, new, count), res)

    @pytest.mark.parametrize("buf,sub,start,end,res", [
        ("abcdefghiabc", "", 0, None, 0),
        ("abcdefghiabc", "def", 0, None, 3),
        ("abcdefghiabc", "abc", 0, None, 0),
        ("abcdefghiabc", "abc", 1, None, 9),
    ])
    def test_index(self, buf, sub, start, end, res, dt):
        buf = np.array(buf, dtype=dt)
        sub = np.array(sub, dtype=dt)
        assert_array_equal(np.strings.index(buf, sub, start, end), res)

    @pytest.mark.parametrize("buf,sub,start,end", [
        ("abcdefghiabc", "hib", 0, None),
        ("abcdefghiab", "abc", 1, None),
        ("abcdefghi", "ghi", 8, None),
        ("abcdefghi", "ghi", -1, None),
        ("rrarrrrrrrrra", "a", 4, 6),
    ])
    def test_index_raises(self, buf, sub, start, end, dt):
        buf = np.array(buf, dtype=dt)
        sub = np.array(sub, dtype=dt)
        with pytest.raises(ValueError, match="substring not found"):
            np.strings.index(buf, sub, start, end)

    @pytest.mark.parametrize("buf,sub,start,end,res", [
        ("abcdefghiabc", "", 0, None, 12),
        ("abcdefghiabc", "def", 0, None, 3),
        ("abcdefghiabc", "abc", 0, None, 9),
        ("abcdefghiabc", "abc", 0, -1, 0),
    ])
    def test_rindex(self, buf, sub, start, end, res, dt):
        buf = np.array(buf, dtype=dt)
        sub = np.array(sub, dtype=dt)
        assert_array_equal(np.strings.rindex(buf, sub, start, end), res)

    @pytest.mark.parametrize("buf,sub,start,end", [
        ("abcdefghiabc", "hib", 0, None),
        ("defghiabc", "def", 1, None),
        ("defghiabc", "abc", 0, -1),
        ("abcdefghi", "ghi", 0, 8),
        ("abcdefghi", "ghi", 0, -1),
        ("rrarrrrrrrrra", "a", 4, 6),
    ])
    def test_rindex_raises(self, buf, sub, start, end, dt):
        buf = np.array(buf, dtype=dt)
        sub = np.array(sub, dtype=dt)
        with pytest.raises(ValueError, match="substring not found"):
            np.strings.rindex(buf, sub, start, end)

    @pytest.mark.parametrize("buf,tabsize,res", [
        ("abc\rab\tdef\ng\thi", 8, "abc\rab      def\ng       hi"),
        ("abc\rab\tdef\ng\thi", 4, "abc\rab  def\ng   hi"),
        ("abc\r\nab\tdef\ng\thi", 8, "abc\r\nab      def\ng       hi"),
        ("abc\r\nab\tdef\ng\thi", 4, "abc\r\nab  def\ng   hi"),
        ("abc\r\nab\r\ndef\ng\r\nhi", 4, "abc\r\nab\r\ndef\ng\r\nhi"),
        (" \ta\n\tb", 1, "  a\n b"),
    ])
    def test_expandtabs(self, buf, tabsize, res, dt):
        buf = np.array(buf, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.expandtabs(buf, tabsize), res)

    def test_expandtabs_raises_overflow(self, dt):
        with pytest.raises(OverflowError, match="new string is too long"):
            np.strings.expandtabs(np.array("\ta\n\tb", dtype=dt), sys.maxsize)
            np.strings.expandtabs(np.array("\ta\n\tb", dtype=dt), 2**61)

    FILL_ERROR = "The fill character must be exactly one character long"

    def test_center_raises_multiple_character_fill(self, dt):
        buf = np.array("abc", dtype=dt)
        fill = np.array("**", dtype=dt)
        with pytest.raises(TypeError, match=self.FILL_ERROR):
            np.strings.center(buf, 10, fill)

    def test_ljust_raises_multiple_character_fill(self, dt):
        buf = np.array("abc", dtype=dt)
        fill = np.array("**", dtype=dt)
        with pytest.raises(TypeError, match=self.FILL_ERROR):
            np.strings.ljust(buf, 10, fill)

    def test_rjust_raises_multiple_character_fill(self, dt):
        buf = np.array("abc", dtype=dt)
        fill = np.array("**", dtype=dt)
        with pytest.raises(TypeError, match=self.FILL_ERROR):
            np.strings.rjust(buf, 10, fill)

    @pytest.mark.parametrize("buf,width,fillchar,res", [
        ('abc', 10, ' ', '   abc    '),
        ('abc', 6, ' ', ' abc  '),
        ('abc', 3, ' ', 'abc'),
        ('abc', 2, ' ', 'abc'),
        ('abc', 10, '*', '***abc****'),
    ])
    def test_center(self, buf, width, fillchar, res, dt):
        buf = np.array(buf, dtype=dt)
        fillchar = np.array(fillchar, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.center(buf, width, fillchar), res)

    @pytest.mark.parametrize("buf,width,fillchar,res", [
        ('abc', 10, ' ', 'abc       '),
        ('abc', 6, ' ', 'abc   '),
        ('abc', 3, ' ', 'abc'),
        ('abc', 2, ' ', 'abc'),
        ('abc', 10, '*', 'abc*******'),
    ])
    def test_ljust(self, buf, width, fillchar, res, dt):
        buf = np.array(buf, dtype=dt)
        fillchar = np.array(fillchar, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.ljust(buf, width, fillchar), res)

    @pytest.mark.parametrize("buf,width,fillchar,res", [
        ('abc', 10, ' ', '       abc'),
        ('abc', 6, ' ', '   abc'),
        ('abc', 3, ' ', 'abc'),
        ('abc', 2, ' ', 'abc'),
        ('abc', 10, '*', '*******abc'),
    ])
    def test_rjust(self, buf, width, fillchar, res, dt):
        buf = np.array(buf, dtype=dt)
        fillchar = np.array(fillchar, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.rjust(buf, width, fillchar), res)

    @pytest.mark.parametrize("buf,width,res", [
        ('123', 2, '123'),
        ('123', 3, '123'),
        ('0123', 4, '0123'),
        ('+123', 3, '+123'),
        ('+123', 4, '+123'),
        ('+123', 5, '+0123'),
        ('+0123', 5, '+0123'),
        ('-123', 3, '-123'),
        ('-123', 4, '-123'),
        ('-0123', 5, '-0123'),
        ('000', 3, '000'),
        ('34', 1, '34'),
        ('0034', 4, '0034'),
    ])
    def test_zfill(self, buf, width, res, dt):
        buf = np.array(buf, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.zfill(buf, width), res)

    @pytest.mark.parametrize("buf,sep,res1,res2,res3", [
        ("this is the partition method", "ti", "this is the par",
            "ti", "tion method"),
        ("http://www.python.org", "://", "http", "://", "www.python.org"),
        ("http://www.python.org", "?", "http://www.python.org", "", ""),
        ("http://www.python.org", "http://", "", "http://", "www.python.org"),
        ("http://www.python.org", "org", "http://www.python.", "org", ""),
        ("http://www.python.org", ["://", "?", "http://", "org"],
            ["http", "http://www.python.org", "", "http://www.python."],
            ["://", "", "http://", "org"],
            ["www.python.org", "", "www.python.org", ""]),
        ("mississippi", "ss", "mi", "ss", "issippi"),
        ("mississippi", "i", "m", "i", "ssissippi"),
        ("mississippi", "w", "mississippi", "", ""),
    ])
    def test_partition(self, buf, sep, res1, res2, res3, dt):
        buf = np.array(buf, dtype=dt)
        sep = np.array(sep, dtype=dt)
        res1 = np.array(res1, dtype=dt)
        res2 = np.array(res2, dtype=dt)
        res3 = np.array(res3, dtype=dt)
        act1, act2, act3 = np.strings.partition(buf, sep)
        assert_array_equal(act1, res1)
        assert_array_equal(act2, res2)
        assert_array_equal(act3, res3)
        assert_array_equal(act1 + act2 + act3, buf)

    @pytest.mark.parametrize("buf,sep,res1,res2,res3", [
        ("this is the partition method", "ti", "this is the parti",
            "ti", "on method"),
        ("http://www.python.org", "://", "http", "://", "www.python.org"),
        ("http://www.python.org", "?", "", "", "http://www.python.org"),
        ("http://www.python.org", "http://", "", "http://", "www.python.org"),
        ("http://www.python.org", "org", "http://www.python.", "org", ""),
        ("http://www.python.org", ["://", "?", "http://", "org"],
            ["http", "", "", "http://www.python."],
            ["://", "", "http://", "org"],
            ["www.python.org", "http://www.python.org", "www.python.org", ""]),
        ("mississippi", "ss", "missi", "ss", "ippi"),
        ("mississippi", "i", "mississipp", "i", ""),
        ("mississippi", "w", "", "", "mississippi"),
    ])
    def test_rpartition(self, buf, sep, res1, res2, res3, dt):
        buf = np.array(buf, dtype=dt)
        sep = np.array(sep, dtype=dt)
        res1 = np.array(res1, dtype=dt)
        res2 = np.array(res2, dtype=dt)
        res3 = np.array(res3, dtype=dt)
        act1, act2, act3 = np.strings.rpartition(buf, sep)
        assert_array_equal(act1, res1)
        assert_array_equal(act2, res2)
        assert_array_equal(act3, res3)
        assert_array_equal(act1 + act2 + act3, buf)


@pytest.mark.parametrize("dt", ["U", "T"])
class TestMethodsWithUnicode:
    @pytest.mark.parametrize("in_,out", [
        ("", False),
        ("a", False),
        ("0", True),
        ("\u2460", False),  # CIRCLED DIGIT 1
        ("\xbc", False),  # VULGAR FRACTION ONE QUARTER
        ("\u0660", True),  # ARABIC_INDIC DIGIT ZERO
        ("012345", True),
        ("012345a", False),
        (["0", "a"], [True, False]),
    ])
    def test_isdecimal_unicode(self, in_, out, dt):
        buf = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isdecimal(buf), out)

    @pytest.mark.parametrize("in_,out", [
        ("", False),
        ("a", False),
        ("0", True),
        ("\u2460", True),  # CIRCLED DIGIT 1
        ("\xbc", True),  # VULGAR FRACTION ONE QUARTER
        ("\u0660", True),  # ARABIC_INDIC DIGIT ZERO
        ("012345", True),
        ("012345a", False),
        (["0", "a"], [True, False]),
    ])
    def test_isnumeric_unicode(self, in_, out, dt):
        buf = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isnumeric(buf), out)

    @pytest.mark.parametrize("buf,old,new,count,res", [
        ("...\u043c......<", "<", "&lt;", -1, "...\u043c......&lt;"),
        ("Ae¢☃€ 😊" * 2, "A", "B", -1, "Be¢☃€ 😊Be¢☃€ 😊"),
        ("Ae¢☃€ 😊" * 2, "😊", "B", -1, "Ae¢☃€ BAe¢☃€ B"),
    ])
    def test_replace_unicode(self, buf, old, new, count, res, dt):
        buf = np.array(buf, dtype=dt)
        old = np.array(old, dtype=dt)
        new = np.array(new, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.replace(buf, old, new, count), res)

    @pytest.mark.parametrize("in_", [
        '\U00010401',
        '\U00010427',
        '\U00010429',
        '\U0001044E',
        '\U0001D7F6',
        '\U00011066',
        '\U000104A0',
        pytest.param('\U0001F107', marks=pytest.mark.xfail(
            sys.platform == 'win32' and IS_PYPY_LT_7_3_16,
            reason="PYPY bug in Py_UNICODE_ISALNUM",
            strict=True)),
    ])
    def test_isalnum_unicode(self, in_, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isalnum(in_), True)

    @pytest.mark.parametrize("in_,out", [
        ('\u1FFc', False),
        ('\u2167', False),
        ('\U00010401', False),
        ('\U00010427', False),
        ('\U0001F40D', False),
        ('\U0001F46F', False),
        ('\u2177', True),
        pytest.param('\U00010429', True, marks=pytest.mark.xfail(
            sys.platform == 'win32' and IS_PYPY_LT_7_3_16,
            reason="PYPY bug in Py_UNICODE_ISLOWER",
            strict=True)),
        ('\U0001044E', True),
    ])
    def test_islower_unicode(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.islower(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ('\u1FFc', False),
        ('\u2167', True),
        ('\U00010401', True),
        ('\U00010427', True),
        ('\U0001F40D', False),
        ('\U0001F46F', False),
        ('\u2177', False),
        pytest.param('\U00010429', False, marks=pytest.mark.xfail(
            sys.platform == 'win32' and IS_PYPY_LT_7_3_16,
            reason="PYPY bug in Py_UNICODE_ISUPPER",
            strict=True)),
        ('\U0001044E', False),
    ])
    def test_isupper_unicode(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.isupper(in_), out)

    @pytest.mark.parametrize("in_,out", [
        ('\u1FFc', True),
        ('Greek \u1FFcitlecases ...', True),
        pytest.param('\U00010401\U00010429', True, marks=pytest.mark.xfail(
            sys.platform == 'win32' and IS_PYPY_LT_7_3_16,
            reason="PYPY bug in Py_UNICODE_ISISTITLE",
            strict=True)),
        ('\U00010427\U0001044E', True),
        pytest.param('\U00010429', False, marks=pytest.mark.xfail(
            sys.platform == 'win32' and IS_PYPY_LT_7_3_16,
            reason="PYPY bug in Py_UNICODE_ISISTITLE",
            strict=True)),
        ('\U0001044E', False),
        ('\U0001F40D', False),
        ('\U0001F46F', False),
    ])
    def test_istitle_unicode(self, in_, out, dt):
        in_ = np.array(in_, dtype=dt)
        assert_array_equal(np.strings.istitle(in_), out)

    @pytest.mark.parametrize("buf,sub,start,end,res", [
        ("Ae¢☃€ 😊" * 2, "😊", 0, None, 6),
        ("Ae¢☃€ 😊" * 2, "😊", 7, None, 13),
    ])
    def test_index_unicode(self, buf, sub, start, end, res, dt):
        buf = np.array(buf, dtype=dt)
        sub = np.array(sub, dtype=dt)
        assert_array_equal(np.strings.index(buf, sub, start, end), res)

    def test_index_raises_unicode(self, dt):
        with pytest.raises(ValueError, match="substring not found"):
            np.strings.index("Ae¢☃€ 😊", "😀")

    @pytest.mark.parametrize("buf,res", [
        ("Ae¢☃€ \t 😊", "Ae¢☃€    😊"),
        ("\t\U0001044E", "        \U0001044E"),
    ])
    def test_expandtabs(self, buf, res, dt):
        buf = np.array(buf, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.expandtabs(buf), res)

    @pytest.mark.parametrize("buf,width,fillchar,res", [
        ('x', 2, '\U0001044E', 'x\U0001044E'),
        ('x', 3, '\U0001044E', '\U0001044Ex\U0001044E'),
        ('x', 4, '\U0001044E', '\U0001044Ex\U0001044E\U0001044E'),
    ])
    def test_center(self, buf, width, fillchar, res, dt):
        buf = np.array(buf, dtype=dt)
        fillchar = np.array(fillchar, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.center(buf, width, fillchar), res)

    @pytest.mark.parametrize("buf,width,fillchar,res", [
        ('x', 2, '\U0001044E', 'x\U0001044E'),
        ('x', 3, '\U0001044E', 'x\U0001044E\U0001044E'),
        ('x', 4, '\U0001044E', 'x\U0001044E\U0001044E\U0001044E'),
    ])
    def test_ljust(self, buf, width, fillchar, res, dt):
        buf = np.array(buf, dtype=dt)
        fillchar = np.array(fillchar, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.ljust(buf, width, fillchar), res)

    @pytest.mark.parametrize("buf,width,fillchar,res", [
        ('x', 2, '\U0001044E', '\U0001044Ex'),
        ('x', 3, '\U0001044E', '\U0001044E\U0001044Ex'),
        ('x', 4, '\U0001044E', '\U0001044E\U0001044E\U0001044Ex'),
    ])
    def test_rjust(self, buf, width, fillchar, res, dt):
        buf = np.array(buf, dtype=dt)
        fillchar = np.array(fillchar, dtype=dt)
        res = np.array(res, dtype=dt)
        assert_array_equal(np.strings.rjust(buf, width, fillchar), res)

    @pytest.mark.parametrize("buf,sep,res1,res2,res3", [
        ("āāāāĀĀĀĀ", "Ă", "āāāāĀĀĀĀ", "", ""),
        ("āāāāĂĀĀĀĀ", "Ă", "āāāā", "Ă", "ĀĀĀĀ"),
        ("āāāāĂĂĀĀĀĀ", "ĂĂ", "āāāā", "ĂĂ", "ĀĀĀĀ"),
        ("𐌁𐌁𐌁𐌁𐌀𐌀𐌀𐌀", "𐌂", "𐌁𐌁𐌁𐌁𐌀𐌀𐌀𐌀", "", ""),
        ("𐌁𐌁𐌁𐌁𐌂𐌀𐌀𐌀𐌀", "𐌂", "𐌁𐌁𐌁𐌁", "𐌂", "𐌀𐌀𐌀𐌀"),
        ("𐌁𐌁𐌁𐌁𐌂𐌂𐌀𐌀𐌀𐌀", "𐌂𐌂", "𐌁𐌁𐌁𐌁", "𐌂𐌂", "𐌀𐌀𐌀𐌀"),
        ("𐌁𐌁𐌁𐌁𐌂𐌂𐌂𐌂𐌀𐌀𐌀𐌀", "𐌂𐌂𐌂𐌂", "𐌁𐌁𐌁𐌁", "𐌂𐌂𐌂𐌂", "𐌀𐌀𐌀𐌀"),
    ])
    def test_partition(self, buf, sep, res1, res2, res3, dt):
        buf = np.array(buf, dtype=dt)
        sep = np.array(sep, dtype=dt)
        res1 = np.array(res1, dtype=dt)
        res2 = np.array(res2, dtype=dt)
        res3 = np.array(res3, dtype=dt)
        act1, act2, act3 = np.strings.partition(buf, sep)
        assert_array_equal(act1, res1)
        assert_array_equal(act2, res2)
        assert_array_equal(act3, res3)
        assert_array_equal(act1 + act2 + act3, buf)

    @pytest.mark.parametrize("buf,sep,res1,res2,res3", [
        ("āāāāĀĀĀĀ", "Ă", "", "", "āāāāĀĀĀĀ"),
        ("āāāāĂĀĀĀĀ", "Ă", "āāāā", "Ă", "ĀĀĀĀ"),
        ("āāāāĂĂĀĀĀĀ", "ĂĂ", "āāāā", "ĂĂ", "ĀĀĀĀ"),
        ("𐌁𐌁𐌁𐌁𐌀𐌀𐌀𐌀", "𐌂", "", "", "𐌁𐌁𐌁𐌁𐌀𐌀𐌀𐌀"),
        ("𐌁𐌁𐌁𐌁𐌂𐌀𐌀𐌀𐌀", "𐌂", "𐌁𐌁𐌁𐌁", "𐌂", "𐌀𐌀𐌀𐌀"),
        ("𐌁𐌁𐌁𐌁𐌂𐌂𐌀𐌀𐌀𐌀", "𐌂𐌂", "𐌁𐌁𐌁𐌁", "𐌂𐌂", "𐌀𐌀𐌀𐌀"),
    ])
    def test_rpartition(self, buf, sep, res1, res2, res3, dt):
        buf = np.array(buf, dtype=dt)
        sep = np.array(sep, dtype=dt)
        res1 = np.array(res1, dtype=dt)
        res2 = np.array(res2, dtype=dt)
        res3 = np.array(res3, dtype=dt)
        act1, act2, act3 = np.strings.rpartition(buf, sep)
        assert_array_equal(act1, res1)
        assert_array_equal(act2, res2)
        assert_array_equal(act3, res3)
        assert_array_equal(act1 + act2 + act3, buf)

    @pytest.mark.parametrize("method", ["strip", "lstrip", "rstrip"])
    @pytest.mark.parametrize(
        "source,strip",
        [
            ("λμ", "μ"),
            ("λμ", "λ"),
            ("λ"*5 + "μ"*2, "μ"),
            ("λ" * 5 + "μ" * 2, "λ"),
            ("λ" * 5 + "A" + "μ" * 2, "μλ"),
            ("λμ" * 5, "μ"),
            ("λμ" * 5, "λ"),
    ])
    def test_strip_functions_unicode(self, source, strip, method, dt):
        src_array = np.array([source], dtype=dt)

        npy_func = getattr(np.strings, method)
        py_func = getattr(str, method)

        expected = np.array([py_func(source, strip)], dtype=dt)
        actual = npy_func(src_array, strip)

        assert_array_equal(actual, expected)


class TestMixedTypeMethods:
    def test_center(self):
        buf = np.array("😊", dtype="U")
        fill = np.array("*", dtype="S")
        res = np.array("*😊*", dtype="U")
        assert_array_equal(np.strings.center(buf, 3, fill), res)

        buf = np.array("s", dtype="S")
        fill = np.array("*", dtype="U")
        res = np.array("*s*", dtype="S")
        assert_array_equal(np.strings.center(buf, 3, fill), res)

        with pytest.raises(ValueError, match="'ascii' codec can't encode"):
            buf = np.array("s", dtype="S")
            fill = np.array("😊", dtype="U")
            np.strings.center(buf, 3, fill)

    def test_ljust(self):
        buf = np.array("😊", dtype="U")
        fill = np.array("*", dtype="S")
        res = np.array("😊**", dtype="U")
        assert_array_equal(np.strings.ljust(buf, 3, fill), res)

        buf = np.array("s", dtype="S")
        fill = np.array("*", dtype="U")
        res = np.array("s**", dtype="S")
        assert_array_equal(np.strings.ljust(buf, 3, fill), res)

        with pytest.raises(ValueError, match="'ascii' codec can't encode"):
            buf = np.array("s", dtype="S")
            fill = np.array("😊", dtype="U")
            np.strings.ljust(buf, 3, fill)

    def test_rjust(self):
        buf = np.array("😊", dtype="U")
        fill = np.array("*", dtype="S")
        res = np.array("**😊", dtype="U")
        assert_array_equal(np.strings.rjust(buf, 3, fill), res)

        buf = np.array("s", dtype="S")
        fill = np.array("*", dtype="U")
        res = np.array("**s", dtype="S")
        assert_array_equal(np.strings.rjust(buf, 3, fill), res)

        with pytest.raises(ValueError, match="'ascii' codec can't encode"):
            buf = np.array("s", dtype="S")
            fill = np.array("😊", dtype="U")
            np.strings.rjust(buf, 3, fill)


class TestUnicodeOnlyMethodsRaiseWithBytes:
    def test_isdecimal_raises(self):
        in_ = np.array(b"1")
        with assert_raises(TypeError):
            np.strings.isdecimal(in_)

    def test_isnumeric_bytes(self):
        in_ = np.array(b"1")
        with assert_raises(TypeError):
            np.strings.isnumeric(in_)


def check_itemsize(n_elem, dt):
    if dt == "T":
        return np.dtype(dt).itemsize
    if dt == "S":
        return n_elem
    if dt == "U":
        return n_elem * 4

@pytest.mark.parametrize("dt", ["S", "U", "T"])
class TestReplaceOnArrays:

    def test_replace_count_and_size(self, dt):
        a = np.array(["0123456789" * i for i in range(4)], dtype=dt)
        r1 = np.strings.replace(a, "5", "ABCDE")
        assert r1.dtype.itemsize == check_itemsize(3*10 + 3*4, dt)
        r1_res = np.array(["01234ABCDE6789" * i for i in range(4)], dtype=dt)
        assert_array_equal(r1, r1_res)
        r2 = np.strings.replace(a, "5", "ABCDE", 1)
        assert r2.dtype.itemsize == check_itemsize(3*10 + 4, dt)
        r3 = np.strings.replace(a, "5", "ABCDE", 0)
        assert r3.dtype.itemsize == a.dtype.itemsize
        assert_array_equal(r3, a)
        # Negative values mean to replace all.
        r4 = np.strings.replace(a, "5", "ABCDE", -1)
        assert r4.dtype.itemsize == check_itemsize(3*10 + 3*4, dt)
        assert_array_equal(r4, r1)
        # We can do count on an element-by-element basis.
        r5 = np.strings.replace(a, "5", "ABCDE", [-1, -1, -1, 1])
        assert r5.dtype.itemsize == check_itemsize(3*10 + 4, dt)
        assert_array_equal(r5, np.array(
            ["01234ABCDE6789" * i for i in range(3)]
            + ["01234ABCDE6789" + "0123456789" * 2], dtype=dt))

    def test_replace_broadcasting(self, dt):
        a = np.array("0,0,0", dtype=dt)
        r1 = np.strings.replace(a, "0", "1", np.arange(3))
        assert r1.dtype == a.dtype
        assert_array_equal(r1, np.array(["0,0,0", "1,0,0", "1,1,0"], dtype=dt))
        r2 = np.strings.replace(a, "0", [["1"], ["2"]], np.arange(1, 4))
        assert_array_equal(r2, np.array([["1,0,0", "1,1,0", "1,1,1"],
                                         ["2,0,0", "2,2,0", "2,2,2"]],
                                        dtype=dt))
        r3 = np.strings.replace(a, ["0", "0,0", "0,0,0"], "X")
        assert_array_equal(r3, np.array(["X,X,X", "X,0", "X"], dtype=dt))
Metadata
View Raw File