From 17a5a83fb4fa9b1d8a27fd0cc9386ae898a1d75d Mon Sep 17 00:00:00 2001 From: xbe Date: Sun, 23 Mar 2014 23:31:58 -0700 Subject: [PATCH] Implement str.rfind() and add tests for it. --- py/objstr.c | 89 ++++++++++++++++-------------------- tests/basics/string_rfind.py | 23 ++++++++++ 2 files changed, 62 insertions(+), 50 deletions(-) create mode 100644 tests/basics/string_rfind.py diff --git a/py/objstr.c b/py/objstr.c index b3aac25aed..00586a3b3b 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -186,19 +186,26 @@ wrong_args: // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard -STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { +STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) { if (hlen >= nlen) { - for (uint i = 0; i <= hlen - nlen; i++) { - bool found = true; - for (uint j = 0; j < nlen; j++) { - if (haystack[i + j] != needle[j]) { - found = false; - break; - } + machine_uint_t str_index, str_index_end; + if (direction > 0) { + str_index = 0; + str_index_end = hlen - nlen; + } else { + str_index = hlen - nlen; + str_index_end = 0; + } + for (;;) { + if (memcmp(&haystack[str_index], needle, nlen) == 0) { + //found + return haystack + str_index; } - if (found) { - return haystack + i; + if (str_index == str_index_end) { + //not found + break; } + str_index += direction; } } return NULL; @@ -260,7 +267,7 @@ STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { /* NOTE `a in b` is `b.__contains__(a)` */ if (MP_OBJ_IS_STR(rhs_in)) { GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len); - return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len) != NULL); + return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL); } break; @@ -382,7 +389,7 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) { return res; } -STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) { +STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction) { assert(2 <= n_args && n_args <= 4); assert(MP_OBJ_IS_STR(args[0])); assert(MP_OBJ_IS_STR(args[1])); @@ -399,20 +406,24 @@ STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) { end = mp_get_index(&str_type, haystack_len, args[3], true); } - const byte *p = find_subbytes(haystack + start, haystack_len - start, needle, needle_len); + const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction); if (p == NULL) { // not found return MP_OBJ_NEW_SMALL_INT(-1); } else { // found - machine_int_t pos = p - haystack; - if (pos + needle_len > end) { - pos = -1; - } - return MP_OBJ_NEW_SMALL_INT(pos); + return MP_OBJ_NEW_SMALL_INT(p - haystack); } } +STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) { + return str_finder(n_args, args, 1); +} + +STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) { + return str_finder(n_args, args, -1); +} + // TODO: (Much) more variety in args STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) { GET_STR_DATA_LEN(self_in, str, str_len); @@ -423,15 +434,6 @@ STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) { return MP_BOOL(memcmp(str, prefix, prefix_len) == 0); } -STATIC bool chr_in_str(const byte* const str, const machine_uint_t str_len, int c) { - for (machine_uint_t i = 0; i < str_len; i++) { - if (str[i] == c) { - return true; - } - } - return false; -} - STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { assert(1 <= n_args && n_args <= 2); assert(MP_OBJ_IS_STR(args[0])); @@ -456,7 +458,7 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { bool first_good_char_pos_set = false; machine_uint_t last_good_char_pos = 0; for (machine_uint_t i = 0; i < orig_str_len; i++) { - if (!chr_in_str(chars_to_del, chars_to_del_len, orig_str[i])) { + if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) { last_good_char_pos = i; if (!first_good_char_pos_set) { first_good_char_pos = i; @@ -546,7 +548,7 @@ STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) { const byte *old_occurrence; const byte *offset_ptr = str; machine_uint_t offset_num = 0; - while ((old_occurrence = find_subbytes(offset_ptr, str_len - offset_num, old, old_len)) != NULL) { + while ((old_occurrence = find_subbytes(offset_ptr, str_len - offset_num, old, old_len, 1)) != NULL) { // copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence if (data != NULL) { memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr); @@ -646,27 +648,12 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t di result[2] = self_in; } - if (str_len >= sep_len) { - machine_uint_t str_index, str_index_end; - if (direction > 0) { - str_index = 0; - str_index_end = str_len - sep_len; - } else { - str_index = str_len - sep_len; - str_index_end = 0; - } - for (;;) { - if (memcmp(&str[str_index], sep, sep_len) == 0) { - result[0] = mp_obj_new_str(str, str_index, false); - result[1] = arg; - result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false); - break; - } - if (str_index == str_index_end) { - break; - } - str_index += direction; - } + const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction); + if (position_ptr != NULL) { + machine_uint_t position = position_ptr - str; + result[0] = mp_obj_new_str(str, position, false); + result[1] = arg; + result[2] = mp_obj_new_str(str + position + sep_len, str_len - position - sep_len, false); } return mp_obj_new_tuple(3, result); @@ -695,6 +682,7 @@ STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, in } STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find); +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_startswith_obj, str_startswith); @@ -707,6 +695,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition); STATIC const mp_method_t str_type_methods[] = { { "find", &str_find_obj }, + { "rfind", &str_rfind_obj }, { "join", &str_join_obj }, { "split", &str_split_obj }, { "startswith", &str_startswith_obj }, diff --git a/tests/basics/string_rfind.py b/tests/basics/string_rfind.py new file mode 100644 index 0000000000..4d0e84018f --- /dev/null +++ b/tests/basics/string_rfind.py @@ -0,0 +1,23 @@ +print("hello world".rfind("ll")) +print("hello world".rfind("ll", None)) +print("hello world".rfind("ll", 1)) +print("hello world".rfind("ll", 1, None)) +print("hello world".rfind("ll", None, None)) +print("hello world".rfind("ll", 1, -1)) +print("hello world".rfind("ll", 1, 1)) +print("hello world".rfind("ll", 1, 2)) +print("hello world".rfind("ll", 1, 3)) +print("hello world".rfind("ll", 1, 4)) +print("hello world".rfind("ll", 1, 5)) +print("hello world".rfind("ll", -100)) +print("0000".rfind('0')) +print("0000".rfind('0', 0)) +print("0000".rfind('0', 1)) +print("0000".rfind('0', 2)) +print("0000".rfind('0', 3)) +print("0000".rfind('0', 4)) +print("0000".rfind('0', 5)) +print("0000".rfind('-1', 3)) +print("0000".rfind('1', 3)) +print("0000".rfind('1', 4)) +print("0000".rfind('1', 5))