Skip to content

Commit 737db5b

Browse files
committed
Merge pull request #1812 from killerswan/indexing2
(core::str) Fixing index and rindex
2 parents c82a0d7 + 207bb3d commit 737db5b

File tree

7 files changed

+151
-87
lines changed

7 files changed

+151
-87
lines changed

src/cargo/cargo.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -651,25 +651,27 @@ fn cmd_install(c: cargo) unsafe {
651651

652652
if str::starts_with(target, "uuid:") {
653653
let uuid = rest(target, 5u);
654-
let idx = str::index(uuid, '/' as u8);
655-
if idx != -1 {
656-
let source = str::unsafe::slice_bytes(uuid, 0u, idx as uint);
657-
uuid = str::unsafe::slice_bytes(uuid, idx as uint + 1u,
658-
str::byte_len(uuid));
659-
install_uuid_specific(c, wd, source, uuid);
660-
} else {
661-
install_uuid(c, wd, uuid);
654+
alt str::index(uuid, '/') {
655+
option::some(idx) {
656+
let source = str::slice(uuid, 0u, idx);
657+
uuid = str::slice(uuid, idx + 1u, str::char_len(uuid));
658+
install_uuid_specific(c, wd, source, uuid);
659+
}
660+
option::none {
661+
install_uuid(c, wd, uuid);
662+
}
662663
}
663664
} else {
664665
let name = target;
665-
let idx = str::index(name, '/' as u8);
666-
if idx != -1 {
667-
let source = str::unsafe::slice_bytes(name, 0u, idx as uint);
668-
name = str::unsafe::slice_bytes(name, idx as uint + 1u,
669-
str::byte_len(name));
670-
install_named_specific(c, wd, source, name);
671-
} else {
672-
install_named(c, wd, name);
666+
alt str::index(name, '/') {
667+
option::some(idx) {
668+
let source = str::slice(name, 0u, idx);
669+
name = str::slice(name, idx + 1u, str::char_len(name));
670+
install_named_specific(c, wd, source, name);
671+
}
672+
option::none {
673+
install_named(c, wd, name);
674+
}
673675
}
674676
}
675677
}

src/comp/back/link.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,16 @@ mod write {
109109
// Decides what to call an intermediate file, given the name of the output
110110
// and the extension to use.
111111
fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe {
112-
let dot_pos = str::index(output_path, '.' as u8);
113-
let stem;
114-
if dot_pos < 0 {
115-
stem = output_path;
116-
} else { stem = str::unsafe::slice_bytes(output_path, 0u,
117-
dot_pos as uint); }
112+
let stem = alt str::index(output_path, '.') {
113+
option::some(dot_pos) {
114+
str::slice(output_path, 0u, dot_pos)
115+
}
116+
option::none { output_path }
117+
};
118+
118119
ret stem + "." + extension;
119120
}
121+
120122
fn run_passes(sess: session, llmod: ModuleRef, output: str) {
121123
let opts = sess.opts;
122124
if opts.time_llvm_passes { llvm::LLVMRustEnableTimePasses(); }

src/comp/syntax/codemap.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,16 +119,13 @@ fn get_line(fm: filemap, line: int) -> str unsafe {
119119
let end: uint;
120120
if line as uint < vec::len(fm.lines) - 1u {
121121
end = fm.lines[line + 1].byte - fm.start_pos.byte;
122+
ret str::unsafe::slice_bytes(*fm.src, begin, end);
122123
} else {
123124
// If we're not done parsing the file, we're at the limit of what's
124125
// parsed. If we just slice the rest of the string, we'll print out
125126
// the remainder of the file, which is undesirable.
126-
end = str::byte_len(*fm.src);
127-
let rest = str::unsafe::slice_bytes(*fm.src, begin, end);
128-
let newline = str::index(rest, '\n' as u8);
129-
if newline != -1 { end = begin + (newline as uint); }
127+
ret str::splitn_char(*fm.src, '\n', 1u)[0];
130128
}
131-
ret str::unsafe::slice_bytes(*fm.src, begin, end);
132129
}
133130

134131
fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)

src/fuzzer/fuzzer.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,9 @@ fn check_variants_T<T: copy>(
283283
}
284284
}
285285

286-
fn last_part(filename: str) -> str unsafe {
287-
let ix = str::rindex(filename, 47u8 /* '/' */);
288-
assert ix >= 0;
289-
str::unsafe::slice_bytes(filename, ix as uint + 1u, str::byte_len(filename) - 3u)
286+
fn last_part(filename: str) -> str {
287+
let ix = option::get(str::rindex(filename, '/'));
288+
str::slice(filename, ix + 1u, str::char_len(filename) - 3u)
290289
}
291290

292291
enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), }

src/libcore/str.rs

Lines changed: 96 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -253,15 +253,12 @@ Function: pop_char
253253
Remove the final character from a string and return it.
254254
255255
Failure:
256-
257256
If the string does not contain any characters.
258257
*/
259258
fn pop_char(&s: str) -> char unsafe {
260259
let end = byte_len(s);
261-
while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
262-
assert (end > 0u);
263-
let ch = char_at(s, end - 1u);
264-
s = unsafe::slice_bytes(s, 0u, end - 1u);
260+
let {ch:ch, prev:end} = char_range_at_reverse(s, end);
261+
s = unsafe::slice_bytes(s, 0u, end);
265262
ret ch;
266263
}
267264

@@ -868,32 +865,50 @@ fn lines_iter(ss: str, ff: fn(&&str)) {
868865
Section: Searching
869866
*/
870867

871-
/*
872-
Function: index
868+
// Function: index
869+
//
870+
// Returns the index of the first matching char
871+
// (as option some/none)
872+
fn index(ss: str, cc: char) -> option<uint> {
873+
let bii = 0u;
874+
let cii = 0u;
875+
let len = byte_len(ss);
876+
while bii < len {
877+
let {ch, next} = char_range_at(ss, bii);
873878

874-
Returns the index of the first matching byte. Returns -1 if
875-
no match is found.
879+
// found here?
880+
if ch == cc {
881+
ret option::some(cii);
882+
}
876883

877-
FIXME: UTF-8
878-
*/
879-
fn index(s: str, c: u8) -> int {
880-
let i: int = 0;
881-
for k: u8 in s { if k == c { ret i; } i += 1; }
882-
ret -1;
883-
}
884+
cii += 1u;
885+
bii = next;
886+
}
884887

885-
/*
886-
Function: rindex
888+
// wasn't found
889+
ret option::none;
890+
}
887891

888-
Returns the index of the last matching byte. Returns -1
889-
if no match is found.
892+
// Function: rindex
893+
//
894+
// Returns the index of the first matching char
895+
// (as option some/none)
896+
fn rindex(ss: str, cc: char) -> option<uint> {
897+
let bii = byte_len(ss);
898+
let cii = char_len(ss);
899+
while bii > 0u {
900+
let {ch, prev} = char_range_at_reverse(ss, bii);
901+
cii -= 1u;
902+
bii = prev;
903+
904+
// found here?
905+
if ch == cc {
906+
ret option::some(cii);
907+
}
908+
}
890909

891-
FIXME: UTF-8
892-
*/
893-
fn rindex(s: str, c: u8) -> int {
894-
let n: int = byte_len(s) as int;
895-
while n >= 0 { if s[n] == c { ret n; } n -= 1; }
896-
ret n;
910+
// wasn't found
911+
ret option::none;
897912
}
898913

899914
/*
@@ -1233,6 +1248,25 @@ Pluck a character out of a string
12331248
*/
12341249
fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; }
12351250

1251+
// Function: char_range_at_reverse
1252+
//
1253+
// Given a byte position and a str, return the previous char and its position
1254+
// This function can be used to iterate over a unicode string in reverse.
1255+
fn char_range_at_reverse(ss: str, start: uint) -> {ch: char, prev: uint} {
1256+
let prev = start;
1257+
1258+
// while there is a previous byte == 10......
1259+
while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 {
1260+
prev -= 1u;
1261+
}
1262+
1263+
// now refer to the initial byte of previous char
1264+
prev -= 1u;
1265+
1266+
let ch = char_at(ss, prev);
1267+
ret {ch:ch, prev:prev};
1268+
}
1269+
12361270
/*
12371271
Function: substr_all
12381272
@@ -1442,13 +1476,42 @@ mod tests {
14421476
}
14431477

14441478
#[test]
1445-
fn test_index_and_rindex() {
1446-
assert (index("hello", 'e' as u8) == 1);
1447-
assert (index("hello", 'o' as u8) == 4);
1448-
assert (index("hello", 'z' as u8) == -1);
1449-
assert (rindex("hello", 'l' as u8) == 3);
1450-
assert (rindex("hello", 'h' as u8) == 0);
1451-
assert (rindex("hello", 'z' as u8) == -1);
1479+
fn test_index() {
1480+
assert ( index("hello", 'h') == option::some(0u));
1481+
assert ( index("hello", 'e') == option::some(1u));
1482+
assert ( index("hello", 'o') == option::some(4u));
1483+
assert ( index("hello", 'z') == option::none);
1484+
}
1485+
1486+
#[test]
1487+
fn test_rindex() {
1488+
assert (rindex("hello", 'l') == option::some(3u));
1489+
assert (rindex("hello", 'o') == option::some(4u));
1490+
assert (rindex("hello", 'h') == option::some(0u));
1491+
assert (rindex("hello", 'z') == option::none);
1492+
}
1493+
1494+
#[test]
1495+
fn test_pop_char() {
1496+
let data = "ประเทศไทย中华";
1497+
let cc = pop_char(data);
1498+
assert "ประเทศไทย中" == data;
1499+
assert '华' == cc;
1500+
}
1501+
1502+
#[test]
1503+
fn test_pop_char_2() {
1504+
let data2 = "华";
1505+
let cc2 = pop_char(data2);
1506+
assert "" == data2;
1507+
assert '华' == cc2;
1508+
}
1509+
1510+
#[test]
1511+
#[should_fail]
1512+
fn test_pop_char_fail() {
1513+
let data = "";
1514+
let _cc3 = pop_char(data);
14521515
}
14531516

14541517
#[test]

src/libstd/fs.rs

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,22 @@ A path or fragment of a filesystem path
3232
*/
3333
type path = str;
3434

35+
fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} {
36+
let ii;
37+
alt str::rindex(pp, os_fs::path_sep) {
38+
option::some(xx) { ii = xx; }
39+
option::none {
40+
alt str::rindex(pp, os_fs::alt_path_sep) {
41+
option::some(xx) { ii = xx; }
42+
option::none { ret {dirname: ".", basename: pp}; }
43+
}
44+
}
45+
}
46+
47+
ret {dirname: str::slice(pp, 0u, ii),
48+
basename: str::slice(pp, ii + 1u, str::char_len(pp))};
49+
}
50+
3551
/*
3652
Function: dirname
3753
@@ -43,13 +59,8 @@ The dirname of "/usr/share" will be "/usr", but the dirname of
4359
4460
If the path is not prefixed with a directory, then "." is returned.
4561
*/
46-
fn dirname(p: path) -> path unsafe {
47-
let i: int = str::rindex(p, os_fs::path_sep as u8);
48-
if i == -1 {
49-
i = str::rindex(p, os_fs::alt_path_sep as u8);
50-
if i == -1 { ret "."; }
51-
}
52-
ret str::unsafe::slice_bytes(p, 0u, i as uint);
62+
fn dirname(pp: path) -> path {
63+
ret splitDirnameBasename(pp).dirname;
5364
}
5465

5566
/*
@@ -63,18 +74,10 @@ path separators in the path then the returned path is identical to
6374
the provided path. If an empty path is provided or the path ends
6475
with a path separator then an empty path is returned.
6576
*/
66-
fn basename(p: path) -> path unsafe {
67-
let i: int = str::rindex(p, os_fs::path_sep as u8);
68-
if i == -1 {
69-
i = str::rindex(p, os_fs::alt_path_sep as u8);
70-
if i == -1 { ret p; }
71-
}
72-
let len = str::byte_len(p);
73-
if (i + 1) as uint >= len { ret p; }
74-
ret str::unsafe::slice_bytes(p, (i + 1) as uint, len);
77+
fn basename(pp: path) -> path {
78+
ret splitDirnameBasename(pp).basename;
7579
}
7680

77-
7881
// FIXME: Need some typestate to avoid bounds check when len(pre) == 0
7982
/*
8083
Function: connect

src/libstd/getopts.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,16 +230,14 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe {
230230
let i_arg = option::none::<str>;
231231
if cur[1] == '-' as u8 {
232232
let tail = str::unsafe::slice_bytes(cur, 2u, curlen);
233-
let eq = str::index(tail, '=' as u8);
234-
if eq == -1 {
233+
let tail_eq = str::splitn_char(tail, '=', 1u);
234+
if vec::len(tail_eq) <= 1u {
235235
names = [long(tail)];
236236
} else {
237237
names =
238-
[long(str::unsafe::slice_bytes(tail,0u,eq as uint))];
238+
[long(tail_eq[0])];
239239
i_arg =
240-
option::some::<str>(str::unsafe::slice_bytes(tail,
241-
(eq as uint) + 1u,
242-
curlen - 2u));
240+
option::some::<str>(tail_eq[1]);
243241
}
244242
} else {
245243
let j = 1u;

0 commit comments

Comments
 (0)