Merge pull request #1812 from killerswan/indexing2

brson · brson · commit 737db5b49a47 · 2012-02-11T17:42:45.000-08:00
(core::str) Fixing index and rindex
diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs
@@ -651,25 +651,27 @@ fn cmd_install(c: cargo) unsafe {
 
     if str::starts_with(target, "uuid:") {
         let uuid = rest(target, 5u);
-        let idx = str::index(uuid, '/' as u8);
-        if idx != -1 {
-            let source = str::unsafe::slice_bytes(uuid, 0u, idx as uint);
-            uuid = str::unsafe::slice_bytes(uuid, idx as uint + 1u,
-                                      str::byte_len(uuid));
-            install_uuid_specific(c, wd, source, uuid);
-        } else {
-            install_uuid(c, wd, uuid);
+        alt str::index(uuid, '/') {
+            option::some(idx) {
+               let source = str::slice(uuid, 0u, idx);
+               uuid = str::slice(uuid, idx + 1u, str::char_len(uuid));
+               install_uuid_specific(c, wd, source, uuid);
+            }
+            option::none {
+               install_uuid(c, wd, uuid);
+            }
         }
     } else {
         let name = target;
-        let idx = str::index(name, '/' as u8);
-        if idx != -1 {
-            let source = str::unsafe::slice_bytes(name, 0u, idx as uint);
-            name = str::unsafe::slice_bytes(name, idx as uint + 1u,
-                                      str::byte_len(name));
-            install_named_specific(c, wd, source, name);
-        } else {
-            install_named(c, wd, name);
+        alt str::index(name, '/') {
+            option::some(idx) {
+               let source = str::slice(name, 0u, idx);
+               name = str::slice(name, idx + 1u, str::char_len(name));
+               install_named_specific(c, wd, source, name);
+            }
+            option::none {
+               install_named(c, wd, name);
+            }
         }
     }
 }
diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs
@@ -109,14 +109,16 @@ mod write {
     // Decides what to call an intermediate file, given the name of the output
     // and the extension to use.
     fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe {
-        let dot_pos = str::index(output_path, '.' as u8);
-        let stem;
-        if dot_pos < 0 {
-            stem = output_path;
-        } else { stem = str::unsafe::slice_bytes(output_path, 0u,
-                                                 dot_pos as uint); }
+        let stem = alt str::index(output_path, '.') {
+                       option::some(dot_pos) {
+                           str::slice(output_path, 0u, dot_pos)
+                       }
+                       option::none { output_path }
+                   };
+
         ret stem + "." + extension;
     }
+
     fn run_passes(sess: session, llmod: ModuleRef, output: str) {
         let opts = sess.opts;
         if opts.time_llvm_passes { llvm::LLVMRustEnableTimePasses(); }
diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs
@@ -119,16 +119,13 @@ fn get_line(fm: filemap, line: int) -> str unsafe {
     let end: uint;
     if line as uint < vec::len(fm.lines) - 1u {
         end = fm.lines[line + 1].byte - fm.start_pos.byte;
+        ret str::unsafe::slice_bytes(*fm.src, begin, end);
     } else {
         // If we're not done parsing the file, we're at the limit of what's
         // parsed. If we just slice the rest of the string, we'll print out
         // the remainder of the file, which is undesirable.
-        end = str::byte_len(*fm.src);
-        let rest = str::unsafe::slice_bytes(*fm.src, begin, end);
-        let newline = str::index(rest, '\n' as u8);
-        if newline != -1 { end = begin + (newline as uint); }
+        ret str::splitn_char(*fm.src, '\n', 1u)[0];
     }
-    ret str::unsafe::slice_bytes(*fm.src, begin, end);
 }
 
 fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)
diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs
@@ -283,10 +283,9 @@ fn check_variants_T<T: copy>(
     }
 }
 
-fn last_part(filename: str) -> str unsafe {
-  let ix = str::rindex(filename, 47u8 /* '/' */);
-  assert ix >= 0;
-  str::unsafe::slice_bytes(filename, ix as uint + 1u, str::byte_len(filename) - 3u)
+fn last_part(filename: str) -> str {
+  let ix = option::get(str::rindex(filename, '/'));
+  str::slice(filename, ix + 1u, str::char_len(filename) - 3u)
 }
 
 enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), }
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
@@ -253,15 +253,12 @@ Function: pop_char
 Remove the final character from a string and return it.
 
 Failure:
-
 If the string does not contain any characters.
 */
 fn pop_char(&s: str) -> char unsafe {
     let end = byte_len(s);
-    while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
-    assert (end > 0u);
-    let ch = char_at(s, end - 1u);
-    s = unsafe::slice_bytes(s, 0u, end - 1u);
+    let {ch:ch, prev:end} = char_range_at_reverse(s, end);
+    s = unsafe::slice_bytes(s, 0u, end);
     ret ch;
 }
 
@@ -868,32 +865,50 @@ fn lines_iter(ss: str, ff: fn(&&str)) {
 Section: Searching
 */
 
-/*
-Function: index
+// Function: index
+//
+// Returns the index of the first matching char
+// (as option some/none)
+fn index(ss: str, cc: char) -> option<uint> {
+    let bii = 0u;
+    let cii = 0u;
+    let len = byte_len(ss);
+    while bii < len {
+        let {ch, next} = char_range_at(ss, bii);
 
-Returns the index of the first matching byte. Returns -1 if
-no match is found.
+        // found here?
+        if ch == cc {
+            ret option::some(cii);
+        }
 
-FIXME: UTF-8
-*/
-fn index(s: str, c: u8) -> int {
-    let i: int = 0;
-    for k: u8 in s { if k == c { ret i; } i += 1; }
-    ret -1;
-}
+        cii += 1u;
+        bii = next;
+    }
 
-/*
-Function: rindex
+    // wasn't found
+    ret option::none;
+}
 
-Returns the index of the last matching byte. Returns -1
-if no match is found.
+// Function: rindex
+//
+// Returns the index of the first matching char
+// (as option some/none)
+fn rindex(ss: str, cc: char) -> option<uint> {
+    let bii = byte_len(ss);
+    let cii = char_len(ss);
+    while bii > 0u {
+        let {ch, prev} = char_range_at_reverse(ss, bii);
+        cii -= 1u;
+        bii = prev;
+
+        // found here?
+        if ch == cc {
+            ret option::some(cii);
+        }
+    }
 
-FIXME: UTF-8
-*/
-fn rindex(s: str, c: u8) -> int {
-    let n: int = byte_len(s) as int;
-    while n >= 0 { if s[n] == c { ret n; } n -= 1; }
-    ret n;
+    // wasn't found
+    ret option::none;
 }
 
 /*
@@ -1233,6 +1248,25 @@ Pluck a character out of a string
 */
 fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; }
 
+// Function: char_range_at_reverse
+//
+// Given a byte position and a str, return the previous char and its position
+// This function can be used to iterate over a unicode string in reverse.
+fn char_range_at_reverse(ss: str, start: uint) -> {ch: char, prev: uint} {
+    let prev = start;
+
+    // while there is a previous byte == 10......
+    while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 {
+        prev -= 1u;
+    }
+
+    // now refer to the initial byte of previous char
+    prev -= 1u;
+
+    let ch = char_at(ss, prev);
+    ret {ch:ch, prev:prev};
+}
+
 /*
 Function: substr_all
 
@@ -1442,13 +1476,42 @@ mod tests {
     }
 
     #[test]
-    fn test_index_and_rindex() {
-        assert (index("hello", 'e' as u8) == 1);
-        assert (index("hello", 'o' as u8) == 4);
-        assert (index("hello", 'z' as u8) == -1);
-        assert (rindex("hello", 'l' as u8) == 3);
-        assert (rindex("hello", 'h' as u8) == 0);
-        assert (rindex("hello", 'z' as u8) == -1);
+    fn test_index() {
+        assert ( index("hello", 'h') == option::some(0u));
+        assert ( index("hello", 'e') == option::some(1u));
+        assert ( index("hello", 'o') == option::some(4u));
+        assert ( index("hello", 'z') == option::none);
+    }
+
+    #[test]
+    fn test_rindex() {
+        assert (rindex("hello", 'l') == option::some(3u));
+        assert (rindex("hello", 'o') == option::some(4u));
+        assert (rindex("hello", 'h') == option::some(0u));
+        assert (rindex("hello", 'z') == option::none);
+    }
+
+    #[test]
+    fn test_pop_char() {
+        let data = "ประเทศไทย中华";
+        let cc = pop_char(data);
+        assert "ประเทศไทย中" == data;
+        assert '华' == cc;
+    }
+
+    #[test]
+    fn test_pop_char_2() {
+        let data2 = "华";
+        let cc2 = pop_char(data2);
+        assert "" == data2;
+        assert '华' == cc2;
+    }
+
+    #[test]
+    #[should_fail]
+    fn test_pop_char_fail() {
+        let data = "";
+        let _cc3 = pop_char(data);
     }
 
     #[test]
diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs
@@ -32,6 +32,22 @@ A path or fragment of a filesystem path
 */
 type path = str;
 
+fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} {
+    let ii;
+    alt str::rindex(pp, os_fs::path_sep) {
+        option::some(xx) { ii = xx; }
+        option::none {
+            alt str::rindex(pp, os_fs::alt_path_sep) {
+                option::some(xx) { ii = xx; }
+                option::none { ret {dirname: ".", basename: pp}; }
+            }
+        }
+    }
+
+    ret {dirname: str::slice(pp, 0u, ii),
+         basename: str::slice(pp, ii + 1u, str::char_len(pp))};
+}
+
 /*
 Function: dirname
 
@@ -43,13 +59,8 @@ The dirname of "/usr/share" will be "/usr", but the dirname of
 
 If the path is not prefixed with a directory, then "." is returned.
 */
-fn dirname(p: path) -> path unsafe {
-    let i: int = str::rindex(p, os_fs::path_sep as u8);
-    if i == -1 {
-        i = str::rindex(p, os_fs::alt_path_sep as u8);
-        if i == -1 { ret "."; }
-    }
-    ret str::unsafe::slice_bytes(p, 0u, i as uint);
+fn dirname(pp: path) -> path {
+    ret splitDirnameBasename(pp).dirname;
 }
 
 /*
@@ -63,18 +74,10 @@ path separators in the path then the returned path is identical to
 the provided path. If an empty path is provided or the path ends
 with a path separator then an empty path is returned.
 */
-fn basename(p: path) -> path unsafe {
-    let i: int = str::rindex(p, os_fs::path_sep as u8);
-    if i == -1 {
-        i = str::rindex(p, os_fs::alt_path_sep as u8);
-        if i == -1 { ret p; }
-    }
-    let len = str::byte_len(p);
-    if (i + 1) as uint >= len { ret p; }
-    ret str::unsafe::slice_bytes(p, (i + 1) as uint, len);
+fn basename(pp: path) -> path {
+    ret splitDirnameBasename(pp).basename;
 }
 
-
 // FIXME: Need some typestate to avoid bounds check when len(pre) == 0
 /*
 Function: connect
diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs
@@ -230,16 +230,14 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe {
             let i_arg = option::none::<str>;
             if cur[1] == '-' as u8 {
                 let tail = str::unsafe::slice_bytes(cur, 2u, curlen);
-                let eq = str::index(tail, '=' as u8);
-                if eq == -1 {
+                let tail_eq = str::splitn_char(tail, '=', 1u);
+                if vec::len(tail_eq) <= 1u {
                     names = [long(tail)];
                 } else {
                     names =
-                        [long(str::unsafe::slice_bytes(tail,0u,eq as uint))];
+                        [long(tail_eq[0])];
                     i_arg =
-                        option::some::<str>(str::unsafe::slice_bytes(tail,
-                                                       (eq as uint) + 1u,
-                                                       curlen - 2u));
+                        option::some::<str>(tail_eq[1]);
                 }
             } else {
                 let j = 1u;

Original file line number	Diff line number	Diff line change
`@@ -283,10 +283,9 @@ fn check_variants_T<T: copy>(`
`283`	`283`	`}`
`284`	`284`	`}`
`285`	`285`
`286`		`-fn last_part(filename: str) -> str unsafe {`
`287`		`- let ix = str::rindex(filename, 47u8 /* '/' */);`
`288`		`- assert ix >= 0;`
`289`		`- str::unsafe::slice_bytes(filename, ix as uint + 1u, str::byte_len(filename) - 3u)`
	`286`	`+fn last_part(filename: str) -> str {`
	`287`	`+ let ix = option::get(str::rindex(filename, '/'));`
	`288`	`+ str::slice(filename, ix + 1u, str::char_len(filename) - 3u)`
`290`	`289`	`}`
`291`	`290`
`292`	`291`	`enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), }`