From 26280d9083d00282570f5effc4f59d3375fcb139 Mon Sep 17 00:00:00 2001 From: Michael Gehring Date: Wed, 6 Jan 2016 16:34:58 +0100 Subject: [PATCH] expand, unexpand: fix build on stable --- Cargo.toml | 4 ++-- src/expand/expand.rs | 5 +---- src/unexpand/unexpand.rs | 5 +---- src/uucore/lib.rs | 1 + src/uucore/utf8.rs | 28 ++++++++++++++++++++++++++++ 5 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 src/uucore/utf8.rs diff --git a/Cargo.toml b/Cargo.toml index 427c93c80..0e7179094 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ generic = [ "dirname", "echo", "env", - "expand", # skip_on_beta + "expand", "expr", "factor", "false", @@ -79,7 +79,7 @@ generic = [ "true", "truncate", "tsort", - "unexpand", # skip_on_beta + "unexpand", "uniq", "wc", "whoami", diff --git a/src/expand/expand.rs b/src/expand/expand.rs index 3a45a6586..7cfadbeb5 100644 --- a/src/expand/expand.rs +++ b/src/expand/expand.rs @@ -1,5 +1,4 @@ #![crate_name = "uu_expand"] -#![feature(unicode)] /* * This file is part of the uutils coreutils package. @@ -14,7 +13,6 @@ extern crate getopts; extern crate libc; -extern crate rustc_unicode; extern crate unicode_width; #[macro_use] @@ -24,7 +22,6 @@ use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::iter::repeat; use std::str::from_utf8; -use rustc_unicode::str::utf8_char_width; use unicode_width::UnicodeWidthChar; static NAME: &'static str = "expand"; @@ -175,7 +172,7 @@ fn expand(options: Options) { while byte < buf.len() { let (ctype, cwidth, nbytes) = if options.uflag { - let nbytes = utf8_char_width(buf[byte]); + let nbytes = uucore::utf8::utf8_char_width(buf[byte]); if byte + nbytes > buf.len() { // don't overrun buffer because of invalid UTF-8 diff --git a/src/unexpand/unexpand.rs b/src/unexpand/unexpand.rs index 3f66ab46f..85cdd8468 100644 --- a/src/unexpand/unexpand.rs +++ b/src/unexpand/unexpand.rs @@ -1,5 +1,4 @@ #![crate_name = "uu_unexpand"] -#![feature(unicode)] /* * This file is part of the uutils coreutils package. @@ -14,7 +13,6 @@ extern crate getopts; extern crate libc; -extern crate rustc_unicode; extern crate unicode_width; #[macro_use] @@ -23,7 +21,6 @@ extern crate uucore; use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdout, Write}; use std::str::from_utf8; -use rustc_unicode::str::utf8_char_width; use unicode_width::UnicodeWidthChar; static NAME: &'static str = "unexpand"; @@ -208,7 +205,7 @@ fn unexpand(options: Options) { } let (ctype, cwidth, nbytes) = if options.uflag { - let nbytes = utf8_char_width(buf[byte]); + let nbytes = uucore::utf8::utf8_char_width(buf[byte]); // figure out how big the next char is, if it's UTF-8 if byte + nbytes > buf.len() { diff --git a/src/uucore/lib.rs b/src/uucore/lib.rs index f5788e8d6..53a02aca2 100644 --- a/src/uucore/lib.rs +++ b/src/uucore/lib.rs @@ -7,6 +7,7 @@ mod macros; pub mod fs; pub mod parse_time; +pub mod utf8; #[cfg(unix)] pub mod c_types; #[cfg(unix)] pub mod process; diff --git a/src/uucore/utf8.rs b/src/uucore/utf8.rs new file mode 100644 index 000000000..a6c48b785 --- /dev/null +++ b/src/uucore/utf8.rs @@ -0,0 +1,28 @@ +/* This is taken from the rust_unicode crate. Remove once 'unicode' becomes stable */ + +// https://tools.ietf.org/html/rfc3629 +static UTF8_CHAR_WIDTH: [u8; 256] = [ +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF +0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF +4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF +]; + +/// Given a first byte, determine how many bytes are in this UTF-8 character +#[inline] +pub fn utf8_char_width(b: u8) -> usize { + return UTF8_CHAR_WIDTH[b as usize] as usize; +} +