Skip to content

Instantly share code, notes, and snippets.

@zofrex
Created December 10, 2016 01:46
Show Gist options
  • Save zofrex/0585c455d7ea05ea63d5a1be195332b1 to your computer and use it in GitHub Desktop.
Save zofrex/0585c455d7ea05ea63d5a1be195332b1 to your computer and use it in GitHub Desktop.
Code to handle the challenging cases described in https://mortoray.com/2013/11/27/the-string-type-is-broken/
extern crate unicode_segmentation;
extern crate unicode_normalization;
use unicode_segmentation::UnicodeSegmentation;
use unicode_normalization::UnicodeNormalization;
fn main() {
let noel = "noe\u{0308}l";
println!("Input: {}", noel);
println!("Naively reversed: {}", naive_reverse(noel));
println!("Graphemes reversed: {}", grapheme_reverse(noel));
println!("Naive first 3: {}", naive_first_n(noel, 3));
println!("Graphemes first 3: {}", grapheme_first_n(noel, 3));
println!("Byte length: {}", byte_length(noel));
println!("Chars length: {}", char_length(noel));
println!("Graphemes length: {}", grapheme_length(noel));
let cats = "😸😾";
println!("Input: {}", cats);
println!("Length: {}", grapheme_length(cats));
println!("Substring after 1st character: {}", cats.graphemes(true).skip(1).collect::<String>());
println!("Reverse: {}", grapheme_reverse(cats));
let baffle = "baffle";
println!("Input: {}", baffle);
println!("Naive uppercase: {}", baffle.to_uppercase());
// println!("Manual uppercase: {}", baffle.chars().map(|c| c.to_uppercase()).flat_map(|i| i).collect::<String>());
let noel_combining_chars = noel;
let noel_precomposed = "no\u{00EB}l"; // noël
println!("Inputs: {}, {}", noel_combining_chars, noel_precomposed);
println!("Naive equality: {}", noel_combining_chars == noel_precomposed);
println!("Normal form equality: {}", nfc(noel_combining_chars) == nfc(noel_precomposed));
}
fn naive_reverse(input: &str) -> String {
input.chars().rev().collect()
}
fn grapheme_reverse(input: &str) -> String {
input.graphemes(true).rev().flat_map(|g| g.chars()).collect()
}
fn naive_first_n(input: &str, n: usize) -> String {
input.chars().take(n).collect::<String>()
}
fn grapheme_first_n(input: &str, n: usize) -> String {
input.graphemes(true).take(n).collect::<String>()
}
fn byte_length(input: &str) -> usize {
input.len()
}
fn char_length(input: &str) -> usize {
input.chars().count()
}
fn grapheme_length(input: &str) -> usize {
input.graphemes(true).count()
}
fn nfc(input :&str) -> String {
input.nfc().collect::<String>()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment