Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#![allow(unknown_lints)]
#![warn(rust_2018_idioms)]
// (It's tempting to avoid the utf8 checks, but they're easy to get wrong, so)
#![deny(unsafe_code)]
#[cfg(feature = "serde_support")]
mod serde_support;
#[cfg(feature = "rusqlite_support")]
mod rusqlite_support;
use std::{
cmp::Ordering,
fmt,
hash::{Hash, Hasher},
ops, str,
};
#[cfg(feature = "random")]
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine};
/// This is a type intended to be used to represent the guids used by sync. It
/// has several benefits over using a `String`:
///
/// 1. It's more explicit about what is being stored, and could prevent bugs
/// where a Guid is passed to a function expecting text.
///
/// 2. Guids are guaranteed to be immutable.
///
/// 3. It's optimized for the guids commonly used by sync. In particular, short guids
/// (including the guids which would meet `PlacesUtils.isValidGuid`) do not incur
/// any heap allocation, and are stored inline.
#[derive(Clone)]
pub struct Guid(Repr);
// The internal representation of a GUID. Most Sync GUIDs are 12 bytes,
// and contain only base64url characters; we can store them on the stack
// without a heap allocation. However, arbitrary ascii guids of up to length 64
// are possible, in which case we fall back to a heap-allocated string.
//
// This is separate only because making `Guid` an enum would expose the
// internals.
#[derive(Clone)]
enum Repr {
// see FastGuid for invariants
Fast(FastGuid),
// invariants:
// - _0.len() > MAX_FAST_GUID_LEN
Slow(String),
}
/// Invariants:
///
/// - `len <= MAX_FAST_GUID_LEN`.
/// - `data[0..len]` encodes valid utf8.
/// - `data[len..].iter().all(|&b| b == b'\0')`
///
/// Note: None of these are required for memory safety, just correctness.
#[derive(Clone)]
struct FastGuid {
len: u8,
data: [u8; MAX_FAST_GUID_LEN],
}
// This is the maximum length (experimentally determined) we can make it before
// `Repr::Fast` is larger than `Guid::Slow` on 32 bit systems. The important
// thing is really that it's not too big, and is above 12 bytes.
const MAX_FAST_GUID_LEN: usize = 14;
impl FastGuid {
#[inline]
fn from_slice(bytes: &[u8]) -> Self {
// Checked by the caller, so debug_assert is fine.
debug_assert!(
can_use_fast(bytes),
"Bug: Caller failed to check can_use_fast: {:?}",
bytes
);
let mut data = [0u8; MAX_FAST_GUID_LEN];
data[0..bytes.len()].copy_from_slice(bytes);
FastGuid {
len: bytes.len() as u8,
data,
}
}
#[inline]
fn as_str(&self) -> &str {
// Note: we only use debug_assert! to ensure valid utf8-ness, so this need
str::from_utf8(self.bytes()).expect("Invalid fast guid bytes!")
}
#[inline]
fn len(&self) -> usize {
self.len as usize
}
#[inline]
fn bytes(&self) -> &[u8] {
&self.data[0..self.len()]
}
}
// Returns:
// - true to use Repr::Fast
// - false to use Repr::Slow
#[inline]
fn can_use_fast<T: ?Sized + AsRef<[u8]>>(bytes: &T) -> bool {
let bytes = bytes.as_ref();
// This is fine as a debug_assert since we'll still panic if it's ever used
// in such a way where it would matter.
debug_assert!(str::from_utf8(bytes).is_ok());
bytes.len() <= MAX_FAST_GUID_LEN
}
impl Guid {
/// Create a guid from a `str`.
#[inline]
pub fn new(s: &str) -> Self {
Guid::from_slice(s.as_ref())
}
/// Create an empty guid. Usable as a constant.
#[inline]
pub const fn empty() -> Self {
Guid(Repr::Fast(FastGuid {
len: 0,
data: [0u8; MAX_FAST_GUID_LEN],
}))
}
/// Create a random guid (of 12 base64url characters). Requires the `random`
/// feature.
#[cfg(feature = "random")]
pub fn random() -> Self {
let bytes: [u8; 9] = rand::random();
// Note: only first 12 bytes are used, but remaining are required to
// build the FastGuid
let mut output = [0u8; MAX_FAST_GUID_LEN];
let bytes_written = URL_SAFE_NO_PAD
.encode_slice(bytes, &mut output[..12])
.expect("Output buffer too small");
debug_assert!(bytes_written == 12);
Guid(Repr::Fast(FastGuid {
len: 12,
data: output,
}))
}
/// Convert `b` into a `Guid`.
#[inline]
pub fn from_string(s: String) -> Self {
Guid::from_vec(s.into_bytes())
}
/// Convert `b` into a `Guid`.
#[inline]
pub fn from_slice(b: &[u8]) -> Self {
if can_use_fast(b) {
Guid(Repr::Fast(FastGuid::from_slice(b)))
} else {
Guid::new_slow(b.into())
}
}
/// Convert `v` to a `Guid`, consuming it.
#[inline]
pub fn from_vec(v: Vec<u8>) -> Self {
if can_use_fast(&v) {
Guid(Repr::Fast(FastGuid::from_slice(&v)))
} else {
Guid::new_slow(v)
}
}
/// Get the data backing this `Guid` as a `&[u8]`.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
match &self.0 {
Repr::Fast(rep) => rep.bytes(),
Repr::Slow(rep) => rep.as_ref(),
}
}
/// Get the data backing this `Guid` as a `&str`.
#[inline]
pub fn as_str(&self) -> &str {
match &self.0 {
Repr::Fast(rep) => rep.as_str(),
Repr::Slow(rep) => rep.as_ref(),
}
}
/// Convert this `Guid` into a `String`, consuming it in the process.
#[inline]
pub fn into_string(self) -> String {
match self.0 {
Repr::Fast(rep) => rep.as_str().into(),
Repr::Slow(rep) => rep,
}
}
/// Returns true for Guids that are deemed valid by the sync server.
pub fn is_valid_for_sync_server(&self) -> bool {
!self.is_empty()
&& self.len() <= 64
&& self
.bytes()
.all(|b| (b' '..=b'~').contains(&b) && b != b',')
}
/// Returns true for Guids that are valid places guids, and false for all others.
pub fn is_valid_for_places(&self) -> bool {
self.len() == 12 && self.bytes().all(Guid::is_valid_places_byte)
}
/// Returns true if the byte `b` is a valid base64url byte.
#[inline]
pub fn is_valid_places_byte(b: u8) -> bool {
BASE64URL_BYTES[b as usize] == 1
}
#[cold]
fn new_slow(v: Vec<u8>) -> Self {
assert!(
!can_use_fast(&v),
"Could use fast for guid (len = {})",
v.len()
);
Guid(Repr::Slow(
String::from_utf8(v).expect("Invalid slow guid bytes!"),
))
}
}
// This is used to implement the places tests.
const BASE64URL_BYTES: [u8; 256] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
impl Ord for Guid {
fn cmp(&self, other: &Self) -> Ordering {
self.as_bytes().cmp(other.as_bytes())
}
}
impl PartialOrd for Guid {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for Guid {
fn eq(&self, other: &Self) -> bool {
self.as_bytes() == other.as_bytes()
}
}
impl Eq for Guid {}
impl Hash for Guid {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_bytes().hash(state);
}
}
impl<'a> From<&'a str> for Guid {
#[inline]
fn from(s: &'a str) -> Guid {
Guid::from_slice(s.as_ref())
}
}
impl<'a> From<&'a &str> for Guid {
#[inline]
fn from(s: &'a &str) -> Guid {
Guid::from_slice(s.as_ref())
}
}
impl<'a> From<&'a [u8]> for Guid {
#[inline]
fn from(s: &'a [u8]) -> Guid {
Guid::from_slice(s)
}
}
impl From<String> for Guid {
#[inline]
fn from(s: String) -> Guid {
Guid::from_string(s)
}
}
impl From<Vec<u8>> for Guid {
#[inline]
fn from(v: Vec<u8>) -> Guid {
Guid::from_vec(v)
}
}
impl From<Guid> for String {
#[inline]
fn from(guid: Guid) -> String {
guid.into_string()
}
}
impl From<Guid> for Vec<u8> {
#[inline]
fn from(guid: Guid) -> Vec<u8> {
guid.into_string().into_bytes()
}
}
impl AsRef<str> for Guid {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for Guid {
#[inline]
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl ops::Deref for Guid {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
// The default Debug impl is pretty unhelpful here.
impl fmt::Debug for Guid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Guid({:?})", self.as_str())
}
}
impl fmt::Display for Guid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl std::default::Default for Guid {
/// Create a default guid by calling `Guid::empty()`
#[inline]
fn default() -> Self {
Guid::empty()
}
}
macro_rules! impl_guid_eq {
($($other: ty),+) => {$(
// This macro is used for items with and without lifetimes.
#[allow(clippy::extra_unused_lifetimes)]
impl<'a> PartialEq<$other> for Guid {
#[inline]
fn eq(&self, other: &$other) -> bool {
PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
}
}
#[allow(clippy::extra_unused_lifetimes)]
impl<'a> PartialEq<Guid> for $other {
#[inline]
fn eq(&self, other: &Guid) -> bool {
PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
}
}
)+}
}
// Implement direct comparison with some common types from the stdlib.
impl_guid_eq![str, &'a str, String, [u8], &'a [u8], Vec<u8>];
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_base64url_bytes() {
let mut expect = [0u8; 256];
for b in b'0'..=b'9' {
expect[b as usize] = 1;
}
for b in b'a'..=b'z' {
expect[b as usize] = 1;
}
for b in b'A'..=b'Z' {
expect[b as usize] = 1;
}
expect[b'_' as usize] = 1;
expect[b'-' as usize] = 1;
assert_eq!(&BASE64URL_BYTES[..], &expect[..]);
}
#[test]
fn test_valid_for_places() {
assert!(Guid::from("aaaabbbbcccc").is_valid_for_places());
assert!(Guid::from_slice(b"09_az-AZ_09-").is_valid_for_places());
assert!(!Guid::from("aaaabbbbccccd").is_valid_for_places()); // too long
assert!(!Guid::from("aaaabbbbccc").is_valid_for_places()); // too short
assert!(!Guid::from("aaaabbbbccc=").is_valid_for_places()); // right length, bad character
assert!(!Guid::empty().is_valid_for_places()); // empty isn't valid to insert.
}
#[test]
fn test_valid_for_sync_server() {
assert!(!Guid::empty().is_valid_for_sync_server()); // empty isn't valid remotely.
}
#[allow(clippy::cmp_owned)] // See clippy note below.
#[test]
fn test_comparison() {
assert_eq!(Guid::from("abcdabcdabcd"), "abcdabcdabcd");
assert_ne!(Guid::from("abcdabcdabcd".to_string()), "ABCDabcdabcd");
assert_eq!(Guid::from("abcdabcdabcd"), &b"abcdabcdabcd"[..]); // b"abcdabcdabcd" has type &[u8; 12]...
assert_ne!(Guid::from(&b"abcdabcdabcd"[..]), &b"ABCDabcdabcd"[..]);
assert_eq!(
Guid::from(b"abcdabcdabcd"[..].to_owned()),
"abcdabcdabcd".to_string()
);
assert_ne!(Guid::from("abcdabcdabcd"), "ABCDabcdabcd".to_string());
assert_eq!(
Guid::from("abcdabcdabcd1234"),
Vec::from(b"abcdabcdabcd1234".as_ref())
);
assert_ne!(
Guid::from("abcdabcdabcd4321"),
Vec::from(b"ABCDabcdabcd4321".as_ref())
);
// order by data instead of length
// hrmph - clippy in 1.54-nightly complains about the below:
// 'error: this creates an owned instance just for comparison'
// '... help: try: `*"aaaaaa"`'
// and suggests a change that's wrong - so we've ignored the lint above.
assert!(Guid::from("zzz") > Guid::from("aaaaaa"));
assert!(Guid::from("ThisIsASolowGuid") < Guid::from("zzz"));
assert!(Guid::from("ThisIsASolowGuid") > Guid::from("AnotherSlowGuid"));
}
#[cfg(feature = "random")]
#[test]
fn test_random() {
use std::collections::HashSet;
// Used to verify uniqueness within our sample of 1000. Could cause
// random failures, but desktop has the same test, and it's never caused
// a problem AFAIK.
let mut seen: HashSet<String> = HashSet::new();
for _ in 0..1000 {
let g = Guid::random();
assert_eq!(g.len(), 12);
assert!(g.is_valid_for_places());
let decoded = URL_SAFE_NO_PAD.decode(&g).unwrap();
assert_eq!(decoded.len(), 9);
let no_collision = seen.insert(g.clone().into_string());
assert!(no_collision, "{}", g);
}
}
}