Revision control

Copy as Markdown

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// This is MAX_CHARS_TO_HASH in places, but I've renamed it because it's in bytes.
// Note that the indices for slicing a Rust `str` are in bytes, so this is what
// we want anyway.
const MAX_BYTES_TO_HASH: usize = 1500;
/// This should be identical to the "real" `mozilla::places::HashURL` with no prefix arg
/// (see also `hash_url_prefix` for the version with one).
///
/// This returns a u64, but only the lower 48 bits should ever be set, so casting to
/// an i64 is totally safe and lossless. If the string has no ':' in it, then the
/// returned hash will be a 32 bit hash.
pub fn hash_url(spec: &str) -> u64 {
let max_len_to_hash = spec.len().min(MAX_BYTES_TO_HASH);
let str_hash = u64::from(hash_string(&spec[..max_len_to_hash]));
let str_head = &spec[..spec.len().min(50)];
// We should be using memchr -- there's almost no chance we aren't
// already pulling it in transitively and it's supposedly *way* faster.
if let Some(pos) = str_head.as_bytes().iter().position(|&b| b == b':') {
let prefix_hash = u64::from(hash_string(&spec[..pos]) & 0x0000_ffff);
(prefix_hash << 32).wrapping_add(str_hash)
} else {
str_hash
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PrefixMode {
/// Equivalent to `"prefix_lo"` in mozilla::places::HashURL
Lo,
/// Equivalent to `"prefix_hi"` in mozilla::places::HashURL
Hi,
}
/// This should be identical to the "real" `mozilla::places::HashURL` when given
/// a prefix arg. Specifically:
///
/// - `hash_url_prefix(spec, PrefixMode::Lo)` is identical to
/// - `hash_url_prefix(spec, PrefixMode::Hi)` is identical to
///
/// As with `hash_url`, it returns a u64, but only the lower 48 bits should ever be set, so
/// casting to e.g. an i64 is lossless.
pub fn hash_url_prefix(spec_prefix: &str, mode: PrefixMode) -> u64 {
let to_hash = &spec_prefix[..spec_prefix.len().min(MAX_BYTES_TO_HASH)];
// Keep 16 bits
let unshifted_hash = hash_string(to_hash) & 0x0000_ffff;
let hash = u64::from(unshifted_hash) << 32;
if mode == PrefixMode::Hi {
hash.wrapping_add(0xffff_ffffu64)
} else {
hash
}
}
// mozilla::kGoldenRatioU32
const GOLDEN_RATIO: u32 = 0x9E37_79B9;
// mozilla::AddU32ToHash
#[inline]
fn add_u32_to_hash(hash: u32, new_value: u32) -> u32 {
(hash.rotate_left(5) ^ new_value).wrapping_mul(GOLDEN_RATIO)
}
/// This should return identical results to `mozilla::HashString`!
#[inline]
pub fn hash_string(string: &str) -> u32 {
string
.as_bytes()
.iter()
.fold(0u32, |hash, &cur| add_u32_to_hash(hash, u32::from(cur)))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prefixes() {
// These are the unique 16 bits of the prefix. You can generate these with:
// `PlacesUtils.history.hashURL(val, "prefix_lo").toString(16).slice(0, 4)`.
let test_values = &[
("http", 0x7226u16),
("https", 0x2b12),
("blob", 0x2612),
("data", 0x9736),
("chrome", 0x75fc),
("resource", 0x37f8),
("file", 0xc7c9),
("place", 0xf434),
];
for &(prefix, top16bits) in test_values {
let expected_lo = u64::from(top16bits) << 32;
let expected_hi = expected_lo | 0xffff_ffffu64;
assert_eq!(
hash_url_prefix(prefix, PrefixMode::Lo),
expected_lo,
"wrong value for hash_url_prefix({:?}, PrefixMode::Lo)",
prefix
);
assert_eq!(
hash_url_prefix(prefix, PrefixMode::Hi),
expected_hi,
"wrong value for hash_url_prefix({:?}, PrefixMode::Hi)",
prefix
);
}
}
#[test]
fn test_hash_url() {
// not actually a valid png, but whatever.
let data_url = "data:image/png;base64,".to_owned() + &"iVBORw0KGgoAAA".repeat(500);
let test_values = &[
("http://www.example.com", 0x7226_2c1a_3496u64),
("http://user:pass@foo:21/bar;par?b#c", 0x7226_61d2_18a7u64),
(
0x2b12_e7bd_7fcdu64,
),
("place:transition=7&sort=4", 0xf434_ac2b_2dafu64),
(
"blob:36c6ded1-6190-45f4-8fcd-355d1b6c9f48",
0x2612_0a43_1050u64,
),
("www.example.com", 0x8b14_9337u64), // URLs without a prefix are hashed to 32 bits
(&data_url[..], 0x9736_d65d_86d9u64),
];
for &(url_str, hash) in test_values {
assert_eq!(hash_url(url_str), hash, "Wrong value for url {:?}", url_str);
}
}
}