content_blocking.rs

//! Transforms filter rules into content blocking syntax used on iOS and MacOS.

use crate::filters::cosmetic::CosmeticFilter;

use crate::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper};

use crate::lists::ParsedFilter;

use memchr::{memchr as find_char, memmem};

use once_cell::sync::Lazy;

use regex::Regex;

use serde::{Deserialize, Serialize};

use std::collections::HashSet;

use std::convert::{TryFrom, TryInto};

/// By default, ABP rules do not block top-level document requests. There's no way to express that

/// in content blocking format, so instead it's approximated with a rule that applies an exception

/// to any first-party requests that are document types.

///

/// This rule should be added after all other network rules.

pub fn ignore_previous_fp_documents() -> CbRule {

    let mut resource_type = HashSet::new();

    resource_type.insert(CbResourceType::Document);

    CbRule {

        trigger: CbTrigger {

            url_filter: String::from(".*"),

            resource_type: Some(resource_type),

            load_type: vec![CbLoadType::FirstParty],

            ..CbTrigger::default()

},

        action: CbAction {

            typ: CbType::IgnorePreviousRules,

            selector: None,

},

/// Rust representation of a single content blocking rule.

///

/// This can be deserialized with `serde_json` directly into the correct format.

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]

pub struct CbRule {

    pub action: CbAction,

    pub trigger: CbTrigger,

impl CbRule {

    /// If this returns false, the rule will not compile and should not be used.

    fn is_ascii(&self) -> bool {

        self.action.selector.iter().all(|s| s.is_ascii())

            && self.trigger.url_filter.is_ascii()

            && self

                .trigger

                .if_domain

                .iter()

                .flatten()

                .all(|d| d.is_ascii())

            && self

                .trigger

                .unless_domain

                .iter()

                .flatten()

                .all(|d| d.is_ascii())

            && self

                .trigger

                .if_top_url

                .iter()

                .flatten()

                .all(|d| d.is_ascii())

            && self

                .trigger

                .unless_top_url

                .iter()

                .flatten()

                .all(|d| d.is_ascii())

/// Corresponds to the `action` field of a Safari content blocking rule.

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]

pub struct CbAction {

    #[serde(rename = "type")]

    pub typ: CbType,

    /// Specify a string that defines a selector list. This value is required when the action type

    /// is css-display-none. If it's not, the selector field is ignored by Safari. Use CSS

    /// identifiers as the individual selector values, separated by commas. Safari and WebKit

    /// supports all of its CSS selectors for Safari content-blocking rules.

    #[serde(default, skip_serializing_if = "Option::is_none")]

    pub selector: Option<String>,

/// Corresponds to the `action.type` field of a Safari content blocking rule.

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]

#[serde(rename_all = "kebab-case")]

pub enum CbType {

    /// Stops loading of the resource. If the resource was cached, the cache is ignored.

    Block,

    /// Strips cookies from the header before sending to the server. Only cookies otherwise

    /// acceptable to Safari's privacy policy can be blocked. Combining with ignore-previous-rules

    /// doesn't override the browser’s privacy settings.

    BlockCookies,

    /// Hides elements of the page based on a CSS selector. A selector field contains the selector

    /// list. Any matching element has its display property set to none, which hides it.

    CssDisplayNone,

    /// Ignores previously triggered actions.

    IgnorePreviousRules,

    /// Changes a URL from http to https. URLs with a specified (nondefault) port and links using

    /// other protocols are unaffected.

    MakeHttps,

/// Corresponds to possible entries in the `trigger.load_type` field of a Safari content blocking

/// rule.

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]

#[serde(rename_all = "kebab-case")]

pub enum CbLoadType {

    FirstParty,

    ThirdParty,

/// Corresponds to possible entries in the `trigger.resource_type` field of a Safari content

/// blocking rule.

#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]

#[serde(rename_all = "kebab-case")]

pub enum CbResourceType {

    Document,

    Image,

    StyleSheet,

    Script,

    Font,

    Raw,

    SvgDocument,

    Media,

    Popup,

/// Corresponds to the `trigger` field of a Safari content blocking rule.

#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]

#[serde(rename_all = "kebab-case")]

pub struct CbTrigger {

    /// Specifies a pattern to match the URL against.

    pub url_filter: String,

    #[serde(default, skip_serializing_if = "Option::is_none")]

    /// A Boolean value. The default value is false.

    pub url_filter_is_case_sensitive: Option<bool>,

    /// An array of strings matched to a URL's domain; limits action to a list of specific domains.

    /// Values must be lowercase ASCII, or punycode for non-ASCII. Add * in front to match domain

    /// and subdomains. Can't be used with unless-domain.

    #[serde(default, skip_serializing_if = "Option::is_none")]

    pub if_domain: Option<Vec<String>>,

    /// An array of strings matched to a URL's domain; acts on any site except domains in a

    /// provided list. Values must be lowercase ASCII, or punycode for non-ASCII. Add * in front to

    /// match domain and subdomains. Can't be used with if-domain.

    #[serde(default, skip_serializing_if = "Option::is_none")]

    pub unless_domain: Option<Vec<String>>,

    /// An array of strings representing the resource types (how the browser intends to use the

    /// resource) that the rule should match. If not specified, the rule matches all resource

    /// types. Valid values: document, image, style-sheet, script, font, raw (Any untyped load),

    /// svg-document, media, popup.

    #[serde(default, skip_serializing_if = "Option::is_none")]

    pub resource_type: Option<HashSet<CbResourceType>>,

    /// An array of strings that can include one of two mutually exclusive values. If not

    /// specified, the rule matches all load types. first-party is triggered only if the resource

    /// has the same scheme, domain, and port as the main page resource. third-party is triggered

    /// if the resource is not from the same domain as the main page resource.

    #[serde(default, skip_serializing_if = "Vec::is_empty")]

    pub load_type: Vec<CbLoadType>,

    /// An array of strings matched to the entire main document URL; limits the action to a

    /// specific list of URL patterns. Values must be lowercase ASCII, or punycode for non-ASCII.

    /// Can't be used with unless-top-url.

    #[serde(default, skip_serializing_if = "Option::is_none")]

    pub if_top_url: Option<Vec<String>>,

    /// An array of strings matched to the entire main document URL; acts on any site except URL

    /// patterns in provided list. Values must be lowercase ASCII, or punycode for non-ASCII. Can't

    /// be used with if-top-url.

    #[serde(default, skip_serializing_if = "Option::is_none")]

    pub unless_top_url: Option<Vec<String>>,

/// Possible failure reasons when attempting to convert an adblock rule into content filtering

/// syntax.

#[derive(Debug)]

pub enum CbRuleCreationFailure {

    /// Currently, only filter rules parsed in debug mode can be translated into equivalent content

    /// blocking syntax.

    NeedsDebugMode,

    /// Content blocking rules cannot have if-domain and unless-domain together at the same time.

    UnlessAndIfDomainTogetherUnsupported,

    /// A network filter rule with only the given content type flags was provided, and none of them

    /// are supported. If at least one supported content type is provided, no failure will occur

    /// and unsupported types will be silently dropped.

    NoSupportedNetworkOptions(NetworkFilterMask),

    /// Network rules with redirect options cannot be represented in content blocking syntax.

    NetworkRedirectUnsupported,

    /// Network rules with generichide options cannot be supported in content blocking syntax.

    NetworkGenerichideUnsupported,

    /// Network rules with badfilter options cannot be supported in content blocking syntax.

    NetworkBadFilterUnsupported,

    /// Network rules with csp options cannot be supported in content blocking syntax.

    NetworkCspUnsupported,

    /// Network rules with removeparam options cannot be supported in content blocking syntax.

    NetworkRemoveparamUnsupported,

    /// Content blocking syntax only supports a subset of regex features, namely:

    /// - Matching any character with “.”.

    /// - Matching ranges with the range syntax [a-b].

    /// - Quantifying expressions with “?”, “+” and “*”.

    /// - Groups with parenthesis.

///

    /// It may be possible to correctly convert some full-regex rules, but others use unsupported

    /// features (e.g. quantified repetition with {...}) that make conversion to content blocking

    /// syntax impossible.

    FullRegexUnsupported,

    /// `Blocker`-internal `NetworkFilter`s can be represented in optimized form, but these cannot

    /// be currently converted into content blocking syntax.

    OptimizedRulesUnsupported,

    /// Cosmetic rules with entities (e.g. google.*) rather than hostnames cannot be represented in

    /// content blocking syntax.

    CosmeticEntitiesUnsupported,

    /// Cosmetic rules with custom action specification (i.e. `:style(...)`) cannot be represented

    /// in content blocking syntax.

    CosmeticActionRulesNotSupported,

    /// Cosmetic rules with scriptlet injections (i.e. `+js(...)`) cannot be represented in content

    /// blocking syntax.

    ScriptletInjectionsNotSupported,

    /// Valid content blocking rules can only include ASCII characters.

    RuleContainsNonASCII,

    /// `from` as a `domain` alias is not currently supported in content blocking syntax.

    FromNotSupported,

    /// Content blocking rules cannot support procedural cosmetic filter operators.

    ProceduralCosmeticFiltersUnsupported,

impl TryFrom<ParsedFilter> for CbRuleEquivalent {

    type Error = CbRuleCreationFailure;

    fn try_from(v: ParsedFilter) -> Result<Self, Self::Error> {

        match v {

            ParsedFilter::Network(f) => f.try_into(),

            ParsedFilter::Cosmetic(f) => Ok(Self::SingleRule(f.try_into()?)),

fn non_empty(v: Vec<String>) -> Option<Vec<String>> {

    if !v.is_empty() {

        Some(v)

    } else {

        None

/// Some adblock rules cannot be directly represented by a single content blocking rule. This enum

/// serves as an intermediate conversion step that provides extra context on why one rule turned

/// into multiple rules.

///

/// The contained rules can be accessed using `IntoIterator`.

#[allow(clippy::large_enum_variant)]

pub enum CbRuleEquivalent {

    /// In most successful cases, an ABP rule can be converted into a single content blocking rule.

    SingleRule(CbRule),

    /// If a network rule has more than one specified resource type, one of those types is

    /// `Document`, and no load type is specified, then the rule should be split into two content

    /// blocking rules: the first has all original resource types except `Document`, and the second

    /// only specifies `Document` with a third-party load type.

    SplitDocument(CbRule, CbRule),

impl IntoIterator for CbRuleEquivalent {

    type Item = CbRule;

    type IntoIter = CbRuleEquivalentIterator;

    fn into_iter(self) -> Self::IntoIter {

        match self {

            Self::SingleRule(r) => CbRuleEquivalentIterator {

                rules: [Some(r), None],

                index: 0,

},

            Self::SplitDocument(r1, r2) => CbRuleEquivalentIterator {

                rules: [Some(r1), Some(r2)],

                index: 0,

},

/// Returned by [`CbRuleEquivalent`]'s `IntoIterator` implementation.

pub struct CbRuleEquivalentIterator {

    rules: [Option<CbRule>; 2],

    index: usize,

impl Iterator for CbRuleEquivalentIterator {

    type Item = CbRule;

    fn next(&mut self) -> Option<Self::Item> {

        if self.index >= self.rules.len() {

            return None;

        let result = self.rules[self.index].take();

        self.index += 1;

        result

impl TryFrom<NetworkFilter> for CbRuleEquivalent {

    type Error = CbRuleCreationFailure;

    fn try_from(v: NetworkFilter) -> Result<Self, Self::Error> {

        static SPECIAL_CHARS: Lazy<Regex> =

            Lazy::new(|| Regex::new(r##"([.+?^${}()|\[\]\\])"##).unwrap());

        static REPLACE_WILDCARDS: Lazy<Regex> = Lazy::new(|| Regex::new(r##"\*"##).unwrap());

        static TRAILING_SEPARATOR: Lazy<Regex> = Lazy::new(|| Regex::new(r##"\^$"##).unwrap());

        if let Some(raw_line) = &v.raw_line {

            if v.is_redirect() {

                return Err(CbRuleCreationFailure::NetworkRedirectUnsupported);

            if v.mask.contains(NetworkFilterMask::GENERIC_HIDE) {

                return Err(CbRuleCreationFailure::NetworkGenerichideUnsupported);

            debug_assert!(

                !v.mask.contains(NetworkFilterMask::BAD_FILTER),

                "BAD_FILTER should be filtered out"

);

            if v.is_csp() {

                return Err(CbRuleCreationFailure::NetworkCspUnsupported);

            if v.mask.contains(NetworkFilterMask::IS_COMPLETE_REGEX) {

                return Err(CbRuleCreationFailure::FullRegexUnsupported);

            if v.is_removeparam() {

                return Err(CbRuleCreationFailure::NetworkRemoveparamUnsupported);

            let load_type = if v

                .mask

                .contains(NetworkFilterMask::THIRD_PARTY | NetworkFilterMask::FIRST_PARTY)

                vec![]

            } else if v.mask.contains(NetworkFilterMask::THIRD_PARTY) {

                vec![CbLoadType::ThirdParty]

            } else if v.mask.contains(NetworkFilterMask::FIRST_PARTY) {

                vec![CbLoadType::FirstParty]

            } else {

                vec![]

};

            let url_filter = match (v.filter, v.hostname) {

                (crate::filters::network::FilterPart::AnyOf(_), _) => {

                    return Err(CbRuleCreationFailure::OptimizedRulesUnsupported)

                (crate::filters::network::FilterPart::Simple(part), Some(hostname)) => {

                    let without_trailing_separator = TRAILING_SEPARATOR.replace_all(&part, "");

                    let escaped_special_chars =

                        SPECIAL_CHARS.replace_all(&without_trailing_separator, r##"\$1"##);

                    let with_fixed_wildcards =

                        REPLACE_WILDCARDS.replace_all(&escaped_special_chars, ".*");

                    let mut url_filter = format!(

                        "^[^:]+:(//)?([^/]+\\.)?{}",

                        SPECIAL_CHARS.replace_all(&hostname, r##"\$1"##)

);

                    if v.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX) {

                        url_filter += ".*";

                    url_filter += &with_fixed_wildcards;

                    if v.mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) {

                        url_filter += "$";

                    url_filter

                (crate::filters::network::FilterPart::Simple(part), None) => {

                    let without_trailing_separator = TRAILING_SEPARATOR.replace_all(&part, "");

                    let escaped_special_chars =

                        SPECIAL_CHARS.replace_all(&without_trailing_separator, r##"\$1"##);

                    let with_fixed_wildcards =

                        REPLACE_WILDCARDS.replace_all(&escaped_special_chars, ".*");

                    let mut url_filter = if v.mask.contains(NetworkFilterMask::IS_LEFT_ANCHOR) {

                        format!("^{with_fixed_wildcards}")

                    } else {

                        let scheme_part = if v

                            .mask

                            .contains(NetworkFilterMask::FROM_HTTP | NetworkFilterMask::FROM_HTTPS)

""

                        } else if v.mask.contains(NetworkFilterMask::FROM_HTTP) {

                            "^http://.*"

                        } else if v.mask.contains(NetworkFilterMask::FROM_HTTPS) {

                            "^https://.*"

                        } else if v.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) {

                            "^wss?://.*"

                        } else {

                            unreachable!("Invalid scheme information");

};

                        format!("{scheme_part}{with_fixed_wildcards}")

};

                    if v.mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) {

                        url_filter += "$";

                    url_filter

                (crate::filters::network::FilterPart::Empty, Some(hostname)) => {

                    let escaped_special_chars = SPECIAL_CHARS.replace_all(&hostname, r##"\$1"##);

                    format!("^[^:]+:(//)?([^/]+\\.)?{escaped_special_chars}")

                (crate::filters::network::FilterPart::Empty, None) => if v

                    .mask

                    .contains(NetworkFilterMask::FROM_HTTP | NetworkFilterMask::FROM_HTTPS)

                    "^https?://"

                } else if v.mask.contains(NetworkFilterMask::FROM_HTTP) {

                    "^http://"

                } else if v.mask.contains(NetworkFilterMask::FROM_HTTPS) {

                    "^https://"

                } else if v.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) {

                    "^wss?://"

                } else {

                    unreachable!("Invalid scheme information");

                .to_string(),

};

            let (if_domain, unless_domain) = if v.opt_domains.is_some()

                || v.opt_not_domains.is_some()

                let mut if_domain = vec![];

                let mut unless_domain = vec![];

                // Unwraps are okay here - any rules with opt_domains or opt_not_domains must have

                // an options section delimited by a '$' character, followed by a `domain=` option.

                let opts = &raw_line[find_char(b'$', raw_line.as_bytes()).unwrap() + "$".len()..];

                let domain_start_index =

                    if let Some(index) = memmem::find(opts.as_bytes(), b"domain=") {

                        index

                    } else {

                        return Err(CbRuleCreationFailure::FromNotSupported);

};

                let domains_start = &opts[domain_start_index + "domain=".len()..];

                let domains = if let Some(comma) = find_char(b',', domains_start.as_bytes()) {

                    &domains_start[..comma]

                } else {

                    domains_start

                .split('|');

                domains.for_each(|domain| {

                    let (collection, domain) =

                        if let Some(domain_stripped) = domain.strip_prefix('~') {

                            (&mut unless_domain, domain_stripped)

                        } else {

                            (&mut if_domain, domain)

};

                    let lowercase = domain.to_lowercase();

                    let normalized_domain = if lowercase.is_ascii() {

                        lowercase

                    } else {

                        // The network filter has already parsed successfully, so this should be

                        // safe

                        idna::domain_to_ascii(&lowercase).unwrap()

};

                    collection.push(format!("*{normalized_domain}"));

});

                (non_empty(if_domain), non_empty(unless_domain))

            } else {

                (None, None)

};

            if if_domain.is_some() && unless_domain.is_some() {

                return Err(CbRuleCreationFailure::UnlessAndIfDomainTogetherUnsupported);

            let blocking_type = if v.mask.contains(NetworkFilterMask::IS_EXCEPTION) {

                CbType::IgnorePreviousRules

            } else {

                CbType::Block

};

            let resource_type = if v.mask.contains(NetworkFilterMask::FROM_NETWORK_TYPES) {

                None

            } else {

                let mut types = HashSet::new();

                let mut unsupported_flags = NetworkFilterMask::empty();

                macro_rules! push_if_flag {

                    ($flag:ident, $target:ident) => {

                        if v.mask.contains(NetworkFilterMask::$flag) {

                            types.insert(CbResourceType::$target);

};

                    ($flag:ident) => {

                        if v.mask.contains(NetworkFilterMask::$flag) {

                            unsupported_flags |= NetworkFilterMask::$flag;

};

                push_if_flag!(FROM_IMAGE, Image);

                push_if_flag!(FROM_MEDIA, Media);

                push_if_flag!(FROM_OBJECT);

                push_if_flag!(FROM_OTHER);

                push_if_flag!(FROM_PING);

                push_if_flag!(FROM_SCRIPT, Script);

                push_if_flag!(FROM_STYLESHEET, StyleSheet);

                push_if_flag!(FROM_SUBDOCUMENT, Document);

                push_if_flag!(FROM_WEBSOCKET);

                push_if_flag!(FROM_XMLHTTPREQUEST, Raw);

                push_if_flag!(FROM_FONT, Font);

                // TODO - Popup, Document when implemented

                if !unsupported_flags.is_empty() && types.is_empty() {

                    return Err(CbRuleCreationFailure::NoSupportedNetworkOptions(

                        unsupported_flags,

));

                Some(types)

};

            let url_filter_is_case_sensitive = if v.mask.contains(NetworkFilterMask::MATCH_CASE) {

                Some(true)

            } else {

                None

};

            let single_rule = CbRule {

                action: CbAction {

                    typ: blocking_type,

                    selector: None,

},

                trigger: CbTrigger {

                    url_filter,

                    load_type,

                    if_domain,

                    unless_domain,

                    resource_type,

                    url_filter_is_case_sensitive,

                    ..Default::default()

},

};

            if !single_rule.is_ascii() {

                return Err(CbRuleCreationFailure::RuleContainsNonASCII);

            if let Some(resource_types) = &single_rule.trigger.resource_type {

                if resource_types.len() > 1

                    && resource_types.contains(&CbResourceType::Document)

                    && single_rule.trigger.load_type.is_empty()

                    let mut non_doc_types = resource_types.clone();

                    non_doc_types.remove(&CbResourceType::Document);

                    let rule_clone = single_rule.clone();

                    let non_doc_rule = CbRule {

                        trigger: CbTrigger {

                            resource_type: Some(non_doc_types),

                            ..rule_clone.trigger

},

                        ..rule_clone

};

                    let mut doc_type = HashSet::new();

                    doc_type.insert(CbResourceType::Document);

                    let just_doc_rule = CbRule {

                        trigger: CbTrigger {

                            resource_type: Some(doc_type),

                            load_type: vec![CbLoadType::ThirdParty],

                            ..single_rule.trigger

},

                        ..single_rule

};

                    return Ok(Self::SplitDocument(non_doc_rule, just_doc_rule));

            Ok(Self::SingleRule(single_rule))

        } else {

            Err(CbRuleCreationFailure::NeedsDebugMode)

impl TryFrom<CosmeticFilter> for CbRule {

    type Error = CbRuleCreationFailure;

    fn try_from(v: CosmeticFilter) -> Result<Self, Self::Error> {

        use crate::filters::cosmetic::{

            CosmeticFilterLocationType as LocationType, CosmeticFilterMask,

};

        if v.action.is_some() {

            return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported);

        if v.mask.contains(CosmeticFilterMask::SCRIPT_INJECT) {

            return Err(CbRuleCreationFailure::ScriptletInjectionsNotSupported);

        if let Some(raw_line) = &v.raw_line {

            let mut hostnames_vec = vec![];

            let mut not_hostnames_vec = vec![];

            let mut any_unsupported = false;

            // Unwrap is okay here - cosmetic rules must have a '#' character

            let sharp_index = find_char(b'#', raw_line.as_bytes()).unwrap();

            CosmeticFilter::locations_before_sharp(raw_line, sharp_index).for_each(

                |(location_type, location)| match location_type {

                    LocationType::Entity | LocationType::NotEntity | LocationType::Unsupported => {

                        any_unsupported = true

                    LocationType::Hostname => {

                        if let Ok(encoded) = idna::domain_to_ascii(location) {

                            hostnames_vec.push(encoded);

                    LocationType::NotHostname => {

                        if let Ok(encoded) = idna::domain_to_ascii(location) {

                            not_hostnames_vec.push(encoded);

},

);

            if any_unsupported && hostnames_vec.is_empty() && not_hostnames_vec.is_empty() {

                return Err(CbRuleCreationFailure::CosmeticEntitiesUnsupported);

            let hostnames_vec = non_empty(hostnames_vec);

            let not_hostnames_vec = non_empty(not_hostnames_vec);

            if hostnames_vec.is_some() && not_hostnames_vec.is_some() {

                return Err(CbRuleCreationFailure::UnlessAndIfDomainTogetherUnsupported);

            let (unless_domain, if_domain) = if v.mask.contains(CosmeticFilterMask::UNHIDE) {

                (hostnames_vec, not_hostnames_vec)

            } else {

                (not_hostnames_vec, hostnames_vec)

};

            let selector = if let Some(selector) = v.plain_css_selector() {

                selector.to_string()

            } else {

                return Err(CbRuleCreationFailure::ProceduralCosmeticFiltersUnsupported);

};

            let rule = Self {

                action: CbAction {

                    typ: CbType::CssDisplayNone,

                    selector: Some(selector),

},

                trigger: CbTrigger {

                    url_filter: ".*".to_string(),

                    if_domain,

                    unless_domain,

                    ..Default::default()

},

};

            if !rule.is_ascii() {

                return Err(CbRuleCreationFailure::RuleContainsNonASCII);

            Ok(rule)

        } else {

            Err(CbRuleCreationFailure::NeedsDebugMode)

#[cfg(test)]

#[path = "../tests/unit/content_blocking.rs"]

mod unit_tests;

Source code

Revision control

Copy as Markdown

Other Tools