aboutsummaryrefslogtreecommitdiff
path: root/lib/adblock/filterrule.cpp
blob: 79ded6d23e0be89bf0783dc66e8f82497f93cef4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*
 * This file is part of smolbote. It's copyrighted by the contributors recorded
 * in the version control history of the file, available from its original
 * location: git://neueland.iserlohn-fortress.net/smolbote.git
 * 
 * SPDX-License-Identifier: GPL-3.0
 */

#include "filterrule.h"
#include <QUrl>

/* TODO
 * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing
 */

/* The AdBlock standard is an incredible mess, vaguely explaiend on:
 * - https://adblockplus.org/filters
 * - https://adblockplus.org/filter-cheatsheet
 */

FilterRule::FilterRule(const QString &line)
{
    valid = parse(line);
}

FilterRule::~FilterRule()
{
}

bool FilterRule::isValid() const
{
    return valid;
}

bool FilterRule::isException() const
{
    return exception;
}

bool FilterRule::shouldBlock(const QUrl &requestUrl) const
{
    QRegularExpressionMatch match = rule.match(requestUrl.toString());
    return match.hasMatch();
}

bool FilterRule::parse(const QString &line)
{
    // skip for comments and empty rules
    if(line.startsWith('!') || line.trimmed().isEmpty()) {
        return false;
    }

    // make a copy of the pattern so we can snap off the parts we've already parsed
    QString pattern = line;

    if(pattern.startsWith("@@")) {
        exception = true;
        pattern = pattern.mid(2);   // remove @@
    }

    rule.setPattern(createRegExpPattern(pattern));
    return true;
}

QString createRegExpPattern(const QString &line)
{
    QString pattern = line;

    // replace . (any character) with \. (a dot)
    pattern.replace('.', "\\.");

    // translate adblock special characters into regex
    // replace wildcard (*) with '.*' (zero or more of any element)
    pattern.replace('*', ".*");

    // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number)
    pattern.replace('^', "($|\\?|\\/|:\\d+)");

    // replace || with ^\w+://([\w,\d,\.]+)?
    pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?");

    if(pattern.startsWith('|') && pattern.endsWith('|')) {
        // replace | at start with ^ (start of string)
        pattern.replace(0, 1, '^');
        // replace | at end with $ (end of string)
        pattern.replace('|', '$');
    }

    return pattern;
}