1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#include "adblockrule.h"
bool isMatchingDomain(const QString &domain, const QString &filter)
{
// domain and filter are the same
if(domain == filter) {
return true;
}
// domain can't be matched by filter if it doesn't end with filter
// ex. example2.com isn't matched by example.com
if(!domain.endsWith(filter)) {
return false;
}
// match with subdomains
// ex. subdomain.example.com is matched by example.com
int index = domain.indexOf(filter);
// match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
return index > 0 && domain[index - 1] == QLatin1Char('.');
}
// adblock format documentation
// https://adblockplus.org/filters
// QString::mid(pos, len) - Returns a string starting at the specified position index.
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
AdBlockRule::AdBlockRule(const QString &filter)
{
QString parsedLine = filter.trimmed();
// there is no rule, or it"s a comment
if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
return;
}
// css rule - ignore for now
if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
return;
}
m_isEnabled = true;
// exception rules
if(parsedLine.startsWith(QLatin1Literal("@@"))) {
m_isException = true;
parsedLine.remove(0, 2);
}
// regular expression rules
if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
matchType = RegularExpressionMatch;
regexp.setPattern(parsedLine);
return;
}
// basic filter rules
if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
matchType = StringEquals;
match = parsedLine.mid(1, parsedLine.length() - 2);
return;
}
// Basic filter rules can use wildcards, which were supported by QRegExp,
// but were deprecated in QRegularExpression.
// remove beginning and ending wildcards
if(parsedLine.startsWith(QLatin1Literal("*")))
parsedLine = parsedLine.mid(1);
if(parsedLine.endsWith(QLatin1Literal("*")))
parsedLine.chop(1);
if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
matchType = DomainMatch;
match = parsedLine.mid(2, parsedLine.length() - 3);
return;
}
// check for wildcards and translate to regexp
// wildcard "*" - any number of characters
// separator "^" - end, ? or /
if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
matchType = RegularExpressionMatch;
parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
regexp.setPattern(parsedLine);
return;
}
matcher.setPattern(parsedLine);
}
bool AdBlockRule::isEnabled() const
{
return m_isEnabled;
}
bool AdBlockRule::shouldBlock(const QUrl &url) const
{
switch (matchType) {
case RegularExpressionMatch:
if(regexp.match(url.toString()).hasMatch())
return !m_isException;
case StringEquals:
return url.toString() == match;
case DomainMatch:
return isMatchingDomain(url.host(), match);
default:
return false;
}
}
|