1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#include "adblockrule.h"
inline std::pair<QWebEngineUrlRequestInfo::ResourceType, bool> parseOption(const QString &option)
{
if(option.endsWith(QLatin1Literal("script"))) {
return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));
} else if(option.endsWith(QLatin1Literal("image"))) {
return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));
} else if(option.endsWith(QLatin1Literal("stylesheet"))) {
return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));
} else if(option.endsWith(QLatin1Literal("object"))) {
return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));
} else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));
} else if(option.endsWith(QLatin1Literal("other"))) {
return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));
} else {
// unhandled pair
Q_ASSERT(false);
}
}
// adblock format documentation
// https://adblockplus.org/filters
// QString::mid(pos, len) - Returns a string starting at the specified position index.
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
AdBlockRule::AdBlockRule(const QString &filter)
{
QString parsedLine = filter.trimmed();
// there is no rule, or it"s a comment
if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
return;
}
// css rule - ignore for now
if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
return;
}
m_isEnabled = true;
// exception rules
if(parsedLine.startsWith(QLatin1Literal("@@"))) {
m_isException = true;
parsedLine.remove(0, 2);
}
// parse options
{
const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
if(sepPos != -1) {
const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
parsedLine = parsedLine.mid(0, sepPos);
for(const QString &option : options) {
if(option.startsWith(QLatin1Literal("domain"))) {
const auto domainList = option.mid(7).split(QLatin1Literal("|"));
for(const QString &domain : domainList) {
if(domain.startsWith(QLatin1Literal("~")))
blockedDomains.append(domain.mid(1));
else
allowedDomains.append(domain);
}
} else {
auto optPair = parseOption(option);
m_resourceTypeOptions.insert(optPair.first, optPair.second);
}
}
}
}
// regular expression rules
if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
urlMatchType = RegularExpressionMatch;
regexp.setPattern(parsedLine);
return;
}
// basic filter rules
if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
urlMatchType = StringEquals;
match = parsedLine.mid(1, parsedLine.length() - 2);
return;
}
// Basic filter rules can use wildcards, which were supported by QRegExp,
// but were deprecated in QRegularExpression.
// remove beginning and ending wildcards
if(parsedLine.startsWith(QLatin1Literal("*")))
parsedLine = parsedLine.mid(1);
if(parsedLine.endsWith(QLatin1Literal("*")))
parsedLine.chop(1);
if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
urlMatchType = DomainMatch;
match = parsedLine.mid(2, parsedLine.length() - 3);
return;
}
// check for wildcards and translate to regexp
// wildcard "*" - any number of characters
// separator "^" - end, ? or /
if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
urlMatchType = RegularExpressionMatch;
parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
regexp.setPattern(parsedLine);
return;
}
match = parsedLine;
}
|