aboutsummaryrefslogtreecommitdiff
path: root/staging/adblock/filterlist.cpp
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2020-02-10 20:58:39 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2020-05-30 11:01:35 +0300
commitd1287f43964633035938f4f4d4133bb6d9da7b3e (patch)
treed09efa4074815c20be9bd6348203fe4336dfe716 /staging/adblock/filterlist.cpp
parentFix segfault in release build (diff)
downloadsmolbote-d1287f43964633035938f4f4d4133bb6d9da7b3e.tar.xz
staging: smolblok
smolblok is a replacement for the current lib/urlfilter AdBlockPlus and hostlist format filter parser. It is a library that uses plugins to provide support for different filter formats. staging/adblock: AdBlockPlus parser plugin plugins/smolblok_hostlist: hostlist format parser plugin Headers will be installed to include/smolbote/ Remove lib/urlfilter
Diffstat (limited to 'staging/adblock/filterlist.cpp')
-rw-r--r--staging/adblock/filterlist.cpp128
1 files changed, 128 insertions, 0 deletions
diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp
new file mode 100644
index 0000000..1846ff6
--- /dev/null
+++ b/staging/adblock/filterlist.cpp
@@ -0,0 +1,128 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://library.iserlohn-fortress.net/aqua/smolbote.git
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include "filterlist.h"
+#include <QByteArray>
+#include <QIODevice>
+
+/**
+ * Documentation:
+ *
+ * https://adblockplus.org/filter-cheatsheet
+ * https://help.eyeo.com/adblockplus/how-to-write-filters
+ *
+ * https://github.com/gorhill/uBlock/wiki/Introduction-to-basic-filtering-syntax
+ * https://github.com/gorhill/uBlock/wiki/Static-filter-syntax
+ *
+ */
+
+const QLatin1String comment_lastModified("! Last modified: ");
+const QLatin1String comment_expires("! Expires: ");
+
+using namespace AdblockPlus;
+
+FilterList::FilterList(QIODevice &from)
+{
+ if(from.isReadable() && from.isTextModeEnabled()) {
+ while(from.bytesAvailable() > 0) {
+ const auto line = from.readLine(512);
+
+ if(line[0] == '!') {
+ parseComment(line);
+
+ } else if(line.contains("##") || line.contains("#@#")) {
+ // ## is element hiding rule
+ // #@# is element hiding exception rule
+
+ } else {
+ parseRule(line);
+ }
+ }
+ }
+}
+
+void FilterList::parseComment(const QString &line)
+{
+ if(line.startsWith(comment_lastModified)) {
+ lastModified = QDateTime::fromString(line.mid(comment_lastModified.size()), "dd MMM yyyy HH:mm 'UTC'");
+ expires = lastModified;
+
+ } else if(line.startsWith(comment_expires)) {
+ const QRegularExpression time_re("(?:(\\d+) days)|(?:(\\d+) hours)");
+ const auto match = time_re.match(line);
+ if(match.hasMatch()) {
+ expires = expires.addDays(match.captured(1).toInt());
+ expires = expires.addSecs(match.captured(2).toInt() * 60 * 60);
+ }
+ }
+}
+
+Rule *FilterList::parseRule(const QByteArray &line)
+{
+ QString pattern = line;
+ Options opt;
+
+ if(pattern.startsWith(QLatin1String("@@"))) {
+ pattern.remove(0, 2);
+ opt.exception = true;
+ }
+
+ // parse options
+ if(pattern.contains('$')) {
+ const auto list = pattern.split('$');
+ pattern = list.at(0);
+ const auto options = list.at(1);
+
+ if(!opt.parseAbp(&options)) {
+ return nullptr;
+ }
+ }
+
+ if(pattern.startsWith("||") && pattern.endsWith("^")) {
+ // domain match
+ pattern = pattern.mid(2, pattern.length() - 3);
+ return new MatcherRule(pattern, opt, MatcherRule::DomainMatch);
+
+ } else if(pattern.startsWith("|") && pattern.endsWith("|")) {
+ // string equals
+ pattern = pattern.mid(1, pattern.length() - 2);
+ return new MatcherRule(pattern, opt, MatcherRule::UrlEquals);
+
+ } else if(pattern.startsWith("|")) {
+ // string starts with
+ pattern = pattern.mid(1, pattern.length() - 1);
+ return new MatcherRule(pattern, opt, MatcherRule::UrlStartsWith);
+
+ } else if(pattern.endsWith("|")) {
+ // string ends with
+ pattern = pattern.mid(0, pattern.length() - 1);
+ return new MatcherRule(pattern, opt, MatcherRule::UrlEndsWith);
+
+ } else if(pattern.startsWith("/") && pattern.endsWith("/")) {
+ // regular expression
+ pattern = pattern.mid(1, pattern.length() - 2);
+ return new RegexRule(pattern, opt);
+
+ } else if(!pattern.isEmpty()) {
+ if(pattern.contains('*')) {
+ // wildcard pattern
+ pattern = QRegularExpression::wildcardToRegularExpression(pattern);
+ return new RegexRule(pattern, opt);
+ } else {
+ // contains pattern
+ return new MatcherRule(pattern, opt);
+ }
+ }
+
+ return nullptr;
+}
+
+bool FilterList::filter(QWebEngineUrlRequestInfo &info) const
+{
+ return false;
+}