From e2b1cc628b304e3f153abc17fb350aa781e26b36 Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Mon, 18 Dec 2017 14:32:45 +0100 Subject: Basic adblock FilterRule --- lib/adblock/filterrule.cpp | 102 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 lib/adblock/filterrule.cpp (limited to 'lib/adblock/filterrule.cpp') diff --git a/lib/adblock/filterrule.cpp b/lib/adblock/filterrule.cpp new file mode 100644 index 0000000..147e5fd --- /dev/null +++ b/lib/adblock/filterrule.cpp @@ -0,0 +1,102 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see . + ** + ******************************************************************************/ + +#include "filterrule.h" +#include + +/* TODO + * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing + */ + +/* The AdBlock standard is an incredible mess, vaguely explaiend on: + * - https://adblockplus.org/filters + * - https://adblockplus.org/filter-cheatsheet + */ + +FilterRule::FilterRule(const QString &line) +{ + valid = parse(line); +} + +FilterRule::~FilterRule() +{ +} + +bool FilterRule::isValid() const +{ + return valid; +} + +bool FilterRule::isException() const +{ + return exception; +} + +bool FilterRule::shouldBlock(const QUrl &requestUrl) const +{ + QRegularExpressionMatch match = rule.match(requestUrl.toString()); + return match.hasMatch(); +} + +bool FilterRule::parse(const QString &line) +{ + // skip for comments and empty rules + if(line.startsWith('!') || line.trimmed().isEmpty()) { + return false; + } + + // make a copy of the pattern so we can snap off the parts we've already parsed + QString pattern = line; + + if(pattern.startsWith("@@")) { + exception = true; + pattern = pattern.mid(2); // remove @@ + } + + rule.setPattern(createRegExpPattern(pattern)); + return true; +} + +QString createRegExpPattern(const QString &line) +{ + QString pattern = line; + + // replace . (any character) with \. (a dot) + pattern.replace('.', "\\."); + + // translate adblock special characters into regex + // replace wildcard (*) with '.*' (zero or more of any element) + pattern.replace('*', ".*"); + + // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number) + pattern.replace('^', "($|\\?|\\/|:\\d+)"); + + // replace || with ^\w+://([\w,\d,\.]+)? + pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?"); + + if(pattern.startsWith('|') && pattern.endsWith('|')) { + // replace | at start with ^ (start of string) + pattern.replace(0, 1, '^'); + // replace | at end with $ (end of string) + pattern.replace('|', '$'); + } + + return pattern; +} -- cgit v1.2.1