From a4631fb9ec2541f99e1aba05cf1e7d2b5ebecb98 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Thu, 19 Aug 2010 02:26:40 +0200 Subject: Add a special matcher for ad block filters for host name Quite a few rules of ad block are just matching domains. Those are of the form: ||trolltech.com^$options This patch add a new class to deal with this kind of filter, AdBlockHostMatcher. Matching a host address is much faster (O(1)) than going through the entire list of rules. --- src/CMakeLists.txt | 1 + src/adblock/adblockhostmatcher.cpp | 55 ++++++++++++++++++++++++++++++++++++++ src/adblock/adblockhostmatcher.h | 51 +++++++++++++++++++++++++++++++++++ src/adblock/adblockmanager.cpp | 24 ++++++++++++++++- src/adblock/adblockmanager.h | 3 +++ 5 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 src/adblock/adblockhostmatcher.cpp create mode 100644 src/adblock/adblockhostmatcher.h (limited to 'src') diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dd90fc10..e873052b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -50,6 +50,7 @@ SET( rekonq_KDEINIT_SRCS bookmarks/bookmarksproxy.cpp bookmarks/bookmarkcontextmenu.cpp #---------------------------------------- + adblock/adblockhostmatcher.cpp adblock/adblockmanager.cpp adblock/adblocknetworkreply.cpp adblock/adblockrule.cpp diff --git a/src/adblock/adblockhostmatcher.cpp b/src/adblock/adblockhostmatcher.cpp new file mode 100644 index 00000000..b11dab2c --- /dev/null +++ b/src/adblock/adblockhostmatcher.cpp @@ -0,0 +1,55 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +* ============================================================ */ + +// Self Includes +#include "adblockhostmatcher.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +bool AdBlockHostMatcher::tryAddFilter(const QString &filter) +{ + if (filter.startsWith(QL1S("||"))) { + QString domain = filter.mid(2); + + const int indexOfFirstSeparator = domain.indexOf(QL1C('^')); + if (indexOfFirstSeparator < 0) + return false; + + const int indexOfLastDollar = domain.lastIndexOf(QL1C('$')); + if (indexOfLastDollar >= 0 && indexOfLastDollar != indexOfFirstSeparator + 1) + return false; + + domain = domain.left(indexOfFirstSeparator); + if (domain.contains(QL1C('/')) || domain.contains(QL1C('*'))) + return false; + + domain = domain.toLower(); + m_hostList.insert(domain); + m_hostList.insert(QL1S("www.") + domain); + return true; + } + return false; +} diff --git a/src/adblock/adblockhostmatcher.h b/src/adblock/adblockhostmatcher.h new file mode 100644 index 00000000..0a15bd4e --- /dev/null +++ b/src/adblock/adblockhostmatcher.h @@ -0,0 +1,51 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +* ============================================================ */ + +#ifndef ADBLOCKHOSTMATCHER_H +#define ADBLOCKHOSTMATCHER_H + +#include +#include + +class AdBlockHostMatcher +{ +public: + // Try to add an adblock filter to this host matcher. + // If the filter is not an hostname, the filter is not added + // and the method return false; + bool tryAddFilter(const QString &filter); + + bool match(const QString &host) const + { + return m_hostList.contains(host.toLower()); + } + + void clear() { m_hostList.clear(); } + +private: + QSet m_hostList; +}; + +#endif // ADBLOCKHOSTMATCHER_H diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index e0a109f0..600dc5ce 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -67,6 +67,8 @@ void AdBlockManager::loadSettings(bool checkUpdateDate) _index = 0; _buffer.clear(); + _hostWhiteList.clear(); + _hostBlackList.clear(); _whiteList.clear(); _blackList.clear(); _hideList.clear(); @@ -133,7 +135,10 @@ void AdBlockManager::loadRules(const QStringList &rules) // white rules if (stringRule.startsWith(QL1S("@@"))) { - AdBlockRule rule(stringRule.mid(2)); + const QString filter = stringRule.mid(2); + if (_hostWhiteList.tryAddFilter(filter)) + continue; + AdBlockRule rule(filter); _whiteList << rule; continue; } @@ -145,6 +150,8 @@ void AdBlockManager::loadRules(const QStringList &rules) continue; } + if (_hostBlackList.tryAddFilter(stringRule)) + continue; AdBlockRule rule(stringRule); _blackList << rule; } @@ -164,8 +171,16 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa // We compute a lowercase version of the URL so each rule does not // have to do it. const QString urlStringLowerCase = urlString.toLower(); + const QString host = request.url().host(); // check white rules before :) + + if (_hostWhiteList.match(host)) { + kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched by host matcher: ***********"; + kDebug() << "UrlString: " << urlString; + return 0; + } + foreach(const AdBlockRule &filter, _whiteList) { if (filter.match(urlString, urlStringLowerCase)) @@ -177,6 +192,13 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa } // then check the black ones :( + if (_hostBlackList.match(host)) { + kDebug() << "****ADBLOCK: BLACK RULE Matched by host matcher: ***********"; + kDebug() << "UrlString: " << urlString; + AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); + return reply; + } + foreach(const AdBlockRule &filter, _blackList) { if (filter.match(urlString, urlStringLowerCase)) diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index eae761e0..69548994 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -108,6 +108,7 @@ #include "rekonq_defines.h" // Local Includes +#include "adblockhostmatcher.h" #include "adblockrule.h" // KDE Includes @@ -155,6 +156,8 @@ private: bool _isAdblockEnabled; bool _isHideAdsEnabled; + AdBlockHostMatcher _hostBlackList; + AdBlockHostMatcher _hostWhiteList; AdBlockRuleList _blackList; AdBlockRuleList _whiteList; QStringList _hideList; -- cgit v1.2.1