diff options
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/adblock/adblockhostmatcher.cpp | 55 | ||||
-rw-r--r-- | src/adblock/adblockhostmatcher.h | 51 | ||||
-rw-r--r-- | src/adblock/adblockmanager.cpp | 38 | ||||
-rw-r--r-- | src/adblock/adblockmanager.h | 3 | ||||
-rw-r--r-- | src/adblock/adblockrule.h | 5 | ||||
-rw-r--r-- | src/adblock/adblockrulefallbackimpl.cpp | 2 | ||||
-rw-r--r-- | src/adblock/adblockrulefallbackimpl.h | 2 | ||||
-rw-r--r-- | src/adblock/adblockruleimpl.h | 2 | ||||
-rw-r--r-- | src/adblock/adblockruletextmatchimpl.cpp | 11 | ||||
-rw-r--r-- | src/adblock/adblockruletextmatchimpl.h | 2 |
11 files changed, 159 insertions, 13 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0130c2d4..151906b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,6 +52,7 @@ SET( rekonq_KDEINIT_SRCS bookmarks/bookmarkstoolbar.cpp bookmarks/bookmarkowner.cpp #---------------------------------------- + adblock/adblockhostmatcher.cpp adblock/adblockmanager.cpp adblock/adblocknetworkreply.cpp adblock/adblockrule.cpp diff --git a/src/adblock/adblockhostmatcher.cpp b/src/adblock/adblockhostmatcher.cpp new file mode 100644 index 00000000..b11dab2c --- /dev/null +++ b/src/adblock/adblockhostmatcher.cpp @@ -0,0 +1,55 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +// Self Includes +#include "adblockhostmatcher.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +bool AdBlockHostMatcher::tryAddFilter(const QString &filter) +{ + if (filter.startsWith(QL1S("||"))) { + QString domain = filter.mid(2); + + const int indexOfFirstSeparator = domain.indexOf(QL1C('^')); + if (indexOfFirstSeparator < 0) + return false; + + const int indexOfLastDollar = domain.lastIndexOf(QL1C('$')); + if (indexOfLastDollar >= 0 && indexOfLastDollar != indexOfFirstSeparator + 1) + return false; + + domain = domain.left(indexOfFirstSeparator); + if (domain.contains(QL1C('/')) || domain.contains(QL1C('*'))) + return false; + + domain = domain.toLower(); + m_hostList.insert(domain); + m_hostList.insert(QL1S("www.") + domain); + return true; + } + return false; +} diff --git a/src/adblock/adblockhostmatcher.h b/src/adblock/adblockhostmatcher.h new file mode 100644 index 00000000..0a15bd4e --- /dev/null +++ b/src/adblock/adblockhostmatcher.h @@ -0,0 +1,51 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKHOSTMATCHER_H +#define ADBLOCKHOSTMATCHER_H + +#include <QSet> +#include <QString> + +class AdBlockHostMatcher +{ +public: + // Try to add an adblock filter to this host matcher. + // If the filter is not an hostname, the filter is not added + // and the method return false; + bool tryAddFilter(const QString &filter); + + bool match(const QString &host) const + { + return m_hostList.contains(host.toLower()); + } + + void clear() { m_hostList.clear(); } + +private: + QSet<QString> m_hostList; +}; + +#endif // ADBLOCKHOSTMATCHER_H diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 644ecff5..8deb0bcd 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -67,6 +67,8 @@ void AdBlockManager::loadSettings(bool checkUpdateDate) _index = 0; _buffer.clear(); + _hostWhiteList.clear(); + _hostBlackList.clear(); _whiteList.clear(); _blackList.clear(); _hideList.clear(); @@ -133,7 +135,10 @@ void AdBlockManager::loadRules(const QStringList &rules) // white rules if (stringRule.startsWith(QL1S("@@"))) { - AdBlockRule rule(stringRule.mid(2)); + const QString filter = stringRule.mid(2); + if (_hostWhiteList.tryAddFilter(filter)) + continue; + AdBlockRule rule(filter); _whiteList << rule; continue; } @@ -145,6 +150,12 @@ void AdBlockManager::loadRules(const QStringList &rules) continue; } + // TODO implement domain-specific hiding + if (stringRule.contains(QL1S("##"))) + continue; + + if (_hostBlackList.tryAddFilter(stringRule)) + continue; AdBlockRule rule(stringRule); _blackList << rule; } @@ -161,11 +172,22 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa return 0; QString urlString = request.url().toString(); + // We compute a lowercase version of the URL so each rule does not + // have to do it. + const QString urlStringLowerCase = urlString.toLower(); + const QString host = request.url().host(); // check white rules before :) + + if (_hostWhiteList.match(host)) { + kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched by host matcher: ***********"; + kDebug() << "UrlString: " << urlString; + return 0; + } + foreach(const AdBlockRule &filter, _whiteList) { - if (filter.match(urlString)) + if (filter.match(urlString, urlStringLowerCase)) { kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********"; kDebug() << "UrlString: " << urlString; @@ -174,9 +196,16 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa } // then check the black ones :( + if (_hostBlackList.match(host)) { + kDebug() << "****ADBLOCK: BLACK RULE Matched by host matcher: ***********"; + kDebug() << "UrlString: " << urlString; + AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); + return reply; + } + foreach(const AdBlockRule &filter, _blackList) { - if (filter.match(urlString)) + if (filter.match(urlString, urlStringLowerCase)) { kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********"; kDebug() << "UrlString: " << urlString; @@ -185,7 +214,8 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa QWebElementCollection elements = document.findAll("*"); foreach(QWebElement el, elements) { - if (filter.match(el.attribute("src"))) + const QString srcAttribute = el.attribute("src"); + if (filter.match(srcAttribute, srcAttribute.toLower())) { kDebug() << "MATCHES ATTRIBUTE!!!!!"; el.setStyleProperty(QL1S("visibility"), QL1S("hidden")); diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index eae761e0..69548994 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -108,6 +108,7 @@ #include "rekonq_defines.h" // Local Includes +#include "adblockhostmatcher.h" #include "adblockrule.h" // KDE Includes @@ -155,6 +156,8 @@ private: bool _isAdblockEnabled; bool _isHideAdsEnabled; + AdBlockHostMatcher _hostBlackList; + AdBlockHostMatcher _hostWhiteList; AdBlockRuleList _blackList; AdBlockRuleList _whiteList; QStringList _hideList; diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h index 04409688..ef7b2f5f 100644 --- a/src/adblock/adblockrule.h +++ b/src/adblock/adblockrule.h @@ -70,9 +70,10 @@ class AdBlockRule public: AdBlockRule(const QString &filter); - bool match(const QString &encodedUrl) const + bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const { - return m_implementation->match(encodedUrl); + Q_ASSERT(encodedUrl.toLower() == encodedUrlLowerCase); + return m_implementation->match(encodedUrl, encodedUrlLowerCase); } private: diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp index decb895d..988f2895 100644 --- a/src/adblock/adblockrulefallbackimpl.cpp +++ b/src/adblock/adblockrulefallbackimpl.cpp @@ -61,7 +61,7 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter) m_regExp.setPattern(parsedLine); } -bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const +bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl, const QString &) const { return m_regExp.indexIn(encodedUrl) != -1; } diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h index 4e7ca555..ed0f6dc6 100644 --- a/src/adblock/adblockrulefallbackimpl.h +++ b/src/adblock/adblockrulefallbackimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleFallbackImpl : public AdBlockRuleImpl { public: AdBlockRuleFallbackImpl(const QString &filter); - bool match(const QString &encodedUrl) const; + bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const; private: QString convertPatternToRegExp(const QString &wildcardPattern); diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h index da367aeb..db5cec30 100644 --- a/src/adblock/adblockruleimpl.h +++ b/src/adblock/adblockruleimpl.h @@ -33,7 +33,7 @@ class AdBlockRuleImpl public: AdBlockRuleImpl(const QString &) {} virtual ~AdBlockRuleImpl() {} - virtual bool match(const QString &encodedUrl) const = 0; + virtual bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0; }; #endif // ADBLOCKRULEIMPL_H diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp index 7c02ea37..892d78e0 100644 --- a/src/adblock/adblockruletextmatchimpl.cpp +++ b/src/adblock/adblockruletextmatchimpl.cpp @@ -34,13 +34,18 @@ AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter) { Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)); - m_textToMatch = filter; + m_textToMatch = filter.toLower(); m_textToMatch.remove(QL1C('*')); } -bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const +bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const { - return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive); + Q_UNUSED(encodedUrl); + // Case sensitive compare is faster, but would be incorrect with encodedUrl since + // we do want case insensitive. + // What we do is work on a lowercase version of m_textToMatch, and compare to the lowercase + // version of encodedUrl. + return encodedUrlLowerCase.contains(m_textToMatch, Qt::CaseSensitive); } bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter) diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h index f0e78be0..28b0656c 100644 --- a/src/adblock/adblockruletextmatchimpl.h +++ b/src/adblock/adblockruletextmatchimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl { public: AdBlockRuleTextMatchImpl(const QString &filter); - bool match(const QString &encodedUrl) const; + bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const; static bool isTextMatchFilter(const QString &filter); |