diff options
| -rw-r--r-- | src/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/adblock/adblockhostmatcher.cpp | 55 | ||||
| -rw-r--r-- | src/adblock/adblockhostmatcher.h | 51 | ||||
| -rw-r--r-- | src/adblock/adblockmanager.cpp | 38 | ||||
| -rw-r--r-- | src/adblock/adblockmanager.h | 3 | ||||
| -rw-r--r-- | src/adblock/adblockrule.h | 5 | ||||
| -rw-r--r-- | src/adblock/adblockrulefallbackimpl.cpp | 2 | ||||
| -rw-r--r-- | src/adblock/adblockrulefallbackimpl.h | 2 | ||||
| -rw-r--r-- | src/adblock/adblockruleimpl.h | 2 | ||||
| -rw-r--r-- | src/adblock/adblockruletextmatchimpl.cpp | 11 | ||||
| -rw-r--r-- | src/adblock/adblockruletextmatchimpl.h | 2 | 
11 files changed, 159 insertions, 13 deletions
| diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0130c2d4..151906b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,6 +52,7 @@ SET( rekonq_KDEINIT_SRCS      bookmarks/bookmarkstoolbar.cpp      bookmarks/bookmarkowner.cpp      #---------------------------------------- +    adblock/adblockhostmatcher.cpp      adblock/adblockmanager.cpp      adblock/adblocknetworkreply.cpp      adblock/adblockrule.cpp diff --git a/src/adblock/adblockhostmatcher.cpp b/src/adblock/adblockhostmatcher.cpp new file mode 100644 index 00000000..b11dab2c --- /dev/null +++ b/src/adblock/adblockhostmatcher.cpp @@ -0,0 +1,55 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +// Self Includes +#include "adblockhostmatcher.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +bool AdBlockHostMatcher::tryAddFilter(const QString &filter) +{ +    if (filter.startsWith(QL1S("||"))) { +        QString domain = filter.mid(2); + +        const int indexOfFirstSeparator = domain.indexOf(QL1C('^')); +        if (indexOfFirstSeparator < 0) +            return false; + +        const int indexOfLastDollar = domain.lastIndexOf(QL1C('$')); +        if (indexOfLastDollar >= 0 && indexOfLastDollar != indexOfFirstSeparator + 1) +            return false; + +        domain = domain.left(indexOfFirstSeparator); +        if (domain.contains(QL1C('/')) || domain.contains(QL1C('*'))) +            return false; + +        domain = domain.toLower(); +        m_hostList.insert(domain); +        m_hostList.insert(QL1S("www.") + domain); +        return true; +    } +    return false; +} diff --git a/src/adblock/adblockhostmatcher.h b/src/adblock/adblockhostmatcher.h new file mode 100644 index 00000000..0a15bd4e --- /dev/null +++ b/src/adblock/adblockhostmatcher.h @@ -0,0 +1,51 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKHOSTMATCHER_H +#define ADBLOCKHOSTMATCHER_H + +#include <QSet> +#include <QString> + +class AdBlockHostMatcher +{ +public: +    // Try to add an adblock filter to this host matcher. +    // If the filter is not an hostname, the filter is not added +    // and the method return false; +    bool tryAddFilter(const QString &filter); + +    bool match(const QString &host) const +    { +        return m_hostList.contains(host.toLower()); +    } + +    void clear() { m_hostList.clear(); } + +private: +    QSet<QString> m_hostList; +}; + +#endif // ADBLOCKHOSTMATCHER_H diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 644ecff5..8deb0bcd 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -67,6 +67,8 @@ void AdBlockManager::loadSettings(bool checkUpdateDate)      _index = 0;      _buffer.clear(); +    _hostWhiteList.clear(); +    _hostBlackList.clear();      _whiteList.clear();      _blackList.clear();      _hideList.clear(); @@ -133,7 +135,10 @@ void AdBlockManager::loadRules(const QStringList &rules)          // white rules          if (stringRule.startsWith(QL1S("@@")))          { -            AdBlockRule rule(stringRule.mid(2)); +            const QString filter = stringRule.mid(2); +            if (_hostWhiteList.tryAddFilter(filter)) +                continue; +            AdBlockRule rule(filter);              _whiteList << rule;              continue;          } @@ -145,6 +150,12 @@ void AdBlockManager::loadRules(const QStringList &rules)              continue;          } +        // TODO implement domain-specific hiding +        if (stringRule.contains(QL1S("##"))) +            continue; + +        if (_hostBlackList.tryAddFilter(stringRule)) +            continue;          AdBlockRule rule(stringRule);          _blackList << rule;      } @@ -161,11 +172,22 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa          return 0;      QString urlString = request.url().toString(); +    // We compute a lowercase version of the URL so each rule does not +    // have to do it. +    const QString urlStringLowerCase = urlString.toLower(); +    const QString host = request.url().host();      // check white rules before :) + +    if (_hostWhiteList.match(host)) { +        kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched by host matcher: ***********"; +        kDebug() << "UrlString:  " << urlString; +        return 0; +    } +      foreach(const AdBlockRule &filter, _whiteList)      { -        if (filter.match(urlString)) +        if (filter.match(urlString, urlStringLowerCase))          {              kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********";              kDebug() << "UrlString:  " << urlString; @@ -174,9 +196,16 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa      }      // then check the black ones :( +    if (_hostBlackList.match(host)) { +        kDebug() << "****ADBLOCK: BLACK RULE Matched by host matcher: ***********"; +        kDebug() << "UrlString:  " << urlString; +        AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); +        return reply; +    } +      foreach(const AdBlockRule &filter, _blackList)      { -        if (filter.match(urlString)) +        if (filter.match(urlString, urlStringLowerCase))          {              kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********";              kDebug() << "UrlString:  " << urlString; @@ -185,7 +214,8 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa              QWebElementCollection elements = document.findAll("*");              foreach(QWebElement el, elements)              { -                if (filter.match(el.attribute("src"))) +                const QString srcAttribute = el.attribute("src"); +                if (filter.match(srcAttribute, srcAttribute.toLower()))                  {                      kDebug() << "MATCHES ATTRIBUTE!!!!!";                      el.setStyleProperty(QL1S("visibility"), QL1S("hidden")); diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index eae761e0..69548994 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -108,6 +108,7 @@  #include "rekonq_defines.h"  // Local Includes +#include "adblockhostmatcher.h"  #include "adblockrule.h"  // KDE Includes @@ -155,6 +156,8 @@ private:      bool _isAdblockEnabled;      bool _isHideAdsEnabled; +    AdBlockHostMatcher _hostBlackList; +    AdBlockHostMatcher _hostWhiteList;      AdBlockRuleList _blackList;      AdBlockRuleList _whiteList;      QStringList _hideList; diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h index 04409688..ef7b2f5f 100644 --- a/src/adblock/adblockrule.h +++ b/src/adblock/adblockrule.h @@ -70,9 +70,10 @@ class AdBlockRule  public:      AdBlockRule(const QString &filter); -    bool match(const QString &encodedUrl) const +    bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const      { -        return m_implementation->match(encodedUrl); +        Q_ASSERT(encodedUrl.toLower() == encodedUrlLowerCase); +        return m_implementation->match(encodedUrl, encodedUrlLowerCase);      }  private: diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp index decb895d..988f2895 100644 --- a/src/adblock/adblockrulefallbackimpl.cpp +++ b/src/adblock/adblockrulefallbackimpl.cpp @@ -61,7 +61,7 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter)      m_regExp.setPattern(parsedLine);  } -bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const +bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl, const QString &) const  {      return m_regExp.indexIn(encodedUrl) != -1;  } diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h index 4e7ca555..ed0f6dc6 100644 --- a/src/adblock/adblockrulefallbackimpl.h +++ b/src/adblock/adblockrulefallbackimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleFallbackImpl : public AdBlockRuleImpl  {  public:      AdBlockRuleFallbackImpl(const QString &filter); -    bool match(const QString &encodedUrl) const; +    bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const;  private:      QString convertPatternToRegExp(const QString &wildcardPattern); diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h index da367aeb..db5cec30 100644 --- a/src/adblock/adblockruleimpl.h +++ b/src/adblock/adblockruleimpl.h @@ -33,7 +33,7 @@ class AdBlockRuleImpl  public:      AdBlockRuleImpl(const QString &) {}      virtual ~AdBlockRuleImpl() {} -    virtual bool match(const QString &encodedUrl) const = 0; +    virtual bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0;  };  #endif // ADBLOCKRULEIMPL_H diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp index 7c02ea37..892d78e0 100644 --- a/src/adblock/adblockruletextmatchimpl.cpp +++ b/src/adblock/adblockruletextmatchimpl.cpp @@ -34,13 +34,18 @@ AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter)  {      Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)); -    m_textToMatch = filter; +    m_textToMatch = filter.toLower();      m_textToMatch.remove(QL1C('*'));  } -bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const +bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const  { -    return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive); +    Q_UNUSED(encodedUrl); +    // Case sensitive compare is faster, but would be incorrect with encodedUrl since +    // we do want case insensitive. +    // What we do is work on a lowercase version of m_textToMatch, and compare to the lowercase +    // version of encodedUrl. +    return encodedUrlLowerCase.contains(m_textToMatch, Qt::CaseSensitive);  }  bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter) diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h index f0e78be0..28b0656c 100644 --- a/src/adblock/adblockruletextmatchimpl.h +++ b/src/adblock/adblockruletextmatchimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl  {  public:      AdBlockRuleTextMatchImpl(const QString &filter); -    bool match(const QString &encodedUrl) const; +    bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const;      static bool isTextMatchFilter(const QString &filter); | 
