From 12f8323ae8e5e469fb0b662f29bd8fedadb85faa Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Sat, 21 Aug 2010 21:40:59 +0200 Subject: Add support for domain option in the ad block module Some ads were not filtered because they were incorrectly matched by the whitelist. This is because we ignore options, including the domain restrictions. For example, the white filter: @@||pagead2.googlesyndication.com/pagead/show_ads.js$domain=kde.org would match any page regardless of the domain restriction. So no ads from pagead2.googlesyndication.com were filtered. This patch adds support for "domain" options in the fallback rules (the other AdBlockRuleImpl rejects options). The domain of the frame making the request is now compared to the option to take the right decision. This patch requires Qt 4.7 with a recent QtWebKit 2.0. --- src/adblock/adblockmanager.cpp | 6 ++--- src/adblock/adblockrule.h | 5 ++-- src/adblock/adblockrulefallbackimpl.cpp | 42 +++++++++++++++++++++++++++++--- src/adblock/adblockrulefallbackimpl.h | 5 +++- src/adblock/adblockruleimpl.h | 3 ++- src/adblock/adblockruletextmatchimpl.cpp | 3 ++- src/adblock/adblockruletextmatchimpl.h | 2 +- 7 files changed, 54 insertions(+), 12 deletions(-) (limited to 'src/adblock') diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 8deb0bcd..f7c61cdb 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -187,7 +187,7 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa foreach(const AdBlockRule &filter, _whiteList) { - if (filter.match(urlString, urlStringLowerCase)) + if (filter.match(request, urlString, urlStringLowerCase)) { kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********"; kDebug() << "UrlString: " << urlString; @@ -205,7 +205,7 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa foreach(const AdBlockRule &filter, _blackList) { - if (filter.match(urlString, urlStringLowerCase)) + if (filter.match(request, urlString, urlStringLowerCase)) { kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********"; kDebug() << "UrlString: " << urlString; @@ -215,7 +215,7 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa foreach(QWebElement el, elements) { const QString srcAttribute = el.attribute("src"); - if (filter.match(srcAttribute, srcAttribute.toLower())) + if (filter.match(request, srcAttribute, srcAttribute.toLower())) { kDebug() << "MATCHES ATTRIBUTE!!!!!"; el.setStyleProperty(QL1S("visibility"), QL1S("hidden")); diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h index ef7b2f5f..701a9fe1 100644 --- a/src/adblock/adblockrule.h +++ b/src/adblock/adblockrule.h @@ -63,6 +63,7 @@ #include // Forward Includes +class QNetworkRequest; class QString; class AdBlockRule @@ -70,10 +71,10 @@ class AdBlockRule public: AdBlockRule(const QString &filter); - bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const + bool match(const QNetworkRequest &request, const QString &encodedUrl, const QString &encodedUrlLowerCase) const { Q_ASSERT(encodedUrl.toLower() == encodedUrlLowerCase); - return m_implementation->match(encodedUrl, encodedUrlLowerCase); + return m_implementation->match(request, encodedUrl, encodedUrlLowerCase); } private: diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp index 988f2895..959050da 100644 --- a/src/adblock/adblockrulefallbackimpl.cpp +++ b/src/adblock/adblockrulefallbackimpl.cpp @@ -30,8 +30,11 @@ #include "rekonq_defines.h" // Qt Includes +#include +#include #include + static inline bool isRegExpFilter(const QString &filter) { return filter.startsWith(QL1C('/')) && filter.endsWith(QL1C('/')); @@ -48,9 +51,24 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter) const int optionsNumber = parsedLine.lastIndexOf(QL1C('$')); if (optionsNumber >= 0 && !isRegExpFilter(parsedLine)) { const QStringList options(parsedLine.mid(optionsNumber + 1).split(QL1C(','))); + parsedLine = parsedLine.left(optionsNumber); + if (options.contains(QL1S("match-case"))) m_regExp.setCaseSensitivity(Qt::CaseSensitive); - parsedLine = parsedLine.left(optionsNumber); + + foreach (const QString &option, options) { + // Domain restricted filter + const QString domainKeyword(QL1S("domain=")); + if (option.startsWith(domainKeyword)) { + QStringList domainList = option.mid(domainKeyword.length()).split(QL1C('|')); + foreach (const QString &domain, domainList) { + if (domain.startsWith(QL1C('~'))) + m_whiteDomains.insert(domain.toLower()); + else + m_blackDomains.insert(domain.toLower()); + } + } + } } if (isRegExpFilter(parsedLine)) @@ -61,9 +79,27 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter) m_regExp.setPattern(parsedLine); } -bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl, const QString &) const +bool AdBlockRuleFallbackImpl::match(const QNetworkRequest &request, const QString &encodedUrl, const QString &) const { - return m_regExp.indexIn(encodedUrl) != -1; + const bool regexpMatch = m_regExp.indexIn(encodedUrl) != -1; + + if (regexpMatch && (!m_whiteDomains.isEmpty() || !m_blackDomains.isEmpty())) { + Q_ASSERT(qobject_cast(request.originatingObject())); + const QWebFrame *const origin = static_cast(request.originatingObject()); + + const QString originDomain = origin->url().host(); + + if (!m_whiteDomains.isEmpty()) { + // In this context, white domains means we block anything but what is in the list. + if (m_whiteDomains.contains(originDomain)) + return false; + return true; + } else if (m_blackDomains.contains(originDomain)) { + return true; + } + return false; + } + return regexpMatch; } QString AdBlockRuleFallbackImpl::convertPatternToRegExp(const QString &wildcardPattern) diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h index ed0f6dc6..8cb02e10 100644 --- a/src/adblock/adblockrulefallbackimpl.h +++ b/src/adblock/adblockrulefallbackimpl.h @@ -31,17 +31,20 @@ // Qt Includes #include #include +#include class AdBlockRuleFallbackImpl : public AdBlockRuleImpl { public: AdBlockRuleFallbackImpl(const QString &filter); - bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const; + bool match(const QNetworkRequest &request, const QString &encodedUrl, const QString &encodedUrlLowerCase) const; private: QString convertPatternToRegExp(const QString &wildcardPattern); QRegExp m_regExp; + QSet m_whiteDomains; + QSet m_blackDomains; }; #endif // ADBLOCKRULEFALLBACKIMPL_H diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h index db5cec30..7702217b 100644 --- a/src/adblock/adblockruleimpl.h +++ b/src/adblock/adblockruleimpl.h @@ -27,13 +27,14 @@ #define ADBLOCKRULEIMPL_H class QString; +class QNetworkRequest; class AdBlockRuleImpl { public: AdBlockRuleImpl(const QString &) {} virtual ~AdBlockRuleImpl() {} - virtual bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0; + virtual bool match(const QNetworkRequest &request, const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0; }; #endif // ADBLOCKRULEIMPL_H diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp index 892d78e0..2d534a3a 100644 --- a/src/adblock/adblockruletextmatchimpl.cpp +++ b/src/adblock/adblockruletextmatchimpl.cpp @@ -38,9 +38,10 @@ AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter) m_textToMatch.remove(QL1C('*')); } -bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const +bool AdBlockRuleTextMatchImpl::match(const QNetworkRequest &request, const QString &encodedUrl, const QString &encodedUrlLowerCase) const { Q_UNUSED(encodedUrl); + Q_UNUSED(request); // Case sensitive compare is faster, but would be incorrect with encodedUrl since // we do want case insensitive. // What we do is work on a lowercase version of m_textToMatch, and compare to the lowercase diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h index 28b0656c..b2c6e239 100644 --- a/src/adblock/adblockruletextmatchimpl.h +++ b/src/adblock/adblockruletextmatchimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl { public: AdBlockRuleTextMatchImpl(const QString &filter); - bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const; + bool match(const QNetworkRequest &request, const QString &encodedUrl, const QString &encodedUrlLowerCase) const; static bool isTextMatchFilter(const QString &filter); -- cgit v1.2.1