From e5815346cd4870640051856abed0887717aa0bf7 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Wed, 18 Aug 2010 23:41:05 +0200 Subject: Improve the performance of AdBlockRuleTextMatchImpl Comparing unicode string without case sensitive is rather expensive because each codepoint must be converted, which is non trivial for unicode. This patch introduce a new argument of ::match() taking the encoded url in lowercase. This way, the conversion can be done only once for a lot of rules. --- src/adblock/adblockmanager.cpp | 10 +++++++--- src/adblock/adblockrule.h | 5 +++-- src/adblock/adblockrulefallbackimpl.cpp | 2 +- src/adblock/adblockrulefallbackimpl.h | 2 +- src/adblock/adblockruleimpl.h | 2 +- src/adblock/adblockruletextmatchimpl.cpp | 11 ++++++++--- src/adblock/adblockruletextmatchimpl.h | 2 +- 7 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 644ecff5..e0a109f0 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -161,11 +161,14 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa return 0; QString urlString = request.url().toString(); + // We compute a lowercase version of the URL so each rule does not + // have to do it. + const QString urlStringLowerCase = urlString.toLower(); // check white rules before :) foreach(const AdBlockRule &filter, _whiteList) { - if (filter.match(urlString)) + if (filter.match(urlString, urlStringLowerCase)) { kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********"; kDebug() << "UrlString: " << urlString; @@ -176,7 +179,7 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa // then check the black ones :( foreach(const AdBlockRule &filter, _blackList) { - if (filter.match(urlString)) + if (filter.match(urlString, urlStringLowerCase)) { kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********"; kDebug() << "UrlString: " << urlString; @@ -185,7 +188,8 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa QWebElementCollection elements = document.findAll("*"); foreach(QWebElement el, elements) { - if (filter.match(el.attribute("src"))) + const QString srcAttribute = el.attribute("src"); + if (filter.match(srcAttribute, srcAttribute.toLower())) { kDebug() << "MATCHES ATTRIBUTE!!!!!"; el.setStyleProperty(QL1S("visibility"), QL1S("hidden")); diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h index 04409688..ef7b2f5f 100644 --- a/src/adblock/adblockrule.h +++ b/src/adblock/adblockrule.h @@ -70,9 +70,10 @@ class AdBlockRule public: AdBlockRule(const QString &filter); - bool match(const QString &encodedUrl) const + bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const { - return m_implementation->match(encodedUrl); + Q_ASSERT(encodedUrl.toLower() == encodedUrlLowerCase); + return m_implementation->match(encodedUrl, encodedUrlLowerCase); } private: diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp index decb895d..988f2895 100644 --- a/src/adblock/adblockrulefallbackimpl.cpp +++ b/src/adblock/adblockrulefallbackimpl.cpp @@ -61,7 +61,7 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter) m_regExp.setPattern(parsedLine); } -bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const +bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl, const QString &) const { return m_regExp.indexIn(encodedUrl) != -1; } diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h index 4e7ca555..ed0f6dc6 100644 --- a/src/adblock/adblockrulefallbackimpl.h +++ b/src/adblock/adblockrulefallbackimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleFallbackImpl : public AdBlockRuleImpl { public: AdBlockRuleFallbackImpl(const QString &filter); - bool match(const QString &encodedUrl) const; + bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const; private: QString convertPatternToRegExp(const QString &wildcardPattern); diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h index da367aeb..db5cec30 100644 --- a/src/adblock/adblockruleimpl.h +++ b/src/adblock/adblockruleimpl.h @@ -33,7 +33,7 @@ class AdBlockRuleImpl public: AdBlockRuleImpl(const QString &) {} virtual ~AdBlockRuleImpl() {} - virtual bool match(const QString &encodedUrl) const = 0; + virtual bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0; }; #endif // ADBLOCKRULEIMPL_H diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp index 7c02ea37..892d78e0 100644 --- a/src/adblock/adblockruletextmatchimpl.cpp +++ b/src/adblock/adblockruletextmatchimpl.cpp @@ -34,13 +34,18 @@ AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter) { Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)); - m_textToMatch = filter; + m_textToMatch = filter.toLower(); m_textToMatch.remove(QL1C('*')); } -bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const +bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const { - return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive); + Q_UNUSED(encodedUrl); + // Case sensitive compare is faster, but would be incorrect with encodedUrl since + // we do want case insensitive. + // What we do is work on a lowercase version of m_textToMatch, and compare to the lowercase + // version of encodedUrl. + return encodedUrlLowerCase.contains(m_textToMatch, Qt::CaseSensitive); } bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter) diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h index f0e78be0..28b0656c 100644 --- a/src/adblock/adblockruletextmatchimpl.h +++ b/src/adblock/adblockruletextmatchimpl.h @@ -36,7 +36,7 @@ class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl { public: AdBlockRuleTextMatchImpl(const QString &filter); - bool match(const QString &encodedUrl) const; + bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const; static bool isTextMatchFilter(const QString &filter); -- cgit v1.2.1