summaryrefslogtreecommitdiff
path: root/src/adblock
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-18 23:41:05 +0200
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-19 03:18:29 +0200
commite5815346cd4870640051856abed0887717aa0bf7 (patch)
tree7ff7d8a66d1febfcc793dbe67c9d3d182ab05977 /src/adblock
parentMerge commit 'refs/merge-requests/178' of git://gitorious.org/rekonq/mainline... (diff)
downloadrekonq-e5815346cd4870640051856abed0887717aa0bf7.tar.xz
Improve the performance of AdBlockRuleTextMatchImpl
Comparing unicode string without case sensitive is rather expensive because each codepoint must be converted, which is non trivial for unicode. This patch introduce a new argument of ::match() taking the encoded url in lowercase. This way, the conversion can be done only once for a lot of rules.
Diffstat (limited to 'src/adblock')
-rw-r--r--src/adblock/adblockmanager.cpp10
-rw-r--r--src/adblock/adblockrule.h5
-rw-r--r--src/adblock/adblockrulefallbackimpl.cpp2
-rw-r--r--src/adblock/adblockrulefallbackimpl.h2
-rw-r--r--src/adblock/adblockruleimpl.h2
-rw-r--r--src/adblock/adblockruletextmatchimpl.cpp11
-rw-r--r--src/adblock/adblockruletextmatchimpl.h2
7 files changed, 22 insertions, 12 deletions
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp
index 644ecff5..e0a109f0 100644
--- a/src/adblock/adblockmanager.cpp
+++ b/src/adblock/adblockmanager.cpp
@@ -161,11 +161,14 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
return 0;
QString urlString = request.url().toString();
+ // We compute a lowercase version of the URL so each rule does not
+ // have to do it.
+ const QString urlStringLowerCase = urlString.toLower();
// check white rules before :)
foreach(const AdBlockRule &filter, _whiteList)
{
- if (filter.match(urlString))
+ if (filter.match(urlString, urlStringLowerCase))
{
kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********";
kDebug() << "UrlString: " << urlString;
@@ -176,7 +179,7 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
// then check the black ones :(
foreach(const AdBlockRule &filter, _blackList)
{
- if (filter.match(urlString))
+ if (filter.match(urlString, urlStringLowerCase))
{
kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********";
kDebug() << "UrlString: " << urlString;
@@ -185,7 +188,8 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
QWebElementCollection elements = document.findAll("*");
foreach(QWebElement el, elements)
{
- if (filter.match(el.attribute("src")))
+ const QString srcAttribute = el.attribute("src");
+ if (filter.match(srcAttribute, srcAttribute.toLower()))
{
kDebug() << "MATCHES ATTRIBUTE!!!!!";
el.setStyleProperty(QL1S("visibility"), QL1S("hidden"));
diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h
index 04409688..ef7b2f5f 100644
--- a/src/adblock/adblockrule.h
+++ b/src/adblock/adblockrule.h
@@ -70,9 +70,10 @@ class AdBlockRule
public:
AdBlockRule(const QString &filter);
- bool match(const QString &encodedUrl) const
+ bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const
{
- return m_implementation->match(encodedUrl);
+ Q_ASSERT(encodedUrl.toLower() == encodedUrlLowerCase);
+ return m_implementation->match(encodedUrl, encodedUrlLowerCase);
}
private:
diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp
index decb895d..988f2895 100644
--- a/src/adblock/adblockrulefallbackimpl.cpp
+++ b/src/adblock/adblockrulefallbackimpl.cpp
@@ -61,7 +61,7 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter)
m_regExp.setPattern(parsedLine);
}
-bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const
+bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl, const QString &) const
{
return m_regExp.indexIn(encodedUrl) != -1;
}
diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h
index 4e7ca555..ed0f6dc6 100644
--- a/src/adblock/adblockrulefallbackimpl.h
+++ b/src/adblock/adblockrulefallbackimpl.h
@@ -36,7 +36,7 @@ class AdBlockRuleFallbackImpl : public AdBlockRuleImpl
{
public:
AdBlockRuleFallbackImpl(const QString &filter);
- bool match(const QString &encodedUrl) const;
+ bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const;
private:
QString convertPatternToRegExp(const QString &wildcardPattern);
diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h
index da367aeb..db5cec30 100644
--- a/src/adblock/adblockruleimpl.h
+++ b/src/adblock/adblockruleimpl.h
@@ -33,7 +33,7 @@ class AdBlockRuleImpl
public:
AdBlockRuleImpl(const QString &) {}
virtual ~AdBlockRuleImpl() {}
- virtual bool match(const QString &encodedUrl) const = 0;
+ virtual bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0;
};
#endif // ADBLOCKRULEIMPL_H
diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp
index 7c02ea37..892d78e0 100644
--- a/src/adblock/adblockruletextmatchimpl.cpp
+++ b/src/adblock/adblockruletextmatchimpl.cpp
@@ -34,13 +34,18 @@ AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter)
{
Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter));
- m_textToMatch = filter;
+ m_textToMatch = filter.toLower();
m_textToMatch.remove(QL1C('*'));
}
-bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const
+bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const
{
- return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive);
+ Q_UNUSED(encodedUrl);
+ // Case sensitive compare is faster, but would be incorrect with encodedUrl since
+ // we do want case insensitive.
+ // What we do is work on a lowercase version of m_textToMatch, and compare to the lowercase
+ // version of encodedUrl.
+ return encodedUrlLowerCase.contains(m_textToMatch, Qt::CaseSensitive);
}
bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter)
diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h
index f0e78be0..28b0656c 100644
--- a/src/adblock/adblockruletextmatchimpl.h
+++ b/src/adblock/adblockruletextmatchimpl.h
@@ -36,7 +36,7 @@ class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl
{
public:
AdBlockRuleTextMatchImpl(const QString &filter);
- bool match(const QString &encodedUrl) const;
+ bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const;
static bool isTextMatchFilter(const QString &filter);