summaryrefslogtreecommitdiff
path: root/src/adblock
diff options
context:
space:
mode:
authorAndrea Diamantini <adjam7@gmail.com>2010-01-14 11:37:45 +0100
committerAndrea Diamantini <adjam7@gmail.com>2010-01-14 11:37:45 +0100
commit1c99aeb13ff4304d45183e81c70c99d33e854d59 (patch)
treede0809c3cf65726d99a58283765b5de97bdabd02 /src/adblock
parentBetter AdBlocking things :) (diff)
downloadrekonq-1c99aeb13ff4304d45183e81c70c99d33e854d59.tar.xz
Cleaning again (the morning after) the adblock thing.. :)
Removed the "options" part. Will be rewritten in rekonq 0.5 together with the UI..
Diffstat (limited to 'src/adblock')
-rw-r--r--src/adblock/adblockmanager.cpp35
-rw-r--r--src/adblock/adblockmanager.h18
-rw-r--r--src/adblock/adblockrule.cpp127
3 files changed, 101 insertions, 79 deletions
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp
index 5290d561..c2a42f0b 100644
--- a/src/adblock/adblockmanager.cpp
+++ b/src/adblock/adblockmanager.cpp
@@ -83,24 +83,23 @@ void AdBlockManager::loadSettings()
if (name.startsWith(QLatin1String("Filter")))
{
- if(url.startsWith("!"))
- {
- continue;
- }
-
- if(url.startsWith("@@"))
- {
- AdBlockRule rule( url.mid(2) );
- _whiteList << rule;
- continue;
- }
-
- if(url.startsWith("##"))
- {
- _hideList << url.mid(2);
- }
- else
- {
+ if(!url.startsWith("!"))
+ {
+ // white rules
+ if(url.startsWith("@@"))
+ {
+ AdBlockRule rule( url.mid(2) );
+ _whiteList << rule;
+ continue;
+ }
+
+ // hide (CSS) rules
+ if(url.startsWith("##"))
+ {
+ _hideList << url.mid(2);
+ continue;
+ }
+
AdBlockRule rule( url );
_blackList << rule;
}
diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h
index 45c3fa18..c07a9492 100644
--- a/src/adblock/adblockmanager.h
+++ b/src/adblock/adblockmanager.h
@@ -31,35 +31,35 @@
// NOTE: AdBlockPlus Filters (fast) summary
//
-// # Basic Filter rules
+// ### Basic Filter rules
//
// RULE = http://example.com/ads/*
// this should block every link containing all things from that link
//
-// # Exception rules
+// ### Exception rules (@@)
//
// RULE = @@advice*
//
// this will save every site, also that matched by other rules, cointaining words
// that starts with "advice". Wildcards && regular expression allowed here.
//
-// # Beginning/end matching rules
+// ### Beginning/end matching rules (||)
//
-// RULE=|http://badsite.com
+// RULE=||http://badsite.com
//
// will stop all links starting with http://badsite.com
//
-// RULE=*swf|
+// RULE=*swf||
//
// will stop all links to direct flash contents
//
-// # Comments
+// ### Comments (!)
//
// RULE=!azz..
//
// Every rule starting with a ! is commented out and should not be checked
//
-// # Filter Options
+// ### Filter Options
//
// You can also specify a number of options to modify the behavior of a filter.
// You list these options separated with commas after a dollar sign ($) at the end of the filter
@@ -85,13 +85,13 @@
//
// RULE=*/ads/*~$script,match-case
//
-// # Regular expressions
+// ### Regular expressions
//
// They usually allow to check for (a lot of) sites, using just one rule, but be careful:
// BASIC FILTERS ARE PROCESSED FASTER THAN REGULAR EXPRESSIONS (In ADP! In rekonq, I don't know...)
//
//
-// ##### ELEMENT HIDING
+// ### ELEMENT HIDING (##)
//
// This is quite different from usual adblock (but, for me, more powerful!). Sometimes you will find advertisements
// that can’t be blocked because they are embedded as text in the web page itself.
diff --git a/src/adblock/adblockrule.cpp b/src/adblock/adblockrule.cpp
index ea3e17de..c6fe47c9 100644
--- a/src/adblock/adblockrule.cpp
+++ b/src/adblock/adblockrule.cpp
@@ -58,6 +58,9 @@
#include <QRegExp>
#include <QUrl>
+#define QL1S(x) QLatin1String(x)
+#define QL1C(x) QLatin1Char(x)
+
AdBlockRule::AdBlockRule(const QString &filter)
{
@@ -65,31 +68,29 @@ AdBlockRule::AdBlockRule(const QString &filter)
QString parsedLine = filter;
- if (parsedLine.startsWith(QLatin1Char('/')))
+ if ( parsedLine.startsWith( QL1C('/') ) && parsedLine.endsWith( QL1C('/') ) )
{
- if (parsedLine.endsWith(QLatin1Char('/')))
- {
- parsedLine = parsedLine.mid(1);
- parsedLine = parsedLine.left(parsedLine.size() - 1);
- isRegExpRule = true;
- }
+ parsedLine = parsedLine.mid(1);
+ parsedLine = parsedLine.left(parsedLine.size() - 1);
+ isRegExpRule = true;
}
- int options = parsedLine.indexOf(QLatin1String("$"), 0);
+ int options = parsedLine.indexOf( QL1C('$'), 0);
if (options >= 0)
{
- m_options = parsedLine.mid(options + 1).split(QLatin1Char(','));
+ m_options = parsedLine.mid(options + 1).split(QL1C(','));
parsedLine = parsedLine.left(options);
}
if(!isRegExpRule)
parsedLine = convertPatternToRegExp(parsedLine);
+
m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2);
- if (m_options.contains(QLatin1String("match-case")))
+ if (m_options.contains( QL1S("match-case") ))
{
m_regExp.setCaseSensitivity(Qt::CaseSensitive);
- m_options.removeOne(QLatin1String("match-case"));
+ m_options.removeOne( QL1S("match-case") );
}
}
@@ -101,34 +102,36 @@ bool AdBlockRule::match(const QString &encodedUrl) const
{
bool matched = m_regExp.indexIn(encodedUrl) != -1;
- if (matched && !m_options.isEmpty())
- {
- // we only support domain right now
- if (m_options.count() == 1)
- {
- foreach (const QString &option, m_options)
- {
- if (option.startsWith(QLatin1String("domain=")))
- {
- QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8());
- QString host = url.host();
- QStringList domainOptions = option.mid(7).split(QLatin1Char('|'));
- foreach (QString domainOption, domainOptions)
- {
- bool negate = domainOption.at(0) == QLatin1Char('~');
- if (negate)
- domainOption = domainOption.mid(1);
- bool hostMatched = domainOption == host;
- if (hostMatched && !negate)
- return true;
- if (!hostMatched && negate)
- return true;
- }
- }
- }
- }
- return false;
- }
+// TODO: Reimplement this in rekonq 0.5 :)
+//
+// if (matched && !m_options.isEmpty())
+// {
+// // we only support domain right now
+// if (m_options.count() == 1)
+// {
+// foreach (const QString &option, m_options)
+// {
+// if (option.startsWith( QL1S("domain=") ))
+// {
+// QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8());
+// QString host = url.host();
+// QStringList domainOptions = option.mid(7).split( QL1C('|') );
+// foreach (QString domainOption, domainOptions)
+// {
+// bool negate = domainOption.at(0) == QL1C('~');
+// if (negate)
+// domainOption = domainOption.mid(1);
+// bool hostMatched = domainOption == host;
+// if (hostMatched && !negate)
+// return true;
+// if (!hostMatched && negate)
+// return true;
+// }
+// }
+// }
+// }
+// return false;
+// }
return matched;
}
@@ -137,17 +140,37 @@ bool AdBlockRule::match(const QString &encodedUrl) const
QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern)
{
QString pattern = wildcardPattern;
- return pattern.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
- .replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
- .replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards
- .replace(QRegExp(QLatin1String("(\\*)$")), QLatin1String("")) // remove trailing wildcards
- .replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1")) // escape special symbols
- .replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")),
- QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")) // process extended anchor at expression start
- .replace(QRegExp(QLatin1String("\\\\\\^")),
- QLatin1String("(?:[^\\w\\d\\-.%]|$)")) // process separator placeholders
- .replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^")) // process anchor at expression start
- .replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end
- .replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")) // replace wildcards by .*
- ;
+
+ // remove multiple wildcards
+ pattern.replace(QRegExp( QL1S("\\*+") ), QL1S("*") );
+
+ // remove anchors following separator placeholder
+ pattern.replace(QRegExp( QL1S("\\^\\|$") ), QL1S("^") );
+
+ // remove leading wildcards
+ pattern.replace(QRegExp( QL1S("^(\\*)") ), QL1S("") );
+
+ // remove trailing wildcards
+ pattern.replace(QRegExp( QL1S("(\\*)$") ), QL1S("") );
+
+ // escape special symbols
+ pattern.replace(QRegExp( QL1S("(\\W)") ), QL1S("\\\\1") );
+
+ // process extended anchor at expression start
+ pattern.replace(QRegExp( QL1S("^\\\\\\|\\\\\\|") ), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?") );
+
+ // process separator placeholders
+ pattern.replace(QRegExp( QL1S("\\\\\\^") ), QL1S("(?:[^\\w\\d\\-.%]|$)") );
+
+ // process anchor at expression start
+ pattern.replace(QRegExp( QL1S("^\\\\\\|") ), QL1S("^") );
+
+ // process anchor at expression end
+ pattern.replace(QRegExp( QL1S("\\\\\\|$") ), QL1S("$") );
+
+ // replace wildcards by .*
+ pattern.replace(QRegExp( QL1S("\\\\\\*") ), QL1S(".*") );
+
+ // Finally, return...
+ return pattern;
}