From 1c99aeb13ff4304d45183e81c70c99d33e854d59 Mon Sep 17 00:00:00 2001 From: Andrea Diamantini Date: Thu, 14 Jan 2010 11:37:45 +0100 Subject: Cleaning again (the morning after) the adblock thing.. :) Removed the "options" part. Will be rewritten in rekonq 0.5 together with the UI.. --- src/adblock/adblockmanager.cpp | 35 ++++++------ src/adblock/adblockmanager.h | 18 +++--- src/adblock/adblockrule.cpp | 127 ++++++++++++++++++++++++----------------- 3 files changed, 101 insertions(+), 79 deletions(-) (limited to 'src/adblock') diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 5290d561..c2a42f0b 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -83,24 +83,23 @@ void AdBlockManager::loadSettings() if (name.startsWith(QLatin1String("Filter"))) { - if(url.startsWith("!")) - { - continue; - } - - if(url.startsWith("@@")) - { - AdBlockRule rule( url.mid(2) ); - _whiteList << rule; - continue; - } - - if(url.startsWith("##")) - { - _hideList << url.mid(2); - } - else - { + if(!url.startsWith("!")) + { + // white rules + if(url.startsWith("@@")) + { + AdBlockRule rule( url.mid(2) ); + _whiteList << rule; + continue; + } + + // hide (CSS) rules + if(url.startsWith("##")) + { + _hideList << url.mid(2); + continue; + } + AdBlockRule rule( url ); _blackList << rule; } diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index 45c3fa18..c07a9492 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -31,35 +31,35 @@ // NOTE: AdBlockPlus Filters (fast) summary // -// # Basic Filter rules +// ### Basic Filter rules // // RULE = http://example.com/ads/* // this should block every link containing all things from that link // -// # Exception rules +// ### Exception rules (@@) // // RULE = @@advice* // // this will save every site, also that matched by other rules, cointaining words // that starts with "advice". Wildcards && regular expression allowed here. // -// # Beginning/end matching rules +// ### Beginning/end matching rules (||) // -// RULE=|http://badsite.com +// RULE=||http://badsite.com // // will stop all links starting with http://badsite.com // -// RULE=*swf| +// RULE=*swf|| // // will stop all links to direct flash contents // -// # Comments +// ### Comments (!) // // RULE=!azz.. // // Every rule starting with a ! is commented out and should not be checked // -// # Filter Options +// ### Filter Options // // You can also specify a number of options to modify the behavior of a filter. // You list these options separated with commas after a dollar sign ($) at the end of the filter @@ -85,13 +85,13 @@ // // RULE=*/ads/*~$script,match-case // -// # Regular expressions +// ### Regular expressions // // They usually allow to check for (a lot of) sites, using just one rule, but be careful: // BASIC FILTERS ARE PROCESSED FASTER THAN REGULAR EXPRESSIONS (In ADP! In rekonq, I don't know...) // // -// ##### ELEMENT HIDING +// ### ELEMENT HIDING (##) // // This is quite different from usual adblock (but, for me, more powerful!). Sometimes you will find advertisements // that can’t be blocked because they are embedded as text in the web page itself. diff --git a/src/adblock/adblockrule.cpp b/src/adblock/adblockrule.cpp index ea3e17de..c6fe47c9 100644 --- a/src/adblock/adblockrule.cpp +++ b/src/adblock/adblockrule.cpp @@ -58,6 +58,9 @@ #include #include +#define QL1S(x) QLatin1String(x) +#define QL1C(x) QLatin1Char(x) + AdBlockRule::AdBlockRule(const QString &filter) { @@ -65,31 +68,29 @@ AdBlockRule::AdBlockRule(const QString &filter) QString parsedLine = filter; - if (parsedLine.startsWith(QLatin1Char('/'))) + if ( parsedLine.startsWith( QL1C('/') ) && parsedLine.endsWith( QL1C('/') ) ) { - if (parsedLine.endsWith(QLatin1Char('/'))) - { - parsedLine = parsedLine.mid(1); - parsedLine = parsedLine.left(parsedLine.size() - 1); - isRegExpRule = true; - } + parsedLine = parsedLine.mid(1); + parsedLine = parsedLine.left(parsedLine.size() - 1); + isRegExpRule = true; } - int options = parsedLine.indexOf(QLatin1String("$"), 0); + int options = parsedLine.indexOf( QL1C('$'), 0); if (options >= 0) { - m_options = parsedLine.mid(options + 1).split(QLatin1Char(',')); + m_options = parsedLine.mid(options + 1).split(QL1C(',')); parsedLine = parsedLine.left(options); } if(!isRegExpRule) parsedLine = convertPatternToRegExp(parsedLine); + m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2); - if (m_options.contains(QLatin1String("match-case"))) + if (m_options.contains( QL1S("match-case") )) { m_regExp.setCaseSensitivity(Qt::CaseSensitive); - m_options.removeOne(QLatin1String("match-case")); + m_options.removeOne( QL1S("match-case") ); } } @@ -101,34 +102,36 @@ bool AdBlockRule::match(const QString &encodedUrl) const { bool matched = m_regExp.indexIn(encodedUrl) != -1; - if (matched && !m_options.isEmpty()) - { - // we only support domain right now - if (m_options.count() == 1) - { - foreach (const QString &option, m_options) - { - if (option.startsWith(QLatin1String("domain="))) - { - QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8()); - QString host = url.host(); - QStringList domainOptions = option.mid(7).split(QLatin1Char('|')); - foreach (QString domainOption, domainOptions) - { - bool negate = domainOption.at(0) == QLatin1Char('~'); - if (negate) - domainOption = domainOption.mid(1); - bool hostMatched = domainOption == host; - if (hostMatched && !negate) - return true; - if (!hostMatched && negate) - return true; - } - } - } - } - return false; - } +// TODO: Reimplement this in rekonq 0.5 :) +// +// if (matched && !m_options.isEmpty()) +// { +// // we only support domain right now +// if (m_options.count() == 1) +// { +// foreach (const QString &option, m_options) +// { +// if (option.startsWith( QL1S("domain=") )) +// { +// QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8()); +// QString host = url.host(); +// QStringList domainOptions = option.mid(7).split( QL1C('|') ); +// foreach (QString domainOption, domainOptions) +// { +// bool negate = domainOption.at(0) == QL1C('~'); +// if (negate) +// domainOption = domainOption.mid(1); +// bool hostMatched = domainOption == host; +// if (hostMatched && !negate) +// return true; +// if (!hostMatched && negate) +// return true; +// } +// } +// } +// } +// return false; +// } return matched; } @@ -137,17 +140,37 @@ bool AdBlockRule::match(const QString &encodedUrl) const QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern) { QString pattern = wildcardPattern; - return pattern.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards - .replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder - .replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards - .replace(QRegExp(QLatin1String("(\\*)$")), QLatin1String("")) // remove trailing wildcards - .replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1")) // escape special symbols - .replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")), - QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")) // process extended anchor at expression start - .replace(QRegExp(QLatin1String("\\\\\\^")), - QLatin1String("(?:[^\\w\\d\\-.%]|$)")) // process separator placeholders - .replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^")) // process anchor at expression start - .replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end - .replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")) // replace wildcards by .* - ; + + // remove multiple wildcards + pattern.replace(QRegExp( QL1S("\\*+") ), QL1S("*") ); + + // remove anchors following separator placeholder + pattern.replace(QRegExp( QL1S("\\^\\|$") ), QL1S("^") ); + + // remove leading wildcards + pattern.replace(QRegExp( QL1S("^(\\*)") ), QL1S("") ); + + // remove trailing wildcards + pattern.replace(QRegExp( QL1S("(\\*)$") ), QL1S("") ); + + // escape special symbols + pattern.replace(QRegExp( QL1S("(\\W)") ), QL1S("\\\\1") ); + + // process extended anchor at expression start + pattern.replace(QRegExp( QL1S("^\\\\\\|\\\\\\|") ), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?") ); + + // process separator placeholders + pattern.replace(QRegExp( QL1S("\\\\\\^") ), QL1S("(?:[^\\w\\d\\-.%]|$)") ); + + // process anchor at expression start + pattern.replace(QRegExp( QL1S("^\\\\\\|") ), QL1S("^") ); + + // process anchor at expression end + pattern.replace(QRegExp( QL1S("\\\\\\|$") ), QL1S("$") ); + + // replace wildcards by .* + pattern.replace(QRegExp( QL1S("\\\\\\*") ), QL1S(".*") ); + + // Finally, return... + return pattern; } -- cgit v1.2.1