diff options
| author | Andrea Diamantini <adjam7@gmail.com> | 2010-01-14 11:38:56 +0100 | 
|---|---|---|
| committer | Andrea Diamantini <adjam7@gmail.com> | 2010-01-14 11:38:56 +0100 | 
| commit | 8d62283b4f44861ebaf21fb78d506b55d1336e3e (patch) | |
| tree | f266a3f3a8cbc58ee0a1f0b4ad48faba0e251ec3 | |
| parent | rekonq 0.3.30 (diff) | |
| parent | Cleaning again (the morning after) the adblock thing.. :) (diff) | |
| download | rekonq-8d62283b4f44861ebaf21fb78d506b55d1336e3e.tar.xz | |
Merge branch 'AdFixes'
| -rw-r--r-- | src/adblock/adblockmanager.cpp | 35 | ||||
| -rw-r--r-- | src/adblock/adblockmanager.h | 18 | ||||
| -rw-r--r-- | src/adblock/adblockrule.cpp | 127 | 
3 files changed, 101 insertions, 79 deletions
| diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 5290d561..c2a42f0b 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -83,24 +83,23 @@ void AdBlockManager::loadSettings()              if (name.startsWith(QLatin1String("Filter")))              { -                if(url.startsWith("!")) -                { -                    continue; -                } -                 -                if(url.startsWith("@@")) -                { -                    AdBlockRule rule( url.mid(2) ); -                    _whiteList << rule; -                    continue; -                } -                 -                if(url.startsWith("##")) -                { -                    _hideList << url.mid(2); -                } -                else -                { +                if(!url.startsWith("!")) +                {            +                    // white rules +                    if(url.startsWith("@@")) +                    { +                        AdBlockRule rule( url.mid(2) ); +                        _whiteList << rule; +                        continue; +                    } +                     +                    // hide (CSS) rules +                    if(url.startsWith("##")) +                    { +                        _hideList << url.mid(2); +                        continue; +                    } +                      AdBlockRule rule( url );                      _blackList << rule;                  } diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index 45c3fa18..c07a9492 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -31,35 +31,35 @@  // NOTE: AdBlockPlus Filters (fast) summary  //  -// # Basic Filter rules +// ### Basic Filter rules  //   // RULE = http://example.com/ads/*   // this should block every link containing all things from that link  //  -// # Exception rules +// ### Exception rules (@@)  //   // RULE = @@advice*  //   // this will save every site, also that matched by other rules, cointaining words  // that starts with "advice". Wildcards && regular expression allowed here.  //  -// # Beginning/end matching rules +// ### Beginning/end matching rules (||)  //  -// RULE=|http://badsite.com +// RULE=||http://badsite.com  //   // will stop all links starting with http://badsite.com  //  -// RULE=*swf| +// RULE=*swf||  //   // will stop all links to direct flash contents  //  -// # Comments +// ### Comments (!)  //   // RULE=!azz..  //   // Every rule starting with a ! is commented out and should not be checked  //  -// # Filter Options +// ### Filter Options  //   // You can also specify a number of options to modify the behavior of a filter.   // You list these options separated with commas after a dollar sign ($) at the end of the filter @@ -85,13 +85,13 @@  //   // RULE=*/ads/*~$script,match-case  //  -// # Regular expressions +// ### Regular expressions  //   // They usually allow to check for (a lot of) sites, using just one rule, but be careful:  // BASIC FILTERS ARE PROCESSED FASTER THAN REGULAR EXPRESSIONS (In ADP! In rekonq, I don't know...)  //   //  -// ##### ELEMENT HIDING +// ### ELEMENT HIDING (##)  //   // This is quite different from usual adblock (but, for me, more powerful!). Sometimes you will find advertisements   // that can’t be blocked because they are embedded as text in the web page itself. diff --git a/src/adblock/adblockrule.cpp b/src/adblock/adblockrule.cpp index ea3e17de..c6fe47c9 100644 --- a/src/adblock/adblockrule.cpp +++ b/src/adblock/adblockrule.cpp @@ -58,6 +58,9 @@  #include <QRegExp>  #include <QUrl> +#define QL1S(x) QLatin1String(x) +#define QL1C(x) QLatin1Char(x) +  AdBlockRule::AdBlockRule(const QString &filter)  { @@ -65,31 +68,29 @@ AdBlockRule::AdBlockRule(const QString &filter)      QString parsedLine = filter; -    if (parsedLine.startsWith(QLatin1Char('/')))  +    if ( parsedLine.startsWith( QL1C('/') ) && parsedLine.endsWith( QL1C('/') ) )       { -        if (parsedLine.endsWith(QLatin1Char('/')))  -        { -            parsedLine = parsedLine.mid(1); -            parsedLine = parsedLine.left(parsedLine.size() - 1); -            isRegExpRule = true; -        } +        parsedLine = parsedLine.mid(1); +        parsedLine = parsedLine.left(parsedLine.size() - 1); +        isRegExpRule = true;      } -    int options = parsedLine.indexOf(QLatin1String("$"), 0); +    int options = parsedLine.indexOf( QL1C('$'), 0);      if (options >= 0)       { -        m_options = parsedLine.mid(options + 1).split(QLatin1Char(',')); +        m_options = parsedLine.mid(options + 1).split(QL1C(','));          parsedLine = parsedLine.left(options);      }      if(!isRegExpRule)          parsedLine = convertPatternToRegExp(parsedLine); +          m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2); -    if (m_options.contains(QLatin1String("match-case")))  +    if (m_options.contains( QL1S("match-case") ))       {          m_regExp.setCaseSensitivity(Qt::CaseSensitive); -        m_options.removeOne(QLatin1String("match-case")); +        m_options.removeOne( QL1S("match-case") );      }  } @@ -101,34 +102,36 @@ bool AdBlockRule::match(const QString &encodedUrl) const  {      bool matched = m_regExp.indexIn(encodedUrl) != -1; -    if (matched && !m_options.isEmpty())  -    { -        // we only support domain right now -        if (m_options.count() == 1) -        { -            foreach (const QString &option, m_options)  -            { -                if (option.startsWith(QLatin1String("domain=")))  -                { -                    QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8()); -                    QString host = url.host(); -                    QStringList domainOptions = option.mid(7).split(QLatin1Char('|')); -                    foreach (QString domainOption, domainOptions)  -                    { -                        bool negate = domainOption.at(0) == QLatin1Char('~'); -                        if (negate) -                            domainOption = domainOption.mid(1); -                        bool hostMatched = domainOption == host; -                        if (hostMatched && !negate) -                            return true; -                        if (!hostMatched && negate) -                            return true; -                    } -                } -            } -        } -        return false; -    } +// TODO: Reimplement this in rekonq 0.5 :) +// +//     if (matched && !m_options.isEmpty())  +//     { +//         // we only support domain right now +//         if (m_options.count() == 1) +//         { +//             foreach (const QString &option, m_options)  +//             { +//                 if (option.startsWith( QL1S("domain=") ))  +//                 { +//                     QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8()); +//                     QString host = url.host(); +//                     QStringList domainOptions = option.mid(7).split( QL1C('|') ); +//                     foreach (QString domainOption, domainOptions)  +//                     { +//                         bool negate = domainOption.at(0) == QL1C('~'); +//                         if (negate) +//                             domainOption = domainOption.mid(1); +//                         bool hostMatched = domainOption == host; +//                         if (hostMatched && !negate) +//                             return true; +//                         if (!hostMatched && negate) +//                             return true; +//                     } +//                 } +//             } +//         } +//         return false; +//     }      return matched;  } @@ -137,17 +140,37 @@ bool AdBlockRule::match(const QString &encodedUrl) const  QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern)  {      QString pattern = wildcardPattern; -    return pattern.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*"))   // remove multiple wildcards -        .replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^"))        // remove anchors following separator placeholder -        .replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String(""))          // remove leading wildcards -        .replace(QRegExp(QLatin1String("(\\*)$")), QLatin1String(""))          // remove trailing wildcards -        .replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1"))      // escape special symbols -        .replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")), -                 QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"))       // process extended anchor at expression start -        .replace(QRegExp(QLatin1String("\\\\\\^")), -                 QLatin1String("(?:[^\\w\\d\\-.%]|$)"))                        // process separator placeholders -        .replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^"))       // process anchor at expression start -        .replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$"))       // process anchor at expression end -        .replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*"))       // replace wildcards by .* -        ; +     +    // remove multiple wildcards +    pattern.replace(QRegExp( QL1S("\\*+") ), QL1S("*") ); +     +    // remove anchors following separator placeholder +    pattern.replace(QRegExp( QL1S("\\^\\|$") ), QL1S("^") ); +     +    // remove leading wildcards +    pattern.replace(QRegExp( QL1S("^(\\*)") ), QL1S("") ); +     +    // remove trailing wildcards +    pattern.replace(QRegExp( QL1S("(\\*)$") ), QL1S("") ); +     +    // escape special symbols +    pattern.replace(QRegExp( QL1S("(\\W)") ), QL1S("\\\\1") ); +     +    // process extended anchor at expression start +    pattern.replace(QRegExp( QL1S("^\\\\\\|\\\\\\|") ), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?") ); +     +    // process separator placeholders +    pattern.replace(QRegExp( QL1S("\\\\\\^") ), QL1S("(?:[^\\w\\d\\-.%]|$)") ); +     +    // process anchor at expression start +    pattern.replace(QRegExp( QL1S("^\\\\\\|") ), QL1S("^") ); +     +    // process anchor at expression end +    pattern.replace(QRegExp( QL1S("\\\\\\|$") ), QL1S("$") ); +     +    // replace wildcards by .* +    pattern.replace(QRegExp( QL1S("\\\\\\*") ), QL1S(".*") ); + +    // Finally, return... +    return pattern;  } | 
