diff options
| author | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-07-09 20:19:30 +0200 | 
|---|---|---|
| committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-07-09 20:19:30 +0200 | 
| commit | 9808d18fa6cd19400f08897014a9948f168927df (patch) | |
| tree | b4224f737ec01391e7d443ad142c94ec4c0bb17f /lib/web/urlfilter | |
| parent | Remove PageMenu (diff) | |
| download | smolbote-9808d18fa6cd19400f08897014a9948f168927df.tar.xz | |
AdBlock rule testing
Diffstat (limited to 'lib/web/urlfilter')
| -rw-r--r-- | lib/web/urlfilter/adblockrule.cpp | 120 | ||||
| -rw-r--r-- | lib/web/urlfilter/adblockrule.h | 43 | 
2 files changed, 163 insertions, 0 deletions
| diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp new file mode 100644 index 0000000..d707783 --- /dev/null +++ b/lib/web/urlfilter/adblockrule.cpp @@ -0,0 +1,120 @@ +#include "adblockrule.h" + +bool isMatchingDomain(const QString &domain, const QString &filter) +{ +    // domain and filter are the same +    if(domain == filter) { +        return true; +    } + +    // domain can't be matched by filter if it doesn't end with filter +    // ex. example2.com isn't matched by example.com +    if(!domain.endsWith(filter)) { +        return false; +    } + +    // match with subdomains +    // ex. subdomain.example.com is matched by example.com +    int index = domain.indexOf(filter); + +    // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') +    return index > 0 && domain[index - 1] == QLatin1Char('.'); +} + +// adblock format documentation +// https://adblockplus.org/filters + +// QString::mid(pos, len) - Returns a string starting at the specified position index. +// QString::chop(len) - Removes n characters from the end of the string. +// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. + +AdBlockRule::AdBlockRule(const QString &filter) +{ +    QString parsedLine = filter.trimmed(); + +    // there is no rule, or it"s a comment +    if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { +        return; +    } + +    // css rule - ignore for now +    if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { +        return; +    } + +    m_isEnabled = true; + +    // exception rules +    if(parsedLine.startsWith(QLatin1Literal("@@"))) { +        m_isException = true; +        parsedLine.remove(0, 2); +    } + +    // regular expression rules +    if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { +        parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + +        matchType = RegularExpressionMatch; +        regexp.setPattern(parsedLine); +        return; +    } + +    // basic filter rules +    if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { +        matchType = StringEquals; +        match = parsedLine.mid(1, parsedLine.length() - 2); +        return; +    } + +    // Basic filter rules can use wildcards, which were supported by QRegExp, +    // but were deprecated in QRegularExpression. + +    // remove beginning and ending wildcards +    if(parsedLine.startsWith(QLatin1Literal("*"))) +        parsedLine = parsedLine.mid(1); + +    if(parsedLine.endsWith(QLatin1Literal("*"))) +        parsedLine.chop(1); + +    if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { +        matchType = DomainMatch; +        match = parsedLine.mid(2, parsedLine.length() - 3); +        return; +    } + +    // check for wildcards and translate to regexp +    // wildcard "*" - any number of characters +    // separator "^" - end, ? or / +    if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { +        matchType = RegularExpressionMatch; +        parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); +        parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); +        regexp.setPattern(parsedLine); +        return; +    } + +    matcher.setPattern(parsedLine); +} + +bool AdBlockRule::isEnabled() const +{ +    return m_isEnabled; +} + +bool AdBlockRule::shouldBlock(const QUrl &url) const +{ +    switch (matchType) { +    case RegularExpressionMatch: +        if(regexp.match(url.toString()).hasMatch()) +            return !m_isException; + +    case StringEquals: +        return url.toString() == match; + +    case DomainMatch: +        return isMatchingDomain(url.host(), match); + +    default: +        return false; +    } +} diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h new file mode 100644 index 0000000..e1cabae --- /dev/null +++ b/lib/web/urlfilter/adblockrule.h @@ -0,0 +1,43 @@ +#ifndef ADBLOCKRULE_H +#define ADBLOCKRULE_H + +#include <QObject> +#include <QString> +#include <QRegularExpression> +#include <QUrl> +#include <QWebEngineUrlRequestInfo> + +class AdBlockRule +{ +public: + +    enum MatchType { +        InvalidMatch, +        RegularExpressionMatch, +        StringContains, +        StringStartsWith, +        StringEndsWith, +        StringEquals, +        DomainMatch +    }; + +    AdBlockRule(const QString &filter); + +    bool isEnabled() const; +    bool shouldBlock(const QUrl &url) const; + +private: +    Q_DISABLE_COPY(AdBlockRule) + +    bool m_isEnabled = false; +    bool m_isException = false; + +    MatchType matchType = InvalidMatch; +    QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions; + +    QString match; +    QRegularExpression regexp; +    QStringMatcher matcher; +}; + +#endif // ADBLOCKRULE_H | 
