Fix Australia News Corp (Googlebot mobile)

This commit is contained in:
magnolia1234 2024-02-29 16:50:19 +01:00
parent cee1bb280e
commit 76ab6d8ae0
6 changed files with 19 additions and 61 deletions

View file

@ -66,9 +66,9 @@ var remove_cookies = [];
var remove_cookies_select_hold, remove_cookies_select_drop;
// Set User-Agent
var use_google_bot, use_bing_bot, use_facebook_bot, use_semrush_bot, use_useragent_custom, use_useragent_custom_obj;
var use_google_bot, use_bing_bot, use_facebook_bot, use_useragent_custom, use_useragent_custom_obj;
// Set Referer
var use_drudgereport_referer, use_facebook_referer, use_google_referer, use_twitter_referer, use_referer_custom, use_referer_custom_obj;
var use_facebook_referer, use_google_referer, use_twitter_referer, use_referer_custom, use_referer_custom_obj;
// Set random IP-address
var random_ip = {};
var use_random_ip = [];
@ -117,10 +117,8 @@ function initSetRules() {
use_google_bot = [];
use_bing_bot = [];
use_facebook_bot = [];
use_semrush_bot = [];
use_useragent_custom = [];
use_useragent_custom_obj = {};
use_drudgereport_referer = [];
use_facebook_referer = [];
use_google_referer = [];
use_twitter_referer = [];
@ -157,8 +155,6 @@ const userAgentMobileB = "Chrome/115.0.5790.171 Mobile Safari/537.36 (compatible
const userAgentDesktopF = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)';
const userAgentDesktopS = "Mozilla/5.0 (compatible; SemrushBot; +http://www.semrush.com/bot.html)";
var enabledSites = [];
var disabledSites = [];
var optionSites = {};
@ -210,6 +206,7 @@ function check_sites_updated(sites_updated_json, optin_update = false) {
var ext_path = 'https://gitlab.com/magnolia1234/bypass-paywalls-' + url_loc + '-clean/-/raw/master/';
var sites_updated_json = 'sites_updated.json';
var sites_updated_json_online = ext_path + sites_updated_json;
var self_hosted = !!(manifestData.update_url || (manifestData.browser_specific_settings && manifestData.browser_specific_settings.gecko.update_url));
function clear_sites_updated() {
ext_api.storage.local.set({
@ -286,10 +283,6 @@ function addRules(domain, rule) {
if (!use_facebook_bot.includes(domain))
use_facebook_bot.push(domain);
break;
case 'semrushbot':
if (!use_semrush_bot.includes(domain))
use_semrush_bot.push(domain);
break;
}
} else if (rule.useragent_custom) {
if (!use_useragent_custom.includes(domain)) {
@ -299,10 +292,6 @@ function addRules(domain, rule) {
}
if (rule.referer) {
switch (rule.referer) {
case 'drudgereport':
if (!use_drudgereport_referer.includes(domain))
use_drudgereport_referer.push(domain);
break;
case 'facebook':
if (!use_facebook_referer.includes(domain))
use_facebook_referer.push(domain);
@ -461,7 +450,7 @@ function set_rules(sites, sites_updated, sites_custom) {
blockedJsInlineDomains = Object.keys(blockedJsInline);
disableJavascriptInline();
use_random_ip = Object.keys(random_ip);
change_headers = use_google_bot.concat(use_bing_bot, use_facebook_bot, use_semrush_bot, use_useragent_custom, use_drudgereport_referer, use_facebook_referer, use_google_referer, use_twitter_referer, use_referer_custom, use_random_ip);
change_headers = use_google_bot.concat(use_bing_bot, use_facebook_bot, use_useragent_custom, use_facebook_referer, use_google_referer, use_twitter_referer, use_referer_custom, use_random_ip);
}
// add grouped sites to en/disabledSites (and exclude sites)
@ -793,7 +782,7 @@ ext_api.webRequest.onBeforeRequest.addListener(function (details) {
);
// Australia News Corp redirect subscribe to amp
var au_news_corp_no_amp_fix = [];
var au_news_corp_no_amp_fix = ['ntnews.com.au'];
var au_news_corp_subscr = au_news_corp_domains.filter(domain => !au_news_corp_no_amp_fix.includes(domain)).map(domain => '*://www.' + domain + '/subscribe/*');
ext_api.webRequest.onBeforeRequest.addListener(function (details) {
if (!isSiteEnabled(details) || details.url.includes('/digitalprinteditions') || !(details.url.includes('dest=') && details.url.split('dest=')[1].split('&')[0])) {
@ -1090,13 +1079,12 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail
var googlebotEnabled = matchUrlDomain(use_google_bot, details.url) &&
!(matchUrlDomain(es_grupo_vocento_domains, details.url) && mobile) &&
!(matchUrlDomain(['economictimes.com', 'economictimes.indiatimes.com'], details.url) && !details.url.split(/\?|#/)[0].endsWith('.cms')) &&
!(matchUrlDomain(au_news_corp_domains, details.url) && (details.url.includes('?amp') || !mobile || (!matchUrlDomain(au_news_corp_no_amp_fix, details.url) && enabledSites.includes('#options_disable_gb_au_news_corp')))) &&
!(matchUrlDomain(au_news_corp_domains, details.url) && (details.url.includes('?amp') || (!matchUrlDomain(au_news_corp_no_amp_fix, details.url) && enabledSites.includes('#options_disable_gb_au_news_corp')))) &&
!(matchUrlDomain('nytimes.com', details.url) && details.url.includes('.nytimes.com/live/')) &&
!(matchUrlDomain('uol.com.br', details.url) && !matchUrlDomain('folha.uol.com.br', details.url)) &&
!(matchUrlDomain('www.wsj.com', details.url));
var bingbotEnabled = matchUrlDomain(use_bing_bot, details.url);
var facebookbotEnabled = matchUrlDomain(use_facebook_bot, details.url);
var semrushbotEnabled = matchUrlDomain(use_semrush_bot, details.url);
var useragent_customEnabled = matchUrlDomain(use_useragent_custom, details.url);
// if referer exists, set it
@ -1104,8 +1092,6 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail
if (requestHeader.name === 'Referer') {
if (googlebotEnabled || matchUrlDomain(use_google_referer, details.url)) {
requestHeader.value = 'https://www.google.com/';
} else if (matchUrlDomain(use_drudgereport_referer, details.url)) {
requestHeader.value = 'https://www.drudgereport.com/';
} else if (matchUrlDomain(use_facebook_referer, details.url)) {
requestHeader.value = 'https://www.facebook.com/';
} else if (matchUrlDomain(use_twitter_referer, details.url)) {
@ -1116,7 +1102,7 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail
setReferer = true;
}
if (requestHeader.name === 'User-Agent') {
useUserAgentMobile = requestHeader.value.toLowerCase().includes("mobile") && !matchUrlDomain(['telerama.fr', 'theatlantic.com'], details.url);
useUserAgentMobile = (requestHeader.value.toLowerCase().includes("mobile") || matchUrlDomain(au_news_corp_domains, details.url)) && !matchUrlDomain(['telerama.fr', 'theatlantic.com'], details.url);
}
return requestHeader;
});
@ -1128,11 +1114,6 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail
name: 'Referer',
value: 'https://www.google.com/'
});
} else if (matchUrlDomain(use_drudgereport_referer, details.url)) {
requestHeaders.push({
name: 'Referer',
value: 'https://www.drudgereport.com/'
});
} else if (matchUrlDomain(use_facebook_referer, details.url)) {
requestHeaders.push({
name: 'Referer',
@ -1179,14 +1160,6 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail
})
}
// override User-Agent to use Semrushbot
else if (semrushbotEnabled) {
requestHeaders.push({
"name": "User-Agent",
"value": userAgentDesktopS
})
}
// override User-Agent to custom
else if (domain = useragent_customEnabled) {
requestHeaders.push({
@ -1401,6 +1374,8 @@ function site_switch() {
let defaultSite_title = isDefaultSite ? Object.keys(defaultSites).find(key => defaultSites[key].domain === isDefaultSite) : '';
let isCustomSite = matchUrlDomain(customSites_domains, currentUrl);
let customSite_title = isCustomSite ? Object.keys(customSites).find(key => customSites[key].domain === isCustomSite || (customSites[key].group && customSites[key].group.split(',').includes(isCustomSite))) : '';
if (isCustomSite && customSite_title && customSites[customSite_title].domain !== isCustomSite)
isCustomSite = customSites[customSite_title].domain;
let isCustomFlexSite = matchUrlDomain(custom_flex_domains, currentUrl);
let isCustomFlexGroupSite = isCustomFlexSite ? Object.keys(custom_flex).find(key => custom_flex[key].includes(isCustomFlexSite)) : '';
let customFlexSite_title = isCustomFlexGroupSite ? Object.keys(defaultSites).find(key => defaultSites[key].domain === isCustomFlexGroupSite) : '';

View file

@ -7,6 +7,7 @@ Add Newsquest Media Group (UK; opt-in to custom sites)
Remove BusinessTimes.com.sg (fix obsolete)
Remove Dn.se (fix obsolete)
Remove LeFigaro.fr (fix obsolete)
Fix Australia News Corp (Googlebot mobile)
Fix Haaretz Group (Outbrain bot)
Fix Project Syndicate (no article)
Fix Times of India (epaper)

View file

@ -999,6 +999,8 @@ else if (matchDomain('freiepresse.de')) {
}
getGoogleWebcache(url, 'div.article-teaser', '', 'article');
}
let ads = document.querySelectorAll('div.rgt-content');
hideDOMElement(...ads);
}
else if (matchDomain('freitag.de')) {

View file

@ -51,5 +51,5 @@
"webRequestBlocking",
"*://*/*"
],
"version": "3.5.7.4"
"version": "3.5.7.5"
}

View file

@ -839,5 +839,5 @@
"*://archive.vn/*",
"*://webcache.googleusercontent.com/*"
],
"version": "3.5.7.4"
"version": "3.5.7.5"
}

View file

@ -146,19 +146,7 @@ var defaultSites = {
block_regex: /cdn\.ampproject\.org\/v\d\/amp-subscriptions-.+\.js/,
useragent: "googlebot",
exception: [{
domain: "cairnspost.com.au",
allow_cookies: 1,
block_regex: /cdn\.ampproject\.org\/v\d\/amp-subscriptions-.+\.js/
}, {
domain: "geelongadvertiser.com.au",
allow_cookies: 1,
block_regex: /cdn\.ampproject\.org\/v\d\/amp-subscriptions-.+\.js/
}, {
domain: "theaustralian.com.au",
allow_cookies: 1,
block_regex: /cdn\.ampproject\.org\/v\d\/amp-subscriptions-.+\.js/
}, {
domain: "townsvillebulletin.com.au",
domain: ["cairnspost.com.au", "geelongadvertiser.com.au", "theaustralian.com.au", "townsvillebulletin.com.au"],
allow_cookies: 1,
block_regex: /cdn\.ampproject\.org\/v\d\/amp-subscriptions-.+\.js/
}
@ -1048,16 +1036,8 @@ var defaultSites = {
block_regex: /(scripts\.repubblica\.it\/pw\/pw\.js|cdn\.ampproject\.org\/v\d\/amp-(access|user-notification)-.+\.js)/,
useragent: "googlebot",
exception: [{
domain: "huffingtonpost.it",
block_js_inline: /\.huffingtonpost\.it\/.+\/news\//,
remove_cookies_select_drop: ["blaize_session"]
}, {
domain: "lastampa.it",
block_js_inline: /\.lastampa\.it\/.+\/news\//,
remove_cookies_select_drop: ["blaize_session"]
}, {
domain: "repubblica.it",
block_js_inline: /\.repubblica\.it\/.+\/news\//,
domain: ["huffingtonpost.it", "lastampa.it", "repubblica.it"],
block_js_inline: /\.it\/.+\/news\//,
remove_cookies_select_drop: ["blaize_session"]
}
]
@ -2626,7 +2606,7 @@ var defaultSites = {
domain: "wsj.com",
allow_cookies: 1,
block_regex: /(cdn\.cxense\.com\/|cdn\.ampproject\.org\/v\d\/amp-subscriptions-.+\.js)/,
referer: "drudgereport",
referer_custom: "https://www.drudgereport.com/",
useragent: "googlebot",
cs_dompurify: 1
},
@ -3049,4 +3029,4 @@ var fr_groupe_ebra_nofix_domains = ['bienpublic.com', 'dna.fr', 'estrepublicain.
var fr_indigo_nofix_domains = ['africaintelligence.com', 'africaintelligence.fr', 'glitz.paris', 'intelligenceonline.com', 'intelligenceonline.fr', 'lalettre.fr'];
var it_gedi_nofix_domains = ['gelocal.it', 'limesonline.com'];
var nl_mediahuis_region_nofix_domains = ['gooieneemlander.nl', 'haarlemsdagblad.nl', 'ijmuidercourant.nl', 'leidschdagblad.nl', 'noordhollandsdagblad.nl'];
var nofix_sites = ['11freunde.de', 'aamulehti.fi', 'abendblatt.de', 'aftenposten.no', 'aftonbladet.se', 'allgaeuer-zeitung.de', 'arkansasonline.com', 'asahi.com', 'asiatimes.com', 'autosport.com', 'aviationweek.com', 'badische-zeitung.de', 'bhaskar.com', 'bloomberglaw.com', 'bloombergtax.com', 'bnef.com', 'bnn.de', 'borsen.dk', 'businessinsider.de', 'businessinsider.jp', 'businesslive.co.za', 'businesstimes.com.sg', 'caixin.com', 'caixinglobal.com', 'caravanmagazine.in', 'catalyst-journal.com', 'chegg.com', 'codesports.com.au', 'compactmag.com', 'courrierinternational.com', 'coursehero.com', 'deutsche-wirtschafts-nachrichten.de', 'die-glocke.de', 'dn.no', 'dn.se', 'elordenmundial.com', 'entrepreneur.com', 'epw.in', 'ewmagazine.nl', 'falter.at', 'finance.si', 'franc-tireur.fr', 'ftchinese.com', 'ftchineselive.com', 'gamestar.de', 'geo.de', 'golem.de', 'gp.se', 'gva.be', 'handelsblatt.com', 'hbrarabic.com', 'hbrchina.org', 'hbrfrance.fr', 'heise.de', 'hs.fi', 'ilsole24ore.com', 'information.dk', 'investors.com', 'iltalehti.fi', 'jacobin.com', 'jeuneafrique.com', 'jungefreiheit.de', 'kleinezeitung.at', 'krone.at', 'laverita.info', 'lavie.fr', 'lavozdegalicia.es', 'law360.co.uk', 'law360.com', 'le1hebdo.fr', 'leconomiste.com', 'lefigaro.fr', 'lefilmfrancais.com', 'lemonde.fr', 'lequipe.fr', 'lesjours.fr', 'letemps.ch', 'liberation.fr', 'libertiesjournal.com', 'main-echo.de', 'mainpost.de', 'manager-magazin.de', 'medianama.com', 'mediapart.fr', 'milanofinanza.it', 'mittelbayerische.de', 'monde-diplomatique.fr', 'mondediplo.com', 'money.it', 'moneycontrol.com', 'moodys.com', 'morningstar.com', 'motorsport.com', 'moz.de', 'nachrichten.at', 'nationaljournal.com', 'nature.com', 'nbr.co.nz', 'newcriterion.com', 'news24.com', 'newslaundry.com', 'nn.de', 'nwzonline.de', 'observador.pt', 'on3.com', 'ouest-france.fr', 'philonomist.com', 'pnp.de', 'politicopro.com', 'politiken.dk', 'pressreader.com', 'publico.pt', 'quillette.com', 'rbc.ru', 'republic.ru', 'rheinpfalz.de', 'risk.net', 'rnz.de', 'saechsische.de', 'sciencedirect.com', 'springer.com', 'statnews.com', 'stern.de', 'stimme.de', 'streetinsider.com', 'substack.com', 'suedkurier.de', 'swp.de', 'taxation.co.uk', 'taxjournal.com', 'techcrunch.com', 'the-ken.com', 'theinformation.com', 'theinitium.com', 'themorningcontext.com', 'theparisreview.org', 'thestar.com.my', 'thewirechina.com', 'timeslive.co.za', 'weltwoche.ch', 'weltwoche.de', 'wissenschaft.de', 'worldpoliticsreview.com', 'wz.de', 'zaobao.com.sg'].concat(de_funke_medien_nofix_domains, de_rp_aachen_medien_nofix_domains, de_westfalen_medien_nofix_domains, fr_be_groupe_rossel_nofix_domains, fr_groupe_ebra_nofix_domains, fr_indigo_nofix_domains, it_gedi_nofix_domains, nl_mediahuis_region_nofix_domains);
var nofix_sites = ['11freunde.de', 'aamulehti.fi', 'abendblatt.de', 'aftenposten.no', 'aftonbladet.se', 'allgaeuer-zeitung.de', 'arkansasonline.com', 'asahi.com', 'asiatimes.com', 'autosport.com', 'aviationweek.com', 'badische-zeitung.de', 'bhaskar.com', 'bloomberglaw.com', 'bloombergtax.com', 'bnef.com', 'bnn.de', 'borsen.dk', 'businessinsider.de', 'businessinsider.jp', 'businesslive.co.za', 'businesstimes.com.sg', 'caixin.com', 'caixinglobal.com', 'caravanmagazine.in', 'catalyst-journal.com', 'chegg.com', 'codesports.com.au', 'compactmag.com', 'courrierinternational.com', 'coursehero.com', 'deutsche-wirtschafts-nachrichten.de', 'die-glocke.de', 'dn.no', 'dn.se', 'elordenmundial.com', 'entrepreneur.com', 'epw.in', 'ewmagazine.nl', 'falter.at', 'finance.si', 'franc-tireur.fr', 'ftchinese.com', 'ftchineselive.com', 'gamestar.de', 'geo.de', 'golem.de', 'gp.se', 'gva.be', 'handelsblatt.com', 'hbrarabic.com', 'hbrchina.org', 'hbrfrance.fr', 'heise.de', 'hs.fi', 'ilsole24ore.com', 'information.dk', 'investors.com', 'iltalehti.fi', 'jacobin.com', 'jeuneafrique.com', 'jungefreiheit.de', 'kleinezeitung.at', 'krone.at', 'laverita.info', 'lavie.fr', 'lavozdegalicia.es', 'law360.co.uk', 'law360.com', 'le1hebdo.fr', 'leconomiste.com', 'lefigaro.fr', 'lefilmfrancais.com', 'lemonde.fr', 'lequipe.fr', 'lesjours.fr', 'letemps.ch', 'liberation.fr', 'libertiesjournal.com', 'lr-online.de', 'main-echo.de', 'mainpost.de', 'manager-magazin.de', 'medianama.com', 'mediapart.fr', 'milanofinanza.it', 'mittelbayerische.de', 'monde-diplomatique.fr', 'mondediplo.com', 'money.it', 'moneycontrol.com', 'moodys.com', 'morningstar.com', 'motorsport.com', 'moz.de', 'nachrichten.at', 'nationaljournal.com', 'nature.com', 'nbr.co.nz', 'newcriterion.com', 'news24.com', 'newslaundry.com', 'nn.de', 'nwzonline.de', 'observador.pt', 'on3.com', 'ouest-france.fr', 'philonomist.com', 'pnp.de', 'politicopro.com', 'politiken.dk', 'pressreader.com', 'publico.pt', 'quillette.com', 'rbc.ru', 'republic.ru', 'rheinpfalz.de', 'risk.net', 'rnz.de', 'saechsische.de', 'sciencedirect.com', 'springer.com', 'statnews.com', 'stern.de', 'stimme.de', 'streetinsider.com', 'substack.com', 'suedkurier.de', 'swp.de', 'taxation.co.uk', 'taxjournal.com', 'techcrunch.com', 'the-ken.com', 'theinformation.com', 'theinitium.com', 'themorningcontext.com', 'theparisreview.org', 'thestar.com.my', 'thewirechina.com', 'timeslive.co.za', 'weltwoche.ch', 'weltwoche.de', 'wissenschaft.de', 'worldpoliticsreview.com', 'wz.de', 'zaobao.com.sg'].concat(de_funke_medien_nofix_domains, de_rp_aachen_medien_nofix_domains, de_westfalen_medien_nofix_domains, fr_be_groupe_rossel_nofix_domains, fr_groupe_ebra_nofix_domains, fr_indigo_nofix_domains, it_gedi_nofix_domains, nl_mediahuis_region_nofix_domains);