Fix-update Haaretz/TheMarker (Bingbot)

This commit is contained in:
magnolia1234 2020-11-02 18:52:40 +01:00
parent 23cd31a60b
commit 1a390c73e2
3 changed files with 23 additions and 7 deletions

View file

@ -141,8 +141,6 @@ var use_google_bot_default = [
'editorialedomani.it', 'editorialedomani.it',
'eurekareport.com.au', 'eurekareport.com.au',
'ft.com', 'ft.com',
'haaretz.co.il',
'haaretz.com',
'handelsblatt.com', 'handelsblatt.com',
'intelligentinvestor.com.au', 'intelligentinvestor.com.au',
'lesoir.be', 'lesoir.be',
@ -152,7 +150,6 @@ var use_google_bot_default = [
'quora.com', 'quora.com',
'republic.ru', 'republic.ru',
'seekingalpha.com', 'seekingalpha.com',
'themarker.com',
'thetimes.co.uk', 'thetimes.co.uk',
'washingtonpost.com', 'washingtonpost.com',
'wiwo.de', 'wiwo.de',
@ -163,6 +160,13 @@ var use_google_bot_default = [
var use_google_bot_custom = []; var use_google_bot_custom = [];
var use_google_bot = use_google_bot_default.concat(use_google_bot_custom); var use_google_bot = use_google_bot_default.concat(use_google_bot_custom);
// Override User-Agent with Bingbot
var use_bing_bot = [
'haaretz.co.il',
'haaretz.com',
'themarker.com',
];
// block paywall-scripts individually // block paywall-scripts individually
var blockedRegexes = { var blockedRegexes = {
'adweek.com': /.+\.lightboxcdn\.com\/.+/, 'adweek.com': /.+\.lightboxcdn\.com\/.+/,
@ -269,8 +273,11 @@ const au_news_corp_domains = ['adelaidenow.com.au', 'cairnspost.com.au', 'courie
const au_prov_news_domains = ['news-mail.com.au', 'frasercoastchronicle.com.au', 'gladstoneobserver.com.au', 'dailyexaminer.com.au', 'dailymercury.com.au', 'themorningbulletin.com.au', 'sunshinecoastdaily.com.au', 'gympietimes.com.au', 'northernstar.com.au', 'qt.com.au', 'thechronicle.com.au', 'warwickdailynews.com.au']; const au_prov_news_domains = ['news-mail.com.au', 'frasercoastchronicle.com.au', 'gladstoneobserver.com.au', 'dailyexaminer.com.au', 'dailymercury.com.au', 'themorningbulletin.com.au', 'sunshinecoastdaily.com.au', 'gympietimes.com.au', 'northernstar.com.au', 'qt.com.au', 'thechronicle.com.au', 'warwickdailynews.com.au'];
const nymag_domains = ['grubstreet.com', 'thecut.com', 'vulture.com']; const nymag_domains = ['grubstreet.com', 'thecut.com', 'vulture.com'];
const userAgentDesktop = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" const userAgentDesktopG = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
const userAgentMobile = "Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible ; Googlebot/2.1 ; +http://www.google.com/bot.html)" const userAgentMobileG = "Chrome/80.0.3987.92 Mobile Safari/537.36 (compatible ; Googlebot/2.1 ; +http://www.google.com/bot.html)"
const userAgentDesktopB = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
const userAgentMobileB = "Chrome/80.0.3987.92 Mobile Safari/537.36 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
var enabledSites = []; var enabledSites = [];
var disabledSites = []; var disabledSites = [];
@ -695,7 +702,7 @@ ext_api.webRequest.onBeforeSendHeaders.addListener(function(details) {
if (matchUrlDomain(use_google_bot, details.url)) { if (matchUrlDomain(use_google_bot, details.url)) {
requestHeaders.push({ requestHeaders.push({
"name": "User-Agent", "name": "User-Agent",
"value": useUserAgentMobile ? userAgentMobile : userAgentDesktop "value": useUserAgentMobile ? userAgentMobileG : userAgentDesktopG
}) })
requestHeaders.push({ requestHeaders.push({
"name": "X-Forwarded-For", "name": "X-Forwarded-For",
@ -703,6 +710,14 @@ ext_api.webRequest.onBeforeSendHeaders.addListener(function(details) {
}) })
} }
// override User-Agent to use Bingbot
if (matchUrlDomain(use_bing_bot, details.url)) {
requestHeaders.push({
"name": "User-Agent",
"value": useUserAgentMobile ? userAgentMobileB : userAgentDesktopB
})
}
// remove cookies before page load // remove cookies before page load
if (!matchUrlDomain(allow_cookies, details.url)) { if (!matchUrlDomain(allow_cookies, details.url)) {
requestHeaders = requestHeaders.map(function(requestHeader) { requestHeaders = requestHeaders.map(function(requestHeader) {

View file

@ -2,6 +2,7 @@
Changelog Bypass Paywalls Clean - Firefox Changelog Bypass Paywalls Clean - Firefox
Post-release Post-release
Fix-update Haaretz/TheMarker (Bingbot)
Fix-update Telegraph.co.uk (overlay) Fix-update Telegraph.co.uk (overlay)
Fix updateBadge (grouped sites) Fix updateBadge (grouped sites)

View file

@ -339,5 +339,5 @@
"webRequest", "webRequest",
"webRequestBlocking" "webRequestBlocking"
], ],
"version": "1.9.3.1" "version": "1.9.3.2"
} }