From f4f11d5cd4ded5f356a07e37e68b3dc738bb37d9 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sun, 6 Aug 2023 13:27:11 +0200 Subject: [PATCH 01/13] Update unherd.com.txt (#1177) --- unherd.com.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/unherd.com.txt b/unherd.com.txt index a7e2898c..a2e3f412 100644 --- a/unherd.com.txt +++ b/unherd.com.txt @@ -1,19 +1,28 @@ -body: //article +body: //article | //div[@class='thepostinner'] strip_id_or_class: ref-ar strip: //div[@class='code-block code-block-4'] strip: //div[@class="metabox"]/parent::div strip_id_or_class: article-image mobile strip: //div[contains(@class, 'author-side')] -strip://div[@class="comment-break"]/parent::div + +## strip footer / comments +strip://div[@class="comment-break"] +strip://div[@class="uhcomments"] strip://div[@class="featured_caption"]/a +## strip header with category+time +strip: //div[@class='time']/self::div | //div[@class='time']/preceding-sibling::* | //div[@class='time']/following-sibling::h2[1] + find_string: style="background-image: url(' replace_string: > replace_string: .jpg"> +find_string: .jpeg');"> +replace_string: .jpeg"> + find_string: .png');"> replace_string: .png"> @@ -24,3 +33,4 @@ prune: no tidy: no test_url: https://unherd.com/2019/09/labours-brexit-shambles/ +test_url: https://unherd.com/thepost/could-britains-green-debate-become-the-new-brexit/ From 8e9efc52b68be96bc37ba0a1be986cf1f01c0340 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sat, 12 Aug 2023 10:18:21 +0200 Subject: [PATCH 02/13] Update tagesspiegel.de.txt (#1180) --- tagesspiegel.de.txt | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tagesspiegel.de.txt b/tagesspiegel.de.txt index 81c86dc2..e99ffc74 100644 --- a/tagesspiegel.de.txt +++ b/tagesspiegel.de.txt @@ -1,19 +1,12 @@ -# Title -title: //div[@class='ts-title'] - -# Set author -author: //a[@rel='author'] - -# Set date -date: //time[@class='ts-time'] - # Fetch full multipage articles single_page_link: //a[contains(@class, 'ts-one-page')] # Content is here -body: //article[@class='ts-article'] +body: //article # General cleanup +strip: //nav +strip: //button strip: //iframe strip_id_or_class: hcf-hidden strip_id_or_class: ts-user-quote @@ -21,8 +14,16 @@ strip_id_or_class: ts-abo-link strip_id_or_class: ts-homepage-link strip_id_or_class: ts-recommendation strip_id_or_class: newsletter +strip: //div[@element-type='embedWrapper'] + +# strip related-articles, pubDate, footer +strip: //aside +strip: //time/parent::p +strip: //a[@data-gtm-class='article-home-link']/parent::p/self::* | //a[@data-gtm-class='article-home-link']/parent::p/following-sibling::* # Fix pictures and captions +strip: //picture/source +strip: //img/@srcset replace_string():
replace_string(): From 6ad490d8cb9fa9695ae6630bfec9435d31c37368 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sat, 12 Aug 2023 10:19:02 +0200 Subject: [PATCH 03/13] Update hessen.de.txt (#1179) --- hessen.de.txt | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/hessen.de.txt b/hessen.de.txt index 6836f943..5c8045e4 100644 --- a/hessen.de.txt +++ b/hessen.de.txt @@ -1,29 +1,28 @@ -# author: HolgerAusB version 2023-01-04 +# author: HolgerAusB version 2023-08-11 # # weekly newsletter of the State of Hesse (Germany) -# -# There html style is very messy. This also makes my -# cleanup very upgly, more kinda pragmatic - body: //div[@class='outer'] +date: //div[@class='sidebar']/div/div/span +author: 'Hessische Landesregierung' -# strip title 'Newsletter nnn' from text -strip: //td/h2[contains(@style, 'color: #16191f')] - -# strip some info boxes -strip: //td/h2[contains(text(), 'Der nächste Newsletter')] -strip: //td/h2[contains(text(), 'erscheint am')] -strip: //table[contains(@style, 'background-color:#eef2ee;')] +# strip header +strip: (//div[@class='sidebar'])[1] +strip: //h2[contains(text(), 'Newsletter')]/ancestor::table[1] -strip: //div[@class='sidebar'] +# strip footer modules +strip_id_or_class: topic-module +strip_id_or_class: contact-module +strip_id_or_class: footer # more cleanups -strip_id_or_class: paragraph--type--cp-box-tile +strip: //img[contains(@src, '/mail/icon_link')] +strip: //table[@class="link-tile"]/@height +strip: //table[@class="link-tile"]/@style strip_id_or_class: spacer -strip_id_or_class: footer prune: no +tidy: no test_url: https://www.hessen.de/newsletter-feed test_url: https://hessen.de/node/10963/newsletter-preview From 191e5acfcdc6c78c59de26f80bead24f628e0e5c Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Sat, 12 Aug 2023 18:19:48 +1000 Subject: [PATCH 04/13] abc.net.au: update to new format (#1178) Signed-off-by: Olivier Mehani --- abc.net.au.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/abc.net.au.txt b/abc.net.au.txt index 22b3a0f4..13eda7a6 100644 --- a/abc.net.au.txt +++ b/abc.net.au.txt @@ -16,3 +16,8 @@ tidy: no test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business + +title: //h1 +body: //div[@id="body"] +test_url: https://www.abc.net.au/news/2023-06-16/fact-check-jacinta-nampijinpa-price-secret-documents-niaa/102485040 +test_contains: widespread but bogus claim about a list of secret "demands" linked to the Voice referendum From 070cd722f459bfc19fc45115d1a7436cddd7c4cc Mon Sep 17 00:00:00 2001 From: Simon Alberny Date: Sat, 12 Aug 2023 10:21:15 +0200 Subject: [PATCH 05/13] Create photopills.com.txt (#1176) --- photopills.com.txt | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 photopills.com.txt diff --git a/photopills.com.txt b/photopills.com.txt new file mode 100644 index 00000000..b4427ee7 --- /dev/null +++ b/photopills.com.txt @@ -0,0 +1,8 @@ +title: //h2 +body: //article[contains(@class, 'post')] +author: //p[@class="author"]//a + +strip: //p[@class="tags"] +strip: //div[@class="freebie"] + +test_url: https://www.photopills.com/articles/depth-of-field-guide From 6a63c68e22661af6ccd634dd9dd4fea1216d06e8 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sun, 13 Aug 2023 23:15:31 +0200 Subject: [PATCH 06/13] Update abc.net.au.txt (#1181) --- abc.net.au.txt | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/abc.net.au.txt b/abc.net.au.txt index 13eda7a6..eb3f7ef4 100644 --- a/abc.net.au.txt +++ b/abc.net.au.txt @@ -1,23 +1,16 @@ -title: //div[@class='article section']//h1 -author: //div[@class="byline"]/a -date: //span[@class="timestamp"] -body: //div[@class="page section"] +body: //article/div[@data-component="ArticleWeb"] -strip: //a[@class="inline-caption"] -strip: //p[@class="ticker section noprint"] -strip: //p[@class="topics"] -strip: //h1 -strip: //div[@class="byline"] -strip: //p[@class="published"] -strip: //div[contains(@class,"featured-scroller")] -strip_id_or_class: footer +strip: //aside +strip: //button +strip: //header/div[contains(@class, 'Headline_meta')] +strip: //div[contains(@class, 'ArticleWeb_publishedDate')] +strip: //div[contains(@class, 'ArticleWeb_shareBottom')]/self::div | //div[contains(@class, 'ArticleWeb_shareBottom')]/following-sibling::* +strip: //div[contains(@aria-label, 'media') and contains(@aria-label, 'embed')] -tidy: no +prune: no -test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 -test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business +test_url: https://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054 +test_url: https://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 -title: //h1 -body: //div[@id="body"] test_url: https://www.abc.net.au/news/2023-06-16/fact-check-jacinta-nampijinpa-price-secret-documents-niaa/102485040 test_contains: widespread but bogus claim about a list of secret "demands" linked to the Voice referendum From c502b849a19ee19e3977c1a9ed93d3a4e73ccd68 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sun, 20 Aug 2023 12:54:45 +0200 Subject: [PATCH 07/13] Create readingthechinadream.com.txt (#1187) --- readingthechinadream.com.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 readingthechinadream.com.txt diff --git a/readingthechinadream.com.txt b/readingthechinadream.com.txt new file mode 100644 index 00000000..65b4f322 --- /dev/null +++ b/readingthechinadream.com.txt @@ -0,0 +1,13 @@ +body: //div[@class='wsite-content'] + +strip: //h2[@class='wsite-content-title'] +strip_id_or_class: commentArea +strip: //div[contains(@class, 'wsite-search-element')] + +# a blank og:image was inserted (for FTR) from the meta-tag which cause a high space +find_string: Date: Sun, 20 Aug 2023 12:55:11 +0200 Subject: [PATCH 08/13] Create legrandcontinent.eu.txt (#1186) --- legrandcontinent.eu.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 legrandcontinent.eu.txt diff --git a/legrandcontinent.eu.txt b/legrandcontinent.eu.txt new file mode 100644 index 00000000..6294adbe --- /dev/null +++ b/legrandcontinent.eu.txt @@ -0,0 +1,26 @@ +body: (//article[contains(concat(' ',normalize-space(@class),' '),' post ')])[1] +author: //meta[@name='author']/@content + +# strip (parts) of header, footer, boxes +strip: //header/h1 | //header/div[1] +strip: //article/div[contains(@class, '-mt-3 text-center')] +strip: //dt[contains(text(), 'Auteur')]/ancestor::div[1] +strip: //a[contains(@class, 'share')]/parent::div +strip: //section[contains(concat(' ',normalize-space(@class),' '),' dive ')] +strip_id_or_class: 'comment-block hidden' +strip_id_or_class: modal_notes +strip_id_or_class: wider-side +strip_id_or_class: special-side + +# iframes are oversized +strip: //iframe/@width +strip: //iframe/@height + +# for right-to-left languages +find_string: class="has-text-align-right" +replace_string: dir="rtl" + +prune: no + +test_url: https://legrandcontinent.eu/fr/2023/08/03/jai-eu-des-doutes-deux-conversations-avec-robert-oppenheimer/ +test_url: https://legrandcontinent.eu/fr/2023/08/17/la-force-de-la-poesie-contre-la-guerre-une-conversation-et-trois-poemes-inedits-de-mostafa-hazara/ From fe1bbc3e777378e8f52ba6169f5fa5a1a23ea118 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sun, 20 Aug 2023 12:55:29 +0200 Subject: [PATCH 09/13] Create abendzeitung-muenchen.de.txt (#1185) --- abendzeitung-muenchen.de.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 abendzeitung-muenchen.de.txt diff --git a/abendzeitung-muenchen.de.txt b/abendzeitung-muenchen.de.txt new file mode 100644 index 00000000..df9ed449 --- /dev/null +++ b/abendzeitung-muenchen.de.txt @@ -0,0 +1,22 @@ +body: //article + +strip_id_or_class: teaserbox +strip_id_or_class: aufmacherbox +strip_id_or_class: mehrzumthema +strip_id_or_class: artdetail_time +strip_id_or_class: artdetail_social +strip_id_or_class: artdetail_desc + +strip: //footer + +# needed for wallabag: +strip: //picture/source + +# strip additional text within figure after caption +# wallabag/f43.me shows here double captions +# see second image at +# https://www.abendzeitung-muenchen.de/muenchen/wahlkampf-in-bayern-soeder-und-aiwanger-ringen-um-die-bierzelt-herrschaft-art-921576 +strip: //figure/figcaption/following-sibling::text() | //figure/figcaption/following-sibling::p + +test_url: https://www.abendzeitung-muenchen.de/muenchen/mvg-fast-alle-fahrschein-automaten-in-muenchen-kommen-weg-art-921634 +test_url: https://www.abendzeitung-muenchen.de/muenchen/wahlkampf-in-bayern-soeder-und-aiwanger-ringen-um-die-bierzelt-herrschaft-art-921576 From 217fab75ed30473b7b22c1131587230e4cc3f884 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sun, 20 Aug 2023 12:57:53 +0200 Subject: [PATCH 10/13] Add files via upload (#1184) * reflect slight design-changes of the ippen.media engine * now supporting author and date selectors * better image handling * should now better work also with wallabag --- 24auto.de.txt | 26 ++++++++++++++++++++------ 24garten.de.txt | 26 ++++++++++++++++++++------ 24hamburg.de.txt | 26 ++++++++++++++++++++------ 24rhein.de.txt | 26 ++++++++++++++++++++------ 24vita.de.txt | 26 ++++++++++++++++++++------ az-online.de.txt | 26 ++++++++++++++++++++------ bgland24.de.txt | 26 ++++++++++++++++++++------ buzzfeed.de.txt | 26 ++++++++++++++++++++------ bw24.de.txt | 26 ++++++++++++++++++++------ chiemgau24.de.txt | 26 ++++++++++++++++++++------ come-on.de.txt | 26 ++++++++++++++++++++------ costanachrichten.com.txt | 26 ++++++++++++++++++++------ dasgelbeblatt.de.txt | 26 ++++++++++++++++++++------ deichstube.de.txt | 26 ++++++++++++++++++++------ echo24.de.txt | 26 ++++++++++++++++++++------ extratipp.com.txt | 26 ++++++++++++++++++++------ fehmarn24.de.txt | 26 ++++++++++++++++++++------ fnp.de.txt | 26 ++++++++++++++++++++------ fr.de.txt | 26 ++++++++++++++++++++------ fuldaerzeitung.de.txt | 26 ++++++++++++++++++++------ giessener-allgemeine.de.txt | 26 ++++++++++++++++++++------ hallo-muenchen.de.txt | 26 ++++++++++++++++++++------ hanauer.de.txt | 26 ++++++++++++++++++++------ heidelberg24.de.txt | 26 ++++++++++++++++++++------ hersfelder-zeitung.de.txt | 26 ++++++++++++++++++++------ hna.de.txt | 26 ++++++++++++++++++++------ in-muenchen.de.txt | 26 ++++++++++++++++++++------ ingame.de.txt | 26 ++++++++++++++++++++------ innsalzach24.de.txt | 26 ++++++++++++++++++++------ kreis-anzeiger.de.txt | 26 ++++++++++++++++++++------ kreisbote.de.txt | 26 ++++++++++++++++++++------ kreiszeitung.de.txt | 26 ++++++++++++++++++++------ kurierverlag.de.txt | 26 ++++++++++++++++++++------ leinetal24.de.txt | 26 ++++++++++++++++++++------ lokalo24.de.txt | 26 ++++++++++++++++++++------ ludwigshafen24.de.txt | 26 ++++++++++++++++++++------ mangfall24.de.txt | 26 ++++++++++++++++++++------ mannheim24.de.txt | 26 ++++++++++++++++++++------ meine-anzeigenzeitung.de.txt | 26 ++++++++++++++++++++------ merkur.de.txt | 26 ++++++++++++++++++++------ news.bayern.txt | 26 ++++++++++++++++++++------ oktoberfest.bayern.txt | 26 ++++++++++++++++++++------ op-online.de.txt | 26 ++++++++++++++++++++------ ovb-online.de.txt | 26 ++++++++++++++++++++------ rga.de.txt | 26 ++++++++++++++++++++------ rosenheim24.de.txt | 26 ++++++++++++++++++++------ ruhr24.de.txt | 26 ++++++++++++++++++++------ sauerlandkurier.de.txt | 26 ++++++++++++++++++++------ soester-anzeiger.de.txt | 26 ++++++++++++++++++++------ solinger-tageblatt.de.txt | 26 ++++++++++++++++++++------ tz.de.txt | 26 ++++++++++++++++++++------ wa.de.txt | 26 ++++++++++++++++++++------ wasserburg24.de.txt | 26 ++++++++++++++++++++------ werra-rundschau.de.txt | 26 ++++++++++++++++++++------ wetterauer-zeitung.de.txt | 26 ++++++++++++++++++++------ wlz-online.de.txt | 26 ++++++++++++++++++++------ 56 files changed, 1120 insertions(+), 336 deletions(-) diff --git a/24auto.de.txt b/24auto.de.txt index bff8bd35..20f08f35 100644 --- a/24auto.de.txt +++ b/24auto.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.24auto.de/news/adac-panne-elektro-auto-starterbatterie-akku-wachstum-verbrenner-tuev-wartung-muenchen-starthilfe-91426845.html diff --git a/24garten.de.txt b/24garten.de.txt index e8f74e62..a07235ed 100644 --- a/24garten.de.txt +++ b/24garten.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.24garten.de/mein-garten/basilikum-trocknen-einlegen-haltbarkeit-ernte-kraeuter-pesto-einfrieren-rosmarin-muenchen-zr-91424291.html diff --git a/24hamburg.de.txt b/24hamburg.de.txt index e659fb16..8d632ba9 100644 --- a/24hamburg.de.txt +++ b/24hamburg.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.24hamburg.de/hamburg/neue-corona-regeln-im-hvv-ab-sofort-entfaellt-3g-91422638.html diff --git a/24rhein.de.txt b/24rhein.de.txt index e566efb3..eedf2295 100644 --- a/24rhein.de.txt +++ b/24rhein.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.24rhein.de/koeln/kalk/koeln-videoueberwachung-kameras-kalk-orte-ueberblick-polizei-ausbau-91388746.html diff --git a/24vita.de.txt b/24vita.de.txt index aa22c996..89f4ea1c 100644 --- a/24vita.de.txt +++ b/24vita.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.24vita.de/verbraucher/allesesser-vegetarier-veganer-deutschland-daab-fleisch-herstellung-speiseplan-dge-bonn-91417014.html diff --git a/az-online.de.txt b/az-online.de.txt index 3647e374..e9f67999 100644 --- a/az-online.de.txt +++ b/az-online.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.az-online.de/uelzen/stadt-uelzen/mehr-gaeste-in-der-uelzener-stadthalle-als-erlaubt-91282522.html diff --git a/bgland24.de.txt b/bgland24.de.txt index 8008dd75..70dc19fb 100644 --- a/bgland24.de.txt +++ b/bgland24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.bgland24.de/bgland/region-berchtesgaden/bischofswiesen-ort28409/schoenau-am-koenigssee-wieder-grosses-interesse-bei-fit-durch-unser-gmoa-2022-91285772.html diff --git a/buzzfeed.de.txt b/buzzfeed.de.txt index 5c9ae5d3..c3605102 100644 --- a/buzzfeed.de.txt +++ b/buzzfeed.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.buzzfeed.de/buzz/19-buecher-die-leute-einfach-nicht-zu-ende-lesen-konnten-91400849.html diff --git a/bw24.de.txt b/bw24.de.txt index 827a5616..9fd50253 100644 --- a/bw24.de.txt +++ b/bw24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.bw24.de/baden-wuerttemberg/wolf-baden-wuerttemberg-sichtung-zollernalbkreis-spaziergaengerin-raubtier-nachweis-91297558.html diff --git a/chiemgau24.de.txt b/chiemgau24.de.txt index 2291d1b0..a4991f9f 100644 --- a/chiemgau24.de.txt +++ b/chiemgau24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.chiemgau24.de/chiemgau/chiemsee/gstadt-am-chiemsee-ort118608/gstadt-tourist-info-leiterin-berichtet-von-guter-auslastung-bei-gaestezahlen-trotz-corona-91288990.html diff --git a/come-on.de.txt b/come-on.de.txt index 84e8e716..10e08f1a 100644 --- a/come-on.de.txt +++ b/come-on.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.come-on.de/kreis-mk/corona-mk-zahlen-omikron-heute-inzidenz-tote-todesfall-news-aktuell-luedenscheid-iserlohn-ticker-91403642.html diff --git a/costanachrichten.com.txt b/costanachrichten.com.txt index ba3c2f1a..57762a51 100644 --- a/costanachrichten.com.txt +++ b/costanachrichten.com.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.costanachrichten.com/spanien/politik-wirtschaft/spanien-westsahara-pedro-sanchez-unabhaengigkeit-uno-marokko-fluechtlinge-ukraine-91422126.html diff --git a/dasgelbeblatt.de.txt b/dasgelbeblatt.de.txt index 80b66361..150ecb96 100644 --- a/dasgelbeblatt.de.txt +++ b/dasgelbeblatt.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.dasgelbeblatt.de/lokales/bad-toelz-wolfratshausen/landkreis-bad-toelz-wolfratshausen-prozent-mehr-gewaltstraftaten-in-2021-91426756.html diff --git a/deichstube.de.txt b/deichstube.de.txt index f5eb5754..4420a763 100644 --- a/deichstube.de.txt +++ b/deichstube.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.deichstube.de/news/werder-bremen-trotzt-problemen-was-der-sieg-gegen-sv-darmstadt-98-alles-aussagt-trainer-ole-werner-2-bundesliga-aufstieg-zr-91423406.html diff --git a/echo24.de.txt b/echo24.de.txt index 7f58f0a0..157b23f2 100644 --- a/echo24.de.txt +++ b/echo24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.echo24.de/baden-wuerttemberg/corona-verordnung-baden-wuerttemberg-regeln-kretschmann-maskenpflicht-lockerungen-zr-91419449.html diff --git a/extratipp.com.txt b/extratipp.com.txt index 87aa05fc..d47055ac 100644 --- a/extratipp.com.txt +++ b/extratipp.com.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.extratipp.com/tv/dsds/quoten-desaster-bei-dsds-setzt-rtl-die-castingshow-mit-florian-silbereisen-ab-91270865.html?trafficsource=idTopBox diff --git a/fehmarn24.de.txt b/fehmarn24.de.txt index 03f41c53..b64b50a7 100644 --- a/fehmarn24.de.txt +++ b/fehmarn24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.fehmarn24.de/heiligenhafen/es-bleibt-das-grosse-ziel-die-innenstadt-von-heiligenhafen-soll-in-den-kommenden-jahren-attraktiver-werden-91421037.html diff --git a/fnp.de.txt b/fnp.de.txt index d0bf7a3f..81802343 100644 --- a/fnp.de.txt +++ b/fnp.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.fnp.de/frankfurt/frankfurt-sachsenhausen-der-adlhochplatz-bekommt-einen-neuen-namen-91287869.html diff --git a/fr.de.txt b/fr.de.txt index 903bc6dd..267c3f47 100644 --- a/fr.de.txt +++ b/fr.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.fr.de/frankfurt/die-nfl-kommt-nach-frankfurt-91329620.html diff --git a/fuldaerzeitung.de.txt b/fuldaerzeitung.de.txt index cf568ba4..73bc0642 100644 --- a/fuldaerzeitung.de.txt +++ b/fuldaerzeitung.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.fuldaerzeitung.de/fulda/corona-fulda-rki-inzidenz-neuinfektionen-impfung-novavax-klinikum-91368571.html diff --git a/giessener-allgemeine.de.txt b/giessener-allgemeine.de.txt index 6f94ecfb..015294b6 100644 --- a/giessener-allgemeine.de.txt +++ b/giessener-allgemeine.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.giessener-allgemeine.de/giessen/zwei-kuenstlerpositionen-vereint-91421463.html diff --git a/hallo-muenchen.de.txt b/hallo-muenchen.de.txt index 47774f72..7c801d42 100644 --- a/hallo-muenchen.de.txt +++ b/hallo-muenchen.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.hallo-muenchen.de/muenchen/mitte/muenchen-sitzsteine-stachus-brunnen-corona-abstand-polizei-treffpunkt-maerz-mai-91420320.html diff --git a/hanauer.de.txt b/hanauer.de.txt index bc3549c8..94d5289f 100644 --- a/hanauer.de.txt +++ b/hanauer.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.hanauer.de/hanau/hanauer-gericht-sieht-nackte-tatsachen-nach-misslungener-polizeiaktion-91420845.html diff --git a/heidelberg24.de.txt b/heidelberg24.de.txt index efe5b51a..96d062b7 100644 --- a/heidelberg24.de.txt +++ b/heidelberg24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.heidelberg24.de/heidelberg/schlaegerei-heidelberg-altstadt-opfer-untere-strasse-polizei-einsatz-91425471.html diff --git a/hersfelder-zeitung.de.txt b/hersfelder-zeitung.de.txt index 386f106b..b6f66cdd 100644 --- a/hersfelder-zeitung.de.txt +++ b/hersfelder-zeitung.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.hersfelder-zeitung.de/lokales/philippsthal-heringen/philippsthal-ort473874/alpaka-hengst-cosmo-tot-aufgefunden-91425061.html diff --git a/hna.de.txt b/hna.de.txt index 20ee7027..8c0a447f 100644 --- a/hna.de.txt +++ b/hna.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.hna.de/kassel/eigentuemer-sollen-sich-ruesten-91290102.html diff --git a/in-muenchen.de.txt b/in-muenchen.de.txt index 5cdeb936..d2198972 100644 --- a/in-muenchen.de.txt +++ b/in-muenchen.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.in-muenchen.de/theater/75-jahre-kleines-spiel-marionettentheater-fuer-erwachsene-in-muenchen-91228706.html diff --git a/ingame.de.txt b/ingame.de.txt index 03efd948..cb918ccf 100644 --- a/ingame.de.txt +++ b/ingame.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.ingame.de/news/the-witcher-4-enthuellt-teaser-cd-projekt-red-release-offen-fans-konsolen-warschau-91426909.html diff --git a/innsalzach24.de.txt b/innsalzach24.de.txt index 83dd6498..2785f7ce 100644 --- a/innsalzach24.de.txt +++ b/innsalzach24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.innsalzach24.de/innsalzach/holzland/toeging-am-inn-ort61987/toeging-inn-nistkaesten-fuer-mauersegler-am-rathausturm-91426965.html diff --git a/kreis-anzeiger.de.txt b/kreis-anzeiger.de.txt index 94f53488..03dd3193 100644 --- a/kreis-anzeiger.de.txt +++ b/kreis-anzeiger.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.kreis-anzeiger.de/lokales/wetteraukreis/mit-hoher-sicherheit-ein-wolf-91287739.html diff --git a/kreisbote.de.txt b/kreisbote.de.txt index 29b9b0b5..c255153a 100644 --- a/kreisbote.de.txt +++ b/kreisbote.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.kreisbote.de/lokales/landsberg/meldeverzuege-am-wochenende-90848517.html?trafficsource=idTopBox diff --git a/kreiszeitung.de.txt b/kreiszeitung.de.txt index 0845ed3f..fa461f64 100644 --- a/kreiszeitung.de.txt +++ b/kreiszeitung.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.kreiszeitung.de/lokales/bremen/bremen-wie-sich-osterholz-tenever-veraendert-hat-91304118.html diff --git a/kurierverlag.de.txt b/kurierverlag.de.txt index eb85abaa..b615fda2 100644 --- a/kurierverlag.de.txt +++ b/kurierverlag.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.kurierverlag.de/bayern/razzien-gegen-hasskriminalitaet-17-beschuldigte-in-bayern-zr-91426730.html diff --git a/leinetal24.de.txt b/leinetal24.de.txt index 68b87121..c813ffec 100644 --- a/leinetal24.de.txt +++ b/leinetal24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.leinetal24.de/lokales/hildesheim/polizeiinspektion-hildesheim-informiert-ueber-die-aktuelle-kriminalstatistik-91420438.html diff --git a/lokalo24.de.txt b/lokalo24.de.txt index df89a972..df455259 100644 --- a/lokalo24.de.txt +++ b/lokalo24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.lokalo24.de/lokales/fulda/ab-montag-neue-besucherregelungen-am-klinikum-fulda-91424837.html diff --git a/ludwigshafen24.de.txt b/ludwigshafen24.de.txt index e400e95d..5cf16b6f 100644 --- a/ludwigshafen24.de.txt +++ b/ludwigshafen24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.ludwigshafen24.de/ludwigshafen/abfahrt-heinigstrasse-pruefung-monitoring-bruecke-hochstrasse-nord-auto-ludwigshafen-verkehr-91264126.html diff --git a/mangfall24.de.txt b/mangfall24.de.txt index d691409d..95209c24 100644 --- a/mangfall24.de.txt +++ b/mangfall24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.mangfall24.de/bayern/hozkirchen-boff-im-foolskino-91427134.html diff --git a/mannheim24.de.txt b/mannheim24.de.txt index a51ee105..19bc70a8 100644 --- a/mannheim24.de.txt +++ b/mannheim24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.mannheim24.de/mannheim/quadrate-fressgasse-fussgaengerzone-verkehrsversuch-facebook-kommentare-mannheim-91412006.html diff --git a/meine-anzeigenzeitung.de.txt b/meine-anzeigenzeitung.de.txt index 20ce136e..f499be79 100644 --- a/meine-anzeigenzeitung.de.txt +++ b/meine-anzeigenzeitung.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.meine-anzeigenzeitung.de/bayern/corona-inzidenz-in-bayern-steigt-weiter-zr-91426750.html diff --git a/merkur.de.txt b/merkur.de.txt index 78415e8c..8ba2b8d5 100644 --- a/merkur.de.txt +++ b/merkur.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.merkur.de/politik/ukraine-krieg-russland-putin-hyperschall-raketen-kinschal-video-twitter-fake-experten-91427019.html diff --git a/news.bayern.txt b/news.bayern.txt index a4a3453d..52cda94c 100644 --- a/news.bayern.txt +++ b/news.bayern.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.news.bayern/nullsupermarkt-hamsterkaeufe-oel-mehl-nuernberg-zettel-notes-of-germany-frust-ukraine-mkr-91426735.html diff --git a/oktoberfest.bayern.txt b/oktoberfest.bayern.txt index ff66a3ed..05e070fc 100644 --- a/oktoberfest.bayern.txt +++ b/oktoberfest.bayern.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.oktoberfest.bayern/wiesn/oktoberfest-2020-coronavirus-riesenrad-chef-ueber-wiesn-absage-ein-trauerspiel-zr-13717682.html diff --git a/op-online.de.txt b/op-online.de.txt index bbdd8f18..228b2f2f 100644 --- a/op-online.de.txt +++ b/op-online.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.op-online.de/region/neu-isenburg/einige-strassenbauarbeiten-in-neu-isenburg-91412589.html diff --git a/ovb-online.de.txt b/ovb-online.de.txt index 7fe4112d..a36fe885 100644 --- a/ovb-online.de.txt +++ b/ovb-online.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.ovb-online.de/rosenheim/chiemgau/prien-am-chiemsee-kampfjet-tornado-im-tiefflug-ueber-dem-chiemsee-angst-war-natuerlich-gleich-da-91414102.html diff --git a/rga.de.txt b/rga.de.txt index bb165256..f7a3bd5f 100644 --- a/rga.de.txt +++ b/rga.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.rga.de/lokales/remscheid/corona-in-remscheid-maskenpflicht-im-rathaus-bleibt-91406940.html diff --git a/rosenheim24.de.txt b/rosenheim24.de.txt index b8c3848c..77435929 100644 --- a/rosenheim24.de.txt +++ b/rosenheim24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.rosenheim24.de/rosenheim/rosenheim-land/rosenheim-corona-zahlen-explodieren-viele-corona-tote-patienten-91420512.html diff --git a/ruhr24.de.txt b/ruhr24.de.txt index b1baf5be..1fa5595e 100644 --- a/ruhr24.de.txt +++ b/ruhr24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.ruhr24.de/dortmund/wolfssichtung-deutschland-ruhrgebiet-wolfswelpe-maerz-2022-video-wolf-dortmund-nrw-eving-woelfe-91410695.html diff --git a/sauerlandkurier.de.txt b/sauerlandkurier.de.txt index 3c1f17a7..637fe5d9 100644 --- a/sauerlandkurier.de.txt +++ b/sauerlandkurier.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.sauerlandkurier.de/hochsauerlandkreis/corona-hsk-inzidenz-zahlen-test-impfung-schulen-news-heute-22-03-2022-aktuell-arnsberg-91424574.html diff --git a/soester-anzeiger.de.txt b/soester-anzeiger.de.txt index 4b912a99..b6dc1c51 100644 --- a/soester-anzeiger.de.txt +++ b/soester-anzeiger.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.soester-anzeiger.de/lokales/kreis-soest/corona-virus-kreis-soest-inzidenz-booster-omikron-nrw-heute-dienstag-22-03-2022-daten-aktuell-regeln-zahlen-impfen-news-ticker-tot-impf-stoff-91372172.html diff --git a/solinger-tageblatt.de.txt b/solinger-tageblatt.de.txt index 70ce3c0e..67601559 100644 --- a/solinger-tageblatt.de.txt +++ b/solinger-tageblatt.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.solinger-tageblatt.de/solingen/corona-in-solingen-vermehrt-vierte-impfung-in-arztpraxen-91406963.html diff --git a/tz.de.txt b/tz.de.txt index f2b06038..490de78c 100644 --- a/tz.de.txt +++ b/tz.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.tz.de/muenchen/stadt/meunchen-sirenen-wegen-krieg-in-der-ukraine-neue-debatte-um-sirenen-in-muenchen-wie-warnt-die-stadt-im-ernstfall-zr-91423000.html diff --git a/wa.de.txt b/wa.de.txt index 04a04897..eda6926a 100644 --- a/wa.de.txt +++ b/wa.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.wa.de/hamm/stadt-hamm-setzt-wollring-gegen-eichenprozessionsspinner-ein-91414576.html diff --git a/wasserburg24.de.txt b/wasserburg24.de.txt index 2470e32d..2801c20e 100644 --- a/wasserburg24.de.txt +++ b/wasserburg24.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.wasserburg24.de/bayern/landkreis-berchtesgadener-land/berchtesgaden-naturschuetzer-stellen-sich-gegen-fussweg-bei-baustelle-91427485.html diff --git a/werra-rundschau.de.txt b/werra-rundschau.de.txt index a6157514..d3ba6963 100644 --- a/werra-rundschau.de.txt +++ b/werra-rundschau.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.werra-rundschau.de/eschwege/corona-im-werra-meissner-kreis-sieben-tage-inzidenz-liegt-bei-1565-91404363.html diff --git a/wetterauer-zeitung.de.txt b/wetterauer-zeitung.de.txt index 1d3c2a79..7cfac856 100644 --- a/wetterauer-zeitung.de.txt +++ b/wetterauer-zeitung.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.wetterauer-zeitung.de/wetterau/mahnwache-fuers-impfen-91318413.html diff --git a/wlz-online.de.txt b/wlz-online.de.txt index 74a385fe..f8280a42 100644 --- a/wlz-online.de.txt +++ b/wlz-online.de.txt @@ -1,12 +1,27 @@ -# Author: HolgerAusB | Version: 2022-03-22 +# Author: HolgerAusB | Version: 2023-08-17 # # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # +# @ippen.media site #========================== -# strip author box and social media box -# these boxes sometimes prevented main picure to show up +body: //article +author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') +author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] +date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime + +strip_id_or_class: id-Article-dateActionboxCombo +strip_id_or_class: id-Article-kicker +strip_id_or_class: id-Article-headline +strip_id_or_class: id-AuthorList +strip_id_or_class: id-StoryElement-inArticleReco +strip_id_or_class: id-Comments +strip_id_or_class: id-Story-timestamp +strip_id_or_class: id-Story-authors +strip_id_or_class: id-Story-interactionBar +strip: //a[@title='Bilderzoom'] + strip_id_or_class: idjs-simpletab-nav-item strip_id_or_class: idjs-simpletab-content-close strip_id_or_class: id-AuthorList @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert strip_id_or_class: id-Article-advert--ad3 strip_id_or_class: id-Article-advert -tidy: yes -prune: yes - +tidy: no +prune: no test_url: https://www.wlz-online.de/frankenberg/corona-impfpflicht-einrichtungen-in-waldeck-frankenberg-muessen-mitarbeiter-dem-kreis-melden-91426968.html From e70a92008cb30271563fca0000ce1947aab558b7 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Sun, 20 Aug 2023 12:58:25 +0200 Subject: [PATCH 11/13] Create geschichtedergegenwart.ch.txt (#1183) --- geschichtedergegenwart.ch.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 geschichtedergegenwart.ch.txt diff --git a/geschichtedergegenwart.ch.txt b/geschichtedergegenwart.ch.txt new file mode 100644 index 00000000..3e5b980e --- /dev/null +++ b/geschichtedergegenwart.ch.txt @@ -0,0 +1,6 @@ +http_header(User-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0 + +body: //article + +test_url: https://geschichtedergegenwart.ch/natuerlicher-oder-kuenstlicher-staat-nation-und-imperium-im-russischen-staatsdenken/ +test_url: https://geschichtedergegenwart.ch/die-schweizerische-neutralitaet-ein-anachronismus/ From bd5e5a477dd78ae91f65d4f239f36554ef37af20 Mon Sep 17 00:00:00 2001 From: vrachnis Date: Sun, 20 Aug 2023 11:59:00 +0100 Subject: [PATCH 12/13] Create jpmens.net.txt (#1182) * Create jpmens.net.txt * Update jpmens.net.txt --- jpmens.net.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 jpmens.net.txt diff --git a/jpmens.net.txt b/jpmens.net.txt new file mode 100644 index 00000000..aba1a5a6 --- /dev/null +++ b/jpmens.net.txt @@ -0,0 +1,9 @@ +body: //div[@class='post'] +date: //div[@class='info']/a +author: //meta[@name="author"]/@content +title: //div[@class="title"]/h1/a + +strip_id_or_class: postfoot +prune: no + +test_url: https://jpmens.net/2023/08/14/using-events-to-drive-ansible/ From 19c129069552e7f70f3f7466befd4fc42efc9209 Mon Sep 17 00:00:00 2001 From: Holger <3876469+HolgerAusB@users.noreply.github.com> Date: Mon, 21 Aug 2023 07:14:52 +0200 Subject: [PATCH 13/13] Ippen.media (#1188) --- rga.de.txt => fingerprint.ippen.media.txt | 7 +++- solinger-tageblatt.de.txt | 47 ----------------------- 2 files changed, 6 insertions(+), 48 deletions(-) rename rga.de.txt => fingerprint.ippen.media.txt (84%) delete mode 100644 solinger-tageblatt.de.txt diff --git a/rga.de.txt b/fingerprint.ippen.media.txt similarity index 84% rename from rga.de.txt rename to fingerprint.ippen.media.txt index f7a3bd5f..025b76d0 100644 --- a/rga.de.txt +++ b/fingerprint.ippen.media.txt @@ -1,5 +1,8 @@ # Author: HolgerAusB | Version: 2023-08-17 # +# This works for any ippen.media website if +# fingerprinting for ippen.media is activated +# # to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. # https://www.example.com/hessen/rssfeed.rdf # @@ -44,4 +47,6 @@ strip_id_or_class: id-Article-advert tidy: no prune: no -test_url: https://www.rga.de/lokales/remscheid/corona-in-remscheid-maskenpflicht-im-rathaus-bleibt-91406940.html + +test_url: https://www.fr.de/frankfurt/die-nfl-kommt-nach-frankfurt-91329620.html +test_url: https://www.merkur.de/politik/ukraine-krieg-russland-putin-hyperschall-raketen-kinschal-video-twitter-fake-experten-91427019.html diff --git a/solinger-tageblatt.de.txt b/solinger-tageblatt.de.txt deleted file mode 100644 index 67601559..00000000 --- a/solinger-tageblatt.de.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Author: HolgerAusB | Version: 2023-08-17 -# -# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g. -# https://www.example.com/hessen/rssfeed.rdf -# -# @ippen.media site -#========================== - -body: //article -author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:') -author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')] -date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime - -strip_id_or_class: id-Article-dateActionboxCombo -strip_id_or_class: id-Article-kicker -strip_id_or_class: id-Article-headline -strip_id_or_class: id-AuthorList -strip_id_or_class: id-StoryElement-inArticleReco -strip_id_or_class: id-Comments -strip_id_or_class: id-Story-timestamp -strip_id_or_class: id-Story-authors -strip_id_or_class: id-Story-interactionBar -strip: //a[@title='Bilderzoom'] - -strip_id_or_class: idjs-simpletab-nav-item -strip_id_or_class: idjs-simpletab-content-close -strip_id_or_class: id-AuthorList -strip_id_or_class: id-Article-content-item-copyright -strip_id_or_class: id-DonaldBreadcrumb -strip_id_or_class: id-StoryElement-timestamp -strip_id_or_class: id-StoryElement-authors -strip_id_or_class: id-StoryElement-interactionBar -strip_id_or_class: id-StoryElement-image-caption -strip_id_or_class: id-Mediabox-info-el -strip_id_or_class: id-Recommendation -strip_id_or_class: id-js-relatedStory - -# strip adverts -strip_id_or_class: id-Article-content-item.id-Article-advert.id-Article-advert--ad3 -strip_id_or_class: id-Article-advert.id-Article-content-item -strip_id_or_class: id-Article-content-item.id-Article-advert -strip_id_or_class: id-Article-advert--ad3 -strip_id_or_class: id-Article-advert - -tidy: no -prune: no -test_url: https://www.solinger-tageblatt.de/solingen/corona-in-solingen-vermehrt-vierte-impfung-in-arztpraxen-91406963.html