From f4f11d5cd4ded5f356a07e37e68b3dc738bb37d9 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sun, 6 Aug 2023 13:27:11 +0200
Subject: [PATCH 01/13] Update unherd.com.txt (#1177)
---
unherd.com.txt | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/unherd.com.txt b/unherd.com.txt
index a7e2898c..a2e3f412 100644
--- a/unherd.com.txt
+++ b/unherd.com.txt
@@ -1,19 +1,28 @@
-body: //article
+body: //article | //div[@class='thepostinner']
strip_id_or_class: ref-ar
strip: //div[@class='code-block code-block-4']
strip: //div[@class="metabox"]/parent::div
strip_id_or_class: article-image mobile
strip: //div[contains(@class, 'author-side')]
-strip://div[@class="comment-break"]/parent::div
+
+## strip footer / comments
+strip://div[@class="comment-break"]
+strip://div[@class="uhcomments"]
strip://div[@class="featured_caption"]/a
+## strip header with category+time
+strip: //div[@class='time']/self::div | //div[@class='time']/preceding-sibling::* | //div[@class='time']/following-sibling::h2[1]
+
find_string: style="background-image: url('
replace_string: >
replace_string: .jpg">
+find_string: .jpeg');">
+replace_string: .jpeg">
+
find_string: .png');">
replace_string: .png">
@@ -24,3 +33,4 @@ prune: no
tidy: no
test_url: https://unherd.com/2019/09/labours-brexit-shambles/
+test_url: https://unherd.com/thepost/could-britains-green-debate-become-the-new-brexit/
From 8e9efc52b68be96bc37ba0a1be986cf1f01c0340 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sat, 12 Aug 2023 10:18:21 +0200
Subject: [PATCH 02/13] Update tagesspiegel.de.txt (#1180)
---
tagesspiegel.de.txt | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/tagesspiegel.de.txt b/tagesspiegel.de.txt
index 81c86dc2..e99ffc74 100644
--- a/tagesspiegel.de.txt
+++ b/tagesspiegel.de.txt
@@ -1,19 +1,12 @@
-# Title
-title: //div[@class='ts-title']
-
-# Set author
-author: //a[@rel='author']
-
-# Set date
-date: //time[@class='ts-time']
-
# Fetch full multipage articles
single_page_link: //a[contains(@class, 'ts-one-page')]
# Content is here
-body: //article[@class='ts-article']
+body: //article
# General cleanup
+strip: //nav
+strip: //button
strip: //iframe
strip_id_or_class: hcf-hidden
strip_id_or_class: ts-user-quote
@@ -21,8 +14,16 @@ strip_id_or_class: ts-abo-link
strip_id_or_class: ts-homepage-link
strip_id_or_class: ts-recommendation
strip_id_or_class: newsletter
+strip: //div[@element-type='embedWrapper']
+
+# strip related-articles, pubDate, footer
+strip: //aside
+strip: //time/parent::p
+strip: //a[@data-gtm-class='article-home-link']/parent::p/self::* | //a[@data-gtm-class='article-home-link']/parent::p/following-sibling::*
# Fix pictures and captions
+strip: //picture/source
+strip: //img/@srcset
replace_string():
replace_string():
From 6ad490d8cb9fa9695ae6630bfec9435d31c37368 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sat, 12 Aug 2023 10:19:02 +0200
Subject: [PATCH 03/13] Update hessen.de.txt (#1179)
---
hessen.de.txt | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/hessen.de.txt b/hessen.de.txt
index 6836f943..5c8045e4 100644
--- a/hessen.de.txt
+++ b/hessen.de.txt
@@ -1,29 +1,28 @@
-# author: HolgerAusB version 2023-01-04
+# author: HolgerAusB version 2023-08-11
#
# weekly newsletter of the State of Hesse (Germany)
-#
-# There html style is very messy. This also makes my
-# cleanup very upgly, more kinda pragmatic
-
body: //div[@class='outer']
+date: //div[@class='sidebar']/div/div/span
+author: 'Hessische Landesregierung'
-# strip title 'Newsletter nnn' from text
-strip: //td/h2[contains(@style, 'color: #16191f')]
-
-# strip some info boxes
-strip: //td/h2[contains(text(), 'Der nächste Newsletter')]
-strip: //td/h2[contains(text(), 'erscheint am')]
-strip: //table[contains(@style, 'background-color:#eef2ee;')]
+# strip header
+strip: (//div[@class='sidebar'])[1]
+strip: //h2[contains(text(), 'Newsletter')]/ancestor::table[1]
-strip: //div[@class='sidebar']
+# strip footer modules
+strip_id_or_class: topic-module
+strip_id_or_class: contact-module
+strip_id_or_class: footer
# more cleanups
-strip_id_or_class: paragraph--type--cp-box-tile
+strip: //img[contains(@src, '/mail/icon_link')]
+strip: //table[@class="link-tile"]/@height
+strip: //table[@class="link-tile"]/@style
strip_id_or_class: spacer
-strip_id_or_class: footer
prune: no
+tidy: no
test_url: https://www.hessen.de/newsletter-feed
test_url: https://hessen.de/node/10963/newsletter-preview
From 191e5acfcdc6c78c59de26f80bead24f628e0e5c Mon Sep 17 00:00:00 2001
From: Olivier Mehani
Date: Sat, 12 Aug 2023 18:19:48 +1000
Subject: [PATCH 04/13] abc.net.au: update to new format (#1178)
Signed-off-by: Olivier Mehani
---
abc.net.au.txt | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/abc.net.au.txt b/abc.net.au.txt
index 22b3a0f4..13eda7a6 100644
--- a/abc.net.au.txt
+++ b/abc.net.au.txt
@@ -16,3 +16,8 @@ tidy: no
test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
+
+title: //h1
+body: //div[@id="body"]
+test_url: https://www.abc.net.au/news/2023-06-16/fact-check-jacinta-nampijinpa-price-secret-documents-niaa/102485040
+test_contains: widespread but bogus claim about a list of secret "demands" linked to the Voice referendum
From 070cd722f459bfc19fc45115d1a7436cddd7c4cc Mon Sep 17 00:00:00 2001
From: Simon Alberny
Date: Sat, 12 Aug 2023 10:21:15 +0200
Subject: [PATCH 05/13] Create photopills.com.txt (#1176)
---
photopills.com.txt | 8 ++++++++
1 file changed, 8 insertions(+)
create mode 100644 photopills.com.txt
diff --git a/photopills.com.txt b/photopills.com.txt
new file mode 100644
index 00000000..b4427ee7
--- /dev/null
+++ b/photopills.com.txt
@@ -0,0 +1,8 @@
+title: //h2
+body: //article[contains(@class, 'post')]
+author: //p[@class="author"]//a
+
+strip: //p[@class="tags"]
+strip: //div[@class="freebie"]
+
+test_url: https://www.photopills.com/articles/depth-of-field-guide
From 6a63c68e22661af6ccd634dd9dd4fea1216d06e8 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sun, 13 Aug 2023 23:15:31 +0200
Subject: [PATCH 06/13] Update abc.net.au.txt (#1181)
---
abc.net.au.txt | 27 ++++++++++-----------------
1 file changed, 10 insertions(+), 17 deletions(-)
diff --git a/abc.net.au.txt b/abc.net.au.txt
index 13eda7a6..eb3f7ef4 100644
--- a/abc.net.au.txt
+++ b/abc.net.au.txt
@@ -1,23 +1,16 @@
-title: //div[@class='article section']//h1
-author: //div[@class="byline"]/a
-date: //span[@class="timestamp"]
-body: //div[@class="page section"]
+body: //article/div[@data-component="ArticleWeb"]
-strip: //a[@class="inline-caption"]
-strip: //p[@class="ticker section noprint"]
-strip: //p[@class="topics"]
-strip: //h1
-strip: //div[@class="byline"]
-strip: //p[@class="published"]
-strip: //div[contains(@class,"featured-scroller")]
-strip_id_or_class: footer
+strip: //aside
+strip: //button
+strip: //header/div[contains(@class, 'Headline_meta')]
+strip: //div[contains(@class, 'ArticleWeb_publishedDate')]
+strip: //div[contains(@class, 'ArticleWeb_shareBottom')]/self::div | //div[contains(@class, 'ArticleWeb_shareBottom')]/following-sibling::*
+strip: //div[contains(@aria-label, 'media') and contains(@aria-label, 'embed')]
-tidy: no
+prune: no
-test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
-test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
+test_url: https://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054
+test_url: https://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
-title: //h1
-body: //div[@id="body"]
test_url: https://www.abc.net.au/news/2023-06-16/fact-check-jacinta-nampijinpa-price-secret-documents-niaa/102485040
test_contains: widespread but bogus claim about a list of secret "demands" linked to the Voice referendum
From c502b849a19ee19e3977c1a9ed93d3a4e73ccd68 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sun, 20 Aug 2023 12:54:45 +0200
Subject: [PATCH 07/13] Create readingthechinadream.com.txt (#1187)
---
readingthechinadream.com.txt | 13 +++++++++++++
1 file changed, 13 insertions(+)
create mode 100644 readingthechinadream.com.txt
diff --git a/readingthechinadream.com.txt b/readingthechinadream.com.txt
new file mode 100644
index 00000000..65b4f322
--- /dev/null
+++ b/readingthechinadream.com.txt
@@ -0,0 +1,13 @@
+body: //div[@class='wsite-content']
+
+strip: //h2[@class='wsite-content-title']
+strip_id_or_class: commentArea
+strip: //div[contains(@class, 'wsite-search-element')]
+
+# a blank og:image was inserted (for FTR) from the meta-tag which cause a high space
+find_string:
Date: Sun, 20 Aug 2023 12:55:11 +0200
Subject: [PATCH 08/13] Create legrandcontinent.eu.txt (#1186)
---
legrandcontinent.eu.txt | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
create mode 100644 legrandcontinent.eu.txt
diff --git a/legrandcontinent.eu.txt b/legrandcontinent.eu.txt
new file mode 100644
index 00000000..6294adbe
--- /dev/null
+++ b/legrandcontinent.eu.txt
@@ -0,0 +1,26 @@
+body: (//article[contains(concat(' ',normalize-space(@class),' '),' post ')])[1]
+author: //meta[@name='author']/@content
+
+# strip (parts) of header, footer, boxes
+strip: //header/h1 | //header/div[1]
+strip: //article/div[contains(@class, '-mt-3 text-center')]
+strip: //dt[contains(text(), 'Auteur')]/ancestor::div[1]
+strip: //a[contains(@class, 'share')]/parent::div
+strip: //section[contains(concat(' ',normalize-space(@class),' '),' dive ')]
+strip_id_or_class: 'comment-block hidden'
+strip_id_or_class: modal_notes
+strip_id_or_class: wider-side
+strip_id_or_class: special-side
+
+# iframes are oversized
+strip: //iframe/@width
+strip: //iframe/@height
+
+# for right-to-left languages
+find_string: class="has-text-align-right"
+replace_string: dir="rtl"
+
+prune: no
+
+test_url: https://legrandcontinent.eu/fr/2023/08/03/jai-eu-des-doutes-deux-conversations-avec-robert-oppenheimer/
+test_url: https://legrandcontinent.eu/fr/2023/08/17/la-force-de-la-poesie-contre-la-guerre-une-conversation-et-trois-poemes-inedits-de-mostafa-hazara/
From fe1bbc3e777378e8f52ba6169f5fa5a1a23ea118 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sun, 20 Aug 2023 12:55:29 +0200
Subject: [PATCH 09/13] Create abendzeitung-muenchen.de.txt (#1185)
---
abendzeitung-muenchen.de.txt | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
create mode 100644 abendzeitung-muenchen.de.txt
diff --git a/abendzeitung-muenchen.de.txt b/abendzeitung-muenchen.de.txt
new file mode 100644
index 00000000..df9ed449
--- /dev/null
+++ b/abendzeitung-muenchen.de.txt
@@ -0,0 +1,22 @@
+body: //article
+
+strip_id_or_class: teaserbox
+strip_id_or_class: aufmacherbox
+strip_id_or_class: mehrzumthema
+strip_id_or_class: artdetail_time
+strip_id_or_class: artdetail_social
+strip_id_or_class: artdetail_desc
+
+strip: //footer
+
+# needed for wallabag:
+strip: //picture/source
+
+# strip additional text within figure after caption
+# wallabag/f43.me shows here double captions
+# see second image at
+# https://www.abendzeitung-muenchen.de/muenchen/wahlkampf-in-bayern-soeder-und-aiwanger-ringen-um-die-bierzelt-herrschaft-art-921576
+strip: //figure/figcaption/following-sibling::text() | //figure/figcaption/following-sibling::p
+
+test_url: https://www.abendzeitung-muenchen.de/muenchen/mvg-fast-alle-fahrschein-automaten-in-muenchen-kommen-weg-art-921634
+test_url: https://www.abendzeitung-muenchen.de/muenchen/wahlkampf-in-bayern-soeder-und-aiwanger-ringen-um-die-bierzelt-herrschaft-art-921576
From 217fab75ed30473b7b22c1131587230e4cc3f884 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sun, 20 Aug 2023 12:57:53 +0200
Subject: [PATCH 10/13] Add files via upload (#1184)
* reflect slight design-changes of the ippen.media engine
* now supporting author and date selectors
* better image handling
* should now better work also with wallabag
---
24auto.de.txt | 26 ++++++++++++++++++++------
24garten.de.txt | 26 ++++++++++++++++++++------
24hamburg.de.txt | 26 ++++++++++++++++++++------
24rhein.de.txt | 26 ++++++++++++++++++++------
24vita.de.txt | 26 ++++++++++++++++++++------
az-online.de.txt | 26 ++++++++++++++++++++------
bgland24.de.txt | 26 ++++++++++++++++++++------
buzzfeed.de.txt | 26 ++++++++++++++++++++------
bw24.de.txt | 26 ++++++++++++++++++++------
chiemgau24.de.txt | 26 ++++++++++++++++++++------
come-on.de.txt | 26 ++++++++++++++++++++------
costanachrichten.com.txt | 26 ++++++++++++++++++++------
dasgelbeblatt.de.txt | 26 ++++++++++++++++++++------
deichstube.de.txt | 26 ++++++++++++++++++++------
echo24.de.txt | 26 ++++++++++++++++++++------
extratipp.com.txt | 26 ++++++++++++++++++++------
fehmarn24.de.txt | 26 ++++++++++++++++++++------
fnp.de.txt | 26 ++++++++++++++++++++------
fr.de.txt | 26 ++++++++++++++++++++------
fuldaerzeitung.de.txt | 26 ++++++++++++++++++++------
giessener-allgemeine.de.txt | 26 ++++++++++++++++++++------
hallo-muenchen.de.txt | 26 ++++++++++++++++++++------
hanauer.de.txt | 26 ++++++++++++++++++++------
heidelberg24.de.txt | 26 ++++++++++++++++++++------
hersfelder-zeitung.de.txt | 26 ++++++++++++++++++++------
hna.de.txt | 26 ++++++++++++++++++++------
in-muenchen.de.txt | 26 ++++++++++++++++++++------
ingame.de.txt | 26 ++++++++++++++++++++------
innsalzach24.de.txt | 26 ++++++++++++++++++++------
kreis-anzeiger.de.txt | 26 ++++++++++++++++++++------
kreisbote.de.txt | 26 ++++++++++++++++++++------
kreiszeitung.de.txt | 26 ++++++++++++++++++++------
kurierverlag.de.txt | 26 ++++++++++++++++++++------
leinetal24.de.txt | 26 ++++++++++++++++++++------
lokalo24.de.txt | 26 ++++++++++++++++++++------
ludwigshafen24.de.txt | 26 ++++++++++++++++++++------
mangfall24.de.txt | 26 ++++++++++++++++++++------
mannheim24.de.txt | 26 ++++++++++++++++++++------
meine-anzeigenzeitung.de.txt | 26 ++++++++++++++++++++------
merkur.de.txt | 26 ++++++++++++++++++++------
news.bayern.txt | 26 ++++++++++++++++++++------
oktoberfest.bayern.txt | 26 ++++++++++++++++++++------
op-online.de.txt | 26 ++++++++++++++++++++------
ovb-online.de.txt | 26 ++++++++++++++++++++------
rga.de.txt | 26 ++++++++++++++++++++------
rosenheim24.de.txt | 26 ++++++++++++++++++++------
ruhr24.de.txt | 26 ++++++++++++++++++++------
sauerlandkurier.de.txt | 26 ++++++++++++++++++++------
soester-anzeiger.de.txt | 26 ++++++++++++++++++++------
solinger-tageblatt.de.txt | 26 ++++++++++++++++++++------
tz.de.txt | 26 ++++++++++++++++++++------
wa.de.txt | 26 ++++++++++++++++++++------
wasserburg24.de.txt | 26 ++++++++++++++++++++------
werra-rundschau.de.txt | 26 ++++++++++++++++++++------
wetterauer-zeitung.de.txt | 26 ++++++++++++++++++++------
wlz-online.de.txt | 26 ++++++++++++++++++++------
56 files changed, 1120 insertions(+), 336 deletions(-)
diff --git a/24auto.de.txt b/24auto.de.txt
index bff8bd35..20f08f35 100644
--- a/24auto.de.txt
+++ b/24auto.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.24auto.de/news/adac-panne-elektro-auto-starterbatterie-akku-wachstum-verbrenner-tuev-wartung-muenchen-starthilfe-91426845.html
diff --git a/24garten.de.txt b/24garten.de.txt
index e8f74e62..a07235ed 100644
--- a/24garten.de.txt
+++ b/24garten.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.24garten.de/mein-garten/basilikum-trocknen-einlegen-haltbarkeit-ernte-kraeuter-pesto-einfrieren-rosmarin-muenchen-zr-91424291.html
diff --git a/24hamburg.de.txt b/24hamburg.de.txt
index e659fb16..8d632ba9 100644
--- a/24hamburg.de.txt
+++ b/24hamburg.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.24hamburg.de/hamburg/neue-corona-regeln-im-hvv-ab-sofort-entfaellt-3g-91422638.html
diff --git a/24rhein.de.txt b/24rhein.de.txt
index e566efb3..eedf2295 100644
--- a/24rhein.de.txt
+++ b/24rhein.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.24rhein.de/koeln/kalk/koeln-videoueberwachung-kameras-kalk-orte-ueberblick-polizei-ausbau-91388746.html
diff --git a/24vita.de.txt b/24vita.de.txt
index aa22c996..89f4ea1c 100644
--- a/24vita.de.txt
+++ b/24vita.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.24vita.de/verbraucher/allesesser-vegetarier-veganer-deutschland-daab-fleisch-herstellung-speiseplan-dge-bonn-91417014.html
diff --git a/az-online.de.txt b/az-online.de.txt
index 3647e374..e9f67999 100644
--- a/az-online.de.txt
+++ b/az-online.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.az-online.de/uelzen/stadt-uelzen/mehr-gaeste-in-der-uelzener-stadthalle-als-erlaubt-91282522.html
diff --git a/bgland24.de.txt b/bgland24.de.txt
index 8008dd75..70dc19fb 100644
--- a/bgland24.de.txt
+++ b/bgland24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.bgland24.de/bgland/region-berchtesgaden/bischofswiesen-ort28409/schoenau-am-koenigssee-wieder-grosses-interesse-bei-fit-durch-unser-gmoa-2022-91285772.html
diff --git a/buzzfeed.de.txt b/buzzfeed.de.txt
index 5c9ae5d3..c3605102 100644
--- a/buzzfeed.de.txt
+++ b/buzzfeed.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.buzzfeed.de/buzz/19-buecher-die-leute-einfach-nicht-zu-ende-lesen-konnten-91400849.html
diff --git a/bw24.de.txt b/bw24.de.txt
index 827a5616..9fd50253 100644
--- a/bw24.de.txt
+++ b/bw24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.bw24.de/baden-wuerttemberg/wolf-baden-wuerttemberg-sichtung-zollernalbkreis-spaziergaengerin-raubtier-nachweis-91297558.html
diff --git a/chiemgau24.de.txt b/chiemgau24.de.txt
index 2291d1b0..a4991f9f 100644
--- a/chiemgau24.de.txt
+++ b/chiemgau24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.chiemgau24.de/chiemgau/chiemsee/gstadt-am-chiemsee-ort118608/gstadt-tourist-info-leiterin-berichtet-von-guter-auslastung-bei-gaestezahlen-trotz-corona-91288990.html
diff --git a/come-on.de.txt b/come-on.de.txt
index 84e8e716..10e08f1a 100644
--- a/come-on.de.txt
+++ b/come-on.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.come-on.de/kreis-mk/corona-mk-zahlen-omikron-heute-inzidenz-tote-todesfall-news-aktuell-luedenscheid-iserlohn-ticker-91403642.html
diff --git a/costanachrichten.com.txt b/costanachrichten.com.txt
index ba3c2f1a..57762a51 100644
--- a/costanachrichten.com.txt
+++ b/costanachrichten.com.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.costanachrichten.com/spanien/politik-wirtschaft/spanien-westsahara-pedro-sanchez-unabhaengigkeit-uno-marokko-fluechtlinge-ukraine-91422126.html
diff --git a/dasgelbeblatt.de.txt b/dasgelbeblatt.de.txt
index 80b66361..150ecb96 100644
--- a/dasgelbeblatt.de.txt
+++ b/dasgelbeblatt.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.dasgelbeblatt.de/lokales/bad-toelz-wolfratshausen/landkreis-bad-toelz-wolfratshausen-prozent-mehr-gewaltstraftaten-in-2021-91426756.html
diff --git a/deichstube.de.txt b/deichstube.de.txt
index f5eb5754..4420a763 100644
--- a/deichstube.de.txt
+++ b/deichstube.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.deichstube.de/news/werder-bremen-trotzt-problemen-was-der-sieg-gegen-sv-darmstadt-98-alles-aussagt-trainer-ole-werner-2-bundesliga-aufstieg-zr-91423406.html
diff --git a/echo24.de.txt b/echo24.de.txt
index 7f58f0a0..157b23f2 100644
--- a/echo24.de.txt
+++ b/echo24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.echo24.de/baden-wuerttemberg/corona-verordnung-baden-wuerttemberg-regeln-kretschmann-maskenpflicht-lockerungen-zr-91419449.html
diff --git a/extratipp.com.txt b/extratipp.com.txt
index 87aa05fc..d47055ac 100644
--- a/extratipp.com.txt
+++ b/extratipp.com.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.extratipp.com/tv/dsds/quoten-desaster-bei-dsds-setzt-rtl-die-castingshow-mit-florian-silbereisen-ab-91270865.html?trafficsource=idTopBox
diff --git a/fehmarn24.de.txt b/fehmarn24.de.txt
index 03f41c53..b64b50a7 100644
--- a/fehmarn24.de.txt
+++ b/fehmarn24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.fehmarn24.de/heiligenhafen/es-bleibt-das-grosse-ziel-die-innenstadt-von-heiligenhafen-soll-in-den-kommenden-jahren-attraktiver-werden-91421037.html
diff --git a/fnp.de.txt b/fnp.de.txt
index d0bf7a3f..81802343 100644
--- a/fnp.de.txt
+++ b/fnp.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.fnp.de/frankfurt/frankfurt-sachsenhausen-der-adlhochplatz-bekommt-einen-neuen-namen-91287869.html
diff --git a/fr.de.txt b/fr.de.txt
index 903bc6dd..267c3f47 100644
--- a/fr.de.txt
+++ b/fr.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.fr.de/frankfurt/die-nfl-kommt-nach-frankfurt-91329620.html
diff --git a/fuldaerzeitung.de.txt b/fuldaerzeitung.de.txt
index cf568ba4..73bc0642 100644
--- a/fuldaerzeitung.de.txt
+++ b/fuldaerzeitung.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.fuldaerzeitung.de/fulda/corona-fulda-rki-inzidenz-neuinfektionen-impfung-novavax-klinikum-91368571.html
diff --git a/giessener-allgemeine.de.txt b/giessener-allgemeine.de.txt
index 6f94ecfb..015294b6 100644
--- a/giessener-allgemeine.de.txt
+++ b/giessener-allgemeine.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.giessener-allgemeine.de/giessen/zwei-kuenstlerpositionen-vereint-91421463.html
diff --git a/hallo-muenchen.de.txt b/hallo-muenchen.de.txt
index 47774f72..7c801d42 100644
--- a/hallo-muenchen.de.txt
+++ b/hallo-muenchen.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.hallo-muenchen.de/muenchen/mitte/muenchen-sitzsteine-stachus-brunnen-corona-abstand-polizei-treffpunkt-maerz-mai-91420320.html
diff --git a/hanauer.de.txt b/hanauer.de.txt
index bc3549c8..94d5289f 100644
--- a/hanauer.de.txt
+++ b/hanauer.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.hanauer.de/hanau/hanauer-gericht-sieht-nackte-tatsachen-nach-misslungener-polizeiaktion-91420845.html
diff --git a/heidelberg24.de.txt b/heidelberg24.de.txt
index efe5b51a..96d062b7 100644
--- a/heidelberg24.de.txt
+++ b/heidelberg24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.heidelberg24.de/heidelberg/schlaegerei-heidelberg-altstadt-opfer-untere-strasse-polizei-einsatz-91425471.html
diff --git a/hersfelder-zeitung.de.txt b/hersfelder-zeitung.de.txt
index 386f106b..b6f66cdd 100644
--- a/hersfelder-zeitung.de.txt
+++ b/hersfelder-zeitung.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.hersfelder-zeitung.de/lokales/philippsthal-heringen/philippsthal-ort473874/alpaka-hengst-cosmo-tot-aufgefunden-91425061.html
diff --git a/hna.de.txt b/hna.de.txt
index 20ee7027..8c0a447f 100644
--- a/hna.de.txt
+++ b/hna.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.hna.de/kassel/eigentuemer-sollen-sich-ruesten-91290102.html
diff --git a/in-muenchen.de.txt b/in-muenchen.de.txt
index 5cdeb936..d2198972 100644
--- a/in-muenchen.de.txt
+++ b/in-muenchen.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.in-muenchen.de/theater/75-jahre-kleines-spiel-marionettentheater-fuer-erwachsene-in-muenchen-91228706.html
diff --git a/ingame.de.txt b/ingame.de.txt
index 03efd948..cb918ccf 100644
--- a/ingame.de.txt
+++ b/ingame.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.ingame.de/news/the-witcher-4-enthuellt-teaser-cd-projekt-red-release-offen-fans-konsolen-warschau-91426909.html
diff --git a/innsalzach24.de.txt b/innsalzach24.de.txt
index 83dd6498..2785f7ce 100644
--- a/innsalzach24.de.txt
+++ b/innsalzach24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.innsalzach24.de/innsalzach/holzland/toeging-am-inn-ort61987/toeging-inn-nistkaesten-fuer-mauersegler-am-rathausturm-91426965.html
diff --git a/kreis-anzeiger.de.txt b/kreis-anzeiger.de.txt
index 94f53488..03dd3193 100644
--- a/kreis-anzeiger.de.txt
+++ b/kreis-anzeiger.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.kreis-anzeiger.de/lokales/wetteraukreis/mit-hoher-sicherheit-ein-wolf-91287739.html
diff --git a/kreisbote.de.txt b/kreisbote.de.txt
index 29b9b0b5..c255153a 100644
--- a/kreisbote.de.txt
+++ b/kreisbote.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.kreisbote.de/lokales/landsberg/meldeverzuege-am-wochenende-90848517.html?trafficsource=idTopBox
diff --git a/kreiszeitung.de.txt b/kreiszeitung.de.txt
index 0845ed3f..fa461f64 100644
--- a/kreiszeitung.de.txt
+++ b/kreiszeitung.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.kreiszeitung.de/lokales/bremen/bremen-wie-sich-osterholz-tenever-veraendert-hat-91304118.html
diff --git a/kurierverlag.de.txt b/kurierverlag.de.txt
index eb85abaa..b615fda2 100644
--- a/kurierverlag.de.txt
+++ b/kurierverlag.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.kurierverlag.de/bayern/razzien-gegen-hasskriminalitaet-17-beschuldigte-in-bayern-zr-91426730.html
diff --git a/leinetal24.de.txt b/leinetal24.de.txt
index 68b87121..c813ffec 100644
--- a/leinetal24.de.txt
+++ b/leinetal24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.leinetal24.de/lokales/hildesheim/polizeiinspektion-hildesheim-informiert-ueber-die-aktuelle-kriminalstatistik-91420438.html
diff --git a/lokalo24.de.txt b/lokalo24.de.txt
index df89a972..df455259 100644
--- a/lokalo24.de.txt
+++ b/lokalo24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.lokalo24.de/lokales/fulda/ab-montag-neue-besucherregelungen-am-klinikum-fulda-91424837.html
diff --git a/ludwigshafen24.de.txt b/ludwigshafen24.de.txt
index e400e95d..5cf16b6f 100644
--- a/ludwigshafen24.de.txt
+++ b/ludwigshafen24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.ludwigshafen24.de/ludwigshafen/abfahrt-heinigstrasse-pruefung-monitoring-bruecke-hochstrasse-nord-auto-ludwigshafen-verkehr-91264126.html
diff --git a/mangfall24.de.txt b/mangfall24.de.txt
index d691409d..95209c24 100644
--- a/mangfall24.de.txt
+++ b/mangfall24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.mangfall24.de/bayern/hozkirchen-boff-im-foolskino-91427134.html
diff --git a/mannheim24.de.txt b/mannheim24.de.txt
index a51ee105..19bc70a8 100644
--- a/mannheim24.de.txt
+++ b/mannheim24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.mannheim24.de/mannheim/quadrate-fressgasse-fussgaengerzone-verkehrsversuch-facebook-kommentare-mannheim-91412006.html
diff --git a/meine-anzeigenzeitung.de.txt b/meine-anzeigenzeitung.de.txt
index 20ce136e..f499be79 100644
--- a/meine-anzeigenzeitung.de.txt
+++ b/meine-anzeigenzeitung.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.meine-anzeigenzeitung.de/bayern/corona-inzidenz-in-bayern-steigt-weiter-zr-91426750.html
diff --git a/merkur.de.txt b/merkur.de.txt
index 78415e8c..8ba2b8d5 100644
--- a/merkur.de.txt
+++ b/merkur.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.merkur.de/politik/ukraine-krieg-russland-putin-hyperschall-raketen-kinschal-video-twitter-fake-experten-91427019.html
diff --git a/news.bayern.txt b/news.bayern.txt
index a4a3453d..52cda94c 100644
--- a/news.bayern.txt
+++ b/news.bayern.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.news.bayern/nullsupermarkt-hamsterkaeufe-oel-mehl-nuernberg-zettel-notes-of-germany-frust-ukraine-mkr-91426735.html
diff --git a/oktoberfest.bayern.txt b/oktoberfest.bayern.txt
index ff66a3ed..05e070fc 100644
--- a/oktoberfest.bayern.txt
+++ b/oktoberfest.bayern.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.oktoberfest.bayern/wiesn/oktoberfest-2020-coronavirus-riesenrad-chef-ueber-wiesn-absage-ein-trauerspiel-zr-13717682.html
diff --git a/op-online.de.txt b/op-online.de.txt
index bbdd8f18..228b2f2f 100644
--- a/op-online.de.txt
+++ b/op-online.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.op-online.de/region/neu-isenburg/einige-strassenbauarbeiten-in-neu-isenburg-91412589.html
diff --git a/ovb-online.de.txt b/ovb-online.de.txt
index 7fe4112d..a36fe885 100644
--- a/ovb-online.de.txt
+++ b/ovb-online.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.ovb-online.de/rosenheim/chiemgau/prien-am-chiemsee-kampfjet-tornado-im-tiefflug-ueber-dem-chiemsee-angst-war-natuerlich-gleich-da-91414102.html
diff --git a/rga.de.txt b/rga.de.txt
index bb165256..f7a3bd5f 100644
--- a/rga.de.txt
+++ b/rga.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.rga.de/lokales/remscheid/corona-in-remscheid-maskenpflicht-im-rathaus-bleibt-91406940.html
diff --git a/rosenheim24.de.txt b/rosenheim24.de.txt
index b8c3848c..77435929 100644
--- a/rosenheim24.de.txt
+++ b/rosenheim24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.rosenheim24.de/rosenheim/rosenheim-land/rosenheim-corona-zahlen-explodieren-viele-corona-tote-patienten-91420512.html
diff --git a/ruhr24.de.txt b/ruhr24.de.txt
index b1baf5be..1fa5595e 100644
--- a/ruhr24.de.txt
+++ b/ruhr24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.ruhr24.de/dortmund/wolfssichtung-deutschland-ruhrgebiet-wolfswelpe-maerz-2022-video-wolf-dortmund-nrw-eving-woelfe-91410695.html
diff --git a/sauerlandkurier.de.txt b/sauerlandkurier.de.txt
index 3c1f17a7..637fe5d9 100644
--- a/sauerlandkurier.de.txt
+++ b/sauerlandkurier.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.sauerlandkurier.de/hochsauerlandkreis/corona-hsk-inzidenz-zahlen-test-impfung-schulen-news-heute-22-03-2022-aktuell-arnsberg-91424574.html
diff --git a/soester-anzeiger.de.txt b/soester-anzeiger.de.txt
index 4b912a99..b6dc1c51 100644
--- a/soester-anzeiger.de.txt
+++ b/soester-anzeiger.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.soester-anzeiger.de/lokales/kreis-soest/corona-virus-kreis-soest-inzidenz-booster-omikron-nrw-heute-dienstag-22-03-2022-daten-aktuell-regeln-zahlen-impfen-news-ticker-tot-impf-stoff-91372172.html
diff --git a/solinger-tageblatt.de.txt b/solinger-tageblatt.de.txt
index 70ce3c0e..67601559 100644
--- a/solinger-tageblatt.de.txt
+++ b/solinger-tageblatt.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.solinger-tageblatt.de/solingen/corona-in-solingen-vermehrt-vierte-impfung-in-arztpraxen-91406963.html
diff --git a/tz.de.txt b/tz.de.txt
index f2b06038..490de78c 100644
--- a/tz.de.txt
+++ b/tz.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.tz.de/muenchen/stadt/meunchen-sirenen-wegen-krieg-in-der-ukraine-neue-debatte-um-sirenen-in-muenchen-wie-warnt-die-stadt-im-ernstfall-zr-91423000.html
diff --git a/wa.de.txt b/wa.de.txt
index 04a04897..eda6926a 100644
--- a/wa.de.txt
+++ b/wa.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.wa.de/hamm/stadt-hamm-setzt-wollring-gegen-eichenprozessionsspinner-ein-91414576.html
diff --git a/wasserburg24.de.txt b/wasserburg24.de.txt
index 2470e32d..2801c20e 100644
--- a/wasserburg24.de.txt
+++ b/wasserburg24.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.wasserburg24.de/bayern/landkreis-berchtesgadener-land/berchtesgaden-naturschuetzer-stellen-sich-gegen-fussweg-bei-baustelle-91427485.html
diff --git a/werra-rundschau.de.txt b/werra-rundschau.de.txt
index a6157514..d3ba6963 100644
--- a/werra-rundschau.de.txt
+++ b/werra-rundschau.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.werra-rundschau.de/eschwege/corona-im-werra-meissner-kreis-sieben-tage-inzidenz-liegt-bei-1565-91404363.html
diff --git a/wetterauer-zeitung.de.txt b/wetterauer-zeitung.de.txt
index 1d3c2a79..7cfac856 100644
--- a/wetterauer-zeitung.de.txt
+++ b/wetterauer-zeitung.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.wetterauer-zeitung.de/wetterau/mahnwache-fuers-impfen-91318413.html
diff --git a/wlz-online.de.txt b/wlz-online.de.txt
index 74a385fe..f8280a42 100644
--- a/wlz-online.de.txt
+++ b/wlz-online.de.txt
@@ -1,12 +1,27 @@
-# Author: HolgerAusB | Version: 2022-03-22
+# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
+# @ippen.media site
#==========================
-# strip author box and social media box
-# these boxes sometimes prevented main picure to show up
+body: //article
+author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
+author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
+date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
+
+strip_id_or_class: id-Article-dateActionboxCombo
+strip_id_or_class: id-Article-kicker
+strip_id_or_class: id-Article-headline
+strip_id_or_class: id-AuthorList
+strip_id_or_class: id-StoryElement-inArticleReco
+strip_id_or_class: id-Comments
+strip_id_or_class: id-Story-timestamp
+strip_id_or_class: id-Story-authors
+strip_id_or_class: id-Story-interactionBar
+strip: //a[@title='Bilderzoom']
+
strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
@@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert
-tidy: yes
-prune: yes
-
+tidy: no
+prune: no
test_url: https://www.wlz-online.de/frankenberg/corona-impfpflicht-einrichtungen-in-waldeck-frankenberg-muessen-mitarbeiter-dem-kreis-melden-91426968.html
From e70a92008cb30271563fca0000ce1947aab558b7 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Sun, 20 Aug 2023 12:58:25 +0200
Subject: [PATCH 11/13] Create geschichtedergegenwart.ch.txt (#1183)
---
geschichtedergegenwart.ch.txt | 6 ++++++
1 file changed, 6 insertions(+)
create mode 100644 geschichtedergegenwart.ch.txt
diff --git a/geschichtedergegenwart.ch.txt b/geschichtedergegenwart.ch.txt
new file mode 100644
index 00000000..3e5b980e
--- /dev/null
+++ b/geschichtedergegenwart.ch.txt
@@ -0,0 +1,6 @@
+http_header(User-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0
+
+body: //article
+
+test_url: https://geschichtedergegenwart.ch/natuerlicher-oder-kuenstlicher-staat-nation-und-imperium-im-russischen-staatsdenken/
+test_url: https://geschichtedergegenwart.ch/die-schweizerische-neutralitaet-ein-anachronismus/
From bd5e5a477dd78ae91f65d4f239f36554ef37af20 Mon Sep 17 00:00:00 2001
From: vrachnis
Date: Sun, 20 Aug 2023 11:59:00 +0100
Subject: [PATCH 12/13] Create jpmens.net.txt (#1182)
* Create jpmens.net.txt
* Update jpmens.net.txt
---
jpmens.net.txt | 9 +++++++++
1 file changed, 9 insertions(+)
create mode 100644 jpmens.net.txt
diff --git a/jpmens.net.txt b/jpmens.net.txt
new file mode 100644
index 00000000..aba1a5a6
--- /dev/null
+++ b/jpmens.net.txt
@@ -0,0 +1,9 @@
+body: //div[@class='post']
+date: //div[@class='info']/a
+author: //meta[@name="author"]/@content
+title: //div[@class="title"]/h1/a
+
+strip_id_or_class: postfoot
+prune: no
+
+test_url: https://jpmens.net/2023/08/14/using-events-to-drive-ansible/
From 19c129069552e7f70f3f7466befd4fc42efc9209 Mon Sep 17 00:00:00 2001
From: Holger <3876469+HolgerAusB@users.noreply.github.com>
Date: Mon, 21 Aug 2023 07:14:52 +0200
Subject: [PATCH 13/13] Ippen.media (#1188)
---
rga.de.txt => fingerprint.ippen.media.txt | 7 +++-
solinger-tageblatt.de.txt | 47 -----------------------
2 files changed, 6 insertions(+), 48 deletions(-)
rename rga.de.txt => fingerprint.ippen.media.txt (84%)
delete mode 100644 solinger-tageblatt.de.txt
diff --git a/rga.de.txt b/fingerprint.ippen.media.txt
similarity index 84%
rename from rga.de.txt
rename to fingerprint.ippen.media.txt
index f7a3bd5f..025b76d0 100644
--- a/rga.de.txt
+++ b/fingerprint.ippen.media.txt
@@ -1,5 +1,8 @@
# Author: HolgerAusB | Version: 2023-08-17
#
+# This works for any ippen.media website if
+# fingerprinting for ippen.media is activated
+#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
@@ -44,4 +47,6 @@ strip_id_or_class: id-Article-advert
tidy: no
prune: no
-test_url: https://www.rga.de/lokales/remscheid/corona-in-remscheid-maskenpflicht-im-rathaus-bleibt-91406940.html
+
+test_url: https://www.fr.de/frankfurt/die-nfl-kommt-nach-frankfurt-91329620.html
+test_url: https://www.merkur.de/politik/ukraine-krieg-russland-putin-hyperschall-raketen-kinschal-video-twitter-fake-experten-91427019.html
diff --git a/solinger-tageblatt.de.txt b/solinger-tageblatt.de.txt
deleted file mode 100644
index 67601559..00000000
--- a/solinger-tageblatt.de.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-# Author: HolgerAusB | Version: 2023-08-17
-#
-# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
-# https://www.example.com/hessen/rssfeed.rdf
-#
-# @ippen.media site
-#==========================
-
-body: //article
-author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
-author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
-date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime
-
-strip_id_or_class: id-Article-dateActionboxCombo
-strip_id_or_class: id-Article-kicker
-strip_id_or_class: id-Article-headline
-strip_id_or_class: id-AuthorList
-strip_id_or_class: id-StoryElement-inArticleReco
-strip_id_or_class: id-Comments
-strip_id_or_class: id-Story-timestamp
-strip_id_or_class: id-Story-authors
-strip_id_or_class: id-Story-interactionBar
-strip: //a[@title='Bilderzoom']
-
-strip_id_or_class: idjs-simpletab-nav-item
-strip_id_or_class: idjs-simpletab-content-close
-strip_id_or_class: id-AuthorList
-strip_id_or_class: id-Article-content-item-copyright
-strip_id_or_class: id-DonaldBreadcrumb
-strip_id_or_class: id-StoryElement-timestamp
-strip_id_or_class: id-StoryElement-authors
-strip_id_or_class: id-StoryElement-interactionBar
-strip_id_or_class: id-StoryElement-image-caption
-strip_id_or_class: id-Mediabox-info-el
-strip_id_or_class: id-Recommendation
-strip_id_or_class: id-js-relatedStory
-
-# strip adverts
-strip_id_or_class: id-Article-content-item.id-Article-advert.id-Article-advert--ad3
-strip_id_or_class: id-Article-advert.id-Article-content-item
-strip_id_or_class: id-Article-content-item.id-Article-advert
-strip_id_or_class: id-Article-advert--ad3
-strip_id_or_class: id-Article-advert
-
-tidy: no
-prune: no
-test_url: https://www.solinger-tageblatt.de/solingen/corona-in-solingen-vermehrt-vierte-impfung-in-arztpraxen-91406963.html