Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
GitHub Actions Bot committed Aug 21, 2023
2 parents e9f1439 + 19c1290 commit e543537
Show file tree
Hide file tree
Showing 67 changed files with 1,266 additions and 432 deletions.
26 changes: 20 additions & 6 deletions 24auto.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.24auto.de/news/adac-panne-elektro-auto-starterbatterie-akku-wachstum-verbrenner-tuev-wartung-muenchen-starthilfe-91426845.html
26 changes: 20 additions & 6 deletions 24garten.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.24garten.de/mein-garten/basilikum-trocknen-einlegen-haltbarkeit-ernte-kraeuter-pesto-einfrieren-rosmarin-muenchen-zr-91424291.html
26 changes: 20 additions & 6 deletions 24hamburg.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.24hamburg.de/hamburg/neue-corona-regeln-im-hvv-ab-sofort-entfaellt-3g-91422638.html
26 changes: 20 additions & 6 deletions 24rhein.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.24rhein.de/koeln/kalk/koeln-videoueberwachung-kameras-kalk-orte-ueberblick-polizei-ausbau-91388746.html
26 changes: 20 additions & 6 deletions 24vita.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.24vita.de/verbraucher/allesesser-vegetarier-veganer-deutschland-daab-fleisch-herstellung-speiseplan-dge-bonn-91417014.html
28 changes: 13 additions & 15 deletions abc.net.au.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
title: //div[@class='article section']//h1
author: //div[@class="byline"]/a
date: //span[@class="timestamp"]
body: //div[@class="page section"]
body: //article/div[@data-component="ArticleWeb"]

strip: //a[@class="inline-caption"]
strip: //p[@class="ticker section noprint"]
strip: //p[@class="topics"]
strip: //h1
strip: //div[@class="byline"]
strip: //p[@class="published"]
strip: //div[contains(@class,"featured-scroller")]
strip_id_or_class: footer
strip: //aside
strip: //button
strip: //header/div[contains(@class, 'Headline_meta')]
strip: //div[contains(@class, 'ArticleWeb_publishedDate')]
strip: //div[contains(@class, 'ArticleWeb_shareBottom')]/self::div | //div[contains(@class, 'ArticleWeb_shareBottom')]/following-sibling::*
strip: //div[contains(@aria-label, 'media') and contains(@aria-label, 'embed')]

tidy: no
prune: no

test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
test_url: https://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054
test_url: https://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892

test_url: https://www.abc.net.au/news/2023-06-16/fact-check-jacinta-nampijinpa-price-secret-documents-niaa/102485040
test_contains: widespread but bogus claim about a list of secret "demands" linked to the Voice referendum
22 changes: 22 additions & 0 deletions abendzeitung-muenchen.de.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
body: //article

strip_id_or_class: teaserbox
strip_id_or_class: aufmacherbox
strip_id_or_class: mehrzumthema
strip_id_or_class: artdetail_time
strip_id_or_class: artdetail_social
strip_id_or_class: artdetail_desc

strip: //footer

# needed for wallabag:
strip: //picture/source

# strip additional text within figure after caption
# wallabag/f43.me shows here double captions
# see second image at
# https://www.abendzeitung-muenchen.de/muenchen/wahlkampf-in-bayern-soeder-und-aiwanger-ringen-um-die-bierzelt-herrschaft-art-921576
strip: //figure/figcaption/following-sibling::text() | //figure/figcaption/following-sibling::p

test_url: https://www.abendzeitung-muenchen.de/muenchen/mvg-fast-alle-fahrschein-automaten-in-muenchen-kommen-weg-art-921634
test_url: https://www.abendzeitung-muenchen.de/muenchen/wahlkampf-in-bayern-soeder-und-aiwanger-ringen-um-die-bierzelt-herrschaft-art-921576
26 changes: 20 additions & 6 deletions az-online.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.az-online.de/uelzen/stadt-uelzen/mehr-gaeste-in-der-uelzener-stadthalle-als-erlaubt-91282522.html
26 changes: 20 additions & 6 deletions bgland24.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.bgland24.de/bgland/region-berchtesgaden/bischofswiesen-ort28409/schoenau-am-koenigssee-wieder-grosses-interesse-bei-fit-durch-unser-gmoa-2022-91285772.html
26 changes: 20 additions & 6 deletions buzzfeed.de.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Author: HolgerAusB | Version: 2022-03-22
# Author: HolgerAusB | Version: 2023-08-17
#
# to get a source-feed, try to add 'rssfeed.rdf' to the category-URL e.g.
# https://www.example.com/hessen/rssfeed.rdf
#
# @ippen.media site
#==========================

# strip author box and social media box
# these boxes sometimes prevented main picure to show up
body: //article
author: substring-after(//p[contains(@class, 'id-Story-authors')] , 'on:')
author: //div[contains(@class, 'id-AuthorList')]/descendant::*[contains(@class, 'id-Link')]
date: //p[contains(@class, 'id-Story-timestamp')]/descendant::time/@datetime

strip_id_or_class: id-Article-dateActionboxCombo
strip_id_or_class: id-Article-kicker
strip_id_or_class: id-Article-headline
strip_id_or_class: id-AuthorList
strip_id_or_class: id-StoryElement-inArticleReco
strip_id_or_class: id-Comments
strip_id_or_class: id-Story-timestamp
strip_id_or_class: id-Story-authors
strip_id_or_class: id-Story-interactionBar
strip: //a[@title='Bilderzoom']

strip_id_or_class: idjs-simpletab-nav-item
strip_id_or_class: idjs-simpletab-content-close
strip_id_or_class: id-AuthorList
Expand All @@ -27,7 +42,6 @@ strip_id_or_class: id-Article-content-item.id-Article-advert
strip_id_or_class: id-Article-advert--ad3
strip_id_or_class: id-Article-advert

tidy: yes
prune: yes

tidy: no
prune: no
test_url: https://www.buzzfeed.de/buzz/19-buecher-die-leute-einfach-nicht-zu-ende-lesen-konnten-91400849.html
Loading

0 comments on commit e543537

Please sign in to comment.