Skip to content

Commit

Permalink
Adiciona validação e extração de URLs válidas em URIPipe
Browse files Browse the repository at this point in the history
Adiciona validação e extração de URLs válidas em URIPipe
  • Loading branch information
robertatakenaka authored Jan 6, 2025
2 parents b5ad50f + 7068b98 commit da844a2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 10 deletions.
27 changes: 17 additions & 10 deletions articlemeta/export_sci.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,24 @@ def precond(data):
if not raw.link:
raise plumber.UnmetPrecondition()

@classmethod
def extract_url(cls, text):
# Regex para capturar URLs válidas
pattern = r'https?://(?:www\.)?[^\s/]+(?:\.[a-z]{2,})(/[^\s]*)?'
matches = re.search(pattern, text)
return matches.group(0) if matches else None

@plumber.precondition(precond)
def transform(self, data):
raw, xml = data

elem_citation = xml.find('./element-citation')

elem = ET.Element('ext-link')
elem.set('ext-link-type', 'uri')
elem.set('href', raw.link)
elem.text = raw.link
elem_citation.append(elem)
if link := self.extract_url(raw.link):
elem = ET.Element('ext-link')
elem.set('ext-link-type', 'uri')
elem.set('href', link)
elem.text = link
elem_citation.append(elem)

try:
access_date = raw.access_date
Expand Down Expand Up @@ -1057,13 +1064,13 @@ def transform(self, data):
for citation in raw.citations:
ref = cit.deploy(citation)[1]
extlinks = ref.xpath("element-citation/ext-link[@ext-link-type='uri']")
try:
for extlink in extlinks:
for extlink in extlinks:
try:
url_parts = list(urlsplit(extlink.attrib["href"]))
url_parts[2] = quote(url_parts[2])
extlink.attrib["href"] = urlunsplit(url_parts)
except ValueError as e:
pass
except ValueError as e:
pass
reflist.append(ref)

return data
Expand Down
17 changes: 17 additions & 0 deletions tests/test_export_sci.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,23 @@ def test_xml_citation_url_pipe(self):
expected = xml.find('./element-citation/ext-link').text

self.assertEqual(u'http://www.scielo.br', expected)

def test_xml_citation_with_broken_url_pipe(self):

fakexylosearticle = Article({'article': {},
'title': {},
'citations': [{'v37': [{'_': 'http:// http://www.scielo.br'}]}]}).citations[0]

pxml = ET.Element('ref')
pxml.append(ET.Element('element-citation'))

data = [fakexylosearticle, pxml]

raw, xml = self._xmlcitation.URIPipe().transform(data)

expected = xml.find('./element-citation/ext-link').text

self.assertEqual(u'http://www.scielo.br', expected)

def test_xml_citation_persongrouppipe_create_author_collab(self):
node = self._xmlcitation.PersonGroupPipe()._create_author(
Expand Down

0 comments on commit da844a2

Please sign in to comment.