-
Notifications
You must be signed in to change notification settings - Fork 0
/
12.py
42 lines (36 loc) · 1.71 KB
/
12.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# children in beautiful soup
from bs4 import BeautifulSoup as bs
html_doc = """
<html><head><title>Tutorials Point</title></head>
<body>
<p class="title"><b>The Biggest Online Tutorials Library, It's all Free</b></p>
<p class="prog">Top 5 most used Programming Languages are:
<a href="https://www.tutorialspoint.com/java/java_overview.htm" class="prog" id="link1">Java</a>,
<a href="https://www.tutorialspoint.com/cprogramming/index.htm" class="prog" id="link2">C</a>,
<a href="https://www.tutorialspoint.com/python/index.htm" class="prog" id="link3">Python</a>,
<a href="https://www.tutorialspoint.com/javascript/javascript_overview.htm" class="prog" id="link4">JavaScript</a> and
<a href="https://www.tutorialspoint.com/ruby/index.htm" class="prog" id="link5">C</a>;
as per online survey.</p>
<p class="prog">Programming Languages</p>
"""
soup = bs(html_doc, "html.parser")
h_tag = soup.head
print(h_tag, "<--h tag")
print(h_tag.contents, "<-- all children of head") # contents gives the list of all children
children = h_tag.contents
print(children[0], "<--first child") # the first child
child_1 = children[0]
print(child_1.contents, "<--children of 1 st child") # the children of <title>
# children of soup object
s_con = soup.contents
# print(s_con) # the whole document
print(soup.contents[1].name,"<--1 st child of the soup") # html
# generator of children of a tag
# print(soup.children) will give a generator object of direct children
print('\n','\n')
print(list(soup.children))
print(len(list(soup.children)))
# contents gives a list while .children gives a generator to iterate over
# a string can't have any children
text = soup.body.p.string
# print(text.contents) # 'NavigableString' object has no attribute 'contents'