-
Notifications
You must be signed in to change notification settings - Fork 0
/
audience.py
151 lines (121 loc) · 5.77 KB
/
audience.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import pandas as pd
import requests
import nltk
# Download the VADER lexicon if not already done
nltk.download('vader_lexicon')
API_KEY = 'AIzaSyAtW_ye1G7KOBebfMJ5raOqgZrb72pvoUE'
CHANNEL_ID = 'UCAov2BBv1ZJav0c_yHEciAw' # Replace with your actual channel ID
# Initialize VADER sentiment analyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
# Custom Hinglish lexicon
hinglish_lexicon = {
"bhot": 2.0, # very good
"acha": 1.5, # good
"kya baat hai": 1.5, # nice
"mast": 2.0, # awesome
"sahi hai": 1.0, # okay
"bura": -2.0, # bad
"ganda": -2.5, # dirty
"waste": -1.5, # useless
"nahi": -1.0, # no
"pathetic": -3.0, # very bad
}
# Update VADER's lexicon with custom values
sia.lexicon.update(hinglish_lexicon)
# Function to get the Uploads Playlist ID
def get_uploads_playlist_id(channel_id, api_key):
url = f'https://www.googleapis.com/youtube/v3/channels?part=contentDetails&id={channel_id}&key={api_key}'
response = requests.get(url)
if response.status_code == 200:
channel_data = response.json()
uploads_playlist_id = channel_data['items'][0]['contentDetails']['relatedPlaylists']['uploads']
return uploads_playlist_id
else:
print("Failed to get Uploads Playlist ID")
return None
# Function to get the first 10 video IDs from the Uploads Playlist
def get_first_10_video_ids(playlist_id, api_key):
video_ids = []
url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=contentDetails&playlistId={playlist_id}&maxResults=10&key={api_key}'
response = requests.get(url)
if response.status_code == 200:
playlist_data = response.json()
for item in playlist_data['items']:
video_ids.append(item['contentDetails']['videoId'])
else:
print("Failed to get video IDs")
return video_ids[:10] # Ensure only the first 10 videos are considered
# Function to get video details and perform analysis
def analyze_videos(video_ids, api_key):
data = []
for video_id in video_ids:
# Fetch video details
video_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&key={api_key}&part=snippet,statistics'
video_response = requests.get(video_url)
if video_response.status_code == 200:
video_data = video_response.json()
# Get video title
video_title = video_data['items'][0]['snippet']['title']
# Get video description
video_description = video_data['items'][0]['snippet']['description']
# Get video tags (if available)
video_tags = video_data['items'][0]['snippet'].get('tags', [])
# Get video category ID and map it to category name
category_id = video_data['items'][0]['snippet']['categoryId']
category_name = get_category_name(category_id, api_key)
# Get view count
view_count = int(video_data['items'][0]['statistics']['viewCount'])
# Like count
like_count = int(video_data['items'][0]['statistics']['likeCount'])
# Comment count
comment_count = int(video_data['items'][0]['statistics'].get('commentCount', '0')) # Use .get() to handle cases where commentCount might not exist
# Append the data to the list
data.append({
'Video Title': video_title,
'Description': video_description,
'Tags': ', '.join(video_tags), # Join tags for better readability
'Category': category_name,
'View Count': view_count,
'Like Count': like_count,
'Comment Count': comment_count,
})
return data
# Function to get video category name from category ID
def get_category_name(category_id, api_key):
url = f'https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&id={category_id}&key={api_key}'
response = requests.get(url)
if response.status_code == 200:
category_data = response.json()
return category_data['items'][0]['snippet']['title'] if category_data['items'] else 'Unknown'
else:
return 'Unknown'
# Function to get video comments
def get_video_comments(video_id, api_key):
comments = []
url = f'https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=100&key={api_key}'
while url:
response = requests.get(url)
if response.status_code == 200:
data = response.json()
for item in data['items']:
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(comment)
# Check if there's a next page
url = f'https://www.googleapis.com/youtube/v3/commentThreads?pageToken={data.get("nextPageToken")}&part=snippet&videoId={video_id}&maxResults=100&key={api_key}' if 'nextPageToken' in data else None
else:
break
return comments
# Step 1: Get the Uploads Playlist ID of the channel
uploads_playlist_id = get_uploads_playlist_id(CHANNEL_ID, API_KEY)
# Step 2: Get the first 10 video IDs from the uploads playlist
if uploads_playlist_id:
video_ids = get_first_10_video_ids(uploads_playlist_id, API_KEY)
# Step 3: Analyze each video
video_data = analyze_videos(video_ids, API_KEY)
# Step 4: Create a DataFrame from the list of dictionaries
df = pd.DataFrame(video_data)
# Display the DataFrame
print(df)
# Save the result to a CSV file
df.to_csv('video_analysis_first_10.csv', index=False)