Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to fix current bugs in the project, and adds an example #17

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions examples/example1.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
gs.results_per_page = 50
results = gs.get_results()
for res in results:
print res.title.encode('utf8')
print res.desc.encode('utf8')
print res.url.encode('utf8')
if res.title is not None:
print res.title.encode('utf8')
if res.desc is not None:
print res.desc.encode('utf8')
if res.url is not None:
print res.url.encode('utf8')
print
except SearchError, e:
print "Search failed: %s" % e
Expand Down
34 changes: 34 additions & 0 deletions examples/example4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/python
#
# Justin Vieira ([email protected])
# http://www.rancorsoft.com -- Let's Rock Together.
#
# This program does a Google search for "super test results" and returns
# all results.
#

from xgoogle.search import GoogleSearch, SearchError
from threading import Thread
from random import randint
import time

try:
gs = GoogleSearch("super test results")
gs.results_per_page = 50
displayedResults = 0
results = gs.get_results()
while displayedResults < gs.num_results:
for res in results:
if res.title is not None:
print res.title.encode('utf8')
if res.desc is not None:
print res.desc.encode('utf8')
if res.url is not None:
print res.url.encode('utf8')
displayedResults += gs.results_per_page
print
time.sleep(randint(15,60))
results = gs.get_results()
except SearchError, e:
print "Search failed: %s" % e

12 changes: 7 additions & 5 deletions xgoogle/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class GoogleSearch(object):
SEARCH_URL_1 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search"
NEXT_PAGE_1 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d"

def __init__(self, query, random_agent=False, debug=False, lang="en", tld="com", re_search_strings=None):
def __init__(self, query, random_agent=True, debug=False, lang="en", tld="com", re_search_strings=None):
self.query = query
self.debug = debug
self.browser = Browser(debug=debug)
Expand Down Expand Up @@ -237,8 +237,8 @@ def _extract_results(self, soup):
def _extract_result(self, result):
title, url = self._extract_title_url(result)
desc = self._extract_description(result)
if not title or not url or not desc:
return None
#if not title or not url or not desc:
# return None
return SearchResult(title, url, desc)

def _extract_title_url(self, result):
Expand Down Expand Up @@ -322,7 +322,8 @@ def _extract_info(self, soup):
return {'from': int(matches.group(1)), 'to': int(matches.group(2)), 'total': int(matches.group(3))}

def _extract_results(self, soup):
results = soup.findAll('p', {'class': 'g'})
#results = soup.findAll('p', {'class': 'g'})
results = soup.findAll('li','g')
ret_res = []
for result in results:
eres = self._extract_result(result)
Expand Down Expand Up @@ -352,7 +353,8 @@ def _extract_title_url(self, result):
return title, url

def _extract_description(self, result):
desc_td = result.findNext('td')
#desc_td = result.findNext('td')
desc_div = result.find('span', 'st')
if not desc_td:
self._maybe_raise(ParseError, "Description tag in Google search result was not found", result)
return None
Expand Down