Continuing from the Scraping webpages with Python and QWebElement post here is the second in this series where finally a real search happens.
#!/usr/bin/python
# These lines will get us the modules we need.
from PyQt4.QtCore import QUrl, SIGNAL
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage, QWebView
import pprint
class Scrape(QApplication):
def __init__(self):
# Apparently there are a number of versions of this init and PyQT
# figures out which you want based on the number of arguments. So pass
# in one argument but we do not need anything really, so None.
super(Scrape, self).__init__(None)
# Create a QWebView instance and store it.
self.webView = QWebView()
# Connect our searchform method to the searchform signal of this new
# QWebView.
self.webView.loadFinished.connect(self.searchForm)
def load(self, url):
# In the __init__ we stored a QWebView instance into self.webView so
# we can load a url into it. It needs a QUrl instance though.
self.webView.load(QUrl(url))
def searchForm(self):
# We landed here because the load is finished. Now, load the root document
# element. It'll be a QWebElement instance. QWebElement is a QT4.6
# addition and it allows easier DOM interaction.
documentElement = self.webView.page().currentFrame().documentElement()
# Let's find the search input element.
inputSearch = documentElement.findFirst('input[title="Google Search"]')
inputSearch.setAttribute('value', 'drupal')
# Disconnect ourselves from the signal.
self.webView.loadFinished.disconnect(self.searchForm)
# And connect the next function.
self.webView.loadFinished.connect(self.searchResults)
documentElement.findFirst('input[name=btnG]').evaluateJavaScript('this.click()')
def searchResults(self):
# As seen above, first grab the root document element and then load all g
# classed list items.
results = self.webView.page().currentFrame().documentElement().findAll('li.g')
# Change the resulting QWebElementCollection into a list so we can easily
# iterate over it.
for e in results.toList():
# Just print the results.
print e.toOuterXml().toAscii()
# We are inside a QT application and need to terminate that properly.
self.exit()
# Instantiate our class.
my_scrape = Scrape()
# Load the Google homepage.
my_scrape.load('http://google.com/ncr')
# Start the QT event loop.
my_scrape.exec_()
Update: Daniel Wehner have written a real world use script based on this post to log in to Cisco authentication.
Commenting on this Story is closed.



![Popular open source software is more secure than unpopular open source software, because insecure software becomes unpopular fast. [That doesn't happen for proprietary software.]](../sites/all/themes/drupal4hu/images/bg-center/bg-center_4.png)














