142 lines
4.4 KiB
Python
142 lines
4.4 KiB
Python
"""Fetch one URL and write its content to stdout.
|
|
|
|
This version adds a Request object.
|
|
"""
|
|
|
|
import sys
|
|
import urllib.parse
|
|
from http.client import BadStatusLine
|
|
|
|
from asyncio import *
|
|
|
|
|
|
class Request:
|
|
|
|
def __init__(self, url, verbose=True):
|
|
self.url = url
|
|
self.verbose = verbose
|
|
self.parts = urllib.parse.urlparse(self.url)
|
|
self.scheme = self.parts.scheme
|
|
assert self.scheme in ('http', 'https'), repr(url)
|
|
self.ssl = self.parts.scheme == 'https'
|
|
self.netloc = self.parts.netloc
|
|
self.hostname = self.parts.hostname
|
|
self.port = self.parts.port or (443 if self.ssl else 80)
|
|
self.path = (self.parts.path or '/')
|
|
self.query = self.parts.query
|
|
if self.query:
|
|
self.full_path = '%s?%s' % (self.path, self.query)
|
|
else:
|
|
self.full_path = self.path
|
|
self.http_version = 'HTTP/1.1'
|
|
self.method = 'GET'
|
|
self.headers = []
|
|
self.reader = None
|
|
self.writer = None
|
|
|
|
@coroutine
|
|
def connect(self):
|
|
if self.verbose:
|
|
print('* Connecting to %s:%s using %s' %
|
|
(self.hostname, self.port, 'ssl' if self.ssl else 'tcp'),
|
|
file=sys.stderr)
|
|
self.reader, self.writer = yield from open_connection(self.hostname,
|
|
self.port,
|
|
ssl=self.ssl)
|
|
if self.verbose:
|
|
print('* Connected to %s' %
|
|
(self.writer.get_extra_info('peername'),),
|
|
file=sys.stderr)
|
|
|
|
def putline(self, line):
|
|
self.writer.write(line.encode('latin-1') + b'\r\n')
|
|
|
|
@coroutine
|
|
def send_request(self):
|
|
request = '%s %s %s' % (self.method, self.full_path, self.http_version)
|
|
if self.verbose: print('>', request, file=sys.stderr)
|
|
self.putline(request)
|
|
if 'host' not in {key.lower() for key, _ in self.headers}:
|
|
self.headers.insert(0, ('Host', self.netloc))
|
|
for key, value in self.headers:
|
|
line = '%s: %s' % (key, value)
|
|
if self.verbose: print('>', line, file=sys.stderr)
|
|
self.putline(line)
|
|
self.putline('')
|
|
|
|
@coroutine
|
|
def get_response(self):
|
|
response = Response(self.reader, self.verbose)
|
|
yield from response.read_headers()
|
|
return response
|
|
|
|
|
|
class Response:
|
|
|
|
def __init__(self, reader, verbose=True):
|
|
self.reader = reader
|
|
self.verbose = verbose
|
|
self.http_version = None # 'HTTP/1.1'
|
|
self.status = None # 200
|
|
self.reason = None # 'Ok'
|
|
self.headers = [] # [('Content-Type', 'text/html')]
|
|
|
|
@coroutine
|
|
def getline(self):
|
|
return (yield from self.reader.readline()).decode('latin-1').rstrip()
|
|
|
|
@coroutine
|
|
def read_headers(self):
|
|
status_line = yield from self.getline()
|
|
if self.verbose: print('<', status_line, file=sys.stderr)
|
|
status_parts = status_line.split(None, 2)
|
|
if len(status_parts) != 3:
|
|
raise BadStatusLine(status_line)
|
|
self.http_version, status, self.reason = status_parts
|
|
self.status = int(status)
|
|
while True:
|
|
header_line = yield from self.getline()
|
|
if not header_line:
|
|
break
|
|
if self.verbose: print('<', header_line, file=sys.stderr)
|
|
# TODO: Continuation lines.
|
|
key, value = header_line.split(':', 1)
|
|
self.headers.append((key, value.strip()))
|
|
if self.verbose: print(file=sys.stderr)
|
|
|
|
@coroutine
|
|
def read(self):
|
|
nbytes = None
|
|
for key, value in self.headers:
|
|
if key.lower() == 'content-length':
|
|
nbytes = int(value)
|
|
break
|
|
if nbytes is None:
|
|
body = yield from self.reader.read()
|
|
else:
|
|
body = yield from self.reader.readexactly(nbytes)
|
|
return body
|
|
|
|
|
|
@coroutine
|
|
def fetch(url, verbose=True):
|
|
request = Request(url, verbose)
|
|
yield from request.connect()
|
|
yield from request.send_request()
|
|
response = yield from request.get_response()
|
|
body = yield from response.read()
|
|
return body
|
|
|
|
|
|
def main():
|
|
loop = get_event_loop()
|
|
try:
|
|
body = loop.run_until_complete(fetch(sys.argv[1], '-v' in sys.argv))
|
|
finally:
|
|
loop.close()
|
|
sys.stdout.buffer.write(body)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|