Skip to content

Instantly share code, notes, and snippets.

@Dmitri-Sintsov
Created August 16, 2015 19:37
Show Gist options
  • Save Dmitri-Sintsov/7f100e147fb4d10837ed to your computer and use it in GitHub Desktop.
Save Dmitri-Sintsov/7f100e147fb4d10837ed to your computer and use it in GitHub Desktop.
Simple proxy server in Python with HTML text substitution.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import lxml.html
from lxml.etree import tostring as etree_tostring
import re
import SocketServer
import SimpleHTTPServer
import shutil
import StringIO
import urllib2
HOST = '127.0.0.1'
PORT = 8232
class Proxy(SimpleHTTPServer.SimpleHTTPRequestHandler):
def send_error(self, code, message=None):
self.send_response(code)
self.end_headers()
def send_head(self):
print self.requestline
url = 'http://habrahabr.ru' + self.path
opener = urllib2.build_opener()
self.read_handle = opener.open(url)
self.send_response(200)
self.end_headers()
def end_headers(self):
self.remote_headers = {}
if hasattr(self.read_handle.info(), "headers"):
headers = self.read_handle.info().headers
for header in headers:
hparts = header.strip('\r\n').split(': ', 1)
self.remote_headers[hparts[0]] = hparts[1]
self.send_header(*hparts)
if self.request_version != 'HTTP/0.9':
self.wfile.write("\r\n")
def process_text(self, text):
text = re.sub(ur'(^|\b)([^\W\d_]{6})($|\b)', ur'\1\2™\3', text, flags=re.UNICODE)
return text
def process_html(self):
html = lxml.html.fromstring(self.content.getvalue())
for element in html.iter(tag='*'):
if element.tag != 'script':
if element.text is not None:
element.text = self.process_text(element.text)
if element.tail is not None:
element.tail = self.process_text(element.tail)
self.content = StringIO.StringIO(etree_tostring(
html, method='html', encoding='utf-8', standalone=True
))
def do_GET(self):
self.send_head()
self.content = StringIO.StringIO()
shutil.copyfileobj(self.read_handle, self.content)
self.read_handle.close()
if self.remote_headers.get('Content-Type') == 'text/html; charset=UTF-8':
self.process_html()
self.content.seek(0)
shutil.copyfileobj(self.content, self.wfile)
httpd = SocketServer.ForkingTCPServer((HOST, PORT), Proxy)
print "Serving at {0}:{1}".format(HOST, PORT)
httpd.serve_forever()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment