#! /usr/bin/env python """Child process of the http plugin, implementing the Requester interface. XXX There are several problems with this plugin. Here's a list. - It's very easy for the KP to forget to Close() the Requester instance it has created. This will cause the child process to hang around forever, since ILU doesn't do garbage collection for us. And I don't see how it could easily, really. Note that this is different from zombie processes. Those are correctly taken care of by pluginlib.spawnserver(). But our lingering processes are fully functional server processes. In theory their SBH may even be published somewhere else, but in practice it normally isn't. - HTTP error codes returned by the server are translated into FileAPI.FileIOError exceptions. The problem with this is that the KP has to parse the error message string to get the numeric error code. Moreover, the headers that came along with the error code are lost. All this makes using the GetWithInfo interface less pleasant than using urllib.urlopen() directly. - There's almost a gaping security hole caused by urllib's support for pseudo URLs of the form file:/path. Fortunately, urlparse doesn't return a hostname for these, so the access check fails. Still, other URL types may not be so lucky... - I'm not too convinced that the rest of the access checking is completely secure. But then neither is the security in the rest of the current release... - It really should be called "URL" instead of "HTTP", since it supports all URL schemes supported by urllib. I decided not to rename it since there are so many other problems with it, and since the renaming would affect at least 4 files plus the directory name... """ import sys import os import getopt import string import urllib from koe.interfaces.stubs import HTTPAPI__skel from koe.interfaces.stubs import FileAPI from koe.common import FileImpl from koe.common import ilutools import urlparse from koe.common import access from koe.common import pathhack class MyURLopener(urllib.FancyURLopener): # Revert default error behavior to that of the base class, # while still keeping the fancy 301/302 redirect handling. http_error_default = urllib.URLopener.http_error_default # Remove 401 error handling in favor of default handling, # because we don't want the plugin to ask for passwords. http_error_401 = http_error_default class MyFileWrapper(FileImpl.FileWrapper): def __init__(self, fp): self._fp = fp def open(self, mode): pass class Requester(HTTPAPI__skel.Requester): def __init__(self): self.http = MyURLopener() def Get(self, url): return self.get(url, 0)[0] def GetWithInfo(self, url): return self.get(url, 1) def get(self, url, doheaders): # attempt to validate URL before proceeding path = pathhack.kos_root() + '/config/access.http.conf' p = access.LimitParser(filename=path) host = urlparse.urlparse(url)[1] user_passwd, host = urllib.splituser(host) host, port = urllib.splitport(host) if '.' not in host: raise FileAPI.FileIOError, \ "host must be fully qualified domain name: %s" % `host` if not p.is_allowed_to('get', host, port): raise FileAPI.FileIOError, \ "Permission denied %s" % str((host, port)) try: fp = self.http.open(url) except IOError, msg: raise FileAPI.FileIOError, str(msg), sys.exc_traceback headers = [] if doheaders: info = fp.info() for key in info.keys(): headers.append([key, info[key]]) headers.append(['.url', fp.geturl()]) return MyFileWrapper(fp), headers def Close(self): ilutools.ExitMainLoop() def main(): fd = -1 opts, args = getopt.getopt(sys.argv[1:], 'f:') for opt, val in opts: if opt == '-f': fd = string.atoi(val) req = Requester() sbh = req.IluSBH() try: fp = os.fdopen(fd, 'w') fp.write(sbh) fp.close() except IOError: print "failed to start" sys.exit(1) ilutools.RunMainLoop() sys.exit(1) if __name__ == "__main__": main()