#!/usr/bin/perl -w # browse zipfiles (in this same directory) like a filesystem via http # # (c) Heiko Hellweg (hellweg@snark.de) 2002 # current version should be findable at http://www.snark.de/tools/ # # License: do to it whatever you want: use, modify, redistribute, hate it... # no warranties that it does or does not perform in any predictable way. # ####################### # $Id: zipcat.cgi,v 1.5 2002/03/07 09:48:31 hh Exp $ ###################### use HTTP::Date ; ###################### # you may want to configure the following parameters to suit your needs # # location ot the unzip binary # (tested only with 'UnZip 5.42 of 14 January 2001, by Info-ZIP') my $unzip = '/usr/bin/unzip' ; # # should a specific file be served when requesting a directory? these are # tried in ascending order (and should be used case insensiteve by unzip). my @indexfiles = ("index.html", "index.htm") ; # # map fileExtensions to mimeTypes (use lowercase here) my $mimeMap = { htm => 'text/html', html => 'text/html', txt => 'text/plain', gif => 'image/gif', jpg => 'image/jpeg', jpeg => 'image/jpeg', png => 'image/png', doc => 'application/msword', ps => 'application/postscript', rtf => 'application/rtf', xml => 'text/xml', dtd => 'text/xml', css => 'text/css' } ; # # end of config section ###################### # debugging: this way it works on the commandline also if(!$ENV{'SCRIPT_NAME'} ) { $ENV{'SCRIPT_NAME'} = $0 ; $ENV{'SCRIPT_FILENAME'} = $0 ; $ENV{'PATH_INFO'} = shift || '' ; } if($ENV{SCRIPT_NAME} =~ /\/nph/) { # running in nph-mode? print("HTTP/1.0 200 OK\n") ; } if($ENV{'PATH_INFO'} && $ENV{'PATH_INFO'} ne "/") { if($ENV{'PATH_INFO'} eq '/showsource/nph-zipcat.cgi') { # don't run zipcat but rather show the source showSource($0) ; exit(0) ; } # try to serve the content of a zipfile my $path = unURL($ENV{'PATH_INFO'}) ; my $queryString = "" ; # will be ignored completely if($path =~ s/(\?.*)$//) { $queryString = $1 ; } # does it look sane? (better safe than sorry) if($path !~ /^[A-Za-z0-9\.\-\_\ \/]+$/ || $path =~ /\.\./) { print("ContentType: text/plain\n\n $path looks illegal to me\n") ; exit(0) ; } if($path =~ s!^/([^\/]+)/!/!) { my $zipfile = $1 ; my $base = $ENV{'SCRIPT_FILENAME'} ; $base =~ s/\/[^\/]+$/\/$zipfile/ ; print("ContentType: text/plain\n\n no way!\n") unless (-r $base) ; if($path =~ /\/$/) { # ending in a / ? browseDir($base, $path) ; } else { catFile($base, $path) ; } exit(0) ; } else { print("ContentType: text/plain\n\n") ; print("huh? what? where?\n") ; # foreach(sort keys %ENV) { print "$_ => $ENV{$_}\n"; } exit(0) ; } } else { # no path-info show, which zipfiles are available in the current dir my $path = $ENV{'SCRIPT_FILENAME'} ; $path =~ s/\/[^\/]+$// ; my $curdir = $path ; $curdir =~ s/.+\/// ; print("Content-Type: text/html\n\n\n") ; print("zipfiles in $curdir/\n") ; print("

zipfiles in $curdir/

\n") ; exit(0) ; } sub urlify { # URL encoding my ($str) = (@_) ; $str =~ s/([^a-zA-Z0-9\-\/\.])/sprintf("%%%0.2X", ord($1))/ge ; return $str ; } sub unURL { # undo URL encoding my ($str) = (@_) ; $str =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; return $str ; } sub htmlify { # html-quote &, <, > my ($str) = (@_) ; $str =~ s/\&/\&\;/g ; $str =~ s/\/\>\;/g ; return $str ; } sub catFile { # output a specific file from within the zipfile my ($base, $path) = (@_) ; my $ext = $path ; $ext =~ s/^.+\.([^\.]+)$/$1/ ; $ext =~ tr/A-Z/a-z/ ; my $mimeType = $mimeMap->{$ext} || "application/octetstream" ; $path =~ s!^/!! ; # try to find size and last modified info @info = split(/\s+/, (split("\n", `$unzip -C -l $base $path`))[3]) ; my $time = time2str(str2time("$info[2] $info[3]")) || 0 ; my $size = $info[1] || 0 ; print("Content-Type: $mimeType\n") ; print("Last-Modified: $time\n") if $time ; print("Content-Length: $size\n") if $size ; print("Connection: close\n\n") ; system("$unzip -C -p $base $path") ; } sub browseDir { my ($base, $path) = (@_) ; my $searchfor = $path ; $searchfor =~ s/^\///; my $notfound = "" ; foreach(@indexfiles) { # look if indexfile is present in the dir my $index = (split("\n", `$unzip -C -l $base $searchfor$_`))[3] ; if($index !~ /--------/) { # index exists catFile($base, "$path$_") ; return ; } else { $notfound .= "\nno index file $searchfor$_" ; } } # so there is no index - browse! my $name = "$base" ; $name =~ s/.+\/// ; print("Content-Type: text/html\n\n") ; print("index of $path within $name\n") ; print("\n") ; print("

index of $path within $name

\n") ;
    print("parent$notfound\n") ;
    my $cmd = "$unzip -l $base '$searchfor*' |" ;
    my $matchpat = "$searchfor" . "[^/]+/?" ;
    my $zippar = $base ;
    $zippar =~ s/.+\/// ;

    my %knownElems = () ;

    if(open(RH, $cmd)) {
	while() {
	    chomp ;
	    if(/^(\s+\d+)\s+([\d\-]+\s+[\d\:]+)\s+(.+)/) {
		my $size = $1 ;
		my $timestamp = $2 ;
		my $filename = $3 ;
		if($filename =~ /^$searchfor([^\/]+\/?)$/) { 
                    # directory, not too deep?
		    print("$size $timestamp $1\n") ;
		    $knownElems{$1}++ ;
		}
		elsif($filename =~ /^$searchfor([^\/]+\/).+$/) { 
		    # a file within a deeper subdir - do we know the subdir?
		    if(! $knownElemss{$1}) {
			print("$size $timestamp $1\n") ;
			$knownElemss{$1}++ ;
		    }
		}
		else {
#		    print("$filename does not match\n") ;
		}
	    }
	    elsif(/^\s*\d+\s+\d+\s+files\s*$/ || /Archive:\s+/) {
		# wrong summary line or Archive-line containing real filename
		# - just drop it
#		print("wrong summary $_\n") ;
	    }
	    else {
		print "$_\n" ; # plain text
	    }
	}
	close(RH)
    }
    print("
\n") ; } sub showSource { my ($showfile) = (@_) ; my @stat = stat($showfile) ; print("Content-Type: text/plain\n") ; print("Content-Length: $stat[7]\n") ; print("Last-Modified: $stat[9]\n") ; print("\n") ; if(open(RH, $showfile)) { while() { print ; } close(RH) ; } else { print("ERROR! can not show you the source... ???\n") ; } } ###################### # $Log: zipcat.cgi,v $ # Revision 1.5 2002/03/07 09:48:31 hh # support mutliple index filenames, tried one after another # for directory requests; support zipfiles in which directories # don't neccessarily have their own line in unzip -l # # Revision 1.4 2002/03/04 12:37:14 hh # don't show the absolute path of the zipfiles anymore # # Revision 1.3 2002/03/04 11:02:33 hh # added special PATH_INFO/command 'showsource' # # Revision 1.2 2002/03/04 10:51:22 hh # cleaned up debug output. # only serve 'clean' filenames from within the zipfile # (A-Za-z0-9._ /-), so we won't have to be frightened # of somebody slipping shell characters into the # unzip commandline... # # Revision 1.1 2002/03/01 13:27:34 hh # seems to work # ######################