/*
 * mod_unzip -- transparently allows access to files within a ZIP archive
 *
 * Copyright (C) 2005 Oliver Baltzer <oliver@lowgra.de>
 *
 * Compile: 
 *          1. Set UNZIP_EXEC to the appropriate path to your unzip binary.
 *          2. apxs2 -i -c mod_unzip.c -lmagic
 *
 * Install: 
 *          1. In your httpd.conf or an appropriate unzip.load in
 *             mods-available/ type:
 *          
 *             LoadModule unzip_module /usr/lib/apache2/modules/mod_unzip.so
 *             AddType application/zip .zip
 *          
 *          2. restart webserver
 *  
 * License: BSD
 */

#include <httpd.h>
#include <http_config.h>
#include <http_protocol.h>
#include <ap_config.h>

#include <string.h>
#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <magic.h>
#include <fcntl.h>

#define UNZIP_EXEC "/usr/bin/unzip"

#define ZIP_MIME "application/zip"
#define ZIP_MIME_LEN 15

#define BUF_SIZE 4096
#define FILENAME_SIZE 256

#define HTML_HEADER "<html><head><title>%s:%s</title></head>" \
    "<body><h1>Directory listing: %s:%s</h1><a href=\"../\">Up</a><br/><br/>\n"
#define HTML_LISTING "<a href=\"%s\">%s</a><br/>\n"
#define HTML_EMPTY_LISTING "empty directory\n"
#define HTML_FOOTER "</body></html>\n"

static magic_t magic_cookie = NULL;

void process_listing(request_rec* req, 
                     char* filename, const int fn_len, 
                     char* path, const int path_len)
{
    if(fn_len != path_len && strncmp(filename, path, path_len) == 0)
    {
        /* path is a prefix of filename */
        int i = path_len;

        /* stop at the first / or go to the end of the string */
        while(i < fn_len && filename[i] != '/')
            i++;
        /* There are two cases:
         * 1. a directory entry has a / as the last character of the string
         * 2. a file entry which does not contain any further /
         * All other cases are directories where the first / encountered is
         * not at the end of the string and such they are not entries of
         * the current path. */
        if((i == fn_len - 1 && filename[i] == '/') || i == fn_len)
        {
            char base[256];
            strncpy(base, &filename[path_len], 255);
            ap_rprintf(req, HTML_LISTING, base, base);
        }
    }
}

int path_exists(const char* filename, const char* path, int path_len)
{
    return strncmp(filename, path, path_len) == 0;
}

int unzip_get_listing(request_rec* req, char* path)
{
    int pipefd[2];
    
    char buffer[BUF_SIZE];
    char filename[FILENAME_SIZE];
    
    pid_t cpid;
    int cstatus;
    int gaps = 0;
    int gap = 1;
    int i;
    int num;
    int path_len = strlen(path);
    int fn_len = 0;
    int found = 0;
    
    char* archive = req->filename;
    char* barchive = basename(archive);
    req->content_type = "text/html";
    /* print the HTML header */
    ap_rprintf(req, HTML_HEADER, barchive, path, barchive, path); 
    
    /* zero filename */
    memset(filename, 0, FILENAME_SIZE);
    
    
    if(pipe(pipefd) == -1)
    {
        return HTTP_INTERNAL_SERVER_ERROR;
    }
    else
    {
        if((cpid = fork()) == 0)
        {
            /* this is the child process */
            close(pipefd[0]);
            
            /* reconnect pipe with STDOUT */
            if(pipefd[1] != STDOUT_FILENO)
            {
                if(dup2(pipefd[1], STDOUT_FILENO) != STDOUT_FILENO)
                    /* unable to connect with STDOUT */
                    exit(-1);
                close(pipefd[1]);
            }
            setenv("UNZIP", "-qq -l", 1);
            if(execl(UNZIP_EXEC, "-l", archive, NULL) < 0)
            {
                /* unable to run unzip */
                exit(-1);
            }
        }
        /* close write pipe of parent */
        close(pipefd[1]);
        while((num = read(pipefd[0], buffer, sizeof(char) * BUF_SIZE)))
        {
            for(i = 0; i < num; i++)
            {
                if(buffer[i] == '\n')
                {
                    /* If we reached the end of the line, had 4 gaps
                     * and this is not the first item, we should have the
                     * filename of a file. */
                    
                    if(gaps == 4)
                    {
                        /* the directory has to occur first, before the
                         * files follow */
                        if(!found)
                            found = path_exists(filename, path, path_len);
                        process_listing(req, filename, fn_len, 
                                        path, path_len);
                    }
                    
                    /* reset filename */
                    memset(filename, 0, FILENAME_SIZE); 
                    fn_len = 0;
                    
                    gaps = 0;
                    gap = 0;
                }
                /* Count gaps as long as we have not reached 4. The 4th is
                 * the last gap, everything that follows belongs to the
                 * filename. */
                else if((buffer[i] == ' ' || buffer[i] == '\t') 
                        && gaps < 4) 
                    /* we just started a new gap */
                    gap = 1;
                else if(gap == 1)
                {
                    /* just finished a gap */
                    gap = 0;
                    gaps++;
                }
                
                /* collect filename if we are in 4th gap */
                if(gap == 0 && gaps == 4)
                    filename[fn_len++] = buffer[i];
            }
        }
        close(pipefd[0]);
        waitpid(cpid, &cstatus, 0);
        /* if the exit status of unzip if != 0 something went wront */ 
        if(WEXITSTATUS(cstatus) != 0)
        {
            return HTTP_INTERNAL_SERVER_ERROR;
        }
        /* if there was no item at all, the directory does not exist */
        if(!found)
            return HTTP_NOT_FOUND;
        /* print HTML footer */
        ap_rprintf(req, HTML_FOOTER);
    }
   return OK;
}

int unzip_get_file(request_rec* req, const char* filename)
{
    int pipefd[2];
    char buffer[BUF_SIZE];
    pid_t cpid;
    int cstatus;
    const char* mime_type = NULL;
    int num;
    char* archive = req->filename;
    
    if(magic_cookie == NULL)
    {
        magic_cookie = magic_open(MAGIC_MIME | MAGIC_RAW | MAGIC_ERROR);
        magic_load(magic_cookie, NULL);
    }
    
    if(pipe(pipefd) == -1)
    {
        return HTTP_INTERNAL_SERVER_ERROR;
    }
    else
    {
        if((cpid = fork()) == 0)
        {
            /* this is the child process */
            close(pipefd[0]);
            
            /* reconnect pipe with STDOUT */
            if(pipefd[1] != STDOUT_FILENO)
            {
                if(dup2(pipefd[1], STDOUT_FILENO) != STDOUT_FILENO)
                    /* unable to connect with STDOUT */
                    exit(-1);
                close(pipefd[1]);
            }
            setenv("UNZIP", "-qq -p", 1);
            if(execl(UNZIP_EXEC, "-p", archive, filename, NULL) < 0)
            {
                /* unable to run unzip */
                exit(-1);
            }
        }
        /* close write pipe of parent */
        close(pipefd[1]);
        while((num = read(pipefd[0], buffer, sizeof(char) * BUF_SIZE)))
        {
            /* determine MIME type */
            if(mime_type == NULL)
            {
                mime_type = magic_buffer(magic_cookie, buffer, num);
                if(mime_type[0] != '\0')
                    req->content_type = mime_type;
                else
                    req->content_type = "text/plain";
            }
            ap_rwrite(buffer, num, req); 
        }
        close(pipefd[0]);
        waitpid(cpid, &cstatus, 0);
        if(WEXITSTATUS(cstatus) != 0)
            return HTTP_NOT_FOUND;
    }
    return OK;
}

static int unzip_handler(request_rec* req)
{
    char* path;
    
    /* check if we actually should handle this request, our MIME type has
     * to be application/zip */
    if(strncmp(req->handler, ZIP_MIME, ZIP_MIME_LEN))
        /* do not handle the request */
        return DECLINED;
    
    /* get the path after the file */
    if(!req->path_info || !(*req->path_info))
    {
        /* If the path info does not exists, redirect to root */
        #define MAX_URI_LEN 4096
        char uri[MAX_URI_LEN] = { '\0' };
        int uri_len = strlen(req->uri);
        strncpy(uri, req->uri, MAX_URI_LEN - 2);
        uri[uri_len] = '/';
        uri[uri_len + 1] = '\0';
        apr_table_setn(req->headers_out, "Location", uri);
        return HTTP_MOVED_PERMANENTLY;
    }
    else
        /* we want to skip the first slash */
        path = req->path_info;
    
        
    /* check if we are looking up a directory or an actual file */
    if(path[strlen(path) - 1] == '/')
        /* do directory listing */
        return unzip_get_listing(req, &path[1]);
    else
        return unzip_get_file(req, &path[1]);

    return DONE;
}

static void unzip_register_hooks(apr_pool_t* p)
{
    /* setup a handler for the ZIP file */
    ap_hook_handler(unzip_handler, NULL, NULL, APR_HOOK_MIDDLE);
}

module AP_MODULE_DECLARE_DATA unzip_module = {
    STANDARD20_MODULE_STUFF,
    NULL, /* no per-dir configuration */
    NULL, /* no per-dir merging */
    NULL, /* no per-server configuration */
    NULL, /* no per-dir merging */
    NULL, /* no configuration commands */
    unzip_register_hooks /* register all other call-backs */
};

