/*
*
* Paros and its related class files.
* 
* Paros is an HTTP/HTTPS proxy for assessing web application security.
* Copyright (C) 2003-2004 Chinotec Technologies Company
* 
* This program is free software; you can redistribute it and/or
* modify it under the terms of the Clarified Artistic License
* as published by the Free Software Foundation.
* 
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* Clarified Artistic License for more details.
* 
* You should have received a copy of the Clarified Artistic License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

package org.parosproxy.paros.core.spider;

import java.io.IOException;
import java.util.List;

import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.parosproxy.paros.network.HttpHeader;
import org.parosproxy.paros.network.HttpMalformedHeaderException;
import org.parosproxy.paros.network.HttpMessage;
import org.parosproxy.paros.network.HttpMethodHelper;
import org.parosproxy.paros.network.HttpResponseHeader;
import org.parosproxy.paros.network.HttpStatusCode;


/**
 *
 * To change the template for this generated type comment go to
 * Window - Preferences - Java - Code Generation - Code and Comments
 */
public class SpiderThread extends Thread {

	private static final String[] NEGLECT_SUFFIXES = {"gif", "jpg", "bmp", "mp3", "arj", "doc", "swf", "pdf", "mpg", "wmv", "zip"};
	private static int threadCount = 1;
	
    private Spider parent = null;
    private boolean stop = false;
    private List queue = null;
    private HttpMessageQueue visitedLink = null;
	private HttpMethodHelper helper = new HttpMethodHelper();
	private boolean completed = false;
	private Collector collector = null;
	private int threadId = 0;
	
    SpiderThread(Spider parent) {
        this.parent = parent;
        queue = parent.getQueue();
        visitedLink = parent.getVisitedLink();
        collector = new Collector(this);
        this.setDaemon(true);
   	    this.setPriority(Thread.NORM_PRIORITY-1);
   	    this.threadId = threadCount++;
    }
    
    
    /**
     * @return Returns the stop.
     */
    boolean isStop() {
        return stop;
    }
    /**
     * @param stop The stop to set.
     */
    void setStop(boolean stop) {
        this.stop = stop;
    }
    
    public void run() {

        QueueItem item = null;
        while (!isStop() && !queue.isEmpty()) {
            try {
                synchronized(queue) {
                
                    // get distinct item from queue
                    do {
                        item = null;
                        if (queue.isEmpty()) {
                            break;
                        } else {
                            item = (QueueItem) queue.remove(0);
                        }                        
                    } while (item != null && visitedLink.contains(item.getMessage()));
                }
                
                if (item != null) {
                    parent.SpiderProgress(item);
                    crawl(item.getMessage(), item.getDepth());
                }

            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        
        if (queue.isEmpty()) {
            completed = true;
        }
        
        parent.checkIfAllThreadCompleted();
        
    }
    

    
    private boolean readMsgResponse(HttpMessage msg) throws HttpException, IOException, HttpMalformedHeaderException {
        HttpResponseHeader resHeader = null;
        HttpMethod method = null;
        boolean result = false;
        try {
            method = helper.createRequestMethod(msg.getRequestHeader(), msg.getRequestBody());
            method.setFollowRedirects(true);
            method.removeRequestHeader(HttpHeader.IF_MODIFIED_SINCE);
            method.removeRequestHeader(HttpHeader.IF_NONE_MATCH);
            parent.getHttpSender().executeMethod(method);

            resHeader = HttpMethodHelper.getHttpResponseHeader(method);
            
	        resHeader.setHeader(HttpHeader.TRANSFER_ENCODING, null);	//	replaceAll("Transfer-Encoding: chunked\r\n", "");
	        if (isNeglectResponse(resHeader)) {
	            return false;
	        }
	        
	        msg.setResponseHeader(resHeader);
            msg.getResponseBody().append(method.getResponseBody());
            result = true;
        } finally {
            method.releaseConnection();
        }
        return result;
    }
    private void crawl(HttpMessage msg, int depth) {
        
        try {
            if (msg.getResponseHeader().isEmpty() || msg.getResponseHeader().getStatusCode() == HttpStatusCode.NOT_MODIFIED) {
                if (!readMsgResponse(msg)) {
                	return;
            	}
            }
            
            if (!HttpStatusCode.isSuccess(msg.getResponseHeader().getStatusCode())) {
                return;
            }
            parent.readURI(msg.cloneAll());
            
            Html html = new Html(msg.getRequestHeader().getURI(), msg.getResponseBody().toString());
            collector.collect(html, depth);

            // no more response processing needed.  remove from msg to save memory
            msg.setResponseHeader(new HttpResponseHeader());
            msg.getResponseBody().setBody("");
            parent.getVisitedLink().add(msg);
            
        } catch (Exception e) {
            
            e.printStackTrace();
        }
        
    }
    


	/**
     * Check if URL is to be neglected if: - not HTTP protocol - outside
     * host domain - irrelevant file suffix (eg gif, jpg) - visited before
     * URL queried by this method will be marked visited.
	 * @throws URIException
     * 
     */
    private boolean isNeglect(HttpMessage msg) throws URIException {
   		boolean result = false;

   		URI uri = msg.getRequestHeader().getURI();
   		
   		// check correct protocol
       	if (!uri.getScheme().equalsIgnoreCase("HTTP") && !uri.getScheme().equalsIgnoreCase("HTTPS")) {
       		return true;
       	}

       	// compare if in seed's domain or inside session domain scope
       	String hostName = uri.getHost().toUpperCase();
       	if (!parent.isSeedScope(uri)) {
       	    if (!parent.getSpiderParam().isInScope(hostName)) {
       	        return true;
           	}
       	}
       	

       	
       	
       	// check if suffix relevant
       	if (uri.getPath() != null) {
       	    String path = uri.getPath().toUpperCase();
       	    for (int i=0; i<NEGLECT_SUFFIXES.length; i++) {
       	        String suffix = "." + NEGLECT_SUFFIXES[i];
       	        if (path.endsWith(suffix)) {
       	            return true;
       	        }
       	    }
       	}
       	
       	// check if link visited before
       	if (visitedLink.contains(msg)) {
       	    return true;
       	}
       	
       	// neglect if link already in queue
       	synchronized (queue) {
       	    for (int i=0; i<queue.size(); i++) {
       	        QueueItem item = (QueueItem) queue.get(i);
       	        if (item.getMessage().equals(msg)) {
       	            return true;
       	        }
       	    }
       	}
       	
       	return false;
    }

    /**
     * Build URI given a base HTML.  Keep absolute if it is.
     * @param html
     * @param link
     * @return
     * @throws URIException
     */
    private URI buildURI(URI base, String link) throws URIException {

        URI uri = null;
        /*
        try {
            uri = new URI(link, true);
            if (uri.isAbsoluteURI()) {
                return uri;
            }
        } catch (URIException e) {}
        */
        
        uri = new URI(base, link, true);
        return uri;
    }
    
    /*
    private void foundURI(URI uri, String referer, int currentDepth) throws HttpMalformedHeaderException, URIException {
        HttpMessage msg = new HttpMessage(new HttpRequestHeader(HttpRequestHeader.GET, uri));
        // set referer in case some page need to simulate being clicked.
        msg.getRequestHeader().setHeader(HttpHeader.REFERER, referer);
        if (!isNeglect(msg)) {
            parent.addQueue(msg, currentDepth+1);
            parent.foundURI(msg);
        }

    }
    */
    
    void foundURI(HttpMessage msg, String referer, int currentDepth) throws URIException {
        msg.getRequestHeader().setHeader(HttpHeader.REFERER, referer);
        if (!isNeglect(msg)) {
            parent.addQueue(msg, currentDepth+1);
            parent.foundURI(msg);
        }
        
    }
    

    private boolean isNeglectResponse(HttpResponseHeader resHeader) {
        
        if (!HttpStatusCode.isSuccess(resHeader.getStatusCode())) {
            return true;
        }
        
        if (resHeader.isImage()) {
            return true;
        }

        if (resHeader.isText()) {
            return false;
        }

        // do not process - not html file
        if (resHeader.getContentLength() > 200000) {
            return true;
        }        
        
        return false;
    }
    
    /**
     * @return Returns the completed.
     */
    public boolean isCompleted() {
        return completed;
    }
}
