package edu.unika.aifb.rdf.rdfcrawler;

import java.net.*;

/**
 * This class represents a data structure to store a single
 * URL with full status information - crawling depth,
 * referrer=parent URL, processing status (see below)
 * and exceptions encountered while crawling to the given URL.
 */
public class URLStruct {

    /**
     * Representation of an (incorrect) URL,
     * in case it cannot be converted to java.net.URL
     * Same thing about parent URL
     */
    private String rawurl;
    private String p_rawurl;


    /** URL represented by this URLStruct */
    private URL url;

    /**
     * "Parent URL", i.e. the URL which referred to the given URL
     * it is null for top URLs
     */
    private URL p_url;

    /** depth of crawling */
    private int depth;

    /** current status of this URL - one of four color names */
    private int status;

    /** exception encountered when processing/crawling this URL, defined iff status=RED */
    private Exception ex;

    /** status codes: */
    public static final int NOT_PROCESSED=0;
    public static final int BEING_PROCESSED=1;
    public static final int PROCESSED=2;
    public static final int ERROR=3;

    public String printStatus() {
        switch (status) {
        case NOT_PROCESSED: return "not processed";
        case BEING_PROCESSED: return "being processed";
        case PROCESSED: return "processed";
        case ERROR: return "error";
        default: return "";
        }
    }


    /**
     * Constructor to make URL records with all the crawling information.
     */
    public URLStruct(String url, String p_url, int depth) {
        //store the variables
        rawurl = url;
        p_rawurl = p_url;
        this.depth = depth;
        url = null;
        p_url = null;
        // set the status to unvisited, assuming there is no error
        status = NOT_PROCESSED;
    }

    /**
     * Used for url, p_url initialization and exception throwing
     */
    public void assertURLIsOK() throws MalformedURLException {
        // set url, p_url
        // if p_rawurl is not null, it is usually a correct URL
        if (p_rawurl != null) {
            p_url = new URL(p_rawurl);
        }
        // this may really throw an exception
        url = new URL(rawurl);
    }

    /**
     * Writeout of this data structure in XML format
     */
    public String openString() {
        String result = "";
        if (getParentURL() != null) {
            result += "<a:successor>\n";
        }
        if (url != null) {
            result += "<rdf:Description about='" + DocInstance.encode(url.toString());
        }
        else {
            result += "<rdf:Description about='" + DocInstance.encode(rawurl);
        }
        if (p_url != null) {
            result += "' a:parent='" + DocInstance.encode(p_url.toString());
        }
        else {
            result += "' a:parent='" + DocInstance.encode(p_rawurl);
        }
        result += "' a:depth='" + depth;
        result += "' a:status='" + printStatus();
        if (status == ERROR) {
            result += "' a:exception='" + DocInstance.encode(ex.toString());
        }
        result += "'>\n";
        return result;
    }

    public String closeString() {
        String result = "</rdf:Description>\n";
        if (getParentURL() != null) {
            result += "</a:successor>\n";
        }
        return result;
    }


    /** Return the depth of this URL */
    public int getDepth() {
        return depth;
    }

    /** Return (typically normalized) URL */
    public String getURL() {
        if (url == null) return rawurl;
        else return url.toString();
    }

    /** Return normalized parent URL */
    public String getParentURL() {
        if (p_url == null) return null;
        else return p_url.toString();
    }

    /** Return status as a number */
    public int getStatus() {
        return status;
    }

    /** Return host from the url */
    public String getHost() {
        if (url == null) return null;
        return url.getHost();
    }

    public void setStatus(int status) {
        this.status = status;
    }

    public void setException(Exception e) {
        ex = e;
    }

    // In case URL ends with a file, return file's extension (in lowercase);
    // empty string otherwise
    public String getExtension() throws Exception {
        if (url == null) return "";
        String pathstr = url.getFile();
        int s = pathstr.lastIndexOf(".");
        if (s < 0) return "";
        String result = pathstr.substring(s+1);
        if (result.indexOf("/") < 0) return result.toLowerCase();
        else return "";
    }
}
