package edu.unika.aifb.rdf.rdfcrawler;

import java.util.*;
import java.io.*;

/**
 * This is a top-level class responsible for
 * the mapping of URIs to filepaths, streams or
 * symbolic strings.
 * For better performance we may want to
 * store all files the RDF Crawler encounters
 * on the file system. Thus repeated usage of
 * RDF Crawler for similar input data may give
 * a significant performance boost.
 * This is essential, if we are building
 * different "cross-sections" of RDF models
 * from the same data.
 *
 * In most cases we want to cache ontologies,
 * so that interpretation of data found in RDF
 * markup is not dependent on whether we succeed
 * to download, say, DAML schema from the network
 * in every particular case.
 */
public class Cache {
    private TreeMap cachemap;

    private String[] uris = {"http://www.w3.org/1999/02/22-rdf-syntax-ns",
                             "http://www.w3.org/2000/01/rdf-schema",
                             "http://www.daml.org/2000/10/daml-ont",
                             "http://purl.org/dc/elements/1.1/",
                             "http://www.semanticweb.org/ontologies/swrc-onto-2000-09-10.daml"};
    private String[] files = {"c:\\Temp\\rdf\\schemas\\rdf-syntax-ns.rdf",
                              "c:\\Temp\\rdf\\schemas\\rdf-schema.rdf",
                              "c:\\Temp\\rdf\\schemas\\daml-ont.rdf",
                              "c:\\Temp\\rdf\\schemas\\dc-elements-1.1.rdf",
                              "c:\\Temp\\rdf\\schemas\\swrc-onto-2000-09-10.daml"};

    private final boolean use_default_cache = false;

    /**
     * Make either an empty cache or a cache with 5 default items
     */
    public Cache() {
        cachemap = new TreeMap();
        if (use_default_cache) {
            for (int i=0; i<uris.length; i++) {
                cachemap.put(uris[i], files[i]);
            }
        }
    }

    /**
     * Look up an URI in the cache
     */
    public String lookup(URLStruct url) {
        return (String)cachemap.get(url.getURL());
    }

    /**
     * Insert an URI-filename pair in the cache
     */
    public void insert(URLStruct url,String filepath) {
        cachemap.put(url.getURL(),filepath);
    }

    /**
     * This utility function reads a file contents in a
     * String buffer and returns it
     */
    public StringBuffer readAsString(URLStruct url)
        throws FileNotFoundException, IOException {

        // if file not available, download it
        String filepath;
        if ((filepath = lookup(url)) == null) {
            filepath = NetRetrieve.download(url);
            insert(url,filepath);
        }

        // StringBuffer where we want to store the file contents
        StringBuffer fstring = new StringBuffer();

        // Read in the file as byte stream
        FileInputStream fis = new FileInputStream(filepath);

        // InputStreamReader converts bytes into Unicode characters
        // so far we ignore encoding
        InputStreamReader isr = new InputStreamReader(fis);

        // buffering for better performance
        BufferedReader br = new BufferedReader(isr);

        char[] buf = new char[4096];
        int len;
        while ((len = br.read(buf, 0, 4096)) != -1) {
            fstring.append(buf, 0, len);
        }

        // close all the streams
        br.close();
        isr.close();
        fis.close();

        return fstring;
    }

    /**
     * Prints a table of cache mappings
     */
    public String toString() {
        String result = "size=" + cachemap.size() + "\n";
        Iterator it = cachemap.keySet().iterator();
        while (it.hasNext()) {
            String key = (String)it.next();
            result += key + "\t" + (String)cachemap.get(key) + "\n";
        }
        return result;
    }
}
