/*
 * Decompiled with CFR 0.152.
 */
package net.sf.jabref.gui.importer.fetcher;

import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.JPanel;
import net.sf.jabref.Globals;
import net.sf.jabref.gui.importer.ImportInspectionDialog;
import net.sf.jabref.gui.importer.fetcher.EntryFetcher;
import net.sf.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter;
import net.sf.jabref.logic.help.HelpFile;
import net.sf.jabref.logic.importer.ImportInspector;
import net.sf.jabref.logic.importer.OutputPrinter;
import net.sf.jabref.logic.net.URLDownload;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.IdGenerator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class CiteSeerXFetcher
implements EntryFetcher {
    private static final int MAX_PAGES_TO_LOAD = 8;
    private static final String QUERY_MARKER = "___QUERY___";
    private static final String URL_START = "http://citeseer.ist.psu.edu";
    private static final String SEARCH_URL = "http://citeseer.ist.psu.edu/search?q=___QUERY___&submit=Search&sort=rlv&t=doc";
    private static final Pattern CITE_LINK_PATTERN = Pattern.compile("<a class=\"remove doc_details\" href=\"(.*)\">");
    private boolean stopFetching;
    private static final String BASE_PATTERN = "<meta name=\"___QUERY___\" content=\"(.*)\" />";
    private static final Pattern TITLE_PATTERN = Pattern.compile("<meta name=\"___QUERY___\" content=\"(.*)\" />".replace("___QUERY___", "citation_title"));
    private static final Pattern AUTHOR_PATTERN = Pattern.compile("<meta name=\"___QUERY___\" content=\"(.*)\" />".replace("___QUERY___", "citation_authors"));
    private static final Pattern YEAR_PATTERN = Pattern.compile("<meta name=\"___QUERY___\" content=\"(.*)\" />".replace("___QUERY___", "citation_year"));
    private static final Pattern ABSTRACT_PATTERN = Pattern.compile("<h3>Abstract</h3>\\s*<p>(.*)</p>");
    private static final Log LOGGER = LogFactory.getLog(CiteSeerXFetcher.class);

    @Override
    public boolean processQuery(String query, ImportInspector inspector, OutputPrinter status) {
        this.stopFetching = false;
        try {
            List<String> citations = this.getCitations(query);
            for (String citation : citations) {
                if (this.stopFetching) break;
                BibEntry entry = CiteSeerXFetcher.getSingleCitation(citation);
                if (entry == null) continue;
                inspector.addEntry(entry);
            }
            return true;
        }
        catch (IOException e) {
            LOGGER.error("Error while fetching from " + this.getTitle(), e);
            ((ImportInspectionDialog)inspector).showErrorMessage(this.getTitle(), e.getLocalizedMessage());
            return false;
        }
    }

    @Override
    public String getTitle() {
        return "CiteSeerX";
    }

    @Override
    public HelpFile getHelpPage() {
        return HelpFile.FETCHER_CITESEERX;
    }

    @Override
    public JPanel getOptionsPanel() {
        return null;
    }

    @Override
    public void stopFetching() {
        this.stopFetching = true;
    }

    private List<String> getCitations(String query) throws IOException {
        String nextPage;
        ArrayList<String> ids = new ArrayList<String>();
        String urlQuery = SEARCH_URL.replace(QUERY_MARKER, URLEncoder.encode(query, StandardCharsets.UTF_8.name()));
        for (int count = 1; (nextPage = CiteSeerXFetcher.getCitationsFromUrl(urlQuery, ids)) != null && count < 8; ++count) {
            urlQuery = nextPage;
            if (!this.stopFetching) continue;
            break;
        }
        return ids;
    }

    private static String getCitationsFromUrl(String urlQuery, List<String> ids) throws IOException {
        String cont = new URLDownload(urlQuery).downloadToString(Globals.prefs.getDefaultEncoding());
        Matcher m = CITE_LINK_PATTERN.matcher(cont);
        while (m.find()) {
            ids.add(URL_START + m.group(1));
        }
        return null;
    }

    private static BibEntry getSingleCitation(String urlString) throws IOException {
        String cont = new URLDownload(urlString).downloadToString(StandardCharsets.UTF_8);
        Matcher m = TITLE_PATTERN.matcher(cont);
        if (m.find()) {
            BibEntry entry = new BibEntry(IdGenerator.next());
            entry.setField("title", m.group(1));
            m = AUTHOR_PATTERN.matcher(cont);
            if (m.find()) {
                String authors = m.group(1);
                entry.setField("author", new NormalizeNamesFormatter().format(authors));
            }
            if ((m = YEAR_PATTERN.matcher(cont)).find()) {
                entry.setField("year", m.group(1));
            }
            if ((m = ABSTRACT_PATTERN.matcher(cont)).find()) {
                entry.setField("abstract", m.group(1));
            }
            return entry;
        }
        return null;
    }
}

