001/*
002 * Copyright 2006 - 2013
003 *     Stefan Balev     <stefan.balev@graphstream-project.org>
004 *     Julien Baudry    <julien.baudry@graphstream-project.org>
005 *     Antoine Dutot    <antoine.dutot@graphstream-project.org>
006 *     Yoann Pigné      <yoann.pigne@graphstream-project.org>
007 *     Guilhelm Savin   <guilhelm.savin@graphstream-project.org>
008 * 
009 * This file is part of GraphStream <http://graphstream-project.org>.
010 * 
011 * GraphStream is a library whose purpose is to handle static or dynamic
012 * graph, create them from scratch, file or any source and display them.
013 * 
014 * This program is free software distributed under the terms of two licenses, the
015 * CeCILL-C license that fits European law, and the GNU Lesser General Public
016 * License. You can  use, modify and/ or redistribute the software under the terms
017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by
019 * the Free Software Foundation, either version 3 of the License, or (at your
020 * option) any later version.
021 * 
022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
024 * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
025 * 
026 * You should have received a copy of the GNU Lesser General Public License
027 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
028 * 
029 * The fact that you are presently reading this means that you have had
030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms.
031 */
032package org.graphstream.algorithm.generator;
033
034import java.net.URISyntaxException;
035
036public class WikipediaGenerator extends URLGenerator {
037        public static final String SPECIAL_URLS = "^https://%s[.]wikipedia[.]org/wiki/(Wikipedia|File|Special|Category|Talk|Portal|Help|Template|Template_talk):.*$";
038
039        public static enum Lang {
040                EN("en.wikipedia.org", "Main_Page",
041                                "Wikipedia|File|Special|Category|Talk|Portal|Help|Template|Template_talk"), FR(
042                                "fr.wikipedia.org", "Wikipédia:Accueil_Principal",
043                                "Wikipédia|Aide|Spécial|Catégorie|Portail|Discussion|Special")
044
045                ;
046
047                final String host;
048                final String mainPage;
049                final String specialFiles;
050
051                Lang(String host, String mainPage, String special) {
052                        this.host = host;
053                        this.mainPage = mainPage;
054                        this.specialFiles = special;
055                }
056        }
057
058        protected final Lang lang;
059
060        public WikipediaGenerator(String... articles) {
061                this(Lang.EN, articles);
062        }
063
064        public WikipediaGenerator(Lang lang, String... articles) {
065                this.lang = lang;
066
067                setDirected(true);
068                setMode(Mode.PATH);
069
070                addHostFilter(lang.host);
071
072                declineMatchingURL("^https?://" + lang.host + "/wiki/index.php.*");
073                declineMatchingURL("^https?://" + lang.host + "/wiki/" + lang.mainPage);
074                declineMatchingURL("^https?://" + lang.host + "/wiki/[\\w_]+:.*$");
075
076                acceptOnlyMatchingURL("^https?://" + lang.host + "/wiki/.*$");
077
078                if (articles != null)
079                        for (int i = 0; i < articles.length; i++)
080                                addArticle(articles[i]);
081        }
082
083        public void addArticle(String name) {
084                addURL("https://" + lang.host + "/wiki/" + name);
085        }
086
087        @Override
088        protected String getNodeLabel(String url) throws URISyntaxException {
089                return url.substring(url.indexOf("/wiki/") + 6);
090        }
091}