001/* 002 * Copyright 2006 - 2013 003 * Stefan Balev <stefan.balev@graphstream-project.org> 004 * Julien Baudry <julien.baudry@graphstream-project.org> 005 * Antoine Dutot <antoine.dutot@graphstream-project.org> 006 * Yoann Pigné <yoann.pigne@graphstream-project.org> 007 * Guilhelm Savin <guilhelm.savin@graphstream-project.org> 008 * 009 * This file is part of GraphStream <http://graphstream-project.org>. 010 * 011 * GraphStream is a library whose purpose is to handle static or dynamic 012 * graph, create them from scratch, file or any source and display them. 013 * 014 * This program is free software distributed under the terms of two licenses, the 015 * CeCILL-C license that fits European law, and the GNU Lesser General Public 016 * License. You can use, modify and/ or redistribute the software under the terms 017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following 018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by 019 * the Free Software Foundation, either version 3 of the License, or (at your 020 * option) any later version. 021 * 022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY 023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 024 * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. 025 * 026 * You should have received a copy of the GNU Lesser General Public License 027 * along with this program. If not, see <http://www.gnu.org/licenses/>. 028 * 029 * The fact that you are presently reading this means that you have had 030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms. 031 */ 032package org.graphstream.algorithm.generator; 033 034import java.net.URISyntaxException; 035 036public class WikipediaGenerator extends URLGenerator { 037 public static final String SPECIAL_URLS = "^https://%s[.]wikipedia[.]org/wiki/(Wikipedia|File|Special|Category|Talk|Portal|Help|Template|Template_talk):.*$"; 038 039 public static enum Lang { 040 EN("en.wikipedia.org", "Main_Page", 041 "Wikipedia|File|Special|Category|Talk|Portal|Help|Template|Template_talk"), FR( 042 "fr.wikipedia.org", "Wikipédia:Accueil_Principal", 043 "Wikipédia|Aide|Spécial|Catégorie|Portail|Discussion|Special") 044 045 ; 046 047 final String host; 048 final String mainPage; 049 final String specialFiles; 050 051 Lang(String host, String mainPage, String special) { 052 this.host = host; 053 this.mainPage = mainPage; 054 this.specialFiles = special; 055 } 056 } 057 058 protected final Lang lang; 059 060 public WikipediaGenerator(String... articles) { 061 this(Lang.EN, articles); 062 } 063 064 public WikipediaGenerator(Lang lang, String... articles) { 065 this.lang = lang; 066 067 setDirected(true); 068 setMode(Mode.PATH); 069 070 addHostFilter(lang.host); 071 072 declineMatchingURL("^https?://" + lang.host + "/wiki/index.php.*"); 073 declineMatchingURL("^https?://" + lang.host + "/wiki/" + lang.mainPage); 074 declineMatchingURL("^https?://" + lang.host + "/wiki/[\\w_]+:.*$"); 075 076 acceptOnlyMatchingURL("^https?://" + lang.host + "/wiki/.*$"); 077 078 if (articles != null) 079 for (int i = 0; i < articles.length; i++) 080 addArticle(articles[i]); 081 } 082 083 public void addArticle(String name) { 084 addURL("https://" + lang.host + "/wiki/" + name); 085 } 086 087 @Override 088 protected String getNodeLabel(String url) throws URISyntaxException { 089 return url.substring(url.indexOf("/wiki/") + 6); 090 } 091}