001/* 002 * ==================================================================== 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, 014 * software distributed under the License is distributed on an 015 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 016 * KIND, either express or implied. See the License for the 017 * specific language governing permissions and limitations 018 * under the License. 019 * ==================================================================== 020 * 021 * This software consists of voluntary contributions made by many 022 * individuals on behalf of the Apache Software Foundation. For more 023 * information on the Apache Software Foundation, please see 024 * <http://www.apache.org/>. 025 * 026 */ 027package org.apache.http.conn.util; 028 029import java.io.BufferedReader; 030import java.io.IOException; 031import java.io.Reader; 032import java.util.ArrayList; 033import java.util.List; 034 035import org.apache.http.annotation.Contract; 036import org.apache.http.annotation.ThreadingBehavior; 037 038/** 039 * Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a> 040 * and configures a PublicSuffixFilter. 041 * 042 * @since 4.4 043 */ 044@Contract(threading = ThreadingBehavior.IMMUTABLE) 045public final class PublicSuffixListParser { 046 047 public PublicSuffixListParser() { 048 } 049 050 /** 051 * Parses the public suffix list format. 052 * <p> 053 * When creating the reader from the file, make sure to use the correct encoding 054 * (the original list is in UTF-8). 055 * 056 * @param reader the data reader. The caller is responsible for closing the reader. 057 * @throws java.io.IOException on error while reading from list 058 */ 059 public PublicSuffixList parse(final Reader reader) throws IOException { 060 final List<String> rules = new ArrayList<String>(); 061 final List<String> exceptions = new ArrayList<String>(); 062 final BufferedReader r = new BufferedReader(reader); 063 064 String line; 065 while ((line = r.readLine()) != null) { 066 if (line.isEmpty()) { 067 continue; 068 } 069 if (line.startsWith("//")) { 070 continue; //entire lines can also be commented using // 071 } 072 if (line.startsWith(".")) { 073 line = line.substring(1); // A leading dot is optional 074 } 075 // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule 076 final boolean isException = line.startsWith("!"); 077 if (isException) { 078 line = line.substring(1); 079 } 080 081 if (isException) { 082 exceptions.add(line); 083 } else { 084 rules.add(line); 085 } 086 } 087 return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions); 088 } 089 090 /** 091 * Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE). 092 * <p> 093 * When creating the reader from the file, make sure to use the correct encoding 094 * (the original list is in UTF-8). 095 * 096 * @param reader the data reader. The caller is responsible for closing the reader. 097 * @throws java.io.IOException on error while reading from list 098 * 099 * @since 4.5 100 */ 101 public List<PublicSuffixList> parseByType(final Reader reader) throws IOException { 102 final List<PublicSuffixList> result = new ArrayList<PublicSuffixList>(2); 103 104 final BufferedReader r = new BufferedReader(reader); 105 final StringBuilder sb = new StringBuilder(256); 106 107 DomainType domainType = null; 108 List<String> rules = null; 109 List<String> exceptions = null; 110 String line; 111 while ((line = r.readLine()) != null) { 112 if (line.isEmpty()) { 113 continue; 114 } 115 if (line.startsWith("//")) { 116 117 if (domainType == null) { 118 if (line.contains("===BEGIN ICANN DOMAINS===")) { 119 domainType = DomainType.ICANN; 120 } else if (line.contains("===BEGIN PRIVATE DOMAINS===")) { 121 domainType = DomainType.PRIVATE; 122 } 123 } else { 124 if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) { 125 if (rules != null) { 126 result.add(new PublicSuffixList(domainType, rules, exceptions)); 127 } 128 domainType = null; 129 rules = null; 130 exceptions = null; 131 } 132 } 133 134 continue; //entire lines can also be commented using // 135 } 136 if (domainType == null) { 137 continue; 138 } 139 140 if (line.startsWith(".")) { 141 line = line.substring(1); // A leading dot is optional 142 } 143 // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule 144 final boolean isException = line.startsWith("!"); 145 if (isException) { 146 line = line.substring(1); 147 } 148 149 if (isException) { 150 if (exceptions == null) { 151 exceptions = new ArrayList<String>(); 152 } 153 exceptions.add(line); 154 } else { 155 if (rules == null) { 156 rules = new ArrayList<String>(); 157 } 158 rules.add(line); 159 } 160 } 161 return result; 162 } 163 164}