001/*
002 * ====================================================================
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *   http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing,
014 * software distributed under the License is distributed on an
015 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
016 * KIND, either express or implied.  See the License for the
017 * specific language governing permissions and limitations
018 * under the License.
019 * ====================================================================
020 *
021 * This software consists of voluntary contributions made by many
022 * individuals on behalf of the Apache Software Foundation.  For more
023 * information on the Apache Software Foundation, please see
024 * <http://www.apache.org/>.
025 *
026 */
027package org.apache.http.conn.util;
028
029import java.io.BufferedReader;
030import java.io.IOException;
031import java.io.Reader;
032import java.util.ArrayList;
033import java.util.List;
034
035import org.apache.http.annotation.Contract;
036import org.apache.http.annotation.ThreadingBehavior;
037
038/**
039 * Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
040 * and configures a PublicSuffixFilter.
041 *
042 * @since 4.4
043 */
044@Contract(threading = ThreadingBehavior.IMMUTABLE)
045public final class PublicSuffixListParser {
046
047    public PublicSuffixListParser() {
048    }
049
050    /**
051     * Parses the public suffix list format.
052     * <p>
053     * When creating the reader from the file, make sure to use the correct encoding
054     * (the original list is in UTF-8).
055     *
056     * @param reader the data reader. The caller is responsible for closing the reader.
057     * @throws java.io.IOException on error while reading from list
058     */
059    public PublicSuffixList parse(final Reader reader) throws IOException {
060        final List<String> rules = new ArrayList<String>();
061        final List<String> exceptions = new ArrayList<String>();
062        final BufferedReader r = new BufferedReader(reader);
063
064        String line;
065        while ((line = r.readLine()) != null) {
066            if (line.isEmpty()) {
067                continue;
068            }
069            if (line.startsWith("//")) {
070                continue; //entire lines can also be commented using //
071            }
072            if (line.startsWith(".")) {
073                line = line.substring(1); // A leading dot is optional
074            }
075            // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
076            final boolean isException = line.startsWith("!");
077            if (isException) {
078                line = line.substring(1);
079            }
080
081            if (isException) {
082                exceptions.add(line);
083            } else {
084                rules.add(line);
085            }
086        }
087        return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions);
088    }
089
090    /**
091     * Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE).
092     * <p>
093     * When creating the reader from the file, make sure to use the correct encoding
094     * (the original list is in UTF-8).
095     *
096     * @param reader the data reader. The caller is responsible for closing the reader.
097     * @throws java.io.IOException on error while reading from list
098     *
099     * @since 4.5
100     */
101    public List<PublicSuffixList> parseByType(final Reader reader) throws IOException {
102        final List<PublicSuffixList> result = new ArrayList<PublicSuffixList>(2);
103
104        final BufferedReader r = new BufferedReader(reader);
105        final StringBuilder sb = new StringBuilder(256);
106
107        DomainType domainType = null;
108        List<String> rules = null;
109        List<String> exceptions = null;
110        String line;
111        while ((line = r.readLine()) != null) {
112            if (line.isEmpty()) {
113                continue;
114            }
115            if (line.startsWith("//")) {
116
117                if (domainType == null) {
118                    if (line.contains("===BEGIN ICANN DOMAINS===")) {
119                        domainType = DomainType.ICANN;
120                    } else if (line.contains("===BEGIN PRIVATE DOMAINS===")) {
121                        domainType = DomainType.PRIVATE;
122                    }
123                } else {
124                    if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) {
125                        if (rules != null) {
126                            result.add(new PublicSuffixList(domainType, rules, exceptions));
127                        }
128                        domainType = null;
129                        rules = null;
130                        exceptions = null;
131                    }
132                }
133
134                continue; //entire lines can also be commented using //
135            }
136            if (domainType == null) {
137                continue;
138            }
139
140            if (line.startsWith(".")) {
141                line = line.substring(1); // A leading dot is optional
142            }
143            // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
144            final boolean isException = line.startsWith("!");
145            if (isException) {
146                line = line.substring(1);
147            }
148
149            if (isException) {
150                if (exceptions == null) {
151                    exceptions = new ArrayList<String>();
152                }
153                exceptions.add(line);
154            } else {
155                if (rules == null) {
156                    rules = new ArrayList<String>();
157                }
158                rules.add(line);
159            }
160        }
161        return result;
162    }
163
164}