001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030import org.apache.commons.io.input.XmlStreamReader;
031
032/**
033 * Character stream that handles all the necessary Voodoo to figure out the
034 * charset encoding of the XML document written to the stream.
035 *
036 * @see XmlStreamReader
037 * @since 2.0
038 */
039public class XmlStreamWriter extends Writer {
040    private static final int BUFFER_SIZE = 4096;
041
042    private final OutputStream out;
043
044    private final String defaultEncoding;
045
046    private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
047
048    private Writer writer;
049
050    private String encoding;
051
052    /**
053     * Constructs a new XML stream writer for the specified output stream
054     * with a default encoding of UTF-8.
055     *
056     * @param out The output stream
057     */
058    public XmlStreamWriter(final OutputStream out) {
059        this(out, null);
060    }
061
062    /**
063     * Constructs a new XML stream writer for the specified output stream
064     * with the specified default encoding.
065     *
066     * @param out The output stream
067     * @param defaultEncoding The default encoding if not encoding could be detected
068     */
069    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
070        this.out = out;
071        this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
072    }
073
074    /**
075     * Constructs a new XML stream writer for the specified file
076     * with a default encoding of UTF-8.
077     *
078     * @param file The file to write to
079     * @throws FileNotFoundException if there is an error creating or
080     * opening the file
081     */
082    public XmlStreamWriter(final File file) throws FileNotFoundException {
083        this(file, null);
084    }
085
086    /**
087     * Constructs a new XML stream writer for the specified file
088     * with the specified default encoding.
089     *
090     * @param file The file to write to
091     * @param defaultEncoding The default encoding if not encoding could be detected
092     * @throws FileNotFoundException if there is an error creating or
093     * opening the file
094     */
095    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
096        this(new FileOutputStream(file), defaultEncoding);
097    }
098
099    /**
100     * Returns the detected encoding.
101     *
102     * @return the detected encoding
103     */
104    public String getEncoding() {
105        return encoding;
106    }
107
108    /**
109     * Returns the default encoding.
110     *
111     * @return the default encoding
112     */
113    public String getDefaultEncoding() {
114        return defaultEncoding;
115    }
116
117    /**
118     * Closes the underlying writer.
119     *
120     * @throws IOException if an error occurs closing the underlying writer
121     */
122    @Override
123    public void close() throws IOException {
124        if (writer == null) {
125            encoding = defaultEncoding;
126            writer = new OutputStreamWriter(out, encoding);
127            writer.write(xmlPrologWriter.toString());
128        }
129        writer.close();
130    }
131
132    /**
133     * Flushes the underlying writer.
134     *
135     * @throws IOException if an error occurs flushing the underlying writer
136     */
137    @Override
138    public void flush() throws IOException {
139        if (writer != null) {
140            writer.flush();
141        }
142    }
143
144    /**
145     * Detects the encoding.
146     *
147     * @param cbuf the buffer to write the characters from
148     * @param off The start offset
149     * @param len The number of characters to write
150     * @throws IOException if an error occurs detecting the encoding
151     */
152    private void detectEncoding(final char[] cbuf, final int off, final int len)
153            throws IOException {
154        int size = len;
155        final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
156        if (xmlProlog.length() + len > BUFFER_SIZE) {
157            size = BUFFER_SIZE - xmlProlog.length();
158        }
159        xmlPrologWriter.write(cbuf, off, size);
160
161        // try to determine encoding
162        if (xmlProlog.length() >= 5) {
163            if (xmlProlog.substring(0, 5).equals("<?xml")) {
164                // try to extract encoding from XML prolog
165                final int xmlPrologEnd = xmlProlog.indexOf("?>");
166                if (xmlPrologEnd > 0) {
167                    // ok, full XML prolog written: let's extract encoding
168                    final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
169                            xmlPrologEnd));
170                    if (m.find()) {
171                        encoding = m.group(1).toUpperCase();
172                        encoding = encoding.substring(1, encoding.length() - 1);
173                    } else {
174                        // no encoding found in XML prolog: using default
175                        // encoding
176                        encoding = defaultEncoding;
177                    }
178                } else {
179                    if (xmlProlog.length() >= BUFFER_SIZE) {
180                        // no encoding found in first characters: using default
181                        // encoding
182                        encoding = defaultEncoding;
183                    }
184                }
185            } else {
186                // no XML prolog: using default encoding
187                encoding = defaultEncoding;
188            }
189            if (encoding != null) {
190                // encoding has been chosen: let's do it
191                xmlPrologWriter = null;
192                writer = new OutputStreamWriter(out, encoding);
193                writer.write(xmlProlog.toString());
194                if (len > size) {
195                    writer.write(cbuf, off + size, len - size);
196                }
197            }
198        }
199    }
200
201    /**
202     * Writes the characters to the underlying writer, detecting encoding.
203     *
204     * @param cbuf the buffer to write the characters from
205     * @param off The start offset
206     * @param len The number of characters to write
207     * @throws IOException if an error occurs detecting the encoding
208     */
209    @Override
210    public void write(final char[] cbuf, final int off, final int len) throws IOException {
211        if (xmlPrologWriter != null) {
212            detectEncoding(cbuf, off, len);
213        } else {
214            writer.write(cbuf, off, len);
215        }
216    }
217
218    static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
219}