View Javadoc

1   /*
2    * Copyright 2005,2009 Ivan SZKIBA
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.ini4j.spi;
17  
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.io.InputStreamReader;
21  import java.io.PushbackInputStream;
22  import java.io.Reader;
23  
24  import java.nio.charset.Charset;
25  
26  class UnicodeInputStreamReader extends Reader
27  {
28      private static final int BOM_SIZE = 4;
29  
30      private static enum Bom
31      {
32          UTF32BE("UTF-32BE", new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0xFE, (byte) 0xFF }),
33          UTF32LE("UTF-32LE", new byte[] { (byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00 }),
34          UTF16BE("UTF-16BE", new byte[] { (byte) 0xFE, (byte) 0xFF }),
35          UTF16LE("UTF-16LE", new byte[] { (byte) 0xFF, (byte) 0xFE }),
36          UTF8("UTF-8", new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
37          private final byte[] _bytes;
38          private Charset _charset;
39  
40          @SuppressWarnings("PMD.ArrayIsStoredDirectly")
41          private Bom(String charsetName, byte[] bytes)
42          {
43              try
44              {
45                  _charset = Charset.forName(charsetName);
46              }
47              catch (Exception x)
48              {
49                  _charset = null;
50              }
51  
52              _bytes = bytes;
53          }
54  
55          private static Bom find(byte[] data)
56          {
57              Bom ret = null;
58  
59              for (Bom bom : values())
60              {
61                  if (bom.supported() && bom.match(data))
62                  {
63                      ret = bom;
64  
65                      break;
66                  }
67              }
68  
69              return ret;
70          }
71  
72          private boolean match(byte[] data)
73          {
74              boolean ok = true;
75  
76              for (int i = 0; i < _bytes.length; i++)
77              {
78                  if (data[i] != _bytes[i])
79                  {
80                      ok = false;
81  
82                      break;
83                  }
84              }
85  
86              return ok;
87          }
88  
89          private boolean supported()
90          {
91              return _charset != null;
92          }
93      }
94  
95      private final Charset _defaultEncoding;
96      private InputStreamReader _reader;
97      private final PushbackInputStream _stream;
98  
99      UnicodeInputStreamReader(InputStream in, Charset defaultEnc)
100     {
101         _stream = new PushbackInputStream(in, BOM_SIZE);
102         _defaultEncoding = defaultEnc;
103     }
104 
105     public void close() throws IOException
106     {
107         init();
108         _reader.close();
109     }
110 
111     public int read(char[] cbuf, int off, int len) throws IOException
112     {
113         init();
114 
115         return _reader.read(cbuf, off, len);
116     }
117 
118     /**
119      * Read-ahead four bytes and check for BOM marks. Extra bytes are
120      * unread back to the stream, only BOM bytes are skipped.
121      */
122     protected void init() throws IOException
123     {
124         if (_reader != null)
125         {
126             return;
127         }
128 
129         Charset encoding;
130         byte[] data = new byte[BOM_SIZE];
131         int n;
132         int unread;
133 
134         n = _stream.read(data, 0, data.length);
135         Bom bom = Bom.find(data);
136 
137         if (bom == null)
138         {
139             encoding = _defaultEncoding;
140             unread = n;
141         }
142         else
143         {
144             encoding = bom._charset;
145             unread = data.length - bom._bytes.length;
146         }
147 
148         if (unread > 0)
149         {
150             _stream.unread(data, (n - unread), unread);
151         }
152 
153         _reader = new InputStreamReader(_stream, encoding);
154     }
155 }