use inline byte-swapping function for MSVC
[flac.git] / src / plugin_common / tags.c
1 /* plugin_common - Routines common to several plugins
2  * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #if HAVE_CONFIG_H
20 #  include <config.h>
21 #endif
22
23 #include <stdio.h>
24 #include <string.h>
25 #include <stdlib.h>
26
27 #include "tags.h"
28 #include "FLAC/assert.h"
29 #include "FLAC/metadata.h"
30
31
32 static __inline unsigned local__wide_strlen(const FLAC__uint16 *s)
33 {
34         unsigned n = 0;
35         while(*s++)
36                 n++;
37         return n;
38 }
39
40 /*
41  * also disallows non-shortest-form encodings, c.f.
42  *   http://www.unicode.org/versions/corrigendum1.html
43  * and a more clear explanation at the end of this section:
44  *   http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
45  */
46 static __inline unsigned local__utf8len(const FLAC__byte *utf8)
47 {
48         FLAC__ASSERT(0 != utf8);
49         if ((utf8[0] & 0x80) == 0) {
50                 return 1;
51         }
52         else if ((utf8[0] & 0xE0) == 0xC0 && (utf8[1] & 0xC0) == 0x80) {
53                 if ((utf8[0] & 0xFE) == 0xC0) /* overlong sequence check */
54                         return 0;
55                 return 2;
56         }
57         else if ((utf8[0] & 0xF0) == 0xE0 && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80) {
58                 if (utf8[0] == 0xE0 && (utf8[1] & 0xE0) == 0x80) /* overlong sequence check */
59                         return 0;
60                 /* illegal surrogates check (U+D800...U+DFFF and U+FFFE...U+FFFF) */
61                 if (utf8[0] == 0xED && (utf8[1] & 0xE0) == 0xA0) /* D800-DFFF */
62                         return 0;
63                 if (utf8[0] == 0xEF && utf8[1] == 0xBF && (utf8[2] & 0xFE) == 0xBE) /* FFFE-FFFF */
64                         return 0;
65                 return 3;
66         }
67         else if ((utf8[0] & 0xF8) == 0xF0 && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80 && (utf8[3] & 0xC0) == 0x80) {
68                 if (utf8[0] == 0xF0 && (utf8[1] & 0xF0) == 0x80) /* overlong sequence check */
69                         return 0;
70                 return 4;
71         }
72         else if ((utf8[0] & 0xFC) == 0xF8 && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80 && (utf8[3] & 0xC0) == 0x80 && (utf8[4] & 0xC0) == 0x80) {
73                 if (utf8[0] == 0xF8 && (utf8[1] & 0xF8) == 0x80) /* overlong sequence check */
74                         return 0;
75                 return 5;
76         }
77         else if ((utf8[0] & 0xFE) == 0xFC && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80 && (utf8[3] & 0xC0) == 0x80 && (utf8[4] & 0xC0) == 0x80 && (utf8[5] & 0xC0) == 0x80) {
78                 if (utf8[0] == 0xFC && (utf8[1] & 0xFC) == 0x80) /* overlong sequence check */
79                         return 0;
80                 return 6;
81         }
82         else {
83                 return 0;
84         }
85 }
86
87
88 static __inline unsigned local__utf8_to_ucs2(const FLAC__byte *utf8, FLAC__uint16 *ucs2)
89 {
90         const unsigned len = local__utf8len(utf8);
91
92         FLAC__ASSERT(0 != ucs2);
93
94         if (len == 1)
95                 *ucs2 = *utf8;
96         else if (len == 2)
97                 *ucs2 = (*utf8 & 0x3F)<<6 | (*(utf8+1) & 0x3F);
98         else if (len == 3)
99                 *ucs2 = (*utf8 & 0x1F)<<12 | (*(utf8+1) & 0x3F)<<6 | (*(utf8+2) & 0x3F);
100         else
101                 *ucs2 = '?';
102
103         return len;
104 }
105
106 static FLAC__uint16 *local__convert_utf8_to_ucs2(const char *src, unsigned length)
107 {
108         FLAC__uint16 *out;
109         unsigned chars = 0;
110
111         FLAC__ASSERT(0 != src);
112
113         /* calculate length */
114         {
115                 const unsigned char *s, *end;
116                 for (s=(const unsigned char *)src, end=s+length; s<end; chars++) {
117                         const unsigned n = local__utf8len(s);
118                         if (n == 0)
119                                 return 0;
120                         s += n;
121                 }
122                 FLAC__ASSERT(s == end);
123         }
124
125         /* allocate */
126         out = (FLAC__uint16*)malloc(chars * sizeof(FLAC__uint16));
127         if (0 == out) {
128                 FLAC__ASSERT(0);
129                 return 0;
130         }
131
132         /* convert */
133         {
134                 const unsigned char *s = (const unsigned char *)src;
135                 FLAC__uint16 *u = out;
136                 for ( ; chars; chars--)
137                         s += local__utf8_to_ucs2(s, u++);
138         }
139
140         return out;
141 }
142
143 static __inline unsigned local__ucs2len(FLAC__uint16 ucs2)
144 {
145         if (ucs2 < 0x0080)
146                 return 1;
147         else if (ucs2 < 0x0800)
148                 return 2;
149         else
150                 return 3;
151 }
152
153 static __inline unsigned local__ucs2_to_utf8(FLAC__uint16 ucs2, FLAC__byte *utf8)
154 {
155         if (ucs2 < 0x080) {
156                 utf8[0] = (FLAC__byte)ucs2;
157                 return 1;
158         }
159         else if (ucs2 < 0x800) {
160                 utf8[0] = 0xc0 | (ucs2 >> 6);
161                 utf8[1] = 0x80 | (ucs2 & 0x3f);
162                 return 2;
163         }
164         else {
165                 utf8[0] = 0xe0 | (ucs2 >> 12);
166                 utf8[1] = 0x80 | ((ucs2 >> 6) & 0x3f);
167                 utf8[2] = 0x80 | (ucs2 & 0x3f);
168                 return 3;
169         }
170 }
171
172 static char *local__convert_ucs2_to_utf8(const FLAC__uint16 *src, unsigned length)
173 {
174         char *out;
175         unsigned len = 0;
176
177         FLAC__ASSERT(0 != src);
178
179         /* calculate length */
180         {
181                 unsigned i;
182                 for (i = 0; i < length; i++)
183                         len += local__ucs2len(src[i]);
184         }
185
186         /* allocate */
187         out = (char*)malloc(len * sizeof(char));
188         if (0 == out)
189                 return 0;
190
191         /* convert */
192         {
193                 unsigned char *u = (unsigned char *)out;
194                 for ( ; *src; src++)
195                         u += local__ucs2_to_utf8(*src, u);
196                 local__ucs2_to_utf8(*src, u);
197         }
198
199         return out;
200 }
201
202
203 FLAC__bool FLAC_plugin__tags_get(const char *filename, FLAC__StreamMetadata **tags)
204 {
205         if(!FLAC__metadata_get_tags(filename, tags))
206                 if(0 == (*tags = FLAC__metadata_object_new(FLAC__METADATA_TYPE_VORBIS_COMMENT)))
207                         return false;
208         return true;
209 }
210
211 FLAC__bool FLAC_plugin__tags_set(const char *filename, const FLAC__StreamMetadata *tags)
212 {
213         FLAC__Metadata_Chain *chain;
214         FLAC__Metadata_Iterator *iterator;
215         FLAC__StreamMetadata *block;
216         FLAC__bool got_vorbis_comments = false;
217         FLAC__bool ok;
218
219         if(0 == (chain = FLAC__metadata_chain_new()))
220                 return false;
221
222         if(!FLAC__metadata_chain_read(chain, filename)) {
223                 FLAC__metadata_chain_delete(chain);
224                 return false;
225         }
226
227         if(0 == (iterator = FLAC__metadata_iterator_new())) {
228                 FLAC__metadata_chain_delete(chain);
229                 return false;
230         }
231
232         FLAC__metadata_iterator_init(iterator, chain);
233
234         do {
235                 if(FLAC__metadata_iterator_get_block_type(iterator) == FLAC__METADATA_TYPE_VORBIS_COMMENT)
236                         got_vorbis_comments = true;
237         } while(!got_vorbis_comments && FLAC__metadata_iterator_next(iterator));
238
239         if(0 == (block = FLAC__metadata_object_clone(tags))) {
240                 FLAC__metadata_chain_delete(chain);
241                 FLAC__metadata_iterator_delete(iterator);
242                 return false;
243         }
244
245         if(got_vorbis_comments)
246                 ok = FLAC__metadata_iterator_set_block(iterator, block);
247         else
248                 ok = FLAC__metadata_iterator_insert_block_after(iterator, block);
249
250         FLAC__metadata_iterator_delete(iterator);
251
252         if(ok) {
253                 FLAC__metadata_chain_sort_padding(chain);
254                 ok = FLAC__metadata_chain_write(chain, /*use_padding=*/true, /*preserve_file_stats=*/true);
255         }
256
257         FLAC__metadata_chain_delete(chain);
258
259         return ok;
260 }
261
262 void FLAC_plugin__tags_destroy(FLAC__StreamMetadata **tags)
263 {
264         FLAC__metadata_object_delete(*tags);
265         *tags = 0;
266 }
267
268 const char *FLAC_plugin__tags_get_tag_utf8(const FLAC__StreamMetadata *tags, const char *name)
269 {
270         const int i = FLAC__metadata_object_vorbiscomment_find_entry_from(tags, /*offset=*/0, name);
271         return (i < 0? 0 : strchr((const char *)tags->data.vorbis_comment.comments[i].entry, '=')+1);
272 }
273
274 FLAC__uint16 *FLAC_plugin__tags_get_tag_ucs2(const FLAC__StreamMetadata *tags, const char *name)
275 {
276         const char *utf8 = FLAC_plugin__tags_get_tag_utf8(tags, name);
277         if(0 == utf8)
278                 return 0;
279         return local__convert_utf8_to_ucs2(utf8, strlen(utf8)+1); /* +1 for terminating null */
280 }
281
282 int FLAC_plugin__tags_delete_tag(FLAC__StreamMetadata *tags, const char *name)
283 {
284         return FLAC__metadata_object_vorbiscomment_remove_entries_matching(tags, name);
285 }
286
287 int FLAC_plugin__tags_delete_all(FLAC__StreamMetadata *tags)
288 {
289         int n = (int)tags->data.vorbis_comment.num_comments;
290         if(n > 0) {
291                 if(!FLAC__metadata_object_vorbiscomment_resize_comments(tags, 0))
292                         n = -1;
293         }
294         return n;
295 }
296
297 FLAC__bool FLAC_plugin__tags_add_tag_utf8(FLAC__StreamMetadata *tags, const char *name, const char *value, const char *separator)
298 {
299         int i;
300
301         FLAC__ASSERT(0 != tags);
302         FLAC__ASSERT(0 != name);
303         FLAC__ASSERT(0 != value);
304
305         if(separator && (i = FLAC__metadata_object_vorbiscomment_find_entry_from(tags, /*offset=*/0, name)) >= 0) {
306                 FLAC__StreamMetadata_VorbisComment_Entry *entry = tags->data.vorbis_comment.comments+i;
307                 const size_t value_len = strlen(value);
308                 const size_t separator_len = strlen(separator);
309                 FLAC__byte *new_entry;
310                 if(0 == (new_entry = (FLAC__byte*)realloc(entry->entry, entry->length + value_len + separator_len + 1)))
311                         return false;
312                 memcpy(new_entry+entry->length, separator, separator_len);
313                 entry->length += separator_len;
314                 memcpy(new_entry+entry->length, value, value_len);
315                 entry->length += value_len;
316                 new_entry[entry->length] = '\0';
317                 entry->entry = new_entry;
318         }
319         else {
320                 FLAC__StreamMetadata_VorbisComment_Entry entry;
321                 if(!FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair(&entry, name, value))
322                         return false;
323                 FLAC__metadata_object_vorbiscomment_append_comment(tags, entry, /*copy=*/false);
324         }
325         return true;
326 }
327
328 FLAC__bool FLAC_plugin__tags_set_tag_ucs2(FLAC__StreamMetadata *tags, const char *name, const FLAC__uint16 *value, FLAC__bool replace_all)
329 {
330         FLAC__StreamMetadata_VorbisComment_Entry entry;
331
332         FLAC__ASSERT(0 != tags);
333         FLAC__ASSERT(0 != name);
334         FLAC__ASSERT(0 != value);
335
336         {
337                 char *utf8 = local__convert_ucs2_to_utf8(value, local__wide_strlen(value)+1); /* +1 for the terminating null */
338                 if(0 == utf8)
339                         return false;
340                 if(!FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair(&entry, name, utf8)) {
341                         free(utf8);
342                         return false;
343                 }
344                 free(utf8);
345         }
346         if(!FLAC__metadata_object_vorbiscomment_replace_comment(tags, entry, replace_all, /*copy=*/false))
347                 return false;
348         return true;
349 }