src/test_streams/ : Pull out write_simple_wavex_header() for reuse.
[flac.git] / src / plugin_common / tags.c
1 /* plugin_common - Routines common to several plugins
2  * Copyright (C) 2002-2009  Josh Coalson
3  * Copyright (C) 2011-2014  Xiph.Org Foundation
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19
20 #ifdef HAVE_CONFIG_H
21 #  include <config.h>
22 #endif
23
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
27
28 #include "tags.h"
29 #include "FLAC/assert.h"
30 #include "FLAC/metadata.h"
31 #include "share/alloc.h"
32
33 #ifndef FLaC__INLINE
34 #define FLaC__INLINE
35 #endif
36
37
38 static FLaC__INLINE size_t local__wide_strlen(const FLAC__uint16 *s)
39 {
40         size_t n = 0;
41         while(*s++)
42                 n++;
43         return n;
44 }
45
46 /*
47  * also disallows non-shortest-form encodings, c.f.
48  *   http://www.unicode.org/versions/corrigendum1.html
49  * and a more clear explanation at the end of this section:
50  *   http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
51  */
52 static size_t local__utf8len(const FLAC__byte *utf8)
53 {
54         FLAC__ASSERT(0 != utf8);
55         if ((utf8[0] & 0x80) == 0) {
56                 return 1;
57         }
58         else if ((utf8[0] & 0xE0) == 0xC0 && (utf8[1] & 0xC0) == 0x80) {
59                 if ((utf8[0] & 0xFE) == 0xC0) /* overlong sequence check */
60                         return 0;
61                 return 2;
62         }
63         else if ((utf8[0] & 0xF0) == 0xE0 && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80) {
64                 if (utf8[0] == 0xE0 && (utf8[1] & 0xE0) == 0x80) /* overlong sequence check */
65                         return 0;
66                 /* illegal surrogates check (U+D800...U+DFFF and U+FFFE...U+FFFF) */
67                 if (utf8[0] == 0xED && (utf8[1] & 0xE0) == 0xA0) /* D800-DFFF */
68                         return 0;
69                 if (utf8[0] == 0xEF && utf8[1] == 0xBF && (utf8[2] & 0xFE) == 0xBE) /* FFFE-FFFF */
70                         return 0;
71                 return 3;
72         }
73         else if ((utf8[0] & 0xF8) == 0xF0 && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80 && (utf8[3] & 0xC0) == 0x80) {
74                 if (utf8[0] == 0xF0 && (utf8[1] & 0xF0) == 0x80) /* overlong sequence check */
75                         return 0;
76                 return 4;
77         }
78         else if ((utf8[0] & 0xFC) == 0xF8 && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80 && (utf8[3] & 0xC0) == 0x80 && (utf8[4] & 0xC0) == 0x80) {
79                 if (utf8[0] == 0xF8 && (utf8[1] & 0xF8) == 0x80) /* overlong sequence check */
80                         return 0;
81                 return 5;
82         }
83         else if ((utf8[0] & 0xFE) == 0xFC && (utf8[1] & 0xC0) == 0x80 && (utf8[2] & 0xC0) == 0x80 && (utf8[3] & 0xC0) == 0x80 && (utf8[4] & 0xC0) == 0x80 && (utf8[5] & 0xC0) == 0x80) {
84                 if (utf8[0] == 0xFC && (utf8[1] & 0xFC) == 0x80) /* overlong sequence check */
85                         return 0;
86                 return 6;
87         }
88         else {
89                 return 0;
90         }
91 }
92
93
94 static size_t local__utf8_to_ucs2(const FLAC__byte *utf8, FLAC__uint16 *ucs2)
95 {
96         const size_t len = local__utf8len(utf8);
97
98         FLAC__ASSERT(0 != ucs2);
99
100         if (len == 1)
101                 *ucs2 = *utf8;
102         else if (len == 2)
103                 *ucs2 = (*utf8 & 0x3F)<<6 | (*(utf8+1) & 0x3F);
104         else if (len == 3)
105                 *ucs2 = (*utf8 & 0x1F)<<12 | (*(utf8+1) & 0x3F)<<6 | (*(utf8+2) & 0x3F);
106         else
107                 *ucs2 = '?';
108
109         return len;
110 }
111
112 static FLAC__uint16 *local__convert_utf8_to_ucs2(const char *src, unsigned length)
113 {
114         FLAC__uint16 *out;
115         size_t chars = 0;
116
117         FLAC__ASSERT(0 != src);
118
119         /* calculate length */
120         {
121                 const unsigned char *s, *end;
122                 for (s=(const unsigned char *)src, end=s+length; s<end; chars++) {
123                         const unsigned n = local__utf8len(s);
124                         if (n == 0)
125                                 return 0;
126                         s += n;
127                 }
128                 FLAC__ASSERT(s == end);
129         }
130
131         /* allocate */
132         out = safe_malloc_mul_2op_(chars, /*times*/sizeof(FLAC__uint16));
133         if (0 == out) {
134                 FLAC__ASSERT(0);
135                 return 0;
136         }
137
138         /* convert */
139         {
140                 const unsigned char *s = (const unsigned char *)src;
141                 FLAC__uint16 *u = out;
142                 for ( ; chars; chars--)
143                         s += local__utf8_to_ucs2(s, u++);
144         }
145
146         return out;
147 }
148
149 static FLaC__INLINE size_t local__ucs2len(FLAC__uint16 ucs2)
150 {
151         if (ucs2 < 0x0080)
152                 return 1;
153         else if (ucs2 < 0x0800)
154                 return 2;
155         else
156                 return 3;
157 }
158
159 static size_t local__ucs2_to_utf8(FLAC__uint16 ucs2, FLAC__byte *utf8)
160 {
161         if (ucs2 < 0x080) {
162                 utf8[0] = (FLAC__byte)ucs2;
163                 return 1;
164         }
165         else if (ucs2 < 0x800) {
166                 utf8[0] = 0xc0 | (ucs2 >> 6);
167                 utf8[1] = 0x80 | (ucs2 & 0x3f);
168                 return 2;
169         }
170         else {
171                 utf8[0] = 0xe0 | (ucs2 >> 12);
172                 utf8[1] = 0x80 | ((ucs2 >> 6) & 0x3f);
173                 utf8[2] = 0x80 | (ucs2 & 0x3f);
174                 return 3;
175         }
176 }
177
178 static char *local__convert_ucs2_to_utf8(const FLAC__uint16 *src, unsigned length)
179 {
180         char *out;
181         size_t len = 0, n;
182
183         FLAC__ASSERT(0 != src);
184
185         /* calculate length */
186         {
187                 unsigned i;
188                 for (i = 0; i < length; i++) {
189                         n = local__ucs2len(src[i]);
190                         if(len + n < len) /* overflow check */
191                                 return 0;
192                         len += n;
193                 }
194         }
195
196         /* allocate */
197         out = safe_malloc_mul_2op_(len, /*times*/sizeof(char));
198         if (0 == out)
199                 return 0;
200
201         /* convert */
202         {
203                 unsigned char *u = (unsigned char *)out;
204                 for ( ; *src; src++)
205                         u += local__ucs2_to_utf8(*src, u);
206                 local__ucs2_to_utf8(*src, u);
207         }
208
209         return out;
210 }
211
212
213 FLAC__bool FLAC_plugin__tags_get(const char *filename, FLAC__StreamMetadata **tags)
214 {
215         if(!FLAC__metadata_get_tags(filename, tags))
216                 if(0 == (*tags = FLAC__metadata_object_new(FLAC__METADATA_TYPE_VORBIS_COMMENT)))
217                         return false;
218         return true;
219 }
220
221 FLAC__bool FLAC_plugin__tags_set(const char *filename, const FLAC__StreamMetadata *tags)
222 {
223         FLAC__Metadata_Chain *chain;
224         FLAC__Metadata_Iterator *iterator;
225         FLAC__StreamMetadata *block;
226         FLAC__bool got_vorbis_comments = false;
227         FLAC__bool ok;
228
229         if(0 == (chain = FLAC__metadata_chain_new()))
230                 return false;
231
232         if(!FLAC__metadata_chain_read(chain, filename)) {
233                 FLAC__metadata_chain_delete(chain);
234                 return false;
235         }
236
237         if(0 == (iterator = FLAC__metadata_iterator_new())) {
238                 FLAC__metadata_chain_delete(chain);
239                 return false;
240         }
241
242         FLAC__metadata_iterator_init(iterator, chain);
243
244         do {
245                 if(FLAC__metadata_iterator_get_block_type(iterator) == FLAC__METADATA_TYPE_VORBIS_COMMENT)
246                         got_vorbis_comments = true;
247         } while(!got_vorbis_comments && FLAC__metadata_iterator_next(iterator));
248
249         if(0 == (block = FLAC__metadata_object_clone(tags))) {
250                 FLAC__metadata_chain_delete(chain);
251                 FLAC__metadata_iterator_delete(iterator);
252                 return false;
253         }
254
255         if(got_vorbis_comments)
256                 ok = FLAC__metadata_iterator_set_block(iterator, block);
257         else
258                 ok = FLAC__metadata_iterator_insert_block_after(iterator, block);
259
260         FLAC__metadata_iterator_delete(iterator);
261
262         if(ok) {
263                 FLAC__metadata_chain_sort_padding(chain);
264                 ok = FLAC__metadata_chain_write(chain, /*use_padding=*/true, /*preserve_file_stats=*/true);
265         }
266
267         FLAC__metadata_chain_delete(chain);
268
269         return ok;
270 }
271
272 void FLAC_plugin__tags_destroy(FLAC__StreamMetadata **tags)
273 {
274         FLAC__metadata_object_delete(*tags);
275         *tags = 0;
276 }
277
278 const char *FLAC_plugin__tags_get_tag_utf8(const FLAC__StreamMetadata *tags, const char *name)
279 {
280         const int i = FLAC__metadata_object_vorbiscomment_find_entry_from(tags, /*offset=*/0, name);
281         return (i < 0? 0 : strchr((const char *)tags->data.vorbis_comment.comments[i].entry, '=')+1);
282 }
283
284 FLAC__uint16 *FLAC_plugin__tags_get_tag_ucs2(const FLAC__StreamMetadata *tags, const char *name)
285 {
286         const char *utf8 = FLAC_plugin__tags_get_tag_utf8(tags, name);
287         if(0 == utf8)
288                 return 0;
289         return local__convert_utf8_to_ucs2(utf8, strlen(utf8)+1); /* +1 for terminating null */
290 }
291
292 int FLAC_plugin__tags_delete_tag(FLAC__StreamMetadata *tags, const char *name)
293 {
294         return FLAC__metadata_object_vorbiscomment_remove_entries_matching(tags, name);
295 }
296
297 int FLAC_plugin__tags_delete_all(FLAC__StreamMetadata *tags)
298 {
299         int n = (int)tags->data.vorbis_comment.num_comments;
300         if(n > 0) {
301                 if(!FLAC__metadata_object_vorbiscomment_resize_comments(tags, 0))
302                         n = -1;
303         }
304         return n;
305 }
306
307 FLAC__bool FLAC_plugin__tags_add_tag_utf8(FLAC__StreamMetadata *tags, const char *name, const char *value, const char *separator)
308 {
309         int i;
310
311         FLAC__ASSERT(0 != tags);
312         FLAC__ASSERT(0 != name);
313         FLAC__ASSERT(0 != value);
314
315         if(separator && (i = FLAC__metadata_object_vorbiscomment_find_entry_from(tags, /*offset=*/0, name)) >= 0) {
316                 FLAC__StreamMetadata_VorbisComment_Entry *entry = tags->data.vorbis_comment.comments+i;
317                 const size_t value_len = strlen(value);
318                 const size_t separator_len = strlen(separator);
319                 FLAC__byte *new_entry;
320                 if(0 == (new_entry = safe_realloc_add_4op_(entry->entry, entry->length, /*+*/value_len, /*+*/separator_len, /*+*/1)))
321                         return false;
322                 memcpy(new_entry+entry->length, separator, separator_len);
323                 entry->length += separator_len;
324                 memcpy(new_entry+entry->length, value, value_len);
325                 entry->length += value_len;
326                 new_entry[entry->length] = '\0';
327                 entry->entry = new_entry;
328         }
329         else {
330                 FLAC__StreamMetadata_VorbisComment_Entry entry;
331                 if(!FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair(&entry, name, value))
332                         return false;
333                 FLAC__metadata_object_vorbiscomment_append_comment(tags, entry, /*copy=*/false);
334         }
335         return true;
336 }
337
338 FLAC__bool FLAC_plugin__tags_set_tag_ucs2(FLAC__StreamMetadata *tags, const char *name, const FLAC__uint16 *value, FLAC__bool replace_all)
339 {
340         FLAC__StreamMetadata_VorbisComment_Entry entry;
341
342         FLAC__ASSERT(0 != tags);
343         FLAC__ASSERT(0 != name);
344         FLAC__ASSERT(0 != value);
345
346         {
347                 char *utf8 = local__convert_ucs2_to_utf8(value, local__wide_strlen(value)+1); /* +1 for the terminating null */
348                 if(0 == utf8)
349                         return false;
350                 if(!FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair(&entry, name, utf8)) {
351                         free(utf8);
352                         return false;
353                 }
354                 free(utf8);
355         }
356         if(!FLAC__metadata_object_vorbiscomment_replace_comment(tags, entry, replace_all, /*copy=*/false))
357                 return false;
358         return true;
359 }