OLD | NEW |
1 <?xml version="1.0" encoding="utf-8"?> | 1 <?xml version="1.0" encoding="utf-8"?> |
2 <!DOCTYPE rfc SYSTEM 'rfc2629.dtd' [ | 2 <!DOCTYPE rfc SYSTEM 'rfc2629.dtd' [ |
3 <!ENTITY rfc2119 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.2119.xml'> | 3 <!ENTITY rfc2119 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.2119.xml'> |
4 <!ENTITY rfc3533 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.3533.xml'> | 4 <!ENTITY rfc3533 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.3533.xml'> |
5 <!ENTITY rfc3629 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.3629.xml'> | 5 <!ENTITY rfc3629 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.3629.xml'> |
6 <!ENTITY rfc4732 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.4732.xml'> | 6 <!ENTITY rfc4732 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.4732.xml'> |
7 <!ENTITY rfc5334 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.5334.xml'> | 7 <!ENTITY rfc5334 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.5334.xml'> |
8 <!ENTITY rfc6381 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.6381.xml'> | 8 <!ENTITY rfc6381 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.6381.xml'> |
9 <!ENTITY rfc6716 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/
rfc/bibxml/reference.RFC.6716.xml'> | 9 <!ENTITY rfc6716 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.6716.xml'> |
| 10 <!ENTITY rfc6982 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.
RFC.6982.xml'> |
10 ]> | 11 ]> |
11 <?rfc toc="yes" symrefs="yes" ?> | 12 <?rfc toc="yes" symrefs="yes" ?> |
12 | 13 |
13 <rfc ipr="trust200902" category="std" docName="draft-ietf-codec-oggopus-01"> | 14 <rfc ipr="trust200902" category="std" docName="draft-ietf-codec-oggopus-06"> |
14 | 15 |
15 <front> | 16 <front> |
16 <title abbrev="Ogg Opus">Ogg Encapsulation for the Opus Audio Codec</title> | 17 <title abbrev="Ogg Opus">Ogg Encapsulation for the Opus Audio Codec</title> |
17 <author initials="T.B." surname="Terriberry" fullname="Timothy B. Terriberry"> | 18 <author initials="T.B." surname="Terriberry" fullname="Timothy B. Terriberry"> |
18 <organization>Mozilla Corporation</organization> | 19 <organization>Mozilla Corporation</organization> |
19 <address> | 20 <address> |
20 <postal> | 21 <postal> |
21 <street>650 Castro Street</street> | 22 <street>650 Castro Street</street> |
22 <city>Mountain View</city> | 23 <city>Mountain View</city> |
23 <region>CA</region> | 24 <region>CA</region> |
(...skipping 23 matching lines...) Expand all Loading... |
47 <author initials="R." surname="Giles" fullname="Ralph Giles"> | 48 <author initials="R." surname="Giles" fullname="Ralph Giles"> |
48 <organization>Mozilla Corporation</organization> | 49 <organization>Mozilla Corporation</organization> |
49 <address> | 50 <address> |
50 <postal> | 51 <postal> |
51 <street>163 West Hastings Street</street> | 52 <street>163 West Hastings Street</street> |
52 <city>Vancouver</city> | 53 <city>Vancouver</city> |
53 <region>BC</region> | 54 <region>BC</region> |
54 <code>V6B 1H5</code> | 55 <code>V6B 1H5</code> |
55 <country>Canada</country> | 56 <country>Canada</country> |
56 </postal> | 57 </postal> |
57 <phone>+1 604 778 1540</phone> | 58 <phone>+1 778 785 1540</phone> |
58 <email>giles@xiph.org</email> | 59 <email>giles@xiph.org</email> |
59 </address> | 60 </address> |
60 </author> | 61 </author> |
61 | 62 |
62 <date day="24" month="May" year="2013"/> | 63 <date day="18" month="October" year="2014"/> |
63 <area>RAI</area> | 64 <area>RAI</area> |
64 <workgroup>codec</workgroup> | 65 <workgroup>codec</workgroup> |
65 | 66 |
66 <abstract> | 67 <abstract> |
67 <t> | 68 <t> |
68 This document defines the Ogg encapsulation for the Opus interactive speech and | 69 This document defines the Ogg encapsulation for the Opus interactive speech and |
69 audio codec. | 70 audio codec. |
70 This allows data encoded in the Opus format to be stored in an Ogg logical | 71 This allows data encoded in the Opus format to be stored in an Ogg logical |
71 bitstream. | 72 bitstream. |
72 Ogg encapsulation provides Opus with a long-term storage format supporting | 73 Ogg encapsulation provides Opus with a long-term storage format supporting |
(...skipping 20 matching lines...) Expand all Loading... |
93 <t> | 94 <t> |
94 Ogg bitstreams are made up of a series of 'pages', each of which contains data | 95 Ogg bitstreams are made up of a series of 'pages', each of which contains data |
95 from one or more 'packets'. | 96 from one or more 'packets'. |
96 Pages are the fundamental unit of multiplexing in an Ogg stream. | 97 Pages are the fundamental unit of multiplexing in an Ogg stream. |
97 Each page is associated with a particular logical stream and contains a capture | 98 Each page is associated with a particular logical stream and contains a capture |
98 pattern and checksum, flags to mark the beginning and end of the logical | 99 pattern and checksum, flags to mark the beginning and end of the logical |
99 stream, and a 'granule position' that represents an absolute position in the | 100 stream, and a 'granule position' that represents an absolute position in the |
100 stream, to aid seeking. | 101 stream, to aid seeking. |
101 A single page can contain up to 65,025 octets of packet data from up to 255 | 102 A single page can contain up to 65,025 octets of packet data from up to 255 |
102 different packets. | 103 different packets. |
103 Packets may be split arbitrarily across pages, and continued from one page to | 104 Packets MAY be split arbitrarily across pages, and continued from one page to |
104 the next (allowing packets much larger than would fit on a single page). | 105 the next (allowing packets much larger than would fit on a single page). |
105 Each page contains 'lacing values' that indicate how the data is partitioned | 106 Each page contains 'lacing values' that indicate how the data is partitioned |
106 into packets, allowing a demuxer to recover the packet boundaries without | 107 into packets, allowing a demuxer to recover the packet boundaries without |
107 examining the encoded data. | 108 examining the encoded data. |
108 A packet is said to 'complete' on a page when the page contains the final | 109 A packet is said to 'complete' on a page when the page contains the final |
109 lacing value corresponding to that packet. | 110 lacing value corresponding to that packet. |
110 </t> | 111 </t> |
111 <t> | 112 <t> |
112 This encapsulation defines the required contents of the packet data, including | 113 This encapsulation defines the contents of the packet data, including |
113 the necessary headers, the organization of those packets into a logical | 114 the necessary headers, the organization of those packets into a logical |
114 stream, and the interpretation of the codec-specific granule position field. | 115 stream, and the interpretation of the codec-specific granule position field. |
115 It does not attempt to describe or specify the existing Ogg container format. | 116 It does not attempt to describe or specify the existing Ogg container format. |
116 Readers unfamiliar with the basic concepts mentioned above are encouraged to | 117 Readers unfamiliar with the basic concepts mentioned above are encouraged to |
117 review the details in <xref target="RFC3533"/>. | 118 review the details in <xref target="RFC3533"/>. |
118 </t> | 119 </t> |
119 | 120 |
120 </section> | 121 </section> |
121 | 122 |
122 <section anchor="terminology" title="Terminology"> | 123 <section anchor="terminology" title="Terminology"> |
123 <t> | 124 <t> |
124 The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", | 125 The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", |
125 "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be | 126 "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" in this |
126 interpreted as described in <xref target="RFC2119"/>. | 127 document are to be interpreted as described in <xref target="RFC2119"/>. |
127 </t> | 128 </t> |
128 | 129 |
129 <t> | 130 <t> |
130 Implementations that fail to satisfy one or more "MUST" requirements are | 131 Implementations that fail to satisfy one or more "MUST" requirements are |
131 considered non-compliant. | 132 considered non-compliant. |
132 Implementations that satisfy all "MUST" requirements, but fail to satisfy one | 133 Implementations that satisfy all "MUST" requirements, but fail to satisfy one |
133 or more "SHOULD" requirements are said to be "conditionally compliant". | 134 or more "SHOULD" requirements are said to be "conditionally compliant". |
134 All other implementations are "unconditionally compliant". | 135 All other implementations are "unconditionally compliant". |
135 </t> | 136 </t> |
136 | 137 |
137 </section> | 138 </section> |
138 | 139 |
139 <section anchor="packet_organization" title="Packet Organization"> | 140 <section anchor="packet_organization" title="Packet Organization"> |
140 <t> | 141 <t> |
141 An Opus stream is organized as follows. | 142 An Ogg Opus stream is organized as follows. |
142 </t> | 143 </t> |
143 <t> | 144 <t> |
144 There are two mandatory header packets. | 145 There are two mandatory header packets. |
145 The granule position of the pages on which these packets complete MUST be zero. | 146 The granule position of the pages on which these packets complete MUST be zero. |
146 </t> | 147 </t> |
147 <t> | 148 <t> |
148 The first packet in the logical Ogg bitstream MUST contain the identification | 149 The first packet in the logical Ogg bitstream MUST contain the identification |
149 (ID) header, which uniquely identifies a stream as Opus audio. | 150 (ID) header, which uniquely identifies a stream as Opus audio. |
150 The format of this header is defined in <xref target="id_header"/>. | 151 The format of this header is defined in <xref target="id_header"/>. |
151 It MUST be placed alone (without any other packet data) on the first page of | 152 It MUST be placed alone (without any other packet data) on the first page of |
152 the logical Ogg bitstream, and must complete on that page. | 153 the logical Ogg bitstream, and MUST complete on that page. |
153 This page MUST have its 'beginning of stream' flag set. | 154 This page MUST have its 'beginning of stream' flag set. |
154 </t> | 155 </t> |
155 <t> | 156 <t> |
156 The second packet in the logical Ogg bitstream MUST contain the comment header, | 157 The second packet in the logical Ogg bitstream MUST contain the comment header, |
157 which contains user-supplied metadata. | 158 which contains user-supplied metadata. |
158 The format of this header is defined in <xref target="comment_header"/>. | 159 The format of this header is defined in <xref target="comment_header"/>. |
159 It MAY span one or more pages, beginning on the second page of the logical | 160 It MAY span one or more pages, beginning on the second page of the logical |
160 stream. | 161 stream. |
161 However many pages it spans, the comment header packet MUST finish the page on | 162 However many pages it spans, the comment header packet MUST finish the page on |
162 which it completes. | 163 which it completes. |
163 </t> | 164 </t> |
164 <t> | 165 <t> |
165 All subsequent pages are audio data pages, and the Ogg packets they contain are | 166 All subsequent pages are audio data pages, and the Ogg packets they contain are |
166 audio data packets. | 167 audio data packets. |
167 Each audio data packet contains one Opus packet for each of N different | 168 Each audio data packet contains one Opus packet for each of N different |
168 streams, where N is typically one for mono or stereo, but may be greater than | 169 streams, where N is typically one for mono or stereo, but MAY be greater than |
169 one for, e.g., multichannel audio. | 170 one for multichannel audio. |
170 The value N is specified in the ID header (see | 171 The value N is specified in the ID header (see |
171 <xref target="channel_mapping"/>), and is fixed over the entire length of the | 172 <xref target="channel_mapping"/>), and is fixed over the entire length of the |
172 logical Ogg bitstream. | 173 logical Ogg bitstream. |
173 </t> | 174 </t> |
174 <t> | 175 <t> |
175 The first N-1 Opus packets, if any, are packed one after another into the Ogg | 176 The first N-1 Opus packets, if any, are packed one after another into the Ogg |
176 packet, using the self-delimiting framing from Appendix B of | 177 packet, using the self-delimiting framing from Appendix B of |
177 <xref target="RFC6716"/>. | 178 <xref target="RFC6716"/>. |
178 The remaining Opus packet is packed at the end of the Ogg packet using the | 179 The remaining Opus packet is packed at the end of the Ogg packet using the |
179 regular, undelimited framing from Section 3 of <xref target="RFC6716"/>. | 180 regular, undelimited framing from Section 3 of <xref target="RFC6716"/>. |
180 All of the Opus packets in a single Ogg packet MUST be constrained to have the | 181 All of the Opus packets in a single Ogg packet MUST be constrained to have the |
181 same duration. | 182 same duration. |
182 The duration and coding modes of each Opus packet are contained in the | |
183 TOC (table of contents) sequence in the first few bytes. | |
184 A decoder SHOULD treat any Opus packet whose duration is different from that of | 183 A decoder SHOULD treat any Opus packet whose duration is different from that of |
185 the first Opus packet in an Ogg packet as if it were an Opus packet with an | 184 the first Opus packet in an Ogg packet as if it were a malformed Opus packet |
186 illegal TOC sequence. | 185 with an invalid TOC sequence. |
| 186 </t> |
| 187 <t> |
| 188 The coding mode (SILK, Hybrid, or CELT), audio bandwidth, channel count, |
| 189 duration (frame size), and number of frames per packet, are indicated in the |
| 190 TOC (table of contents) sequence at the beginning of each Opus packet, as |
| 191 described in Section 3.1 of <xref target="RFC6716"/>. |
| 192 The combination of mode, audio bandwidth, and frame size is referred to as |
| 193 the configuration of an Opus packet. |
187 </t> | 194 </t> |
188 <t> | 195 <t> |
189 The first audio data page SHOULD NOT have the 'continued packet' flag set | 196 The first audio data page SHOULD NOT have the 'continued packet' flag set |
190 (which would indicate the first audio data packet is continued from a previous | 197 (which would indicate the first audio data packet is continued from a previous |
191 page). | 198 page). |
192 Packets MUST be placed into Ogg pages in order until the end of stream. | 199 Packets MUST be placed into Ogg pages in order until the end of stream. |
193 Audio packets MAY span page boundaries. | 200 Audio packets MAY span page boundaries. |
194 A decoder MUST treat a zero-octet audio data packet as if it were an Opus | 201 A decoder MUST treat a zero-octet audio data packet as if it were a malformed |
195 packet with an illegal TOC sequence. | 202 Opus packet as described in Section 3.4 of <xref target="RFC6716"/>. |
| 203 </t> |
| 204 <t> |
196 The last page SHOULD have the 'end of stream' flag set, but implementations | 205 The last page SHOULD have the 'end of stream' flag set, but implementations |
197 should be prepared to deal with truncated streams that do not have a page | 206 need to be prepared to deal with truncated streams that do not have a page |
198 marked 'end of stream'. | 207 marked 'end of stream'. |
199 The final packet on the last page SHOULD NOT be a continued packet, i.e., the | 208 The final packet on the last page SHOULD NOT be a continued packet, i.e., the |
200 final lacing value should be less than 255. | 209 final lacing value SHOULD be less than 255. |
201 There MUST NOT be any more pages in an Opus logical bitstream after a page | 210 There MUST NOT be any more pages in an Opus logical bitstream after a page |
202 marked 'end of stream'. | 211 marked 'end of stream'. |
203 </t> | 212 </t> |
204 </section> | 213 </section> |
205 | 214 |
206 <section anchor="granpos" title="Granule Position"> | 215 <section anchor="granpos" title="Granule Position"> |
207 <t> | 216 <t> |
208 The granule position of an audio data page encodes the total number of PCM | 217 The granule position of an audio data page encodes the total number of PCM |
209 samples in the stream up to and including the last fully-decodable sample from | 218 samples in the stream up to and including the last fully-decodable sample from |
210 the last packet completed on that page. | 219 the last packet completed on that page. |
211 A page that is entirely spanned by a single packet (that completes on a | 220 A page that is entirely spanned by a single packet (that completes on a |
212 subsequent page) has no granule position, and the granule position field MUST | 221 subsequent page) has no granule position, and the granule position field MUST |
213 be set to the special value '-1' in two's complement. | 222 be set to the special value '-1' in two's complement. |
214 </t> | 223 </t> |
215 | 224 |
216 <t> | 225 <t> |
217 The granule position of an audio data page is in units of PCM audio samples at | 226 The granule position of an audio data page is in units of PCM audio samples at |
218 a fixed rate of 48 kHz (per channel; a stereo stream's granule position | 227 a fixed rate of 48 kHz (per channel; a stereo stream's granule position |
219 does not increment at twice the speed of a mono stream). | 228 does not increment at twice the speed of a mono stream). |
220 It is possible to run an Opus decoder at other sampling rates, but the value | 229 It is possible to run an Opus decoder at other sampling rates, but the value |
221 in the granule position field always counts samples assuming a 48 kHz | 230 in the granule position field always counts samples assuming a 48 kHz |
222 decoding rate, and the rest of this specification makes the same assumption. | 231 decoding rate, and the rest of this specification makes the same assumption. |
223 </t> | 232 </t> |
224 | 233 |
225 <t> | 234 <t> |
226 The duration of an Opus packet may be any multiple of 2.5 ms, up to a | 235 The duration of an Opus packet can be any multiple of 2.5 ms, up to a |
227 maximum of 120 ms. | 236 maximum of 120 ms. |
228 This duration is encoded in the TOC sequence at the beginning of each packet. | 237 This duration is encoded in the TOC sequence at the beginning of each packet. |
229 The number of samples returned by a decoder corresponds to this duration | 238 The number of samples returned by a decoder corresponds to this duration |
230 exactly, even for the first few packets. | 239 exactly, even for the first few packets. |
231 For example, a 20 ms packet fed to a decoder running at 48 kHz will | 240 For example, a 20 ms packet fed to a decoder running at 48 kHz will |
232 always return 960 samples. | 241 always return 960 samples. |
233 A demuxer can parse the TOC sequence at the beginning of each Ogg packet to | 242 A demuxer can parse the TOC sequence at the beginning of each Ogg packet to |
234 work backwards or forwards from a packet with a known granule position (i.e., | 243 work backwards or forwards from a packet with a known granule position (i.e., |
235 the last packet completed on some page) in order to assign granule positions | 244 the last packet completed on some page) in order to assign granule positions |
236 to every packet, or even every individual sample. | 245 to every packet, or even every individual sample. |
237 The one exception is the last page in the stream, as described below. | 246 The one exception is the last page in the stream, as described below. |
238 </t> | 247 </t> |
239 | 248 |
240 <t> | 249 <t> |
241 All other pages with completed packets after the first MUST have a granule | 250 All other pages with completed packets after the first MUST have a granule |
242 position equal to the number of samples contained in packets that complete on | 251 position equal to the number of samples contained in packets that complete on |
243 that page plus the granule position of the most recent page with completed | 252 that page plus the granule position of the most recent page with completed |
244 packets. | 253 packets. |
245 This guarantees that a demuxer can assign individual packets the same granule | 254 This guarantees that a demuxer can assign individual packets the same granule |
246 position when working forwards as when working backwards. | 255 position when working forwards as when working backwards. |
247 For this to work, there cannot be any gaps. | 256 For this to work, there cannot be any gaps. |
248 In order to support capturing a stream that uses discontinuous transmission | |
249 (DTX), an encoder SHOULD emit packets that explicitly request the use of | |
250 Packet Loss Concealment (PLC) (i.e., with a frame length of 0, as defined in | |
251 Section 3.2.1 of <xref target="RFC6716"/>) in place of the packets that were | |
252 not transmitted. | |
253 </t> | 257 </t> |
254 | 258 |
| 259 <section anchor="gap-repair" title="Repairing Gaps in Real-time Streams"> |
| 260 <t> |
| 261 In order to support capturing a real-time stream that has lost or not |
| 262 transmitted packets, a muxer SHOULD emit packets that explicitly request the |
| 263 use of Packet Loss Concealment (PLC) in place of the missing packets. |
| 264 Only gaps that are a multiple of 2.5 ms are repairable, as these are the |
| 265 only durations that can be created by packet loss or discontinuous |
| 266 transmission. |
| 267 Muxers need not handle other gap sizes. |
| 268 Creating the necessary packets involves synthesizing a TOC byte (defined in |
| 269 Section 3.1 of <xref target="RFC6716"/>)—and whatever |
| 270 additional internal framing is needed—to indicate the packet duration |
| 271 for each stream. |
| 272 The actual length of each missing Opus frame inside the packet is zero bytes, |
| 273 as defined in Section 3.2.1 of <xref target="RFC6716"/>. |
| 274 </t> |
| 275 |
| 276 <t> |
| 277 Zero-byte frames MAY be packed into packets using any of codes 0, 1, |
| 278 2, or 3. |
| 279 When successive frames have the same configuration, the higher code packings |
| 280 reduce overhead. |
| 281 Likewise, if the TOC configuration matches, the muxer MAY further combine the |
| 282 empty frames with previous or subsequent non-zero-length frames (using |
| 283 code 2 or VBR code 3). |
| 284 </t> |
| 285 |
| 286 <t> |
| 287 <xref target="RFC6716"/> does not impose any requirements on the PLC, but this |
| 288 section outlines choices that are expected to have a positive influence on |
| 289 most PLC implementations, including the reference implementation. |
| 290 Synthesized TOC sequences SHOULD maintain the same mode, audio bandwidth, |
| 291 channel count, and frame size as the previous packet (if any). |
| 292 This is the simplest and usually the most well-tested case for the PLC to |
| 293 handle and it covers all losses that do not include a configuration switch, |
| 294 as defined in Section 4.5 of <xref target="RFC6716"/>. |
| 295 </t> |
| 296 |
| 297 <t> |
| 298 When a previous packet is available, keeping the audio bandwidth and channel |
| 299 count the same allows the PLC to provide maximum continuity in the concealment |
| 300 data it generates. |
| 301 However, if the size of the gap is not a multiple of the most recent frame |
| 302 size, then the frame size will have to change for at least some frames. |
| 303 Such changes SHOULD be delayed as long as possible to simplify |
| 304 things for PLC implementations. |
| 305 </t> |
| 306 |
| 307 <t> |
| 308 As an example, a 95 ms gap could be encoded as nineteen 5 ms frames |
| 309 in two bytes with a single CBR code 3 packet. |
| 310 If the previous frame size was 20 ms, using four 20 ms frames |
| 311 followed by three 5 ms frames requires 4 bytes (plus an extra byte |
| 312 of Ogg lacing overhead), but allows the PLC to use its well-tested steady |
| 313 state behavior for as long as possible. |
| 314 The total bitrate of the latter approach, including Ogg overhead, is about |
| 315 0.4 kbps, so the impact on file size is minimal. |
| 316 </t> |
| 317 |
| 318 <t> |
| 319 Changing modes is discouraged, since this causes some decoder implementations |
| 320 to reset their PLC state. |
| 321 However, SILK and Hybrid mode frames cannot fill gaps that are not a multiple |
| 322 of 10 ms. |
| 323 If switching to CELT mode is needed to match the gap size, a muxer SHOULD do |
| 324 so at the end of the gap to allow the PLC to function for as long as possible. |
| 325 </t> |
| 326 |
| 327 <t> |
| 328 In the example above, if the previous frame was a 20 ms SILK mode frame, |
| 329 the better solution is to synthesize a packet describing four 20 ms SILK |
| 330 frames, followed by a packet with a single 10 ms SILK |
| 331 frame, and finally a packet with a 5 ms CELT frame, to fill the 95 ms |
| 332 gap. |
| 333 This also requires four bytes to describe the synthesized packet data (two |
| 334 bytes for a CBR code 3 and one byte each for two code 0 packets) but three |
| 335 bytes of Ogg lacing overhead are needed to mark the packet boundaries. |
| 336 At 0.6 kbps, this is still a minimal bitrate impact over a naive, low quality |
| 337 solution. |
| 338 </t> |
| 339 |
| 340 <t> |
| 341 Since medium-band audio is an option only in the SILK mode, wideband frames |
| 342 SHOULD be generated if switching from that configuration to CELT mode, to |
| 343 ensure that any PLC implementation which does try to migrate state between |
| 344 the modes will be able to preserve all of the available audio bandwidth. |
| 345 </t> |
| 346 |
| 347 </section> |
| 348 |
255 <section anchor="preskip" title="Pre-skip"> | 349 <section anchor="preskip" title="Pre-skip"> |
256 <t> | 350 <t> |
257 There is some amount of latency introduced during the decoding process, to | 351 There is some amount of latency introduced during the decoding process, to |
258 allow for overlap in the MDCT modes, stereo mixing in the LP modes, and | 352 allow for overlap in the CELT mode, stereo mixing in the SILK mode, and |
259 resampling, and the encoder will introduce even more latency (though the exact | 353 resampling. |
260 amount is not specified). | 354 The encoder might have introduced additional latency through its own resampling |
| 355 and analysis (though the exact amount is not specified). |
261 Therefore, the first few samples produced by the decoder do not correspond to | 356 Therefore, the first few samples produced by the decoder do not correspond to |
262 real input audio, but are instead composed of padding inserted by the encoder | 357 real input audio, but are instead composed of padding inserted by the encoder |
263 to compensate for this latency. | 358 to compensate for this latency. |
264 These samples need to be stored and decoded, as Opus is an asymptotically | 359 These samples need to be stored and decoded, as Opus is an asymptotically |
265 convergent predictive codec, meaning the decoded contents of each frame depend | 360 convergent predictive codec, meaning the decoded contents of each frame depend |
266 on the recent history of decoder inputs. | 361 on the recent history of decoder inputs. |
267 However, a decoder will want to skip these samples after decoding them. | 362 However, a decoder will want to skip these samples after decoding them. |
268 </t> | 363 </t> |
269 | 364 |
270 <t> | 365 <t> |
271 A 'pre-skip' field in the ID header (see <xref target="id_header"/>) signals | 366 A 'pre-skip' field in the ID header (see <xref target="id_header"/>) signals |
272 the number of samples which SHOULD be skipped (decoded but discarded) at the | 367 the number of samples which SHOULD be skipped (decoded but discarded) at the |
273 beginning of the stream. | 368 beginning of the stream. |
274 This provides sufficient history to the decoder so that it has already | 369 This amount need not be a multiple of 2.5 ms, MAY be smaller than a single |
275 converged before the stream's output begins. | 370 packet, or MAY span the contents of several packets. |
276 It may also be used to perform sample-accurate cropping of existing encoded | 371 These samples are not valid audio, and SHOULD NOT be played. |
277 streams. | |
278 This amount need not be a multiple of 2.5 ms, may be smaller than a single | |
279 packet, or may span the contents of several packets. | |
280 </t> | 372 </t> |
| 373 |
| 374 <t> |
| 375 For example, if the first Opus frame uses the CELT mode, it will always |
| 376 produce 120 samples of windowed overlap-add data. |
| 377 However, the overlap data is initially all zeros (since there is no prior |
| 378 frame), meaning this cannot, in general, accurately represent the original |
| 379 audio. |
| 380 The SILK mode requires additional delay to account for its analysis and |
| 381 resampling latency. |
| 382 The encoder delays the original audio to avoid this problem. |
| 383 </t> |
| 384 |
| 385 <t> |
| 386 The pre-skip field MAY also be used to perform sample-accurate cropping of |
| 387 already encoded streams. |
| 388 In this case, a value of at least 3840 samples (80 ms) provides |
| 389 sufficient history to the decoder that it will have converged |
| 390 before the stream's output begins. |
| 391 </t> |
| 392 |
281 </section> | 393 </section> |
282 | 394 |
283 <section anchor="pcm_sample_position" title="PCM Sample Position"> | 395 <section anchor="pcm_sample_position" title="PCM Sample Position"> |
284 <t> | 396 <t> |
| 397 <figure align="center"> |
| 398 <preamble> |
285 The PCM sample position is determined from the granule position using the | 399 The PCM sample position is determined from the granule position using the |
286 formula | 400 formula |
287 <figure align="center"> | 401 </preamble> |
288 <artwork align="center"><![CDATA[ | 402 <artwork align="center"><![CDATA[ |
289 'PCM sample position' = 'granule position' - 'pre-skip' . | 403 'PCM sample position' = 'granule position' - 'pre-skip' . |
290 ]]></artwork> | 404 ]]></artwork> |
291 </figure> | 405 </figure> |
292 </t> | 406 </t> |
293 | 407 |
294 <t> | 408 <t> |
295 For example, if the granule position of the first audio data page is 59,971, | 409 For example, if the granule position of the first audio data page is 59,971, |
296 and the pre-skip is 11,971, then the PCM sample position of the last decoded | 410 and the pre-skip is 11,971, then the PCM sample position of the last decoded |
297 sample from that page is 48,000. | 411 sample from that page is 48,000. |
| 412 <figure align="center"> |
| 413 <preamble> |
298 This can be converted into a playback time using the formula | 414 This can be converted into a playback time using the formula |
299 <figure align="center"> | 415 </preamble> |
300 <artwork align="center"><![CDATA[ | 416 <artwork align="center"><![CDATA[ |
301 'PCM sample position' | 417 'PCM sample position' |
302 'playback time' = --------------------- . | 418 'playback time' = --------------------- . |
303 48000.0 | 419 48000.0 |
304 ]]></artwork> | 420 ]]></artwork> |
305 </figure> | 421 </figure> |
306 </t> | 422 </t> |
307 | 423 |
308 <t> | 424 <t> |
309 The initial PCM sample position before any samples are played is normally '0'. | 425 The initial PCM sample position before any samples are played is normally '0'. |
310 In this case, the PCM sample position of the first audio sample to be played | 426 In this case, the PCM sample position of the first audio sample to be played |
311 starts at '1', because it marks the time on the clock | 427 starts at '1', because it marks the time on the clock |
312 <spanx style="emph">after</spanx> that sample has been played, and a stream | 428 <spanx style="emph">after</spanx> that sample has been played, and a stream |
313 that is exactly one second long has a final PCM sample position of '48000', | 429 that is exactly one second long has a final PCM sample position of '48000', |
314 as in the example here. | 430 as in the example here. |
315 </t> | 431 </t> |
316 | 432 |
317 <t> | 433 <t> |
318 Vorbis streams use a granule position smaller than the number of audio samples | 434 Vorbis streams use a granule position smaller than the number of audio samples |
319 contained in the first audio data page to indicate that some of those samples | 435 contained in the first audio data page to indicate that some of those samples |
320 must be trimmed from the output (see <xref target="vorbis-trim"/>). | 436 are trimmed from the output (see <xref target="vorbis-trim"/>). |
321 However, to do so, Vorbis requires that the first audio data page contains | 437 However, to do so, Vorbis requires that the first audio data page contains |
322 exactly two packets, in order to allow the decoder to perform PCM position | 438 exactly two packets, in order to allow the decoder to perform PCM position |
323 adjustments before needing to return any PCM data. | 439 adjustments before needing to return any PCM data. |
324 Opus uses the pre-skip mechanism for this purpose instead, since the encoder | 440 Opus uses the pre-skip mechanism for this purpose instead, since the encoder |
325 may introduce more than a single packet's worth of latency, and since very | 441 MAY introduce more than a single packet's worth of latency, and since very |
326 large packets in streams with a very large number of channels might not fit | 442 large packets in streams with a very large number of channels might not fit |
327 on a single page. | 443 on a single page. |
328 </t> | 444 </t> |
329 </section> | 445 </section> |
330 | 446 |
331 <section anchor="end_trimming" title="End Trimming"> | 447 <section anchor="end_trimming" title="End Trimming"> |
332 <t> | 448 <t> |
333 The page with the 'end of stream' flag set MAY have a granule position that | 449 The page with the 'end of stream' flag set MAY have a granule position that |
334 indicates the page contains less audio data than would normally be returned by | 450 indicates the page contains less audio data than would normally be returned by |
335 decoding up through the final packet. | 451 decoding up through the final packet. |
(...skipping 13 matching lines...) Expand all Loading... |
349 title="Restrictions on the Initial Granule Position"> | 465 title="Restrictions on the Initial Granule Position"> |
350 <t> | 466 <t> |
351 The granule position of the first audio data page with a completed packet MAY | 467 The granule position of the first audio data page with a completed packet MAY |
352 be larger than the number of samples contained in packets that complete on | 468 be larger than the number of samples contained in packets that complete on |
353 that page, however it MUST NOT be smaller, unless that page has the 'end of | 469 that page, however it MUST NOT be smaller, unless that page has the 'end of |
354 stream' flag set. | 470 stream' flag set. |
355 Allowing a granule position larger than the number of samples allows the | 471 Allowing a granule position larger than the number of samples allows the |
356 beginning of a stream to be cropped or a live stream to be joined without | 472 beginning of a stream to be cropped or a live stream to be joined without |
357 rewriting the granule position of all the remaining pages. | 473 rewriting the granule position of all the remaining pages. |
358 This means that the PCM sample position just before the first sample to be | 474 This means that the PCM sample position just before the first sample to be |
359 played may be larger than '0'. | 475 played MAY be larger than '0'. |
360 Synchronization when multiplexing with other logical streams still uses the PCM | 476 Synchronization when multiplexing with other logical streams still uses the PCM |
361 sample position relative to '0' to compute sample times. | 477 sample position relative to '0' to compute sample times. |
362 This does not affect the behavior of pre-skip: exactly 'pre-skip' samples | 478 This does not affect the behavior of pre-skip: exactly 'pre-skip' samples |
363 should be skipped from the beginning of the decoded output, even if the | 479 SHOULD be skipped from the beginning of the decoded output, even if the |
364 initial PCM sample position is greater than zero. | 480 initial PCM sample position is greater than zero. |
365 </t> | 481 </t> |
366 | 482 |
367 <t> | 483 <t> |
368 On the other hand, a granule position that is smaller than the number of | 484 On the other hand, a granule position that is smaller than the number of |
369 decoded samples prevents a demuxer from working backwards to assign each | 485 decoded samples prevents a demuxer from working backwards to assign each |
370 packet or each individual sample a valid granule position, since granule | 486 packet or each individual sample a valid granule position, since granule |
371 positions must be non-negative. | 487 positions are non-negative. |
372 A decoder MUST reject as invalid any stream where the granule position is | 488 A decoder MUST reject as invalid any stream where the granule position is |
373 smaller than the number of samples contained in packets that complete on the | 489 smaller than the number of samples contained in packets that complete on the |
374 first audio data page with a completed packet, unless that page has the 'end | 490 first audio data page with a completed packet, unless that page has the 'end |
375 of stream' flag set. | 491 of stream' flag set. |
376 It MAY defer this action until it decodes the last packet completed on that | 492 It MAY defer this action until it decodes the last packet completed on that |
377 page. | 493 page. |
378 </t> | 494 </t> |
379 | 495 |
380 <t> | 496 <t> |
381 If that page has the 'end of stream' flag set, a demuxer MUST reject as invalid | 497 If that page has the 'end of stream' flag set, a demuxer MUST reject as invalid |
382 any stream where its granule position is smaller than the 'pre-skip' amount. | 498 any stream where its granule position is smaller than the 'pre-skip' amount. |
383 This would indicate that more samples should be skipped from the initial | 499 This would indicate that there are more samples to be skipped from the initial |
384 decoded output than exist in the stream. | 500 decoded output than exist in the stream. |
385 If the granule position is smaller than the number of decoded samples produced | 501 If the granule position is smaller than the number of decoded samples produced |
386 by the packets that complete on that page, then a demuxer MUST use an initial | 502 by the packets that complete on that page, then a demuxer MUST use an initial |
387 granule position of '0', and can work forwards from '0' to timestamp | 503 granule position of '0', and can work forwards from '0' to timestamp |
388 individual packets. | 504 individual packets. |
389 If the granule position is larger than the number of decoded samples available, | 505 If the granule position is larger than the number of decoded samples available, |
390 then the demuxer MUST still work backwards as described above, even if the | 506 then the demuxer MUST still work backwards as described above, even if the |
391 'end of stream' flag is set, to determine the initial granule position, and | 507 'end of stream' flag is set, to determine the initial granule position, and |
392 thus the initial PCM sample position. | 508 thus the initial PCM sample position. |
393 Both of these will be greater than '0' in this case. | 509 Both of these will be greater than '0' in this case. |
(...skipping 13 matching lines...) Expand all Loading... |
407 <t> | 523 <t> |
408 When seeking within an Ogg Opus stream, the decoder SHOULD start decoding (and | 524 When seeking within an Ogg Opus stream, the decoder SHOULD start decoding (and |
409 discarding the output) at least 3840 samples (80 ms) prior to the | 525 discarding the output) at least 3840 samples (80 ms) prior to the |
410 seek target in order to ensure that the output audio is correct by the time it | 526 seek target in order to ensure that the output audio is correct by the time it |
411 reaches the seek target. | 527 reaches the seek target. |
412 This 'pre-roll' is separate from, and unrelated to, the 'pre-skip' used at the | 528 This 'pre-roll' is separate from, and unrelated to, the 'pre-skip' used at the |
413 beginning of the stream. | 529 beginning of the stream. |
414 If the point 80 ms prior to the seek target comes before the initial PCM | 530 If the point 80 ms prior to the seek target comes before the initial PCM |
415 sample position, the decoder SHOULD start decoding from the beginning of the | 531 sample position, the decoder SHOULD start decoding from the beginning of the |
416 stream, applying pre-skip as normal, regardless of whether the pre-skip is | 532 stream, applying pre-skip as normal, regardless of whether the pre-skip is |
417 larger or smaller than 80 ms, and then continue to discard the samples | 533 larger or smaller than 80 ms, and then continue to discard samples |
418 required to reach the seek target (if any). | 534 to reach the seek target (if any). |
419 </t> | 535 </t> |
420 </section> | 536 </section> |
421 | 537 |
422 </section> | 538 </section> |
423 | 539 |
424 <section anchor="headers" title="Header Packets"> | 540 <section anchor="headers" title="Header Packets"> |
425 <t> | 541 <t> |
426 An Opus stream contains exactly two mandatory header packets: | 542 An Opus stream contains exactly two mandatory header packets: |
427 an identification header and a comment header. | 543 an identification header and a comment header. |
428 </t> | 544 </t> |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
511 least 3,840 samples (80 ms) is RECOMMENDED to ensure complete | 627 least 3,840 samples (80 ms) is RECOMMENDED to ensure complete |
512 convergence in the decoder. | 628 convergence in the decoder. |
513 <vspace blankLines="1"/> | 629 <vspace blankLines="1"/> |
514 </t> | 630 </t> |
515 <t><spanx style="strong">Input Sample Rate</spanx> (32 bits, unsigned, little | 631 <t><spanx style="strong">Input Sample Rate</spanx> (32 bits, unsigned, little |
516 endian): | 632 endian): |
517 <vspace blankLines="1"/> | 633 <vspace blankLines="1"/> |
518 This field is <spanx style="emph">not</spanx> the sample rate to use for | 634 This field is <spanx style="emph">not</spanx> the sample rate to use for |
519 playback of the encoded data. | 635 playback of the encoded data. |
520 <vspace blankLines="1"/> | 636 <vspace blankLines="1"/> |
521 Opus has a handful of coding modes, with internal audio bandwidths of 4, 6, 8, | 637 Opus can switch between internal audio bandwidths of 4, 6, 8, 12, and |
522 12, and 20 kHz. | 638 20 kHz. |
523 Each packet in the stream may have a different audio bandwidth. | 639 Each packet in the stream can have a different audio bandwidth. |
524 Regardless of the audio bandwidth, the reference decoder supports decoding any | 640 Regardless of the audio bandwidth, the reference decoder supports decoding any |
525 stream at a sample rate of 8, 12, 16, 24, or 48 kHz. | 641 stream at a sample rate of 8, 12, 16, 24, or 48 kHz. |
526 The original sample rate of the encoder input is not preserved by the lossy | 642 The original sample rate of the encoder input is not preserved by the lossy |
527 compression. | 643 compression. |
528 <vspace blankLines="1"/> | 644 <vspace blankLines="1"/> |
529 An Ogg Opus player SHOULD select the playback sample rate according to the | 645 An Ogg Opus player SHOULD select the playback sample rate according to the |
530 following procedure: | 646 following procedure: |
531 <list style="numbers"> | 647 <list style="numbers"> |
532 <t>If the hardware supports 48 kHz playback, decode at 48 kHz.</t> | 648 <t>If the hardware supports 48 kHz playback, decode at 48 kHz.</t> |
533 <t>Otherwise, if the hardware's highest available sample rate is a supported | 649 <t>Otherwise, if the hardware's highest available sample rate is a supported |
534 rate, decode at this sample rate.</t> | 650 rate, decode at this sample rate.</t> |
535 <t>Otherwise, if the hardware's highest available sample rate is less than | 651 <t>Otherwise, if the hardware's highest available sample rate is less than |
536 48 kHz, decode at the highest supported rate above this and resample.</t> | 652 48 kHz, decode at the next highest supported rate above this and |
| 653 resample.</t> |
537 <t>Otherwise, decode at 48 kHz and resample.</t> | 654 <t>Otherwise, decode at 48 kHz and resample.</t> |
538 </list> | 655 </list> |
539 However, the 'Input Sample Rate' field allows the encoder to pass the sample | 656 However, the 'Input Sample Rate' field allows the encoder to pass the sample |
540 rate of the original input stream as metadata. | 657 rate of the original input stream as metadata. |
541 This may be useful when the user requires the output sample rate to match the | 658 This is useful when the user requires the output sample rate to match the |
542 input sample rate. | 659 input sample rate. |
543 For example, a non-player decoder writing PCM format samples to disk might | 660 For example, a non-player decoder writing PCM format samples to disk might |
544 choose to resample the output audio back to the original input sample rate to | 661 choose to resample the output audio back to the original input sample rate to |
545 reduce surprise to the user, who might reasonably expect to get back a file | 662 reduce surprise to the user, who might reasonably expect to get back a file |
546 with the same sample rate as the one they fed to the encoder. | 663 with the same sample rate as the one they fed to the encoder. |
547 <vspace blankLines="1"/> | 664 <vspace blankLines="1"/> |
548 A value of zero indicates 'unspecified'. | 665 A value of zero indicates 'unspecified'. |
549 Encoders SHOULD write the actual input sample rate or zero, but decoder | 666 Encoders SHOULD write the actual input sample rate or zero, but decoder |
550 implementations which do something with this field SHOULD take care to behave | 667 implementations which do something with this field SHOULD take care to behave |
551 sanely if given crazy values (e.g., do not actually upsample the output to | 668 sanely if given crazy values (e.g., do not actually upsample the output to |
552 10 MHz if requested). | 669 10 MHz if requested). |
553 <vspace blankLines="1"/> | 670 <vspace blankLines="1"/> |
554 </t> | 671 </t> |
555 <t><spanx style="strong">Output Gain</spanx> (16 bits, signed, little | 672 <t><spanx style="strong">Output Gain</spanx> (16 bits, signed, little |
556 endian): | 673 endian): |
557 <vspace blankLines="1"/> | 674 <vspace blankLines="1"/> |
558 This is a gain to be applied by the decoder. | 675 This is a gain to be applied by the decoder. |
559 It is 20*log10 of the factor to scale the decoder output by to achieve the | 676 It is 20*log10 of the factor to scale the decoder output by to achieve the |
560 desired playback volume, stored in a 16-bit, signed, two's complement | 677 desired playback volume, stored in a 16-bit, signed, two's complement |
561 fixed-point value with 8 fractional bits (i.e., Q7.8). | 678 fixed-point value with 8 fractional bits (i.e., Q7.8). |
| 679 <figure align="center"> |
| 680 <preamble> |
562 To apply the gain, a decoder could use | 681 To apply the gain, a decoder could use |
563 <figure align="center"> | 682 </preamble> |
564 <artwork align="center"><![CDATA[ | 683 <artwork align="center"><![CDATA[ |
565 sample *= pow(10, output_gain/(20.0*256)) , | 684 sample *= pow(10, output_gain/(20.0*256)) , |
566 ]]></artwork> | 685 ]]></artwork> |
| 686 <postamble> |
| 687 where output_gain is the raw 16-bit value from the header. |
| 688 </postamble> |
567 </figure> | 689 </figure> |
568 where output_gain is the raw 16-bit value from the header. | |
569 <vspace blankLines="1"/> | 690 <vspace blankLines="1"/> |
570 Virtually all players and media frameworks should apply it by default. | 691 Virtually all players and media frameworks SHOULD apply it by default. |
571 If a player chooses to apply any volume adjustment or gain modification, such | 692 If a player chooses to apply any volume adjustment or gain modification, such |
572 as the R128_TRACK_GAIN (see <xref target="comment_header"/>) or a user-facing | 693 as the R128_TRACK_GAIN (see <xref target="comment_header"/>), the adjustment |
573 volume knob, the adjustment MUST be applied in addition to this output gain in | 694 MUST be applied in addition to this output gain in order to achieve playback |
574 order to achieve playback at the desired volume. | 695 at the normalized volume. |
575 <vspace blankLines="1"/> | 696 <vspace blankLines="1"/> |
576 An encoder SHOULD set this field to zero, and instead apply any gain prior to | 697 An encoder SHOULD set this field to zero, and instead apply any gain prior to |
577 encoding, when this is possible and does not conflict with the user's wishes. | 698 encoding, when this is possible and does not conflict with the user's wishes. |
578 The output gain should only be nonzero when the gain is adjusted after | 699 A nonzero output gain indicates the gain was adjusted after encoding, or that |
579 encoding, or when the user wishes to adjust the gain for playback while | 700 a user wished to adjust the gain for playback while preserving the ability |
580 preserving the ability to recover the original signal amplitude. | 701 to recover the original signal amplitude. |
581 <vspace blankLines="1"/> | 702 <vspace blankLines="1"/> |
582 Although the output gain has enormous range (+/- 128 dB, enough to amplify | 703 Although the output gain has enormous range (+/- 128 dB, enough to amplify |
583 inaudible sounds to the threshold of physical pain), most applications can | 704 inaudible sounds to the threshold of physical pain), most applications can |
584 only reasonably use a small portion of this range around zero. | 705 only reasonably use a small portion of this range around zero. |
585 The large range serves in part to ensure that gain can always be losslessly | 706 The large range serves in part to ensure that gain can always be losslessly |
586 transferred between OpusHead and R128_TRACK_GAIN (see below) without | 707 transferred between OpusHead and R128 gain tags (see below) without |
587 saturating. | 708 saturating. |
588 <vspace blankLines="1"/> | 709 <vspace blankLines="1"/> |
589 </t> | 710 </t> |
590 <t><spanx style="strong">Channel Mapping Family</spanx> (8 bits, | 711 <t><spanx style="strong">Channel Mapping Family</spanx> (8 bits, |
591 unsigned): | 712 unsigned): |
592 <vspace blankLines="1"/> | 713 <vspace blankLines="1"/> |
593 This octet indicates the order and semantic meaning of the various channels | 714 This octet indicates the order and semantic meaning of the output channels. |
594 encoded in each Ogg packet. | |
595 <vspace blankLines="1"/> | 715 <vspace blankLines="1"/> |
596 Each possible value of this octet indicates a mapping family, which defines a | 716 Each possible value of this octet indicates a mapping family, which defines a |
597 set of allowed channel counts, and the ordered set of channel names for each | 717 set of allowed channel counts, and the ordered set of channel names for each |
598 allowed channel count. | 718 allowed channel count. |
599 The details are described in <xref target="channel_mapping"/>. | 719 The details are described in <xref target="channel_mapping"/>. |
600 </t> | 720 </t> |
601 <t><spanx style="strong">Channel Mapping Table</spanx>: | 721 <t><spanx style="strong">Channel Mapping Table</spanx>: |
602 This table defines the mapping from encoded streams to output channels. | 722 This table defines the mapping from encoded streams to output channels. |
603 It is omitted when the channel mapping family is 0, but REQUIRED otherwise. | 723 It is omitted when the channel mapping family is 0, but REQUIRED otherwise. |
604 Its contents are specified in <xref target="channel_mapping"/>. | 724 Its contents are specified in <xref target="channel_mapping"/>. |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
644 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 764 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
645 ]]></artwork> | 765 ]]></artwork> |
646 </figure> | 766 </figure> |
647 | 767 |
648 <t> | 768 <t> |
649 The fields in the channel mapping table have the following meaning: | 769 The fields in the channel mapping table have the following meaning: |
650 <list style="numbers" counter="8"> | 770 <list style="numbers" counter="8"> |
651 <t><spanx style="strong">Stream Count</spanx> 'N' (8 bits, unsigned): | 771 <t><spanx style="strong">Stream Count</spanx> 'N' (8 bits, unsigned): |
652 <vspace blankLines="1"/> | 772 <vspace blankLines="1"/> |
653 This is the total number of streams encoded in each Ogg packet. | 773 This is the total number of streams encoded in each Ogg packet. |
654 This value is required to correctly parse the packed Opus packets inside an | 774 This value is necessary to correctly parse the packed Opus packets inside an |
655 Ogg packet, as described in <xref target="packet_organization"/>. | 775 Ogg packet, as described in <xref target="packet_organization"/>. |
656 This value MUST NOT be zero, as without at least one Opus packet with a valid | 776 This value MUST NOT be zero, as without at least one Opus packet with a valid |
657 TOC sequence, a demuxer cannot recover the duration of an Ogg packet. | 777 TOC sequence, a demuxer cannot recover the duration of an Ogg packet. |
658 <vspace blankLines="1"/> | 778 <vspace blankLines="1"/> |
659 For channel mapping family 0, this value defaults to 1, and is not coded. | 779 For channel mapping family 0, this value defaults to 1, and is not coded. |
660 <vspace blankLines="1"/> | 780 <vspace blankLines="1"/> |
661 </t> | 781 </t> |
662 <t><spanx style="strong">Coupled Stream Count</spanx> 'M' (8 bits, unsigned): | 782 <t><spanx style="strong">Coupled Stream Count</spanx> 'M' (8 bits, unsigned): |
663 This is the number of streams whose decoders should be configured to produce | 783 This is the number of streams whose decoders are to be configured to produce |
664 two channels. | 784 two channels. |
665 This MUST be no larger than the total number of streams, N. | 785 This MUST be no larger than the total number of streams, N. |
666 <vspace blankLines="1"/> | 786 <vspace blankLines="1"/> |
667 Each packet in an Opus stream has an internal channel count of 1 or 2, which | 787 Each packet in an Opus stream has an internal channel count of 1 or 2, which |
668 can change from packet to packet. | 788 can change from packet to packet. |
669 This is selected by the encoder depending on the bitrate and the audio being | 789 This is selected by the encoder depending on the bitrate and the audio being |
670 encoded. | 790 encoded. |
671 The original channel count of the encoder input is not preserved by the lossy | 791 The original channel count of the encoder input is not preserved by the lossy |
672 compression. | 792 compression. |
673 <vspace blankLines="1"/> | 793 <vspace blankLines="1"/> |
674 Regardless of the internal channel count, any Opus stream can be decoded as | 794 Regardless of the internal channel count, any Opus stream can be decoded as |
675 mono (a single channel) or stereo (two channels) by appropriate initialization | 795 mono (a single channel) or stereo (two channels) by appropriate initialization |
676 of the decoder. | 796 of the decoder. |
677 The 'coupled stream count' field indicates that the first M Opus decoders are | 797 The 'coupled stream count' field indicates that the first M Opus decoders are |
678 to be initialized in stereo mode, and the remaining N-M decoders are to be | 798 to be initialized for stereo output, and the remaining N-M decoders are to be |
679 initialized in mono mode. | 799 initialized for mono only. |
680 The total number of decoded channels, (M+N), MUST be no larger than 255, as | 800 The total number of decoded channels, (M+N), MUST be no larger than 255, as |
681 there is no way to index more channels than that in the channel mapping. | 801 there is no way to index more channels than that in the channel mapping. |
682 <vspace blankLines="1"/> | 802 <vspace blankLines="1"/> |
683 For channel mapping family 0, this value defaults to C-1 (i.e., 0 for mono | 803 For channel mapping family 0, this value defaults to C-1 (i.e., 0 for mono |
684 and 1 for stereo), and is not coded. | 804 and 1 for stereo), and is not coded. |
685 <vspace blankLines="1"/> | 805 <vspace blankLines="1"/> |
686 </t> | 806 </t> |
687 <t><spanx style="strong">Channel Mapping</spanx> (8*C bits): | 807 <t><spanx style="strong">Channel Mapping</spanx> (8*C bits): |
688 This contains one octet per output channel, indicating which decoded channel | 808 This contains one octet per output channel, indicating which decoded channel |
689 should be used for each one. | 809 is to be used for each one. |
690 Let 'index' be the value of this octet for a particular output channel. | 810 Let 'index' be the value of this octet for a particular output channel. |
691 This value MUST either be smaller than (M+N), or be the special value 255. | 811 This value MUST either be smaller than (M+N), or be the special value 255. |
692 If 'index' is less than 2*M, the output MUST be taken from decoding stream | 812 If 'index' is less than 2*M, the output MUST be taken from decoding stream |
693 ('index'/2) as stereo and selecting the left channel if 'index' is even, and | 813 ('index'/2) as stereo and selecting the left channel if 'index' is even, and |
694 the right channel if 'index' is odd. | 814 the right channel if 'index' is odd. |
695 If 'index' is 2*M or larger, the output MUST be taken from decoding stream | 815 If 'index' is 2*M or larger, but less than 255, the output MUST be taken from |
696 ('index'-M) as mono. | 816 decoding stream ('index'-M) as mono. |
697 If 'index' is 255, the corresponding output channel MUST contain pure silence. | 817 If 'index' is 255, the corresponding output channel MUST contain pure silence. |
698 <vspace blankLines="1"/> | 818 <vspace blankLines="1"/> |
699 The number of output channels, C, is not constrained to match the number of | 819 The number of output channels, C, is not constrained to match the number of |
700 decoded channels (M+N). | 820 decoded channels (M+N). |
701 A single index value MAY appear multiple times, i.e., the same decoded channel | 821 A single index value MAY appear multiple times, i.e., the same decoded channel |
702 might be mapped to multiple output channels. | 822 might be mapped to multiple output channels. |
703 Some decoded channels might not be assigned to any output channel, as well. | 823 Some decoded channels might not be assigned to any output channel, as well. |
704 <vspace blankLines="1"/> | 824 <vspace blankLines="1"/> |
705 For channel mapping family 0, the first index defaults to 0, and if C==2, | 825 For channel mapping family 0, the first index defaults to 0, and if C==2, |
706 the second index defaults to 1. | 826 the second index defaults to 1. |
707 Neither index is coded. | 827 Neither index is coded. |
708 </t> | 828 </t> |
709 </list> | 829 </list> |
710 </t> | 830 </t> |
711 | 831 |
712 <t> | 832 <t> |
713 After producing the output channels, the channel mapping family determines the | 833 After producing the output channels, the channel mapping family determines the |
714 semantic meaning of each one. | 834 semantic meaning of each one. |
715 Currently there are three defined mapping families, although more may be added. | 835 There are three defined mapping families in this specification. |
716 </t> | 836 </t> |
717 | 837 |
718 <section anchor="channel_mapping_0" title="Channel Mapping Family 0"> | 838 <section anchor="channel_mapping_0" title="Channel Mapping Family 0"> |
719 <t> | 839 <t> |
720 Allowed numbers of channels: 1 or 2. | 840 Allowed numbers of channels: 1 or 2. |
721 RTP mapping. | 841 RTP mapping. |
722 </t> | 842 </t> |
723 <t> | 843 <t> |
724 <list style="symbols"> | 844 <list style="symbols"> |
725 <t>1 channel: monophonic (mono).</t> | 845 <t>1 channel: monophonic (mono).</t> |
726 <t>2 channels: stereo (left, right).</t> | 846 <t>2 channels: stereo (left, right).</t> |
727 </list> | 847 </list> |
728 <spanx style="strong">Special mapping</spanx>: This channel mapping value also | 848 <spanx style="strong">Special mapping</spanx>: This channel mapping value also |
729 indicates that the contents consists of a single Opus stream that is stereo if | 849 indicates that the contents consists of a single Opus stream that is stereo if |
730 and only if C==2, with stream index 0 mapped to output channel 0 (mono, or | 850 and only if C==2, with stream index 0 mapped to output channel 0 (mono, or |
731 left channel) and stream index 1 mapped to output channel 1 (right channel) | 851 left channel) and stream index 1 mapped to output channel 1 (right channel) |
732 if stereo. | 852 if stereo. |
733 When the 'channel mapping family' octet has this value, the channel mapping | 853 When the 'channel mapping family' octet has this value, the channel mapping |
734 table MUST be omitted from the ID header packet. | 854 table MUST be omitted from the ID header packet. |
735 </t> | 855 </t> |
736 </section> | 856 </section> |
737 | 857 |
738 <section anchor="channel_mapping_1" title="Channel Mapping Family 1"> | 858 <section anchor="channel_mapping_1" title="Channel Mapping Family 1"> |
739 <t> | 859 <t> |
740 Allowed numbers of channels: 1...8. | 860 Allowed numbers of channels: 1...8. |
741 Vorbis channel order. | 861 Vorbis channel order. |
742 </t> | 862 </t> |
743 <t> | 863 <t> |
744 Each channel is assigned to a speaker location in a conventional surround | 864 Each channel is assigned to a speaker location in a conventional surround |
745 configuration. | 865 arrangement. |
746 Specific locations depend on the number of channels, and are given below | 866 Specific locations depend on the number of channels, and are given below |
747 in order of the corresponding channel indicies. | 867 in order of the corresponding channel indicies. |
748 <list style="symbols"> | 868 <list style="symbols"> |
749 <t>1 channel: monophonic (mono).</t> | 869 <t>1 channel: monophonic (mono).</t> |
750 <t>2 channels: stereo (left, right).</t> | 870 <t>2 channels: stereo (left, right).</t> |
751 <t>3 channels: linear surround (left, center, right)</t> | 871 <t>3 channels: linear surround (left, center, right)</t> |
752 <t>4 channels: quadraphonic (front left, front right, rear left
, rear right).</t> | 872 <t>4 channels: quadraphonic (front left, front right, rear left
, rear right).</t> |
753 <t>5 channels: 5.0 surround (front left, front center, front ri
ght, rear left, rear right).</t> | 873 <t>5 channels: 5.0 surround (front left, front center, front ri
ght, rear left, rear right).</t> |
754 <t>6 channels: 5.1 surround (front left, front center, front ri
ght, rear left, rear right, LFE).</t> | 874 <t>6 channels: 5.1 surround (front left, front center, front ri
ght, rear left, rear right, LFE).</t> |
755 <t>7 channels: 6.1 surround (front left, front center, front ri
ght, side left, side right, rear center, LFE).</t> | 875 <t>7 channels: 6.1 surround (front left, front center, front ri
ght, side left, side right, rear center, LFE).</t> |
756 <t>8 channels: 7.1 surround (front left, front center, front ri
ght, side left, side right, rear left, rear right, LFE)</t> | 876 <t>8 channels: 7.1 surround (front left, front center, front ri
ght, side left, side right, rear left, rear right, LFE)</t> |
757 </list> | 877 </list> |
758 This set of surround configurations and speaker location orderings is the same | 878 </t> |
759 as the one used by the Vorbis codec <xref target="vorbis-mapping"/>. | 879 <t> |
| 880 This set of surround options and speaker location orderings is the same |
| 881 as those used by the Vorbis codec <xref target="vorbis-mapping"/>. |
760 The ordering is different from the one used by the | 882 The ordering is different from the one used by the |
761 WAVE <xref target="wave-multichannel"/> and | 883 WAVE <xref target="wave-multichannel"/> and |
762 FLAC <xref target="flac"/> formats, | 884 FLAC <xref target="flac"/> formats, |
763 so correct ordering requires permutation of the output channels when encoding | 885 so correct ordering requires permutation of the output channels when decoding |
764 from or decoding to those formats. | 886 to or encoding from those formats. |
765 'LFE' here refers to a Low Frequency Effects, often mapped to a subwoofer | 887 'LFE' here refers to a Low Frequency Effects, often mapped to a subwoofer |
766 with no particular spacial position. | 888 with no particular spatial position. |
767 Implementations SHOULD identify 'side' or 'rear' speaker locations with | 889 Implementations SHOULD identify 'side' or 'rear' speaker locations with |
768 'surround' and 'back' as appropriate when interfacing with audio formats | 890 'surround' and 'back' as appropriate when interfacing with audio formats |
769 or systems which prefer that terminology. | 891 or systems which prefer that terminology. |
770 Speaker configurations other than those described here are not supported. | |
771 </t> | 892 </t> |
772 </section> | 893 </section> |
773 | 894 |
774 <section anchor="channel_mapping_255" | 895 <section anchor="channel_mapping_255" |
775 title="Channel Mapping Family 255"> | 896 title="Channel Mapping Family 255"> |
776 <t> | 897 <t> |
777 Allowed numbers of channels: 1...255. | 898 Allowed numbers of channels: 1...255. |
778 No defined channel meaning. | 899 No defined channel meaning. |
779 </t> | 900 </t> |
780 <t> | 901 <t> |
(...skipping 23 matching lines...) Expand all Loading... |
804 Players SHOULD perform channel mixing to increase or reduce the number of | 925 Players SHOULD perform channel mixing to increase or reduce the number of |
805 channels as needed. | 926 channels as needed. |
806 </t> | 927 </t> |
807 | 928 |
808 <t> | 929 <t> |
809 Implementations MAY use the following matricies to implement downmixing from | 930 Implementations MAY use the following matricies to implement downmixing from |
810 multichannel files using <xref target="channel_mapping_1">Channel Mapping | 931 multichannel files using <xref target="channel_mapping_1">Channel Mapping |
811 Family 1</xref>, which are known to give acceptable results for stereo. | 932 Family 1</xref>, which are known to give acceptable results for stereo. |
812 Matricies for 3 and 4 channels are normalized so each coefficent row sums | 933 Matricies for 3 and 4 channels are normalized so each coefficent row sums |
813 to 1 to avoid clipping. | 934 to 1 to avoid clipping. |
814 For 5 or more channels they are normalized to 2 as a compromize between | 935 For 5 or more channels they are normalized to 2 as a compromise between |
815 clipping and dynamic range reduction. | 936 clipping and dynamic range reduction. |
816 </t> | 937 </t> |
817 <t> | 938 <t> |
818 In these matricies the front left and front right channels are generally | 939 In these matricies the front left and front right channels are generally |
819 passed through directly. | 940 passed through directly. |
820 When a surround channel is split between both the left and right stereo | 941 When a surround channel is split between both the left and right stereo |
821 channels, coefficients are chosen so their squares sum to 1, which | 942 channels, coefficients are chosen so their squares sum to 1, which |
822 helps preserve the perceived intensity. | 943 helps preserve the perceived intensity. |
823 Rear channels are mixed more diffusely or attenuated to maintain focus | 944 Rear channels are mixed more diffusely or attenuated to maintain focus |
824 on the front channels. | 945 on the front channels. |
825 </t> | 946 </t> |
826 | 947 |
827 <figure anchor="downmix-matrix-3" | 948 <figure anchor="downmix-matrix-3" |
828 title="Stereo downmix matrix for the linear surround channel mapping" | 949 title="Stereo downmix matrix for the linear surround channel mapping" |
829 align="center"> | 950 align="center"> |
830 <artwork align="center"><![CDATA[ | 951 <artwork align="center"><![CDATA[ |
831 Left output = ( 0.585786 * left + 0.414214 * center ) | 952 L output = ( 0.585786 * left + 0.414214 * center ) |
832 Right output = ( 0.414214 * center + 0.585786 * right ) | 953 R output = ( 0.414214 * center + 0.585786 * right ) |
833 ]]></artwork> | 954 ]]></artwork> |
834 <postamble> | 955 <postamble> |
835 Exact coefficient values are 1 and 1/sqrt(2), multiplied by | 956 Exact coefficient values are 1 and 1/sqrt(2), multiplied by |
836 1/(1 + 1/sqrt(2)) for normalization. | 957 1/(1 + 1/sqrt(2)) for normalization. |
837 </postamble> | 958 </postamble> |
838 </figure> | 959 </figure> |
839 | 960 |
840 <figure anchor="downmix-matrix-4" | 961 <figure anchor="downmix-matrix-4" |
841 title="Stereo downmix matrix for the quadraphonic channel mapping" | 962 title="Stereo downmix matrix for the quadraphonic channel mapping" |
842 align="center"> | 963 align="center"> |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
958 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 1079 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
959 | User Comment #1 String Length | | 1080 | User Comment #1 String Length | |
960 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 1081 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
961 : : | 1082 : : |
962 ]]></artwork> | 1083 ]]></artwork> |
963 </figure> | 1084 </figure> |
964 | 1085 |
965 <t> | 1086 <t> |
966 The comment header consists of a 64-bit magic signature, followed by data in | 1087 The comment header consists of a 64-bit magic signature, followed by data in |
967 the same format as the <xref target="vorbis-comment"/> header used in Ogg | 1088 the same format as the <xref target="vorbis-comment"/> header used in Ogg |
968 Vorbis (without the final "framing bit"), Ogg Theora, and Speex. | 1089 Vorbis, except (like Ogg Theora and Speex) the final "framing bit" specified |
| 1090 in the Vorbis spec is not present. |
969 <list style="numbers"> | 1091 <list style="numbers"> |
970 <t><spanx style="strong">Magic Signature</spanx>: | 1092 <t><spanx style="strong">Magic Signature</spanx>: |
971 <vspace blankLines="1"/> | 1093 <vspace blankLines="1"/> |
972 This is an 8-octet (64-bit) field that allows codec identification and is | 1094 This is an 8-octet (64-bit) field that allows codec identification and is |
973 human-readable. | 1095 human-readable. |
974 It contains, in order, the magic numbers: | 1096 It contains, in order, the magic numbers: |
975 <list style="empty"> | 1097 <list style="empty"> |
976 <t>0x4F 'O'</t> | 1098 <t>0x4F 'O'</t> |
977 <t>0x70 'p'</t> | 1099 <t>0x70 'p'</t> |
978 <t>0x75 'u'</t> | 1100 <t>0x75 'u'</t> |
(...skipping 12 matching lines...) Expand all Loading... |
991 <vspace blankLines="1"/> | 1113 <vspace blankLines="1"/> |
992 This field gives the length of the following vendor string, in octets. | 1114 This field gives the length of the following vendor string, in octets. |
993 It MUST NOT indicate that the vendor string is longer than the rest of the | 1115 It MUST NOT indicate that the vendor string is longer than the rest of the |
994 packet. | 1116 packet. |
995 <vspace blankLines="1"/> | 1117 <vspace blankLines="1"/> |
996 </t> | 1118 </t> |
997 <t><spanx style="strong">Vendor String</spanx> (variable length, UTF-8 vector): | 1119 <t><spanx style="strong">Vendor String</spanx> (variable length, UTF-8 vector): |
998 <vspace blankLines="1"/> | 1120 <vspace blankLines="1"/> |
999 This is a simple human-readable tag for vendor information, encoded as a UTF-8 | 1121 This is a simple human-readable tag for vendor information, encoded as a UTF-8 |
1000 string <xref target="RFC3629"/>. | 1122 string <xref target="RFC3629"/>. |
1001 No terminating null octet is required. | 1123 No terminating null octet is necessary. |
1002 <vspace blankLines="1"/> | 1124 <vspace blankLines="1"/> |
1003 This tag is intended to identify the codec encoder and encapsulation | 1125 This tag is intended to identify the codec encoder and encapsulation |
1004 implementations, for tracing differences in technical behavior. | 1126 implementations, for tracing differences in technical behavior. |
1005 User-facing encoding applications can use the 'ENCODER' user comment tag | 1127 User-facing encoding applications can use the 'ENCODER' user comment tag |
1006 to identify themselves. | 1128 to identify themselves. |
1007 <vspace blankLines="1"/> | 1129 <vspace blankLines="1"/> |
1008 </t> | 1130 </t> |
1009 <t><spanx style="strong">User Comment List Length</spanx> (32 bits, unsigned, | 1131 <t><spanx style="strong">User Comment List Length</spanx> (32 bits, unsigned, |
1010 little endian): | 1132 little endian): |
1011 <vspace blankLines="1"/> | 1133 <vspace blankLines="1"/> |
(...skipping 22 matching lines...) Expand all Loading... |
1034 </t> | 1156 </t> |
1035 </list> | 1157 </list> |
1036 </t> | 1158 </t> |
1037 | 1159 |
1038 <t> | 1160 <t> |
1039 The vendor string length and user comment list length are REQUIRED, and | 1161 The vendor string length and user comment list length are REQUIRED, and |
1040 implementations SHOULD reject comment headers that do not contain enough data | 1162 implementations SHOULD reject comment headers that do not contain enough data |
1041 for these fields, or that do not contain enough data for the corresponding | 1163 for these fields, or that do not contain enough data for the corresponding |
1042 vendor string or user comments they describe. | 1164 vendor string or user comments they describe. |
1043 Making this check before allocating the associated memory to contain the data | 1165 Making this check before allocating the associated memory to contain the data |
1044 may help prevent a possible Denial-of-Service (DoS) attack from small comment | 1166 helps prevent a possible Denial-of-Service (DoS) attack from small comment |
1045 headers that claim to contain strings longer than the entire packet or more | 1167 headers that claim to contain strings longer than the entire packet or more |
1046 user comments than than could possibly fit in the packet. | 1168 user comments than than could possibly fit in the packet. |
1047 </t> | 1169 </t> |
1048 | 1170 |
1049 <t> | 1171 <t> |
| 1172 Immediately following the user comment list, the comment header MAY |
| 1173 contain zero-padding or other binary data which is not specified here. |
| 1174 If the least-significant bit of the first byte of this data is 1, then editors |
| 1175 SHOULD preserve the contents of this data when updating the tags, but if this |
| 1176 bit is 0, all such data MAY be treated as padding, and truncated or discarded |
| 1177 as desired. |
| 1178 </t> |
| 1179 |
| 1180 <section anchor="comment_format" title="Tag Definitions"> |
| 1181 <t> |
1050 The user comment strings follow the NAME=value format described by | 1182 The user comment strings follow the NAME=value format described by |
1051 <xref target="vorbis-comment"/> with the same recommended tag names. | 1183 <xref target="vorbis-comment"/> with the same recommended tag names: |
1052 One new comment tag is introduced for Ogg Opus: | 1184 ARTIST, TITLE, DATE, ALBUM, and so on. |
| 1185 </t> |
| 1186 <t> |
| 1187 Two new comment tags are introduced here: |
| 1188 </t> |
| 1189 |
1053 <figure align="center"> | 1190 <figure align="center"> |
| 1191 <preamble>An optional gain for track nomalization</preamble> |
1054 <artwork align="left"><![CDATA[ | 1192 <artwork align="left"><![CDATA[ |
1055 R128_TRACK_GAIN=-573 | 1193 R128_TRACK_GAIN=-573 |
1056 ]]></artwork> | 1194 ]]></artwork> |
1057 </figure> | 1195 <postamble> |
1058 representing the volume shift needed to normalize the track's volume. | 1196 representing the volume shift needed to normalize the track's volume |
| 1197 during isolated playback, in random shuffle, and so on. |
1059 The gain is a Q7.8 fixed point number in dB, as in the ID header's 'output | 1198 The gain is a Q7.8 fixed point number in dB, as in the ID header's 'output |
1060 gain' field. | 1199 gain' field. |
| 1200 </postamble> |
| 1201 </figure> |
| 1202 <t> |
1061 This tag is similar to the REPLAYGAIN_TRACK_GAIN tag in | 1203 This tag is similar to the REPLAYGAIN_TRACK_GAIN tag in |
1062 Vorbis <xref target="replay-gain"/>, except that the normal volume | 1204 Vorbis <xref target="replay-gain"/>, except that the normal volume |
1063 reference is the <xref target="EBU-R128"/> standard. | 1205 reference is the <xref target="EBU-R128"/> standard. |
1064 </t> | 1206 </t> |
| 1207 <figure align="center"> |
| 1208 <preamble>An optional gain for album nomalization</preamble> |
| 1209 <artwork align="left"><![CDATA[ |
| 1210 R128_ALBUM_GAIN=111 |
| 1211 ]]></artwork> |
| 1212 <postamble> |
| 1213 representing the volume shift needed to normalize the overall volume when |
| 1214 played as part of a particular collection of tracks. |
| 1215 The gain is also a Q7.8 fixed point number in dB, as in the ID header's |
| 1216 'output gain' field. |
| 1217 </postamble> |
| 1218 </figure> |
1065 <t> | 1219 <t> |
1066 An Ogg Opus file MUST NOT have more than one such tag, and if present its | 1220 An Ogg Opus stream MUST NOT have more than one of each tag, and if present |
1067 value MUST be an integer from -32768 to 32767, inclusive, represented in | 1221 their values MUST be an integer from -32768 to 32767, inclusive, |
1068 ASCII with no whitespace. | 1222 represented in ASCII as a base 10 number with no whitespace. |
1069 If present, it MUST correctly represent the R128 normalization gain relative | 1223 A leading '+' or '-' character is valid. |
1070 to the 'output gain' field specified in the ID header. | 1224 Leading zeros are also permitted, but the value MUST be represented by |
1071 If a player chooses to make use of the R128_TRACK_GAIN tag, it MUST be | 1225 no more than 6 characters. |
1072 applied <spanx style="emph">in addition</spanx> to the 'output gain' value. | 1226 Other non-digit characters MUST NOT be present. |
1073 If an encoder wishes to use R128 normalization, and the output gain is not | 1227 </t> |
1074 otherwise constrained or specified, the encoder SHOULD write the R128 gain | 1228 <t> |
1075 into the 'output gain' field and store a tag containing "R128_TRACK_GAIN=0". | 1229 If present, R128_TRACK_GAIN and R128_ALBUM_GAIN MUST correctly represent |
1076 That is, it should assume that by default tools will respect the 'output gain' | 1230 the R128 normalization gain relative to the 'output gain' field specified |
| 1231 in the ID header. |
| 1232 If a player chooses to make use of the R128_TRACK_GAIN tag or the |
| 1233 R128_ALBUM_GAIN tag, it MUST apply those gains |
| 1234 <spanx style="emph">in addition</spanx> to the 'output gain' value. |
| 1235 If a tool modifies the ID header's 'output gain' field, it MUST also update or |
| 1236 remove the R128_TRACK_GAIN and R128_ALBUM_GAIN comment tags if present. |
| 1237 An encoder SHOULD assume that by default tools will respect the 'output gain' |
1077 field, and not the comment tag. | 1238 field, and not the comment tag. |
1078 If a tool modifies the ID header's 'output gain' field, it MUST also update or | |
1079 remove the R128_TRACK_GAIN comment tag. | |
1080 </t> | 1239 </t> |
1081 <t> | 1240 <t> |
1082 To avoid confusion with multiple normalization schemes, an Opus comment header | 1241 To avoid confusion with multiple normalization schemes, an Opus comment header |
1083 SHOULD NOT contain any of the REPLAYGAIN_TRACK_GAIN, REPLAYGAIN_TRACK_PEAK, | 1242 SHOULD NOT contain any of the REPLAYGAIN_TRACK_GAIN, REPLAYGAIN_TRACK_PEAK, |
1084 REPLAYGAIN_ALBUM_GAIN, or REPLAYGAIN_ALBUM_PEAK tags. | 1243 REPLAYGAIN_ALBUM_GAIN, or REPLAYGAIN_ALBUM_PEAK tags. |
| 1244 <xref target="EBU-R128"/> normalization is preferred to the earlier |
| 1245 REPLAYGAIN schemes because of its clear definition and adoption by industry. |
| 1246 Peak normalizations are difficult to calculate reliably for lossy codecs |
| 1247 because of variation in excursion heights due to decoder differences. |
| 1248 In the authors' investigations they were not applied consistently or broadly |
| 1249 enough to merit inclusion here. |
1085 </t> | 1250 </t> |
1086 <t> | 1251 </section> <!-- end comment_format --> |
1087 There is no Opus comment tag corresponding to REPLAYGAIN_ALBUM_GAIN. | 1252 </section> <!-- end comment_header --> |
1088 That information should instead be stored in the ID header's 'output gain' | |
1089 field. | |
1090 </t> | |
1091 </section> | |
1092 | 1253 |
1093 </section> | 1254 </section> <!-- end headers --> |
1094 | 1255 |
1095 <section anchor="packet_size_limits" title="Packet Size Limits"> | 1256 <section anchor="packet_size_limits" title="Packet Size Limits"> |
1096 <t> | 1257 <t> |
1097 Technically valid Opus packets can be arbitrarily large due to the padding | 1258 Technically, valid Opus packets can be arbitrarily large due to the padding |
1098 format, although the amount of non-padding data they can contain is bounded. | 1259 format, although the amount of non-padding data they can contain is bounded. |
1099 These packets might be spread over a similarly enormous number of Ogg pages. | 1260 These packets might be spread over a similarly enormous number of Ogg pages. |
1100 Encoders SHOULD use no more padding than required to make a variable bitrate | 1261 Encoders SHOULD use no more padding than is necessary to make a variable |
1101 (VBR) stream constant bitrate (CBR). | 1262 bitrate (VBR) stream constant bitrate (CBR). |
1102 Decoders SHOULD avoid attempting to allocate excessive amounts of memory when | 1263 Decoders SHOULD avoid attempting to allocate excessive amounts of memory when |
1103 presented with a very large packet. | 1264 presented with a very large packet. |
1104 The presence of an extremely large packet in the stream could indicate a | 1265 The presence of an extremely large packet in the stream could indicate a |
1105 memory exhaustion attack or stream corruption. | 1266 memory exhaustion attack or stream corruption. |
1106 Decoders SHOULD reject a packet that is too large to process, and display a | 1267 Decoders SHOULD reject a packet that is too large to process, and display a |
1107 warning message. | 1268 warning message. |
1108 </t> | 1269 </t> |
1109 <t> | 1270 <t> |
1110 In an Ogg Opus stream, the largest possible valid packet that does not use | 1271 In an Ogg Opus stream, the largest possible valid packet that does not use |
1111 padding has a size of (61,298*N - 2) octets, or about 60 kB per | 1272 padding has a size of (61,298*N - 2) octets, or about 60 kB per |
1112 Opus stream. | 1273 Opus stream. |
1113 With 255 streams, this is 15,630,988 octets (14.9 MB) and can | 1274 With 255 streams, this is 15,630,988 octets (14.9 MB) and can |
1114 span up to 61,298 Ogg pages, all but one of which will have a granule | 1275 span up to 61,298 Ogg pages, all but one of which will have a granule |
1115 position of -1. | 1276 position of -1. |
1116 This is of course a very extreme packet, consisting of 255 streams, each | 1277 This is of course a very extreme packet, consisting of 255 streams, each |
1117 containing 120 ms of audio encoded as 2.5 ms frames, each frame | 1278 containing 120 ms of audio encoded as 2.5 ms frames, each frame |
1118 using the maximum possible number of octets (1275) and stored in the least | 1279 using the maximum possible number of octets (1275) and stored in the least |
1119 efficient manner allowed (a VBR code 3 Opus packet). | 1280 efficient manner allowed (a VBR code 3 Opus packet). |
1120 Even in such a packet, most of the data will be zeros as 2.5 ms frames | 1281 Even in such a packet, most of the data will be zeros as 2.5 ms frames |
1121 cannot actually use all 1275 octets. | 1282 cannot actually use all 1275 octets. |
1122 The largest packet consisting of entirely useful data is | 1283 The largest packet consisting of entirely useful data is |
1123 (15,326*N - 2) octets, or about 15 kB per stream. | 1284 (15,326*N - 2) octets, or about 15 kB per stream. |
1124 This corresponds to 120 ms of audio encoded as 10 ms frames in either | 1285 This corresponds to 120 ms of audio encoded as 10 ms frames in either |
1125 LP or Hybrid mode, but at a data rate of over 1 Mbps, which makes little | 1286 SILK or Hybrid mode, but at a data rate of over 1 Mbps, which makes little |
1126 sense for the quality achieved. | 1287 sense for the quality achieved. |
1127 A more reasonable limit is (7,664*N - 2) octets, or about 7.5 kB | 1288 A more reasonable limit is (7,664*N - 2) octets, or about 7.5 kB |
1128 per stream. | 1289 per stream. |
1129 This corresponds to 120 ms of audio encoded as 20 ms stereo MDCT-mode | 1290 This corresponds to 120 ms of audio encoded as 20 ms stereo CELT mode |
1130 frames, with a total bitrate just under 511 kbps (not counting the Ogg | 1291 frames, with a total bitrate just under 511 kbps (not counting the Ogg |
1131 encapsulation overhead). | 1292 encapsulation overhead). |
1132 With N=8, the maximum number of channels currently defined by mapping | 1293 With N=8, the maximum number of channels currently defined by mapping |
1133 family 1, this gives a maximum packet size of 61,310 octets, or just | 1294 family 1, this gives a maximum packet size of 61,310 octets, or just |
1134 under 60 kB. | 1295 under 60 kB. |
1135 This is still quite conservative, as it assumes each output channel is taken | 1296 This is still quite conservative, as it assumes each output channel is taken |
1136 from one decoded channel of a stereo packet. | 1297 from one decoded channel of a stereo packet. |
1137 An implementation could reasonably choose any of these numbers for its internal | 1298 An implementation could reasonably choose any of these numbers for its internal |
1138 limits. | 1299 limits. |
1139 </t> | 1300 </t> |
1140 </section> | 1301 </section> |
1141 | 1302 |
1142 <section anchor="encoder" title="Encoder Guidelines"> | 1303 <section anchor="encoder" title="Encoder Guidelines"> |
1143 <t> | 1304 <t> |
1144 When encoding Opus files, Ogg encoders should take into account the | 1305 When encoding Opus streams, Ogg muxers SHOULD take into account the |
1145 algorithmic delay of the Opus encoder. | 1306 algorithmic delay of the Opus encoder. |
1146 </t> | 1307 </t> |
1147 <figure align="center"> | 1308 <figure align="center"> |
1148 <preamble> | 1309 <preamble> |
1149 In encoders derived from the reference implementation, the number of | 1310 In encoders derived from the reference implementation, the number of |
1150 samples can be queried with: | 1311 samples can be queried with: |
1151 </preamble> | 1312 </preamble> |
1152 <artwork align="center"><![CDATA[ | 1313 <artwork align="center"><![CDATA[ |
1153 opus_encoder_ctl(encoder_state, OPUS_GET_LOOKAHEAD, &samples_delay); | 1314 opus_encoder_ctl(encoder_state, OPUS_GET_LOOKAHEAD(&delay_samples)); |
1154 ]]></artwork> | 1315 ]]></artwork> |
1155 </figure> | 1316 </figure> |
1156 <t> | 1317 <t> |
1157 To achieve good quality in the very first samples of a stream, the Ogg encoder | 1318 To achieve good quality in the very first samples of a stream, the Ogg encoder |
1158 MAY use LPC extrapolation to generate at least 120 extra samples | 1319 MAY use linear predictive coding (LPC) extrapolation |
1159 (extra_samples) at the beginning to avoid the Opus encoder having to encode | 1320 <xref target="linear-prediction"/> to generate at least 120 extra samples at |
1160 a discontinuous signal. | 1321 the beginning to avoid the Opus encoder having to encode a discontinuous |
1161 For an input file containing length samples, the Ogg encoder SHOULD set the | 1322 signal. |
1162 preskip header flag to samples_delay+extra_samples, encode at least | 1323 For an input file containing 'length' samples, the Ogg encoder SHOULD set the |
1163 length+samples_delay+extra_samples samples, and set the granulepos of the last | 1324 pre-skip header value to delay_samples+extra_samples, encode at least |
1164 page to length+samples_delay+extra_samples. | 1325 length+delay_samples+extra_samples samples, and set the granulepos of the last |
| 1326 page to length+delay_samples+extra_samples. |
1165 This ensures that the encoded file has the same duration as the original, with | 1327 This ensures that the encoded file has the same duration as the original, with |
1166 no time offset. The best way to pad the end of the stream is to also use LPC | 1328 no time offset. The best way to pad the end of the stream is to also use LPC |
1167 extrapolation, but zero-padding is also acceptable. | 1329 extrapolation, but zero-padding is also acceptable. |
1168 </t> | 1330 </t> |
1169 | 1331 |
1170 <section anchor="lpc" title="LPC Extrapolation"> | 1332 <section anchor="lpc" title="LPC Extrapolation"> |
1171 <t> | 1333 <t> |
1172 The first step in LPC extrapolation is to compute linear prediction | 1334 The first step in LPC extrapolation is to compute linear prediction |
1173 coefficients. | 1335 coefficients. <xref target="lpc-sample"/> |
1174 When extending the end of the signal, order-N (typically with N ranging from 8 | 1336 When extending the end of the signal, order-N (typically with N ranging from 8 |
1175 to 40) LPC analysis is performed on a window near the end of the signal. | 1337 to 40) LPC analysis is performed on a window near the end of the signal. |
1176 The last N samples are used as memory to an infinite impulse response (IIR) | 1338 The last N samples are used as memory to an infinite impulse response (IIR) |
1177 filter. | 1339 filter. |
1178 </t> | 1340 </t> |
1179 <figure align="center"> | 1341 <figure align="center"> |
1180 <preamble> | 1342 <preamble> |
1181 The filter is then applied on a zero input to extrapolate the end of the signal. | 1343 The filter is then applied on a zero input to extrapolate the end of the signal. |
1182 Let a(k) be the kth LPC coefficient and x(n) be the nth sample of the signal, | 1344 Let a(k) be the kth LPC coefficient and x(n) be the nth sample of the signal, |
1183 each new sample past the end of the signal is computed as: | 1345 each new sample past the end of the signal is computed as: |
(...skipping 14 matching lines...) Expand all Loading... |
1198 When extending the beginning of the signal, it is best to apply a "fade in" to | 1360 When extending the beginning of the signal, it is best to apply a "fade in" to |
1199 the extrapolated signal, e.g. by multiplying it by a half-Hanning window | 1361 the extrapolated signal, e.g. by multiplying it by a half-Hanning window |
1200 <xref target="hanning"/>. | 1362 <xref target="hanning"/>. |
1201 </t> | 1363 </t> |
1202 | 1364 |
1203 </section> | 1365 </section> |
1204 | 1366 |
1205 <section anchor="continuous_chaining" title="Continuous Chaining"> | 1367 <section anchor="continuous_chaining" title="Continuous Chaining"> |
1206 <t> | 1368 <t> |
1207 In some applications, such as Internet radio, it is desirable to cut a long | 1369 In some applications, such as Internet radio, it is desirable to cut a long |
1208 streams into smaller chains, e.g. so the comment header can be updated. | 1370 stream into smaller chains, e.g. so the comment header can be updated. |
1209 This can be done simply by separating the input streams into segments and | 1371 This can be done simply by separating the input streams into segments and |
1210 encoding each segment independently. | 1372 encoding each segment independently. |
1211 The drawback of this approach is that it creates a small discontinuity | 1373 The drawback of this approach is that it creates a small discontinuity |
1212 at the boundary due to the lossy nature of Opus. | 1374 at the boundary due to the lossy nature of Opus. |
1213 An encoder MAY avoid this discontinuity by using the following procedure: | 1375 An encoder MAY avoid this discontinuity by using the following procedure: |
1214 <list style="numbers"> | 1376 <list style="numbers"> |
1215 <t>Encode the last frame of the first segment as an independent frame by | 1377 <t>Encode the last frame of the first segment as an independent frame by |
1216 turning off all forms of inter-frame prediction. | 1378 turning off all forms of inter-frame prediction. |
1217 De-emphasis is allowed.</t> | 1379 De-emphasis is allowed.</t> |
1218 <t>Set the granulepos of the last page to a point near the end of the last | 1380 <t>Set the granulepos of the last page to a point near the end of the last |
1219 frame.</t> | 1381 frame.</t> |
1220 <t>Begin the second segment with a copy of the last frame of the first | 1382 <t>Begin the second segment with a copy of the last frame of the first |
1221 segment.</t> | 1383 segment.</t> |
1222 <t>Set the preskip flag of the second stream in such a way as to properly | 1384 <t>Set the pre-skip value of the second stream in such a way as to properly |
1223 join the two streams.</t> | 1385 join the two streams.</t> |
1224 <t>Continue the encoding process normally from there, without any reset to | 1386 <t>Continue the encoding process normally from there, without any reset to |
1225 the encoder.</t> | 1387 the encoder.</t> |
1226 </list> | 1388 </list> |
1227 </t> | 1389 </t> |
| 1390 <figure align="center"> |
| 1391 <preamble> |
| 1392 In encoders derived from the reference implementation, inter-frame prediction |
| 1393 can be turned off by calling: |
| 1394 </preamble> |
| 1395 <artwork align="center"><![CDATA[ |
| 1396 opus_encoder_ctl(encoder_state, OPUS_SET_PREDICTION_DISABLED(1)); |
| 1397 ]]></artwork> |
| 1398 <postamble> |
| 1399 For best results, this implementation requires that prediction be explicitly |
| 1400 enabled again before resuming normal encoding, even after a reset. |
| 1401 </postamble> |
| 1402 </figure> |
| 1403 |
1228 </section> | 1404 </section> |
1229 | 1405 |
1230 </section> | 1406 </section> |
1231 | 1407 |
1232 <section anchor="implementation" title="Implementation Status"> | 1408 <section anchor="implementation" title="Implementation Status"> |
1233 <t> | 1409 <t> |
1234 A brief summary of major implementations of this draft is available | 1410 A brief summary of major implementations of this draft is available |
1235 at <eref target="https://wiki.xiph.org/OggOpusImplementation"/>, | 1411 at <eref target="https://wiki.xiph.org/OggOpusImplementation"/>, |
1236 along with their status. | 1412 along with their status. |
1237 </t> | 1413 </t> |
1238 <t> | 1414 <t> |
1239 [Note to RFC Editor: please remove this entire section before | 1415 [Note to RFC Editor: please remove this entire section before |
1240 final publication per <xref target="draft-sheffer-running-code"/>.] | 1416 final publication per <xref target="RFC6982"/>.] |
1241 </t> | 1417 </t> |
1242 </section> | 1418 </section> |
1243 | 1419 |
1244 <section anchor="security" title="Security Considerations"> | 1420 <section anchor="security" title="Security Considerations"> |
1245 <t> | 1421 <t> |
1246 Implementations of the Opus codec need to take appropriate security | 1422 Implementations of the Opus codec need to take appropriate security |
1247 considerations into account, as outlined in <xref target="RFC4732"/>. | 1423 considerations into account, as outlined in <xref target="RFC4732"/>. |
1248 This is just as much a problem for the container as it is for the codec itself. | 1424 This is just as much a problem for the container as it is for the codec itself. |
1249 It is extremely important for the decoder to be robust against malicious | 1425 It is extremely important for the decoder to be robust against malicious |
1250 payloads. | 1426 payloads. |
1251 Malicious payloads must not cause the decoder to overrun its allocated memory | 1427 Malicious payloads MUST NOT cause the decoder to overrun its allocated memory |
1252 or to take an excessive amount of resources to decode. | 1428 or to take an excessive amount of resources to decode. |
1253 Although problems in encoders are typically rarer, the same applies to the | 1429 Although problems in encoders are typically rarer, the same applies to the |
1254 encoder. | 1430 encoder. |
1255 Malicious audio streams must not cause the encoder to misbehave because this | 1431 Malicious audio streams MUST NOT cause the encoder to misbehave because this |
1256 would allow an attacker to attack transcoding gateways. | 1432 would allow an attacker to attack transcoding gateways. |
1257 </t> | 1433 </t> |
1258 | 1434 |
1259 <t> | 1435 <t> |
1260 Like most other container formats, Ogg Opus files should not be used with | 1436 Like most other container formats, Ogg Opus streams SHOULD NOT be used with |
1261 insecure ciphers or cipher modes that are vulnerable to known-plaintext | 1437 insecure ciphers or cipher modes that are vulnerable to known-plaintext |
1262 attacks. | 1438 attacks. |
1263 Elements such as the Ogg page capture pattern and the magic signatures in the | 1439 Elements such as the Ogg page capture pattern and the magic signatures in the |
1264 ID header and the comment header all have easily predictable values, in | 1440 ID header and the comment header all have easily predictable values, in |
1265 addition to various elements of the codec data itself. | 1441 addition to various elements of the codec data itself. |
1266 </t> | 1442 </t> |
1267 </section> | 1443 </section> |
1268 | 1444 |
1269 <section anchor="content_type" title="Content Type"> | 1445 <section anchor="content_type" title="Content Type"> |
1270 <t> | 1446 <t> |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1329 </middle> | 1505 </middle> |
1330 <back> | 1506 <back> |
1331 <references title="Normative References"> | 1507 <references title="Normative References"> |
1332 &rfc2119; | 1508 &rfc2119; |
1333 &rfc3533; | 1509 &rfc3533; |
1334 &rfc3629; | 1510 &rfc3629; |
1335 &rfc5334; | 1511 &rfc5334; |
1336 &rfc6381; | 1512 &rfc6381; |
1337 &rfc6716; | 1513 &rfc6716; |
1338 | 1514 |
1339 <reference anchor="EBU-R128" target="http://tech.ebu.ch/loudness"> | 1515 <reference anchor="EBU-R128" target="https://tech.ebu.ch/loudness"> |
1340 <front> | 1516 <front> |
1341 <title>"Loudness Recommendation EBU R128</title> | 1517 <title>Loudness Recommendation EBU R128</title> |
1342 <author fullname="EBU Technical Committee"/> | 1518 <author> |
1343 <date month="August" year="2011"/> | 1519 <organization>EBU Technical Committee</organization> |
| 1520 </author> |
| 1521 <date month="August" year="2011"/> |
1344 </front> | 1522 </front> |
1345 </reference> | 1523 </reference> |
1346 | 1524 |
1347 <reference anchor="vorbis-comment" | 1525 <reference anchor="vorbis-comment" |
1348 target="http://www.xiph.org/vorbis/doc/v-comment.html"> | 1526 target="https://www.xiph.org/vorbis/doc/v-comment.html"> |
1349 <front> | 1527 <front> |
1350 <title>Ogg Vorbis I Format Specification: Comment Field and Header | 1528 <title>Ogg Vorbis I Format Specification: Comment Field and Header |
1351 Specification</title> | 1529 Specification</title> |
1352 <author initials="C." surname="Montgomery" | 1530 <author initials="C." surname="Montgomery" |
1353 fullname="Christopher "Monty" Montgomery"/> | 1531 fullname="Christopher "Monty" Montgomery"/> |
1354 <date month="July" year="2002"/> | 1532 <date month="July" year="2002"/> |
1355 </front> | 1533 </front> |
1356 </reference> | 1534 </reference> |
1357 | 1535 |
1358 </references> | 1536 </references> |
1359 | 1537 |
1360 <references title="Informative References"> | 1538 <references title="Informative References"> |
1361 | 1539 |
1362 <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.x
ml"?--> | 1540 <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.x
ml"?--> |
1363 &rfc4732; | 1541 &rfc4732; |
1364 | 1542 &rfc6982; |
1365 <reference anchor="draft-sheffer-running-code" | |
1366 target="https://tools.ietf.org/html/draft-sheffer-running-code-05#section-2"> | |
1367 <front> | |
1368 <title>Improving "Rough Consensus" with Running Code</title> | |
1369 <author initials="Y." surname="Sheffer" fullname="Yaron Sheffer"/> | |
1370 <author initials="A." surname="Farrel" fullname="Adrian Farrel"/> | |
1371 <date month="May" year="2013"/> | |
1372 </front> | |
1373 </reference> | |
1374 | 1543 |
1375 <reference anchor="flac" | 1544 <reference anchor="flac" |
1376 target="https://xiph.org/flac/format.html"> | 1545 target="https://xiph.org/flac/format.html"> |
1377 <front> | 1546 <front> |
1378 <title>FLAC - Free Lossless Audio Codec Format Description</title> | 1547 <title>FLAC - Free Lossless Audio Codec Format Description</title> |
1379 <author initials="J." surname="Coalson" fullname="Josh Coalson"/> | 1548 <author initials="J." surname="Coalson" fullname="Josh Coalson"/> |
1380 <date month="January" year="2008"/> | 1549 <date month="January" year="2008"/> |
1381 </front> | 1550 </front> |
1382 </reference> | 1551 </reference> |
1383 | 1552 |
1384 <reference anchor="hanning" | 1553 <reference anchor="hanning" |
1385 target="http://en.wikipedia.org/wiki/Hamming_function#Hann_.28Hanning.29_window
"> | 1554 target="https://en.wikipedia.org/wiki/Hamming_function#Hann_.28Hanning.29_windo
w"> |
1386 <front> | 1555 <front> |
1387 <title>"Hann window</title> | 1556 <title>Hann window</title> |
1388 <author fullname="Wikipedia"/> | 1557 <author> |
| 1558 <organization>Wikipedia</organization> |
| 1559 </author> |
1389 <date month="May" year="2013"/> | 1560 <date month="May" year="2013"/> |
1390 </front> | 1561 </front> |
1391 </reference> | 1562 </reference> |
1392 | 1563 |
| 1564 <reference anchor="linear-prediction" |
| 1565 target="https://en.wikipedia.org/wiki/Linear_predictive_coding"> |
| 1566 <front> |
| 1567 <title>Linear Predictive Coding</title> |
| 1568 <author> |
| 1569 <organization>Wikipedia</organization> |
| 1570 </author> |
| 1571 <date month="January" year="2014"/> |
| 1572 </front> |
| 1573 </reference> |
| 1574 |
| 1575 <reference anchor="lpc-sample" |
| 1576 target="https://svn.xiph.org/trunk/vorbis/lib/lpc.c"> |
| 1577 <front> |
| 1578 <title>Autocorrelation LPC coeff generation algorithm |
| 1579 (Vorbis source code)</title> |
| 1580 <author initials="J." surname="Degener" fullname="Jutta Degener"/> |
| 1581 <author initials="C." surname="Bormann" fullname="Carsten Bormann"/> |
| 1582 <date month="November" year="1994"/> |
| 1583 </front> |
| 1584 </reference> |
| 1585 |
| 1586 |
1393 <reference anchor="replay-gain" | 1587 <reference anchor="replay-gain" |
1394 target="http://wiki.xiph.org/VorbisComment#Replay_Gain"> | 1588 target="https://wiki.xiph.org/VorbisComment#Replay_Gain"> |
1395 <front> | 1589 <front> |
1396 <title>VorbisComment: Replay Gain</title> | 1590 <title>VorbisComment: Replay Gain</title> |
1397 <author initials="C." surname="Parker" fullname="Conrad Parker"/> | 1591 <author initials="C." surname="Parker" fullname="Conrad Parker"/> |
1398 <author initials="M." surname="Leese" fullname="Martin Leese"/> | 1592 <author initials="M." surname="Leese" fullname="Martin Leese"/> |
1399 <date month="June" year="2009"/> | 1593 <date month="June" year="2009"/> |
1400 </front> | 1594 </front> |
1401 </reference> | 1595 </reference> |
1402 | 1596 |
1403 <reference anchor="seeking" | 1597 <reference anchor="seeking" |
1404 target="http://wiki.xiph.org/Seeking"> | 1598 target="https://wiki.xiph.org/Seeking"> |
1405 <front> | 1599 <front> |
1406 <title>Granulepos Encoding and How Seeking Really Works</title> | 1600 <title>Granulepos Encoding and How Seeking Really Works</title> |
1407 <author initials="S." surname="Pfeiffer" fullname="Silvia Pfeiffer"/> | 1601 <author initials="S." surname="Pfeiffer" fullname="Silvia Pfeiffer"/> |
1408 <author initials="C." surname="Parker" fullname="Conrad Parker"/> | 1602 <author initials="C." surname="Parker" fullname="Conrad Parker"/> |
1409 <author initials="G." surname="Maxwell" fullname="Greg Maxwell"/> | 1603 <author initials="G." surname="Maxwell" fullname="Greg Maxwell"/> |
1410 <date month="May" year="2012"/> | 1604 <date month="May" year="2012"/> |
1411 </front> | 1605 </front> |
1412 </reference> | 1606 </reference> |
1413 | 1607 |
1414 <reference anchor="vorbis-mapping" | 1608 <reference anchor="vorbis-mapping" |
1415 target="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9"> | 1609 target="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9"> |
1416 <front> | 1610 <front> |
1417 <title>The Vorbis I Specification, Section 4.3.9 Output Channel Order</title> | 1611 <title>The Vorbis I Specification, Section 4.3.9 Output Channel Order</title> |
1418 <author initials="C." surname="Montgomery" | 1612 <author initials="C." surname="Montgomery" |
1419 fullname="Christopher "Monty" Montgomery"/> | 1613 fullname="Christopher "Monty" Montgomery"/> |
1420 <date month="January" year="2010"/> | 1614 <date month="January" year="2010"/> |
1421 </front> | 1615 </front> |
1422 </reference> | 1616 </reference> |
1423 | 1617 |
1424 <reference anchor="vorbis-trim" | 1618 <reference anchor="vorbis-trim" |
1425 target="http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-130000A.2"> | 1619 target="https://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-130000A.2"> |
1426 <front> | 1620 <front> |
1427 <title>The Vorbis I Specification, Appendix A: Embedding Vorbis | 1621 <title>The Vorbis I Specification, Appendix A: Embedding Vorbis |
1428 into an Ogg stream</title> | 1622 into an Ogg stream</title> |
1429 <author initials="C." surname="Montgomery" | 1623 <author initials="C." surname="Montgomery" |
1430 fullname="Christopher "Monty" Montgomery"/> | 1624 fullname="Christopher "Monty" Montgomery"/> |
1431 <date month="November" year="2008"/> | 1625 <date month="November" year="2008"/> |
1432 </front> | 1626 </front> |
1433 </reference> | 1627 </reference> |
1434 | 1628 |
1435 <reference anchor="wave-multichannel" | 1629 <reference anchor="wave-multichannel" |
1436 target="http://msdn.microsoft.com/en-us/windows/hardware/gg463006.aspx"> | 1630 target="http://msdn.microsoft.com/en-us/windows/hardware/gg463006.aspx"> |
1437 <front> | 1631 <front> |
1438 <title>Multiple Channel Audio Data and WAVE Files</title> | 1632 <title>Multiple Channel Audio Data and WAVE Files</title> |
1439 <author fullname="Microsoft Corporation"/> | 1633 <author> |
| 1634 <organization>Microsoft Corporation</organization> |
| 1635 </author> |
1440 <date month="March" year="2007"/> | 1636 <date month="March" year="2007"/> |
1441 </front> | 1637 </front> |
1442 </reference> | 1638 </reference> |
1443 | 1639 |
1444 </references> | 1640 </references> |
1445 | 1641 |
1446 </back> | 1642 </back> |
1447 </rfc> | 1643 </rfc> |
OLD | NEW |