MP4音频解码信息(转帖加注释)

http://blog.csdn.net/linzhiji/article/details/5840031

注释：

1。3gp和MP4中的AAC的私有数据保存在esds的0x05标签的数据，

结构为 05 + 长度 + 内容。

将长度赋值给 extradatasize

将内容赋值给 extradata

长度的计算函数在ffmpeg中的static int mp4_read_descr_len(ByteIOContext *pb)

2。avc/h264的extradata和extradata信息在avcc atom中，将avcc atom去掉type和长度（8个字节）后的长度赋予extradatasize,内容赋值给extradata.

MP4文件格式分为头部和数据两部分，头部是由许多被称作Atom的结构单元嵌套或排列而成，数据部分则完全为实际数据不包含元信息，因此具体解码时音视频帧的位置和大小都要在头部获取。详细内容见以下链接：
http://wqyuwss.52rd.net
这里总结下音频解码信息获取的一些经验，当然详细内容需要查看quick time file format的文档。
MP4的音频解码信息保存在如下嵌套的Atom中，{moov{mdia{minf{smhd{stbl{stsd}}}}}}
stsd可能包括多个音频信息的描述，结构如下：

typedef struct stsdtable { unsigned int size;//Atom大小 char format[4];//音频编码格式 int res1; int ref; short version;//版本 short pad1; int pad2; short channels;//声道 short bitspersample; short compress_id; short res2; short samplerate1;//采样率 short samplerate2; //{if(version==1) int sampleperpacket; int bytesperpacket; int bytesperframe; int bytespersample; //} } stsdtable;

其中format对应音频编码格式：
PCM_S32BE, in32
PCM_S32LE, in32
PCM_S24BE, in24
PCM_S24LE, in24
PCM_S16BE, twos // 16 bits //
PCM_S16LE, sowt //
PCM_S16LE, lpcm
PCM_F32BE, fl32
PCM_F64BE, fl64
PCM_S8,     sowt
PCM_U8,     raw // 8 bits unsigned
PCM_U8,     NONE // uncompressed
PCM_MULAW, ulaw //
PCM_ALAW,   alaw //
ADPCM_IMA_QT, ima4 // IMA-4 ADPCM //
MACE3,      MAC3 // Macintosh Audio Compression and Expansion 3:1 ///
MACE6,      MAC6 // Macintosh Audio Compression and Expansion 6:1 //
MP3,        .mp3 // MPEG layer 3 */ /* sample files at http://www.3ivx.com/showcase.html use this tag //
MP3,        0x6D730055 // MPEG layer 3 //
OGG_VORBIS, OggS //// sample files at http://heroinewarrior.com/xmovie.php3 use this tag //
AAC,        mp4a // MPEG-4 AAC //
AC3,        ac-3 // ETSI TS 102 366 Annex F //
AMR_NB,     samr // AMR-NB 3gp //
AMR_WB,     sawb // AMR-WB 3gp//
GSM,        agsm
ALAC,       alac // Apple Lossless //
QCELP,      Qclp
QCELP,      sqcp // ISO Media fourcc //
QDM2,       QDM2 // QDM2 //
DVAUDIO,    vdva
DVAUDIO,    dvca
WMAV2,      WMA2
这个获取比较简单，下面是解码私有数据的获取：
这些解码私有数据也保存在Atom中，通常在上面结构体的后面，有esds、frma、mp4a、wave。AAC的私有数据保存在esds的0x05标签的数据，QDM2的则是”wave”Atom的数据部分(以下按顺序分析)：
   4字节长度
   4字节 “esds” or “m4ds” 标志
   4字节版本标识

   1字节 ES描述类型标签 0x03
   –3字节扩展描述类型标签可能没有
   1字节描述类型长度
   2字节 ES ID
   1字节流优先级

   1字节解码配置描述类型标签 0x04
   –3字节扩展描述类型标签可能没有
   1字节描述类型长度
   1字节描述对象ID
   1字节
   3字节
   4字节
   4字节

   1字节解码配置描述类型标签 0x05
   –3字节扩展描述类型标签可能没有
   1字节长度

   1字节 0x06
   0x06不再分析
下面是一个例子：
长度标签
00015218h: 00 00 00 10 73 6D 68 64 00 00 00 00 00 00 00 00 ; ….smhd……..
00015228h: 00 00 00 24 64 69 6E 66 00 00 00 1C 64 72 65 66 ; …$dinf….dref
00015238h: 00 00 00 00 00 00 00 01 00 00 00 0C 75 72 6C 20 ; …………url
00015248h: 00 00 00 01 00 02 C0 97 73 74 62 6C 00 00 00 5B ; ……罈stbl…[
00015258h: 73 74 73 64 00 00 00 00 00 00 00 01 00 00 00 4B ; stsd………..K
00015268h: 6D 70 34 61 00 00 00 00 00 00 00 01 00 00 00 00 ; mp4a…………
00015278h: 00 00 00 00 00 01 00 10 00 00 00 00 7D 00 00 00 ; …………}…
00015288h: 00 00 00 27 65 73 64 73 00 00 00 00 03 19 00 00 ; …’esds……..
00015298h: 00 04 11 40 15 00 00 D2 00 00 BB 88 00 00 7D 00 ; …@…?.粓..}.
000152a8h: 05 02 12 88 06 01 02                            ; …?..

0x12 0x88即私有数据(对应ffmpeg中AVCodecContext.extradata)
下面是mp4音频部分分析的代码：

//MP4Analyze.h #define uint8_t unsigned char


/******atom tag*******/

uint8_t moov[] = "moov";

uint8_t trak[] = "trak";

uint8_t mdia[] = "mdia";

uint8_t minf[] = "minf";

uint8_t stbl[] = "stbl";

uint8_t stsd[] = "stsd";

uint8_t stsc[] = "stsc";

uint8_t stsz[] = "stsz";

uint8_t stco[] = "stco";

uint8_t ftyp[] = "ftyp";

uint8_t mdat[] = "mdat";
typedef struct Atom

{

    unsigned int size;

    uint8_t tag[4];

    int ver_flag;

    unsigned int num_of_entries;

    unsigned int pos;

    uint8_t *data;

} Atom;

/****audio format****/

uint8_t kmp3[] = {0x6D,0x73,0x00,0x55};

uint8_t fmp3[] = ".mp3";

uint8_t raw[] = "raw ";
uint8_t wave[] = "wave";

uint8_t mp4a[] = "mp4a";

uint8_t enca[] = "enca";//encrypted to ISO/IEC 14496-12 or 3GPP standards



uint8_t smar[] = "smar";//encoded to 3GPP GSM 6.10 AMR narrowband standards 



uint8_t sawb[] = "sawb";//encoded to 3GPP GSM 6.10 AMR wideband standards



uint8_t m4ds[] = "m4ds";//encoded to ISO/IEC 14496-10 AVC standards



uint8_t esds[] = "esds";

uint8_t fram[] = "fram";
/*** We may not need these ***/

#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))

typedef struct AVCodecTag {

    int id;

    unsigned int tag;

} AVCodecTag;

typedef struct stsdtable

{

    unsigned int size;

    char format[4];

    int res1;

    int ref;

    short version;

    short pad1;

    int pad2;

    short channels;

    short bitspersample;

    short compress_id;

    short res2;

    short samplerate1;

    short samplerate2;

    //{if(version==1)



        int sampleperpacket;

        int bytesperpacket;

        int bytesperframe;

        int bytespersample;

    //}



} stsdtable;
/***** result is stored here ******/

typedef struct sampletable

{

    unsigned int size;

    unsigned int id_of_sd;

} sampletable;

//MP4Analyze.cpp


#include "MP4Analyze.h"

#include <vector>

#include <map>

#include <iostream>

#include <string>

#ifdef WIN32

#include <winsock2.h> 

#pragma comment(lib, "Ws2_32.lib")

#pragma warning (disable:4786)

#endif
#ifdef __GNUG__

#include <netinet/in.h>

#endif

using namespace std;
/**

*** mp4存在宽度为8字节的wide atom tag，需要注意，这里暂未考虑

**/
/*

* check if a mov/mp4/3gp type

*/

int check_format(uint8_t *data, int size)

{

    if(strncmp((char*)moov,(char*)(data+4),4)==0 ||

        strncmp((char*)ftyp,(char*)(data+4),4)==0 ||strncmp((char*)mdat,(char*)(data+4),4)==0 )

        return 0;

    return -1;

}
unsigned int get_size(const uint8_t *data,int size)

{

    unsigned int tmp = 0;

    for(int i=0; i<size; ++i)

    {

        tmp <<= 8;

        tmp += *data++;

    }

    return tmp;

}

/* if found,return the offset from the data[0]*/

int seek_tag(uint8_t tag[],uint8_t *data, unsigned int size1,uint8_t **pos,unsigned int *size2)

{

    if(data == NULL || size1 == 0)

        return -1;

    unsigned int tag_size = get_size(data,4);

    if(tag_size >size1 + 8)

        return -1;

    unsigned int tmp = 0;

    while(strncmp((char*)data+4,(char*)tag,4) != 0)

    {

    //    printf("%s/n",data+4);



        if(tag_size==0)

            return -1;

        if(tag_size < size1 + 8)

        {

            data += tag_size;

            tmp += tag_size;

        }

        else

            return -1;

        tag_size = get_size(data,4);

    }

    printf("find :%c%c%c%c/n",tag[0],tag[1],tag[2],tag[3]);

    if(tmp + tag_size > size1 )

     printf("warning: the atom may be not complete!/n");

    *pos = data+8;

    *size2 = tag_size -8;

    return tmp;

}

/*** elementary stream descriptor analyse ***/

/*

unsigned int codec_get_tag(const AVCodecTag *tags, int id)

{

    while (tags->id != CODEC_ID_NONE) {

        if (tags->id == id)

            return tags->tag;

        tags++;

    }

    return 0;

}

/* may not need analyse

int esds_analyze(uint8_t *data, unsigned int size)

{

    return 0;

}

*/
/*version == 2 ??? reffer to ffmpeg source mov.c line 943

if (format == MKTAG('l','p','c','m'))

        st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags);

*/

vector<stsdtable>& get_audio_info(uint8_t *data, unsigned int size, vector<stsdtable>& stable)//stsd



{

    uint8_t * datapos = data;

    Atom *stsd_audio =(Atom *)data;

    int tmp_size = 16;
    printf("size : %u/n",ntohl(stsd_audio->size));

    printf("num_entr: %u/n",ntohl(stsd_audio->num_of_entries));
    for(int i=0; i < ntohl(stsd_audio->num_of_entries); ++i)

    {

        if(tmp_size > size)//注意



            return stable;

        datapos += tmp_size;

        stsdtable * audio_entry = (stsdtable *)(datapos);

        stable.push_back(*audio_entry);//这里存入的是网络序的数据，使用时需进行转换



        tmp_size += ntohl(audio_entry->size);
     /***************/

        printf("--tablesize: %d/n",ntohl(audio_entry->size));

        printf("--format : %s/n",audio_entry->format);

        printf("--version : %d/n",ntohs(audio_entry->version));

        printf("--channels: %d/n",ntohs(audio_entry->channels));

        printf("--bitpersam: %d/n",ntohs(audio_entry->bitspersample));

        printf("--IDcompress: %d/n",ntohs(audio_entry->compress_id));    

        printf("--samplerate: %d.%d/n",ntohs(audio_entry->samplerate1),ntohs(audio_entry->samplerate2));

        /**************/
     tmp_size = sizeof(stsdtable);

        if(ntohs(audio_entry->version)==0)

        {

            tmp_size -= 16;

        }

        datapos += tmp_size;

        //if(ntohs(audio_entry->compress_id)==-2)//此处尚需考证



        if(ntohl(audio_entry->size) > sizeof(stsdtable))

        {

            printf("----atom size:%d/n",get_size(datapos,4));

            printf("----atom name:%c%c%c%c/n",datapos[4],datapos[5],datapos[6],datapos[7]);

            if(strncmp((char*)datapos,(char*)esds,4)==0)

            {

                //handle esds



            }

        }

    }

    return stable;

}

map<unsigned int,sampletable> & get_packet_offset(uint8_t *STBL[], map<unsigned int,sampletable>& table)

{

    //table.insert(pair<long,sampletable>(1,sample));



    unsigned int num_sam_to_chunk = get_size(STBL[0]-4,4);//stsc



    unsigned int num_sample = get_size(STBL[1]-4,4);//stsz



    unsigned int num_chunk = get_size(STBL[2]-4,4);//stco



    unsigned int chunk_index = 0;

    unsigned int next_chunk_index = 0;

    uint8_t *cur_sam_to_chunk = STBL[0];

    uint8_t *cur_sam_size = STBL[1];

    uint8_t *cur_chunk_offset = STBL[2];

    sampletable sample;

    printf("number of stsc entries:%d /nnumber of sample size:%d /nnumber of chunk offset:%d/n",num_sam_to_chunk,num_sample,num_chunk);

    for(unsigned int i = 0; i < num_sam_to_chunk; ++i)//对所有的entries



    {

        chunk_index = get_size(cur_sam_to_chunk,4);

        next_chunk_index = get_size(cur_sam_to_chunk+12,4);

        sample.id_of_sd = get_size(cur_sam_to_chunk+8,4);

        if(i == num_sam_to_chunk -1)//最后一个



        {

            next_chunk_index = num_chunk+1;

        } 

        printf("chunk_index:(%d---%d)/n",chunk_index,next_chunk_index);

        for(unsigned int k=chunk_index; k < next_chunk_index; ++k)//当前chunk序号到下一个chunk序号之间的chunk



        {//处理所有重复的chunk



            printf("chunk_index:%d sample num:%d/n",chunk_index,get_size(cur_sam_to_chunk+4,4));

            unsigned int offset = get_size(cur_chunk_offset+(chunk_index-1)*4,4);

            for(unsigned int j=0; j < get_size(cur_sam_to_chunk+4,4); ++j)//chunk内地sample数目



            {//处理该chunk中的sample



                sample.size = get_size(cur_sam_size,4);    

                printf("--sample offset:%d %x size:%d/n",offset,offset,sample.size);

                table.insert(pair<unsigned int,sampletable>(offset,sample));

                offset = offset + sample.size;

                cur_sam_size += 4;

            }

            system("pause");

            chunk_index++;

        }

        cur_sam_to_chunk += 12;

    }

    return table;

}
int seek_audio_atom( uint8_t *data1, unsigned int size1)

{

    uint8_t tag[] = "mdiaminfsmhd";

    uint8_t *datapos;

    unsigned int tag_size;

    uint8_t *data;

    unsigned int size;

    int offset_of_atom = 0;

    if((offset_of_atom = seek_tag(moov, data1, size1, &data, &size)) == -1)

        return -1;

    if(offset_of_atom + size >size1)

    { //some handles



        printf("moov atom is not complete,need more data");

    }

    data1 = data;

    size1 = size;

    uint8_t *nexttrak = data;

    unsigned int traksize = size;

    int i=0;

    while(1)

    {

        printf("-----/n");

        if(seek_tag(trak, nexttrak, traksize, &datapos, &tag_size) != -1)

        {

            nexttrak = datapos + tag_size;

            if(size1 < (nexttrak - data1))

                return -1;

            traksize = size1 - (nexttrak - data1);

            data = datapos;

            size = tag_size;

        } 

        else

        {

            return -1;

        }

        i=0;

        while(i<3)

        {

            if(seek_tag(tag+i*4, data, size, &datapos, &tag_size) != -1)

            {

                if(i==2)

                 break;

                data = datapos;

                size = tag_size;

                ++i;

            }

            else

            {

                break;

            }

        }

        if(strncmp("smhd",(char*)(datapos-4),4) == 0)

        {

            if(seek_tag(stbl, data, size, &datapos, &tag_size)!= -1)

            {

                printf("—find audio stbl—!/n");

                data = datapos;

                size = tag_size;
                if(seek_tag(stsd, data, size, &datapos, &tag_size) != -1)

                {

                    vector<stsdtable> stable; //音频信息



                    get_audio_info(datapos-8, tag_size,stable);

                }
                uint8_t *STBL[3] ={NULL,NULL,NULL};//



                uint8_t *datapos1;

                unsigned int tag_size1;//



                if(seek_tag(stsc, data, size, &datapos1, &tag_size1) != -1)

                {

                    STBL[0] = datapos1 + 8;

                }

                uint8_t *datapos2;

                unsigned int tag_size2;

                if(seek_tag(stsz, data, size, &datapos2, &tag_size2) != -1)

                {

                    STBL[1] = datapos2 + 12;

                }

                uint8_t *datapos3;

                unsigned int tag_size3;

                if(seek_tag(stco, data, size, &datapos3, &tag_size3) != -1)

                {

                    STBL[2] = datapos3 + 8;

                }

                if(STBL[0] && STBL[1] && STBL[2] )

                {

                    map<unsigned int,sampletable> postable;//音频帧信息



                    get_packet_offset(STBL,postable);

                }

            }

            return 0;

        }

    }

    return -1;

}

int main(char arg, char *argv[])

{

    FILE *mp4;

    cout<<"please input the file name :"<<endl;

    string filename;

    cin>>filename;

    mp4 = fopen(filename.c_str(),"rb");

    uint8_t buffer[300000];

    fread(buffer,1,300000,mp4);
    seek_audio_atom((uint8_t*)buffer,300000);

fclose(mp4); return 0; }

MP4音频解码信息(转帖加注释)

Published by

风君子

发表回复取消回复

最新文章

标签

书签

Published by

风君子

发表回复 取消回复

最新文章

标签

书签

发表回复取消回复