SkeyeRTSPLive高效转码之SkeyeVideoDecoder采用Intel集成显卡高效硬件解码解决方案(附源码) (1)

时间:2021-10-24 00:33:42

在我之前写的一篇文章​​《SkeyeRTSPLive传统视频监控互联网+实现利器解决方案》​​中提到RTSP转RTMP的转流过程,简化流程就是通过SkeyeRTSPClient拉RTSP流,获取音视频编码数据,然后再通过SkeyeRTMPPusher推出去,流程非常简单;然后再实际开发过程中,我们发现其实这个过程并没有想象中那么简单;首先,RTSP协议支持多种音视频编码格式,如音频支持AAC,G711,G726,等,视频支持H264,H625,MJPEG, MPEG等等各种格式,而SkeyeRTMP推流只支持H264(已扩展支持H265)格式,这时,音频我们可以通过SkeyeAACEncoder将音频转码成AAC格式,而视频我们可以通过SkeyeVideoDecoder解码成原始数据,然后再通过SkeyeVideoEncoder将原始数据转码成RTMP推送指定的格式,本文,我们将重点讲述SkeyeVideoDecoder基于Intel硬解码库的硬解码流程。

SkeyeVideoDecoder基于Intel硬解码库SkeyeIntelHardDecoder

SkeyeIntelHardDecoder库是基于Intel主板集成显卡的硬件解码程序,内部进行解码采用D3D进行显示,其解码效率比ffmpeg软件解码效率提到至少5-6倍;

1. 接口声明如下:
// Intel Media Hardware Codec SDK Interface [8/17/2016 SwordTwelve]

#ifndef INTELHARDCODEC_INTERFACE_H
#define INTELHARDCODEC_INTERFACE_H

#ifdef __cplusplus

#define HARDCODEC_MAKEFOURCC(A,B,C,D) ((((int)A))+(((int)B)<<8)+(((int)C)<<16)+(((int)D)<<24))

/* Hardware Codec FourCC */
typedef enum tagINTELHARDCODEC_FORMAT{
HARDCODEC_FOURCC_NV12 = HARDCODEC_MAKEFOURCC('N','V','1','2'), /* Native Format */
HARDCODEC_FOURCC_YV12 = HARDCODEC_MAKEFOURCC('Y','V','1','2'),
HARDCODEC_FOURCC_YUY2 = HARDCODEC_MAKEFOURCC('Y','U','Y','2'),
HARDCODEC_FOURCC_RGB3 = HARDCODEC_MAKEFOURCC('R','G','B','3'), /* RGB24 */
HARDCODEC_FOURCC_RGB4 = HARDCODEC_MAKEFOURCC('R','G','B','4'), /* RGB32 */
HARDCODEC_FOURCC_P8 = 41, /* D3DFMT_P8 */
HARDCODEC_FOURCC_P8_TEXTURE = HARDCODEC_MAKEFOURCC('P','8','M','B')
}INTELHARDCODEC_FORMAT;


class SkeyeIntelHardDecoder_Interface
{
//导出接口
public:
public: //DLL 接口
virtual int WINAPI Init(HWND hWnd, bool bDxv2Show, bool bShowToScale, int mode = 1) = 0;
virtual int WINAPI Decode(unsigned char * pData, int len, OUT INTELHARDCODEC_FORMAT& outFormat, OUT unsigned char * pYUVData) = 0;
virtual void WINAPI Close() = 0;

};

//视频获取定义接口指针类型
typedef SkeyeIntelHardDecoder_Interface* LPIntelHardDecoder;

LPIntelHardDecoder APIENTRY Create_SkeyeIntelHardDecoder();//创建控制接口指针
void APIENTRY Release_SkeyeIntelHardDecoder(LPIntelHardDecoder lpHardDecoder);//销毁控制接口指针

#endif//__cplusplus
#endif//INTELHARDCODEC_INTERFACE_H
2. SkeyeIntelHardDecoder解码库调用流程
  • 第一步,初始化解码器 首先,检查设备是否支持DXVA2,代码如下所示:
bool sSupportDxva2(HWND hwnd)
{
HRESULT hr;
mfxU32 adapterNum = 0;
IDirect3D9* m_pD3D9 = NULL;
IDirect3DDevice9* m_pD3DD9 = NULL;
IDirect3DDeviceManager9* m_pDeviceManager9 = NULL;
D3DPRESENT_PARAMETERS m_D3DPP;

m_pD3D9 = Direct3DCreate9(D3D_SDK_VERSION);
if (!m_pD3D9)
return false;

ZeroMemory(&m_D3DPP, sizeof(m_D3DPP));
m_D3DPP.Windowed = true;
m_D3DPP.hDeviceWindow = hwnd;

m_D3DPP.Flags = D3DPRESENTFLAG_VIDEO;
m_D3DPP.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
m_D3DPP.PresentationInterval = D3DPRESENT_INTERVAL_ONE;
m_D3DPP.BackBufferCount = 1;
m_D3DPP.BackBufferFormat = D3DFMT_X8R8G8B8;

m_D3DPP.BackBufferWidth = GetSystemMetrics(SM_CXSCREEN);
m_D3DPP.BackBufferHeight = GetSystemMetrics(SM_CYSCREEN);

m_D3DPP.Flags |= D3DPRESENTFLAG_LOCKABLE_BACKBUFFER;


m_D3DPP.SwapEffect = D3DSWAPEFFECT_DISCARD; // D3DSWAPEFFECT_OVERLAY

hr = m_pD3D9->CreateDevice(
adapterNum,
D3DDEVTYPE_HAL,
hwnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED | D3DCREATE_FPU_PRESERVE,
&m_D3DPP,
&m_pD3DD9);
if (FAILED(hr))
{
m_pD3D9->Release();
return false;
}
hr = m_pD3DD9->Reset(&m_D3DPP);
if (FAILED(hr))
return false;
hr = m_pD3DD9->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
if (FAILED(hr))
{
m_pD3DD9->Release();
m_pD3D9->Release();
return false;
}

UINT resetToken = 0;

hr = DXVA2CreateDirect3DDeviceManager9(&resetToken, &m_pDeviceManager9);
if (FAILED(hr))
{
m_pD3DD9->Release();
m_pD3D9->Release();
return false;
}
m_pDeviceManager9->Release();
m_pD3DD9->Release();
m_pD3D9->Release();
return true;
}

然后,初始化解码器,代码如下:

int Init(HWND hWnd, bool bDxv2Show, bool bShowToScale, int mode )
{
//检测是否支持硬件解码
bool bUseIntelSdk = isSupportDxva2(hWnd);
if (!bUseIntelSdk)
{
return -1;
}
m_bDxv2Show = bDxv2Show;
m_bShowToScale = bShowToScale;
m_hWnd = hWnd;
::GetClientRect(m_hWnd,&m_WndRect);

m_pD3dRender = new CDecodeD3DRender();
m_pD3dRender->Init(hWnd);

mfxStatus sts = MFX_ERR_NONE;
// mfxVersion version = {MFX_VERSION_MINOR, MFX_VERSION_MAJOR};
mfxVersion version = {0, 1};

if (mode == 1)
{
if (MFX_ERR_NONE != m_mfxSession.Init(MFX_IMPL_HARDWARE_ANY, &version))
sts = m_mfxSession.Init(MFX_IMPL_HARDWARE, &version);
if(sts != MFX_ERR_NONE)
sts = m_mfxSession.Init(MFX_IMPL_SOFTWARE, &version);
}
else
{
sts = m_mfxSession.Init(MFX_IMPL_SOFTWARE, &version);
}

MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

// create decoder
m_pmfxDEC = new MFXVideoDECODE(m_mfxSession);
MSDK_CHECK_POINTER(m_pmfxDEC, MFX_ERR_MEMORY_ALLOC);

// set video type in parameters
m_mfxVideoParams.mfx.CodecId = MFX_CODEC_AVC;
m_mfxVideoParams.AsyncDepth = 1;
// [12/6/2016 dingshuai]
m_mfxVideoParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12;//MFX_FOURCC_YV12;


// set memory type
m_bd3dAlloc = true;

memset(&m_mfxBS, 0, sizeof(m_mfxBS));

sts = InitMfxBitstream(&m_mfxBS, 1024*1024*2);
return sts;
}
  • 第二步,调用解码函数 解码器是异步工作的,所以在解码函数调用后,我们需要从缓冲区内把数据取出进行D3D显示或者回调到上层显示,代码如下所示:
mfxStatus CIntelMediaDecode::h264DecPacket(unsigned char * p_data, int len, unsigned char* p_yuvData)
{
mfxSyncPoint syncp;
mfxStatus sts = MFX_ERR_NONE;
mfxU16 nIndex = 0; // index of free surface

//while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts)
{
if (MFX_WRN_DEVICE_BUSY == sts)
{
MSDK_SLEEP(1); // just wait and then repeat the same call to DecodeFrameAsync

}

if (len > (m_mfxBS.MaxLength - m_mfxBS.DataLength))
{
// sts = ExtendMfxBitstream(&m_mfxBS, m_mfxBS.MaxLength * 2);
// MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
m_mfxBS.DataOffset = 0;
m_mfxBS.DataLength = 0;
}

if (len > 0)
{
memmove(m_mfxBS.Data, m_mfxBS.Data+m_mfxBS.DataOffset, m_mfxBS.DataLength);
m_mfxBS.DataOffset = 0;

memcpy(m_mfxBS.Data+m_mfxBS.DataLength, p_data, len);
m_mfxBS.DataLength += len;
}

if (m_mfxBS.DataLength < 5)
{
m_sPrevState = MFX_ERR_MORE_DATA;
return MFX_ERR_MORE_DATA;
}

if (MFX_ERR_MORE_SURFACE == m_sPrevState || MFX_ERR_NONE == m_sPrevState)
{
nIndex = GetFreeSurfaceIndex(m_pmfxSurfaces, m_mfxResponse.NumFrameActual); // find new working surface
if (MSDK_INVALID_SURF_IDX == nIndex)
{
return MFX_ERR_MEMORY_ALLOC;
}
}

sts = m_pmfxDEC->DecodeFrameAsync(&m_mfxBS, &(m_pmfxSurfaces[nIndex]), &m_pmfxOutSurface, &syncp);
if (MFX_ERR_DEVICE_LOST == sts || MFX_ERR_DEVICE_FAILED == sts)
return sts;
m_sPrevState = sts;

// ignore warnings if output is available,
// if no output and no action required just repeat the same call
if (MFX_ERR_NONE < sts && syncp)
{
sts = MFX_ERR_NONE;
}

if (MFX_ERR_NONE == sts)
{
sts = m_mfxSession.SyncOperation(syncp, MSDK_DEC_WAIT_INTERVAL);
}

if (MFX_ERR_NONE == sts)
{
#if 0
if (m_nSnapShotRequest == 1)
{
picture->linesize[0] = m_pmfxOutSurface->Data.Pitch;
picture->linesize[1] = m_pmfxOutSurface->Data.Pitch;
picture->linesize[2] = 0;

picture->data[0] = m_pmfxOutSurface->Data.Y;
picture->data[1] = m_pmfxOutSurface->Data.UV;
picture->data[2] = 0;

int ret = SetSnapFrameData(picture);
if (ret > 0)
{
m_nSnapShotDataReady = 1;
}
m_nSnapShotRequest = 0;
}
#endif

#if 1
if (m_bExternalAlloc)
{
sts = m_pMFXAllocator->Lock(m_pMFXAllocator->pthis, m_pmfxOutSurface->Data.MemId, &(m_pmfxOutSurface->Data));
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}
#endif

++m_nFrameIndex;

int nYUVSize = m_nSrcWidth*m_nSrcHeight;
//YUV数据拷贝
if (p_yuvData)//YUV420
{
memcpy(p_yuvData, m_pmfxOutSurface->Data.Y, nYUVSize);
memcpy(p_yuvData+m_nSrcWidth*m_nSrcHeight, m_pmfxOutSurface->Data.UV, m_nSrcWidth*m_nSrcHeight>>1);

// 硬件编码出来是NV12,外部为了显示方便应该转成I420 [12/6/2016 dingshuai]
#if 0
int i = 0;
int yuvDely = nYUVSize*5/4;
int nHelfYUVSize = nYUVSize/2;
// Write Cb
for(int idx = 0; idx < (nHelfYUVSize); idx+=2)
{
*(p_yuvData+nYUVSize + i) = m_pmfxOutSurface->Data.UV[idx];
*(p_yuvData+yuvDely +i) = m_pmfxOutSurface->Data.UV[idx+1];
i++;
}
#endif

}

#if 1
if (m_bExternalAlloc)
{
sts = m_pMFXAllocator->Unlock(m_pMFXAllocator->pthis, m_pmfxOutSurface->Data.MemId, &(m_pmfxOutSurface->Data));
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}
#endif
if (m_bDxv2Show)
{
::GetClientRect(m_hWnd, &m_WndRect);
sts = m_pD3dRender->RenderFrame(m_pmfxOutSurface, m_pMFXAllocator,
m_WndRect, m_DisplayTitle, m_bEndedDrag, m_bShowToScale, &m_TargetRect);
if (sts == MFX_ERR_NULL_PTR)
sts = MFX_ERR_NONE;
}
}
}
return sts;
}
  • 第三步,关闭解码器
void Close()
{
WipeMfxBitstream(&m_mfxBS);
MSDK_SAFE_DELETE(m_pmfxDEC);
MSDK_SAFE_DELETE(m_pD3dRender);

if (m_bIntelSystemInitFinish != true)
return;

m_bIntelSystemInitFinish = false;

DeleteFrames();

// allocator if used as external for MediaSDK must be deleted after decoder
DeleteAllocator();

m_mfxSession.Close();

if (NULL != m_pSnapShotBuf)
{
free(m_pSnapShotBuf);
m_pSnapShotBuf = NULL;
}
}

至此,我们已经完成了对Intel集显解码器的封装,通过阅读[SkeyeRTSPPlayer]的代码可以对该库的调用流程有一个系统的认识;此外,目前集成的Intel集显解码库是比较旧的版本,目前尚不支持H265解码,有兴趣的朋友可以下载最新的Intel集显编解码库Demo自行封装解码库,相信现在的版本会更加的易用和高效。

有任何技术问题,欢迎大家和我技术交流: ​​295222688@qq.com​​

大家也可以加入SkeyePlayer流媒体播放器 QQ群进行讨论: 102644504