H264RateControl.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296
  1. /*------------------------------------------------------------------------------
  2. -- --
  3. -- This software is confidential and proprietary and may be used --
  4. -- only as expressly authorized by a licensing agreement from --
  5. -- --
  6. -- Hantro Products Oy. --
  7. -- --
  8. -- (C) COPYRIGHT 2006 HANTRO PRODUCTS OY --
  9. -- ALL RIGHTS RESERVED --
  10. -- --
  11. -- The entire notice above must be reproduced --
  12. -- on all copies and should not be removed. --
  13. -- --
  14. --------------------------------------------------------------------------------
  15. --
  16. -- Description : Rate control
  17. --
  18. ------------------------------------------------------------------------------*/
  19. /*
  20. * Using only one leaky bucket (Multible buckets is supported by std).
  21. * Constant bit rate (CBR) operation, ie. leaky bucket drain rate equals
  22. * average rate of the stream, is enabled if RC_CBR_HRD = 1. Frame skiping and
  23. * filler data are minimum requirements for the CBR conformance.
  24. *
  25. * Constant HRD parameters:
  26. * low_delay_hrd_flag = 0, assumes constant delay mode.
  27. * cpb_cnt_minus1 = 0, only one leaky bucket used.
  28. * (cbr_flag[0] = RC_CBR_HRD, CBR mode.)
  29. */
  30. #include "H264RateControl.h"
  31. #include "H264Slice.h"
  32. #ifdef ROI_SUPPORT
  33. #include "H264RoiModel.h"
  34. #endif
  35. /*------------------------------------------------------------------------------
  36. Module defines
  37. ------------------------------------------------------------------------------*/
  38. /* Define this if strict bitrate control is needed, each window has a strict
  39. * bit budget. Otherwise a moving window is used for smoother quality.
  40. #define RC_WINDOW_STRICT*/
  41. #ifdef TRACE_RC
  42. #include <stdio.h>
  43. FILE *fpRcTrc = NULL;
  44. /* Select debug output: fpRcTrc or stdout */
  45. #define DBGOUTPUT fpRcTrc
  46. /* Select debug level: 0 = minimum, 2 = maximum */
  47. #define DBG_LEVEL 2
  48. #define DBG(l, str) if (l <= DBG_LEVEL) fprintf str
  49. #else
  50. #define DBG(l, str)
  51. #endif
  52. #define INITIAL_BUFFER_FULLNESS 60 /* Decoder Buffer in procents */
  53. #define MIN_PIC_SIZE 50 /* Procents from picPerPic */
  54. #define CLIP3(min, max, val) ((val) < (min) ? (min) : ((val) > (max) ? (max) : (val)))
  55. #define DIV(a, b) ((b) ? ((a) + (SIGN(a) * (b)) / 2) / (b) : (a))
  56. #define DSCY 32 /* n * 32 */
  57. #define I32_MAX 2147483647 /* 2 ^ 31 - 1 */
  58. #define QP_DELTA 2
  59. #define QP_DELTA_LIMIT 10
  60. #define INTRA_QP_DELTA (0)
  61. #define WORD_CNT_MAX 65535
  62. /*------------------------------------------------------------------------------
  63. Local structures
  64. ------------------------------------------------------------------------------*/
  65. /* q_step values scaled up by 4 and evenly rounded */
  66. static const i32 q_step[53] = { 3, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 9, 10, 11,
  67. 13, 14, 16, 18, 20, 23, 25, 28, 32, 36, 40, 45, 51, 57, 64, 72, 80, 90,
  68. 101, 114, 128, 144, 160, 180, 203, 228, 256, 288, 320, 360, 405, 456,
  69. 513, 577, 640, 720, 810, 896};
  70. /*------------------------------------------------------------------------------
  71. Local function prototypes
  72. ------------------------------------------------------------------------------*/
  73. static i32 InitialQp(i32 bits, i32 pels);
  74. static void MbQuant(h264RateControl_s * rc);
  75. static void LinearModel(h264RateControl_s * rc);
  76. static void AdaptiveModel(h264RateControl_s * rc);
  77. static void SourceParameter(h264RateControl_s * rc, i32 nonZeroCnt);
  78. static void PicSkip(h264RateControl_s * rc);
  79. static void PicQuantLimit(h264RateControl_s * rc);
  80. static i32 VirtualBuffer(h264VirtualBuffer_s *vb, i32 timeInc, true_e hrd);
  81. static void PicQuant(h264RateControl_s * rc);
  82. static i32 avg_rc_error(linReg_s *p);
  83. static void update_rc_error(linReg_s *p, i32 bits);
  84. static i32 gop_avg_qp(h264RateControl_s *rc);
  85. static i32 new_pic_quant(linReg_s *p, i32 bits, true_e useQpDeltaLimit);
  86. static i32 get_avg_bits(linReg_s *p, i32 n);
  87. static void update_tables(linReg_s *p, i32 qp, i32 bits);
  88. static void update_model(linReg_s *p);
  89. static i32 lin_sy(i32 *qp, i32 *r, i32 n);
  90. static i32 lin_sx(i32 *qp, i32 n);
  91. static i32 lin_sxy(i32 *qp, i32 *r, i32 n);
  92. static i32 lin_nsxx(i32 *qp, i32 n);
  93. /*------------------------------------------------------------------------------
  94. H264InitRc() Initialize rate control.
  95. ------------------------------------------------------------------------------*/
  96. bool_e H264InitRc(h264RateControl_s * rc)
  97. {
  98. h264VirtualBuffer_s *vb = &rc->virtualBuffer;
  99. if((rc->qpMax > 51))
  100. {
  101. return ENCHW_NOK;
  102. }
  103. /* QP -1: Initial QP estimation done by RC */
  104. if (rc->qpHdr == -1) {
  105. i32 tmp = H264Calculate(vb->bitRate, rc->outRateDenom, rc->outRateNum);
  106. rc->qpHdr = InitialQp(tmp, rc->mbPerPic*16*16);
  107. PicQuantLimit(rc);
  108. }
  109. if((rc->qpHdr > rc->qpMax) || (rc->qpHdr < rc->qpMin))
  110. {
  111. return ENCHW_NOK;
  112. }
  113. rc->mbQpAdjustment = MIN(7, MAX(-8, rc->mbQpAdjustment));
  114. /* HRD needs frame RC and macroblock RC*/
  115. if (rc->hrd == ENCHW_YES) {
  116. rc->picRc = ENCHW_YES;
  117. rc->mbRc = ENCHW_YES;
  118. }
  119. /* Macroblock RC needs frame RC */
  120. if (rc->mbRc == ENCHW_YES)
  121. rc->picRc = ENCHW_YES;
  122. /* mbQpAdjustment disables macroblock RC */
  123. if (rc->mbQpAdjustment && rc->mbRc)
  124. rc->mbRc = ENCHW_NO;
  125. rc->coeffCntMax = rc->mbPerPic * 24 * 16;
  126. rc->frameCoded = ENCHW_YES;
  127. rc->sliceTypeCur = ISLICE;
  128. rc->sliceTypePrev = PSLICE;
  129. rc->qpHdrPrev = rc->qpHdr;
  130. rc->fixedQp = rc->qpHdr;
  131. vb->bitPerPic = H264Calculate(vb->bitRate, rc->outRateDenom, rc->outRateNum);
  132. /* Check points are smootly distributed except last one */
  133. {
  134. h264QpCtrl_s *qpCtrl = &rc->qpCtrl;
  135. qpCtrl->checkPoints = MIN(rc->mbRows - 1, CHECK_POINTS_MAX);
  136. if(rc->mbRc == ENCHW_YES)
  137. {
  138. qpCtrl->checkPointDistance =
  139. rc->mbPerPic / (qpCtrl->checkPoints + 1);
  140. }
  141. else
  142. {
  143. qpCtrl->checkPointDistance = 0;
  144. }
  145. }
  146. #if defined(TRACE_RC) && (DBGOUTPUT == fpRcTrc)
  147. if (!fpRcTrc) fpRcTrc = fopen("rc.trc", "wt");
  148. #endif
  149. /* new rate control algorithm */
  150. update_rc_error(&rc->rError, 0x7fffffff);
  151. update_rc_error(&rc->intraError, 0x7fffffff);
  152. rc->frameCnt = 0;
  153. rc->linReg.pos = 0;
  154. rc->linReg.len = 0;
  155. rc->linReg.a1 = 0;
  156. rc->linReg.a2 = 0;
  157. rc->linReg.qs[0] = q_step[51];
  158. rc->linReg.bits[0] = 0;
  159. rc->linReg.qp_prev = rc->qpHdr;
  160. rc->intra.pos = 0;
  161. rc->intra.len = 0;
  162. rc->intra.a1 = 0;
  163. rc->intra.a2 = 0;
  164. rc->intra.qs[0] = q_step[51];
  165. rc->intra.bits[0] = 0;
  166. rc->intra.qp_prev = rc->qpHdr;
  167. rc->gopQpSum = 0;
  168. rc->gopQpDiv = 0;
  169. /* API parameter is named gopLen but the actual usage is rate controlling
  170. * window in frames. RC tries to match the target bitrate inside the
  171. * window. Each window can contain multiple GOPs and the RC adapts to the
  172. * intra rate by calculating intraInterval. */
  173. rc->windowLen = rc->gopLen;
  174. vb->windowRem = rc->gopLen;
  175. rc->intraIntervalCtr = rc->intraInterval = rc->gopLen;
  176. rc->targetPicSize = 0;
  177. rc->frameBitCnt = 0;
  178. DBG(0, (DBGOUTPUT, "\nInitRc: picRc\t\t%i hrd\t%i picSkip\t%i\n",
  179. rc->picRc, rc->hrd, rc->picSkip));
  180. DBG(0, (DBGOUTPUT, " mbRc\t\t\t%i qpHdr\t%i Min,Max\t%i,%i\n",
  181. rc->mbRc, rc->qpHdr, rc->qpMin, rc->qpMax));
  182. DBG(0, (DBGOUTPUT, " checkPointDistance\t%i\n",
  183. rc->qpCtrl.checkPointDistance));
  184. DBG(0, (DBGOUTPUT, " CPBsize\t%i\n BitRate\t%i\n BitPerPic\t%i\n",
  185. vb->bufferSize, vb->bitRate, vb->bitPerPic));
  186. rc->sei.hrd = rc->hrd;
  187. if(rc->hrd)
  188. {
  189. vb->bucketFullness =
  190. H264Calculate(vb->bufferSize, INITIAL_BUFFER_FULLNESS, 100);
  191. rc->gDelaySum = H264Calculate(90000, vb->bufferSize, vb->bitRate);
  192. rc->gInitialDelay = H264Calculate(90000, vb->bucketFullness, vb->bitRate);
  193. rc->gInitialDoffs = rc->gDelaySum - rc->gInitialDelay;
  194. vb->bucketFullness = vb->bufferSize - vb->bucketFullness;
  195. /* Because is the first frame. Avoids if clauses in VirtualBuffer() */
  196. vb->bucketFullness += vb->bitPerPic;
  197. #ifdef TRACE_RC
  198. rc->gBufferMin = vb->bufferSize;
  199. rc->gBufferMax = 0;
  200. #endif
  201. rc->sei.icrd = (u32)rc->gInitialDelay;
  202. rc->sei.icrdo = (u32)rc->gInitialDoffs;
  203. DBG(1, (DBGOUTPUT, "\n InitialDelay\t%i\n Offset\t\t%i\n",
  204. rc->gInitialDelay, rc->gInitialDoffs));
  205. }
  206. return ENCHW_OK;
  207. }
  208. /*------------------------------------------------------------------------------
  209. InitialQp() Returns sequence initial quantization parameter.
  210. ------------------------------------------------------------------------------*/
  211. static i32 InitialQp(i32 bits, i32 pels)
  212. {
  213. const i32 qp_tbl[2][9] = {
  214. {27, 44, 72, 119, 192, 314, 453, 653, 0x7FFFFFFF},
  215. /*{26, 38, 59, 96, 173, 305, 545, 0x7FFFFFFF},*/
  216. {49, 45, 41, 37, 33, 29, 25, 21, 17}};
  217. const i32 upscale = 1000;
  218. i32 i = -1;
  219. /* prevents overflow, QP would anyway be 17 with this high bitrate
  220. for all resolutions under and including 1920x1088 */
  221. if (bits > 1000000)
  222. return 17;
  223. /* Make room for multiplication */
  224. pels >>= 8;
  225. bits >>= 2;
  226. /* Adjust the bits value for the current resolution */
  227. bits *= pels + 250;
  228. ASSERT(pels > 0);
  229. ASSERT(bits > 0);
  230. bits /= 350 + (3 * pels) / 4;
  231. bits = H264Calculate(bits, upscale, pels << 6);
  232. while (qp_tbl[0][++i] < bits);
  233. return qp_tbl[1][i];
  234. }
  235. /*------------------------------------------------------------------------------
  236. H264FillerRc
  237. Stream watermarking. Insert filler NAL unit of certain size after each
  238. Nth frame.
  239. ------------------------------------------------------------------------------*/
  240. u32 H264FillerRc(h264RateControl_s * rc, u32 frameCnt)
  241. {
  242. const u8 filler[] = { 0, 9, 0, 9, 9, 9, 0, 2, 2, 0 };
  243. u32 idx;
  244. if(rc->fillerIdx == (u32) (-1))
  245. {
  246. rc->fillerIdx = sizeof(filler) / sizeof(*filler) - 1;
  247. }
  248. idx = rc->fillerIdx;
  249. if(frameCnt != 0 && ((frameCnt % 128) == 0))
  250. {
  251. idx++;
  252. }
  253. idx %= sizeof(filler) / sizeof(*filler);
  254. if(idx != rc->fillerIdx)
  255. {
  256. rc->fillerIdx = idx;
  257. return filler[idx] + 1;
  258. }
  259. return 0;
  260. }
  261. /*------------------------------------------------------------------------------
  262. VirtualBuffer() Return difference of target and real buffer fullness.
  263. Virtual buffer and real bit count grow until one second. After one second
  264. output bit rate per second is removed from virtualBitCnt and realBitCnt. Bit
  265. drifting has been taken care.
  266. If the leaky bucket in VBR mode becomes empty (e.g. underflow), those R * T_e
  267. bits are lost and must be decremented from virtualBitCnt. (NOTE: Drift
  268. calculation will mess virtualBitCnt up, so the loss is added to realBitCnt)
  269. ------------------------------------------------------------------------------*/
  270. static i32 VirtualBuffer(h264VirtualBuffer_s *vb, i32 timeInc, true_e hrd)
  271. {
  272. i32 drift, target, bitPerPic = vb->bitPerPic;
  273. if (hrd) {
  274. #if RC_CBR_HRD
  275. /* In CBR mode, bucket _must not_ underflow. Insert filler when
  276. * needed. */
  277. vb->bucketFullness -= bitPerPic;
  278. #else
  279. if (vb->bucketFullness >= bitPerPic) {
  280. vb->bucketFullness -= bitPerPic;
  281. } else {
  282. vb->realBitCnt += (bitPerPic - vb->bucketFullness);
  283. vb->bucketFullness = 0;
  284. }
  285. #endif
  286. }
  287. /* Saturate realBitCnt, this is to prevent overflows caused by much greater
  288. bitrate setting than is really possible to reach */
  289. if (vb->realBitCnt > 0x1FFFFFFF)
  290. vb->realBitCnt = 0x1FFFFFFF;
  291. if (vb->realBitCnt < -0x1FFFFFFF)
  292. vb->realBitCnt = -0x1FFFFFFF;
  293. vb->picTimeInc += timeInc;
  294. vb->virtualBitCnt += H264Calculate(vb->bitRate, timeInc, vb->timeScale);
  295. target = vb->virtualBitCnt - vb->realBitCnt;
  296. /* Saturate target, prevents rc going totally out of control.
  297. This situation should never happen. */
  298. if (target > 0x1FFFFFFF)
  299. target = 0x1FFFFFFF;
  300. if (target < -0x1FFFFFFF)
  301. target = -0x1FFFFFFF;
  302. /* picTimeInc must be in range of [0, timeScale) */
  303. while (vb->picTimeInc >= vb->timeScale) {
  304. vb->picTimeInc -= vb->timeScale;
  305. vb->virtualBitCnt -= vb->bitRate;
  306. vb->realBitCnt -= vb->bitRate;
  307. }
  308. drift = H264Calculate(vb->bitRate, vb->picTimeInc, vb->timeScale);
  309. drift -= vb->virtualBitCnt;
  310. vb->virtualBitCnt += drift;
  311. DBG(1, (DBGOUTPUT, "virtualBitCnt:\t\t%6i realBitCnt: %i ",
  312. vb->virtualBitCnt, vb->realBitCnt));
  313. DBG(1, (DBGOUTPUT, "target: %i timeInc: %i\n", target, timeInc));
  314. return target;
  315. }
  316. /*------------------------------------------------------------------------------
  317. H264AfterPicRc() Update source model, bit rate target error and linear
  318. regression model for frame QP calculation. If HRD enabled, check leaky bucket
  319. status and return RC_OVERFLOW if coded frame must be skipped. Otherwise
  320. returns number of required filler payload bytes.
  321. ------------------------------------------------------------------------------*/
  322. i32 H264AfterPicRc(h264RateControl_s * rc, u32 nonZeroCnt, u32 byteCnt,
  323. u32 qpSum)
  324. {
  325. h264VirtualBuffer_s *vb = &rc->virtualBuffer;
  326. i32 bitPerPic = rc->virtualBuffer.bitPerPic;
  327. i32 tmp, stat, bitCnt = (i32)byteCnt * 8;
  328. (void) bitPerPic;
  329. rc->qpSum = (i32)qpSum;
  330. rc->frameBitCnt = bitCnt;
  331. rc->nonZeroCnt = nonZeroCnt;
  332. rc->gopBitCnt += bitCnt;
  333. rc->frameCnt++;
  334. if (rc->targetPicSize) {
  335. tmp = ((bitCnt - rc->targetPicSize) * 100) /
  336. rc->targetPicSize;
  337. } else {
  338. tmp = -1;
  339. }
  340. DBG(0, (DBGOUTPUT, "\nAFTER PIC RC:\n"));
  341. DBG(0, (DBGOUTPUT, "BitCnt %3d BitErr/avg %3d%% ", bitCnt,
  342. ((bitCnt - bitPerPic) * 100) / (bitPerPic+1)));
  343. DBG(1, (DBGOUTPUT, "BitErr/target %3i%% qpHdr %2i avgQp %4i\n",
  344. tmp, rc->qpHdr, rc->qpSum / rc->mbPerPic));
  345. /* Calculate the source parameter only for INTER frames */
  346. if (rc->sliceTypeCur != ISLICE && rc->sliceTypeCur != ISLICES)
  347. SourceParameter(rc, rc->nonZeroCnt);
  348. /* Store the error between target and actual frame size */
  349. if (rc->sliceTypeCur != ISLICE && rc->sliceTypeCur != ISLICES) {
  350. /* Saturate the error to avoid inter frames with
  351. * mostly intra MBs to affect too much */
  352. update_rc_error(&rc->rError,
  353. MIN(bitCnt - rc->targetPicSize, 2*rc->targetPicSize));
  354. } else {
  355. update_rc_error(&rc->intraError, bitCnt - rc->targetPicSize);
  356. }
  357. /* Update number of bits used for residual, inter or intra */
  358. if (rc->sliceTypeCur != ISLICE && rc->sliceTypeCur != ISLICES) {
  359. update_tables(&rc->linReg, rc->qpHdrPrev,
  360. H264Calculate(bitCnt, 256, rc->mbPerPic));
  361. update_model(&rc->linReg);
  362. } else {
  363. update_tables(&rc->intra, rc->qpHdrPrev,
  364. H264Calculate(bitCnt, 256, rc->mbPerPic));
  365. update_model(&rc->intra);
  366. }
  367. /* Post-frame skip if HRD buffer overflow */
  368. if ((rc->hrd == ENCHW_YES) && (bitCnt > (vb->bufferSize - vb->bucketFullness))) {
  369. DBG(1, (DBGOUTPUT, "Be: %7i ", vb->bucketFullness));
  370. DBG(1, (DBGOUTPUT, "fillerBits %5i ", 0));
  371. DBG(1, (DBGOUTPUT, "bitCnt %d spaceLeft %d ",
  372. bitCnt, (vb->bufferSize - vb->bucketFullness)));
  373. DBG(1, (DBGOUTPUT, "bufSize %d bucketFullness %d bitPerPic %d\n",
  374. vb->bufferSize, vb->bucketFullness, bitPerPic));
  375. DBG(0, (DBGOUTPUT, "HRD overflow, frame discard\n"));
  376. rc->frameCoded = ENCHW_NO;
  377. return H264RC_OVERFLOW;
  378. } else {
  379. vb->bucketFullness += bitCnt;
  380. vb->realBitCnt += bitCnt;
  381. }
  382. DBG(1, (DBGOUTPUT, "plot\t%4i\t%4i\t%8i\t%8i\t%8i\t%8i\t%8i\n",
  383. rc->frameCnt, rc->qpHdr, rc->targetPicSize, bitCnt,
  384. bitPerPic, rc->gopAvgBitCnt, vb->realBitCnt-vb->virtualBitCnt));
  385. if (rc->hrd == ENCHW_NO) {
  386. return 0;
  387. }
  388. tmp = 0;
  389. #if RC_CBR_HRD
  390. /* Bits needed to prevent bucket underflow */
  391. tmp = bitPerPic - vb->bucketFullness;
  392. if (tmp > 0) {
  393. tmp = (tmp + 7) / 8;
  394. vb->bucketFullness += tmp * 8;
  395. vb->realBitCnt += tmp * 8;
  396. } else {
  397. tmp = 0;
  398. }
  399. #endif
  400. /* Update Buffering Info */
  401. stat = vb->bufferSize - vb->bucketFullness;
  402. rc->gInitialDelay = H264Calculate(90000, stat, vb->bitRate);
  403. rc->gInitialDoffs = rc->gDelaySum - rc->gInitialDelay;
  404. rc->sei.icrd = (u32)rc->gInitialDelay;
  405. rc->sei.icrdo = (u32)rc->gInitialDoffs;
  406. DBG(1, (DBGOUTPUT, "initialDelay: %5i ", rc->gInitialDelay));
  407. DBG(1, (DBGOUTPUT, "initialDoffs: %5i\n", rc->gInitialDoffs));
  408. DBG(1, (DBGOUTPUT, "Be: %7i ", vb->bucketFullness));
  409. DBG(1, (DBGOUTPUT, "fillerBits %5i\n", tmp * 8));
  410. #ifdef TRACE_RC
  411. if (vb->bucketFullness < rc->gBufferMin) {
  412. rc->gBufferMin = vb->bucketFullness;
  413. }
  414. if (vb->bucketFullness > rc->gBufferMax) {
  415. rc->gBufferMax = vb->bucketFullness;
  416. }
  417. DBG(1, (DBGOUTPUT, "\nLeaky Bucket Min: %i (%d%%) Max: %i (%d%%)\n",
  418. rc->gBufferMin, rc->gBufferMin*100/vb->bufferSize,
  419. rc->gBufferMax, rc->gBufferMax*100/vb->bufferSize));
  420. #endif
  421. return tmp;
  422. }
  423. /*------------------------------------------------------------------------------
  424. H264BeforePicRc() Update virtual buffer, and calculate picInitQp for current
  425. picture , and coded status.
  426. ------------------------------------------------------------------------------*/
  427. void H264BeforePicRc(h264RateControl_s * rc, u32 timeInc, u32 sliceType)
  428. {
  429. h264VirtualBuffer_s *vb = &rc->virtualBuffer;
  430. i32 i, rcWindow, intraBits = 0, tmp = 0;
  431. rc->frameCoded = ENCHW_YES;
  432. rc->sliceTypeCur = sliceType;
  433. DBG(0, (DBGOUTPUT, "\nBEFORE PIC RC:\n"));
  434. DBG(0, (DBGOUTPUT, "Frame type current\t%2i\n", sliceType));
  435. tmp = VirtualBuffer(&rc->virtualBuffer, (i32) timeInc, rc->hrd);
  436. for(i = 0; i < CHECK_POINTS_MAX; i++) {
  437. rc->qpCtrl.wordCntTarget[i] = 0;
  438. }
  439. if (vb->windowRem == 0) {
  440. vb->windowRem = rc->windowLen-1;
  441. /* New bitrate window, reset error counters */
  442. update_rc_error(&rc->rError, 0x7fffffff);
  443. /* Don't reset intra error in case of intra-only, it would cause step. */
  444. if (rc->sliceTypeCur != rc->sliceTypePrev)
  445. update_rc_error(&rc->intraError, 0x7fffffff);
  446. } else {
  447. vb->windowRem--;
  448. }
  449. /* Calculate target size for this picture. Adjust the target bitPerPic
  450. * with the cumulated error between target and actual bitrates (tmp).
  451. * Also take into account the bits used by intra frame starting the GOP. */
  452. if (rc->sliceTypeCur != ISLICE && rc->sliceTypeCur != ISLICES &&
  453. rc->intraInterval > 1) {
  454. /* GOP bits that are used by intra frame. Amount of bits
  455. * "stolen" by intra from each inter frame in the GOP. */
  456. intraBits = vb->bitPerPic*rc->intraInterval*get_avg_bits(&rc->gop, 10)/100;
  457. intraBits -= vb->bitPerPic;
  458. intraBits /= (rc->intraInterval-1);
  459. intraBits = MAX(0, intraBits);
  460. }
  461. /* Compensate for intra "stealing" bits from inters. */
  462. tmp += intraBits*(rc->intraInterval-rc->intraIntervalCtr);
  463. #ifdef RC_WINDOW_STRICT
  464. /* In the end of window don't be too strict with matching the error
  465. * otherwise the end of window tends to twist QP. */
  466. rcWindow = MAX(MAX(3, rc->windowLen/8), vb->windowRem);
  467. #else
  468. /* Actually we can be fairly easy with this one, let's make it
  469. * a moving window to smoothen the changes. */
  470. rcWindow = MAX(1, rc->windowLen);
  471. #endif
  472. rc->targetPicSize = vb->bitPerPic - intraBits + DIV(tmp, rcWindow);
  473. /* Don't let the target go negative because it won't make any difference
  474. * and it will confuse RC because it can never be reached. */
  475. rc->targetPicSize = MAX(0, rc->targetPicSize);
  476. DBG(1, (DBGOUTPUT, "intraBits: %7i\tintraRatio: %3i%%\n",
  477. intraBits, get_avg_bits(&rc->gop, 10)));
  478. DBG(1, (DBGOUTPUT, "WndRem: %4i ", vb->windowRem));
  479. if (rc->sliceTypeCur == ISLICE || rc->sliceTypeCur == ISLICES) {
  480. DBG(1, (DBGOUTPUT, "Rd: %6d ", avg_rc_error(&rc->intraError)));
  481. } else {
  482. DBG(1, (DBGOUTPUT, "Rd: %6d ", avg_rc_error(&rc->rError)));
  483. }
  484. DBG(1, (DBGOUTPUT, "Tr: %7d\n", rc->targetPicSize));
  485. if(rc->picSkip)
  486. PicSkip(rc);
  487. /* determine initial quantization parameter for current picture */
  488. PicQuant(rc);
  489. /* quantization parameter user defined limitations */
  490. PicQuantLimit(rc);
  491. /* Store the start QP, before ROI adjustment */
  492. rc->qpHdrPrev = rc->qpHdr;
  493. if(rc->sliceTypeCur == ISLICE || rc->sliceTypeCur == ISLICES)
  494. {
  495. if(rc->fixedIntraQp)
  496. rc->qpHdr = rc->fixedIntraQp;
  497. else if (rc->sliceTypePrev != ISLICE && rc->sliceTypePrev != ISLICES)
  498. rc->qpHdr += rc->intraQpDelta;
  499. /* quantization parameter user defined limitations still apply */
  500. PicQuantLimit(rc);
  501. if (rc->intraIntervalCtr > 1)
  502. rc->intraInterval = rc->intraIntervalCtr;
  503. rc->intraIntervalCtr = 1;
  504. }
  505. else
  506. {
  507. /* trace the QP over GOP, excluding Intra QP */
  508. rc->gopQpSum += rc->qpHdr;
  509. rc->gopQpDiv++;
  510. rc->intraIntervalCtr++;
  511. /* Check that interval is repeating */
  512. if (rc->intraIntervalCtr > rc->intraInterval)
  513. rc->intraInterval = rc->intraIntervalCtr;
  514. }
  515. /* mb rate control (check point rate control) */
  516. #ifdef ROI_SUPPORT
  517. if (rc->roiRc)
  518. {
  519. H264EncRoiModel(rc);
  520. PicQuantLimit(rc);
  521. }
  522. else
  523. #endif
  524. if (rc->mbRc)
  525. {
  526. MbQuant(rc);
  527. }
  528. /* reset counters */
  529. rc->qpSum = 0;
  530. rc->qpLastCoded = rc->qpHdr;
  531. rc->qpTarget = rc->qpHdr;
  532. rc->nonZeroCnt = 0;
  533. rc->sliceTypePrev = rc->sliceTypeCur;
  534. DBG(0, (DBGOUTPUT, "Frame type current\t%i\n",rc->sliceTypeCur));
  535. DBG(0, (DBGOUTPUT, "Frame coded\t\t%2i\n", rc->frameCoded));
  536. DBG(0, (DBGOUTPUT, "Frame qpHdr\t\t%2i\n", rc->qpHdr));
  537. for(i = 0; i < CHECK_POINTS_MAX; i++) {
  538. DBG(1, (DBGOUTPUT, "CP %i mbNum %4i wTarg %5i\n", i,
  539. (rc->qpCtrl.checkPointDistance * (i + 1)),
  540. rc->qpCtrl.wordCntTarget[i]*32));
  541. }
  542. rc->sei.crd += timeInc;
  543. rc->sei.dod = 0;
  544. }
  545. /*------------------------------------------------------------------------------
  546. MbQuant()
  547. ------------------------------------------------------------------------------*/
  548. void MbQuant(h264RateControl_s * rc)
  549. {
  550. i32 nonZeroTarget;
  551. /* Disable Mb Rc for Intra Slices, because coeffTarget will be wrong */
  552. if(rc->sliceTypeCur == ISLICE || rc->sliceTypeCur == ISLICES ||
  553. rc->srcPrm == 0)
  554. {
  555. return;
  556. }
  557. /* Required zero cnt */
  558. nonZeroTarget = H264Calculate(rc->targetPicSize, 256, rc->srcPrm);
  559. nonZeroTarget = MIN(rc->coeffCntMax, MAX(0, nonZeroTarget));
  560. nonZeroTarget = MIN(0x7FFFFFFFU / 1024U, (u32)nonZeroTarget);
  561. rc->virtualBuffer.nonZeroTarget = nonZeroTarget;
  562. /* Use linear model when previous frame can't be used for prediction */
  563. if ((rc->sliceTypeCur != rc->sliceTypePrev) || (rc->nonZeroCnt == 0))
  564. {
  565. LinearModel(rc);
  566. }
  567. else
  568. {
  569. AdaptiveModel(rc);
  570. }
  571. }
  572. /*------------------------------------------------------------------------------
  573. LinearModel()
  574. ------------------------------------------------------------------------------*/
  575. void LinearModel(h264RateControl_s * rc)
  576. {
  577. const i32 sscale = 256;
  578. h264QpCtrl_s *qc = &rc->qpCtrl;
  579. i32 scaler;
  580. i32 i;
  581. i32 tmp, nonZeroTarget = rc->virtualBuffer.nonZeroTarget;
  582. ASSERT(nonZeroTarget < (0x7FFFFFFF / sscale));
  583. if(nonZeroTarget > 0)
  584. {
  585. scaler = H264Calculate(nonZeroTarget, sscale, (i32) rc->mbPerPic);
  586. }
  587. else
  588. {
  589. return;
  590. }
  591. DBG(1, (DBGOUTPUT, " Linear Target: %8d prevCnt:\t %6d Scaler:\t %6d\n",
  592. nonZeroTarget, rc->nonZeroCnt, scaler / sscale));
  593. for(i = 0; i < rc->qpCtrl.checkPoints; i++)
  594. {
  595. tmp = (scaler * (qc->checkPointDistance * (i + 1) + 1)) / sscale;
  596. tmp = MIN(WORD_CNT_MAX, tmp / 32 + 1);
  597. if (tmp < 0) tmp = WORD_CNT_MAX; /* Detect overflow */
  598. qc->wordCntTarget[i] = tmp; /* div32 for regs */
  599. }
  600. /* calculate nz count for avg. bits per frame */
  601. tmp = H264Calculate(rc->virtualBuffer.bitPerPic, 256, rc->srcPrm);
  602. DBG(1, (DBGOUTPUT, "Error Limit:\t %8d SrcPrm:\t %6d\n",
  603. tmp, rc->srcPrm / 256));
  604. qc->wordError[0] = -tmp * 3;
  605. qc->qpChange[0] = -3;
  606. qc->wordError[1] = -tmp * 2;
  607. qc->qpChange[1] = -2;
  608. qc->wordError[2] = -tmp * 1;
  609. qc->qpChange[2] = -1;
  610. qc->wordError[3] = tmp * 1;
  611. qc->qpChange[3] = 0;
  612. qc->wordError[4] = tmp * 2;
  613. qc->qpChange[4] = 1;
  614. qc->wordError[5] = tmp * 3;
  615. qc->qpChange[5] = 2;
  616. qc->wordError[6] = tmp * 4;
  617. qc->qpChange[6] = 3;
  618. for(i = 0; i < CTRL_LEVELS; i++)
  619. {
  620. tmp = qc->wordError[i];
  621. tmp = CLIP3(-32768, 32767, tmp / 4);
  622. qc->wordError[i] = tmp;
  623. }
  624. }
  625. /*------------------------------------------------------------------------------
  626. AdaptiveModel()
  627. ------------------------------------------------------------------------------*/
  628. void AdaptiveModel(h264RateControl_s * rc)
  629. {
  630. const i32 sscale = 256;
  631. h264QpCtrl_s *qc = &rc->qpCtrl;
  632. i32 i;
  633. i32 tmp, nonZeroTarget = rc->virtualBuffer.nonZeroTarget;
  634. i32 scaler;
  635. ASSERT(nonZeroTarget < (0x7FFFFFFF / sscale));
  636. if((nonZeroTarget > 0) && (rc->nonZeroCnt > 0))
  637. {
  638. scaler = H264Calculate(nonZeroTarget, sscale, rc->nonZeroCnt);
  639. }
  640. else
  641. {
  642. return;
  643. }
  644. DBG(1, (DBGOUTPUT, "Adaptive Target: %8d prevCnt:\t %6d Scaler:\t %6d\n",
  645. nonZeroTarget, rc->nonZeroCnt, scaler / sscale));
  646. for(i = 0; i < rc->qpCtrl.checkPoints; i++)
  647. {
  648. tmp = (i32) (qc->wordCntPrev[i] * scaler) / sscale;
  649. tmp = MIN(WORD_CNT_MAX, tmp / 32 + 1);
  650. if (tmp < 0) tmp = WORD_CNT_MAX; /* Detect overflow */
  651. qc->wordCntTarget[i] = tmp; /* div32 for regs */
  652. DBG(2, (DBGOUTPUT, " CP %i wordCntPrev %6i wordCntTarget_div32 %6i\n",
  653. i, qc->wordCntPrev[i], qc->wordCntTarget[i]));
  654. }
  655. /* Qp change table */
  656. /* calculate nz count for avg. bits per frame */
  657. tmp = H264Calculate(rc->virtualBuffer.bitPerPic, 256, (rc->srcPrm * 3));
  658. DBG(1, (DBGOUTPUT, "Error Limit:\t %8d SrcPrm:\t %6d\n",
  659. tmp, rc->srcPrm / 256));
  660. qc->wordError[0] = -tmp * 3;
  661. qc->qpChange[0] = -3;
  662. qc->wordError[1] = -tmp * 2;
  663. qc->qpChange[1] = -2;
  664. qc->wordError[2] = -tmp * 1;
  665. qc->qpChange[2] = -1;
  666. qc->wordError[3] = tmp * 1;
  667. qc->qpChange[3] = 0;
  668. qc->wordError[4] = tmp * 2;
  669. qc->qpChange[4] = 1;
  670. qc->wordError[5] = tmp * 3;
  671. qc->qpChange[5] = 2;
  672. qc->wordError[6] = tmp * 4;
  673. qc->qpChange[6] = 3;
  674. for(i = 0; i < CTRL_LEVELS; i++)
  675. {
  676. tmp = qc->wordError[i];
  677. tmp = CLIP3(-32768, 32767, tmp / 4);
  678. qc->wordError[i] = tmp;
  679. }
  680. }
  681. /*------------------------------------------------------------------------------
  682. SourceParameter() Source parameter of last coded frame. Parameters
  683. has been scaled up by factor 256.
  684. ------------------------------------------------------------------------------*/
  685. void SourceParameter(h264RateControl_s * rc, i32 nonZeroCnt)
  686. {
  687. ASSERT(rc->qpSum <= 51 * rc->mbPerPic);
  688. ASSERT(nonZeroCnt <= rc->coeffCntMax);
  689. ASSERT(nonZeroCnt >= 0 && rc->coeffCntMax >= 0);
  690. /* AVOID division by zero */
  691. if(nonZeroCnt == 0)
  692. {
  693. nonZeroCnt = 1;
  694. }
  695. rc->srcPrm = H264Calculate(rc->frameBitCnt, 256, nonZeroCnt);
  696. DBG(1, (DBGOUTPUT, "nonZeroCnt %6i, srcPrm %i\n",
  697. nonZeroCnt, rc->srcPrm/256));
  698. }
  699. /*------------------------------------------------------------------------------
  700. PicSkip() Decrease framerate if not enough bits available.
  701. ------------------------------------------------------------------------------*/
  702. void PicSkip(h264RateControl_s * rc)
  703. {
  704. h264VirtualBuffer_s *vb = &rc->virtualBuffer;
  705. i32 bitAvailable = vb->virtualBitCnt - vb->realBitCnt;
  706. i32 skipIncLimit = -vb->bitPerPic / 3;
  707. i32 skipDecLimit = vb->bitPerPic / 3;
  708. /* When frameRc is enabled, skipFrameTarget is not allowed to be > 1
  709. * This makes sure that not too many frames is skipped and lets
  710. * the frameRc adjust QP instead of skipping many frames */
  711. if(((rc->picRc == ENCHW_NO) || (vb->skipFrameTarget == 0)) &&
  712. (bitAvailable < skipIncLimit))
  713. {
  714. vb->skipFrameTarget++;
  715. }
  716. if((bitAvailable > skipDecLimit) && vb->skipFrameTarget > 0)
  717. {
  718. vb->skipFrameTarget--;
  719. }
  720. if(vb->skippedFrames < vb->skipFrameTarget)
  721. {
  722. vb->skippedFrames++;
  723. rc->frameCoded = ENCHW_NO;
  724. }
  725. else
  726. {
  727. vb->skippedFrames = 0;
  728. }
  729. }
  730. /*------------------------------------------------------------------------------
  731. PicQuant() Calculate quantization parameter for next frame. In the beginning
  732. of window use previous GOP average QP and otherwise find new QP
  733. using the target size and previous frames QPs and bit counts.
  734. ------------------------------------------------------------------------------*/
  735. void PicQuant(h264RateControl_s * rc)
  736. {
  737. i32 normBits, targetBits;
  738. true_e useQpDeltaLimit = ENCHW_YES;
  739. if(rc->picRc != ENCHW_YES)
  740. {
  741. rc->qpHdr = rc->fixedQp;
  742. DBG(1, (DBGOUTPUT, "R/cx: xxxx QP: xx xx D: xxxx newQP: xx\n"));
  743. return;
  744. }
  745. /* If HRD is enabled we must make sure this frame fits in buffer */
  746. if (rc->hrd == ENCHW_YES)
  747. {
  748. i32 bitsAvailable =
  749. (rc->virtualBuffer.bufferSize - rc->virtualBuffer.bucketFullness);
  750. /* If the previous frame didn't fit the buffer we don't limit QP change */
  751. if (rc->frameBitCnt > bitsAvailable) {
  752. useQpDeltaLimit = ENCHW_NO;
  753. }
  754. }
  755. /* determine initial quantization parameter for current picture */
  756. if (rc->sliceTypeCur == ISLICE || rc->sliceTypeCur == ISLICES) {
  757. /* Default intra QP == prev GOP average */
  758. rc->qpHdr = gop_avg_qp(rc);
  759. /* If all frames are intra we calculate new QP
  760. * for intra the same way as for inter */
  761. if (rc->sliceTypePrev == ISLICE || rc->sliceTypePrev == ISLICES) {
  762. targetBits = rc->targetPicSize - avg_rc_error(&rc->intraError);
  763. normBits = H264Calculate(targetBits, 256, rc->mbPerPic);
  764. rc->qpHdr = new_pic_quant(&rc->intra, normBits, useQpDeltaLimit);
  765. } else {
  766. DBG(1, (DBGOUTPUT, "R/cx: xxxx QP: xx xx D: xxxx newQP: xx\n"));
  767. }
  768. } else {
  769. /* Calculate new QP by matching to previous inter frames R-Q curve */
  770. targetBits = rc->targetPicSize - avg_rc_error(&rc->rError);
  771. normBits = H264Calculate(targetBits, 256, rc->mbPerPic);
  772. rc->qpHdr = new_pic_quant(&rc->linReg, normBits, useQpDeltaLimit);
  773. }
  774. }
  775. /*------------------------------------------------------------------------------
  776. PicQuantLimit()
  777. ------------------------------------------------------------------------------*/
  778. void PicQuantLimit(h264RateControl_s * rc)
  779. {
  780. rc->qpHdr = MIN(rc->qpMax, MAX(rc->qpMin, rc->qpHdr));
  781. }
  782. /*------------------------------------------------------------------------------
  783. Calculate() I try to avoid overflow and calculate good enough result of a*b/c
  784. ------------------------------------------------------------------------------*/
  785. i32 H264Calculate(i32 a, i32 b, i32 c)
  786. {
  787. u32 left = 32;
  788. u32 right = 0;
  789. u32 shift;
  790. i32 sign = 1;
  791. i32 tmp;
  792. if(a == 0 || b == 0)
  793. {
  794. return 0;
  795. }
  796. else if((a * b / b) == a && c != 0)
  797. {
  798. return (a * b / c);
  799. }
  800. if(a < 0)
  801. {
  802. sign = -1;
  803. a = -a;
  804. }
  805. if(b < 0)
  806. {
  807. sign *= -1;
  808. b = -b;
  809. }
  810. if(c < 0)
  811. {
  812. sign *= -1;
  813. c = -c;
  814. }
  815. if(c == 0 )
  816. {
  817. return 0x7FFFFFFF * sign;
  818. }
  819. if(b > a)
  820. {
  821. tmp = b;
  822. b = a;
  823. a = tmp;
  824. }
  825. for(--left; (((u32)a << left) >> left) != (u32)a; --left);
  826. left--; /* unsigned values have one more bit on left,
  827. we want signed accuracy. shifting signed values gives
  828. lint warnings */
  829. while(((u32)b >> right) > (u32)c)
  830. {
  831. right++;
  832. }
  833. if(right > left)
  834. {
  835. return 0x7FFFFFFF * sign;
  836. }
  837. else
  838. {
  839. shift = left - right;
  840. return (i32)((((u32)a << shift) / (u32)c * (u32)b) >> shift) * sign;
  841. }
  842. }
  843. /*------------------------------------------------------------------------------
  844. avg_rc_error() PI(D)-control for rate prediction error.
  845. ------------------------------------------------------------------------------*/
  846. static i32 avg_rc_error(linReg_s *p)
  847. {
  848. return DIV(p->bits[2] * 4 + p->bits[1] * 6 + p->bits[0] * 0, 100);
  849. }
  850. /*------------------------------------------------------------------------------
  851. update_overhead() Update PI(D)-control values
  852. ------------------------------------------------------------------------------*/
  853. static void update_rc_error(linReg_s *p, i32 bits)
  854. {
  855. p->len = 3;
  856. if (bits == (i32)0x7fffffff) {
  857. /* RESET */
  858. p->bits[0] = 0;
  859. p->bits[1] = 0;
  860. p->bits[2] = 0;
  861. return;
  862. }
  863. p->bits[0] = bits - p->bits[2]; /* Derivative */
  864. if ((bits > 0) && (bits + p->bits[1] > p->bits[1]))
  865. p->bits[1] = bits + p->bits[1]; /* Integral */
  866. if ((bits < 0) && (bits + p->bits[1] < p->bits[1]))
  867. p->bits[1] = bits + p->bits[1]; /* Integral */
  868. p->bits[2] = bits; /* Proportional */
  869. DBG(1, (DBGOUTPUT, "P %6d I %7d D %7d\n", p->bits[2], p->bits[1], p->bits[0]));
  870. }
  871. /*------------------------------------------------------------------------------
  872. gop_avg_qp() Average quantization parameter of P frames since previous I.
  873. ------------------------------------------------------------------------------*/
  874. i32 gop_avg_qp(h264RateControl_s *rc)
  875. {
  876. i32 tmp = rc->qpHdrPrev;
  877. if (rc->gopQpSum && rc->gopQpDiv) {
  878. tmp = DIV(rc->gopQpSum, rc->gopQpDiv);
  879. }
  880. /* Average bit count per frame for previous GOP (intra + inter) */
  881. rc->gopAvgBitCnt = DIV(rc->gopBitCnt, (rc->gopQpDiv+1));
  882. /* Ratio of intra_frame_bits/all_gop_bits % for previous GOP */
  883. if (rc->gopBitCnt) {
  884. i32 gopIntraBitRatio =
  885. H264Calculate(get_avg_bits(&rc->intra,1), rc->mbPerPic, 256) * 100;
  886. gopIntraBitRatio = DIV(gopIntraBitRatio, rc->gopBitCnt);
  887. /* GOP bit count must be > intra bit count, so ratio must be < 100 */
  888. gopIntraBitRatio = MIN(99, gopIntraBitRatio);
  889. update_tables(&rc->gop, tmp, gopIntraBitRatio);
  890. }
  891. rc->gopQpSum = 0;
  892. rc->gopQpDiv = 0;
  893. rc->gopBitCnt = 0;
  894. return tmp;
  895. }
  896. /*------------------------------------------------------------------------------
  897. new_pic_quant() Calculate new quantization parameter from the 2nd degree R-Q
  898. equation. Further adjust Qp for "smoother" visual quality.
  899. ------------------------------------------------------------------------------*/
  900. static i32 new_pic_quant(linReg_s *p, i32 bits, true_e useQpDeltaLimit)
  901. {
  902. i32 tmp, qp_best = p->qp_prev, qp = p->qp_prev, diff;
  903. i32 diff_prev = 0, qp_prev = 0, diff_best = 0x7FFFFFFF;
  904. DBG(1, (DBGOUTPUT, "R/cx:%6d ",bits));
  905. if (p->a1 == 0 && p->a2 == 0) {
  906. DBG(1, (DBGOUTPUT, " QP: xx xx D: ==== newQP: %2d\n", qp));
  907. return qp;
  908. }
  909. if (bits <= 0) {
  910. if (useQpDeltaLimit)
  911. qp = MIN(51, MAX(0, qp + QP_DELTA));
  912. else
  913. qp = MIN(51, MAX(0, qp + 10));
  914. DBG(1, (DBGOUTPUT, " QP: xx xx D: ---- newQP: %2d\n", qp));
  915. return qp;
  916. }
  917. do {
  918. tmp = DIV(p->a1, q_step[qp]);
  919. tmp += DIV(p->a2, q_step[qp] * q_step[qp]);
  920. diff = ABS(tmp - bits);
  921. if (diff < diff_best) {
  922. if (diff_best == 0x7FFFFFFF) {
  923. diff_prev = diff;
  924. qp_prev = qp;
  925. } else {
  926. diff_prev = diff_best;
  927. qp_prev = qp_best;
  928. }
  929. diff_best = diff;
  930. qp_best = qp;
  931. if ((tmp - bits) <= 0) {
  932. if (qp < 1) {
  933. break;
  934. }
  935. qp--;
  936. } else {
  937. if (qp > 50) {
  938. break;
  939. }
  940. qp++;
  941. }
  942. } else {
  943. break;
  944. }
  945. } while ((qp >= 0) && (qp <= 51));
  946. qp = qp_best;
  947. DBG(1,(DBGOUTPUT, " QP: %2d %2d D: %5d", qp, qp_prev, diff_prev - diff_best));
  948. /* One unit change in Qp changes rate about 12% ca. 1/8. If the
  949. * difference is less than half use the one closer to previous value */
  950. if (ABS(diff_prev - diff_best) <= ABS(bits) / 16) {
  951. qp = qp_prev;
  952. }
  953. DBG(1, (DBGOUTPUT, " newQP: %2d\n", qp));
  954. /* Limit Qp change for smoother visual quality */
  955. if (useQpDeltaLimit) {
  956. tmp = qp - p->qp_prev;
  957. if (tmp > QP_DELTA) {
  958. qp = p->qp_prev + QP_DELTA;
  959. /* When QP is totally wrong, allow faster QP increase */
  960. if (tmp > QP_DELTA_LIMIT)
  961. qp = p->qp_prev + QP_DELTA*2;
  962. } else if (tmp < -QP_DELTA) {
  963. qp = p->qp_prev - QP_DELTA;
  964. }
  965. }
  966. return qp;
  967. }
  968. /*------------------------------------------------------------------------------
  969. get_avg_bits()
  970. ------------------------------------------------------------------------------*/
  971. static i32 get_avg_bits(linReg_s *p, i32 n)
  972. {
  973. i32 i;
  974. i32 sum = 0;
  975. i32 pos = p->pos;
  976. if (!p->len) return 0;
  977. if (n == -1 || n > p->len)
  978. n = p->len;
  979. i = n;
  980. while (i--) {
  981. if (pos) pos--;
  982. else pos = p->len-1;
  983. sum += p->bits[pos];
  984. if (sum < 0) {
  985. return I32_MAX / (n-i);
  986. }
  987. }
  988. return DIV(sum, n);
  989. }
  990. /*------------------------------------------------------------------------------
  991. update_tables() only statistics of PSLICE, please.
  992. ------------------------------------------------------------------------------*/
  993. static void update_tables(linReg_s *p, i32 qp, i32 bits)
  994. {
  995. const i32 clen = RC_TABLE_LENGTH;
  996. i32 tmp = p->pos;
  997. p->qp_prev = qp;
  998. p->qs[tmp] = q_step[qp];
  999. p->bits[tmp] = bits;
  1000. if (++p->pos >= clen) {
  1001. p->pos = 0;
  1002. }
  1003. if (p->len < clen) {
  1004. p->len++;
  1005. }
  1006. }
  1007. /*------------------------------------------------------------------------------
  1008. update_model() Update model parameter by Linear Regression.
  1009. ------------------------------------------------------------------------------*/
  1010. static void update_model(linReg_s *p)
  1011. {
  1012. i32 *qs = p->qs, *r = p->bits, n = p->len;
  1013. i32 i, a1, a2, sx = lin_sx(qs, n), sy = lin_sy(qs, r, n);
  1014. for (i = 0; i < n; i++) {
  1015. DBG(2, (DBGOUTPUT, "model: qs %i r %i\n",qs[i], r[i]));
  1016. }
  1017. a1 = lin_sxy(qs, r, n);
  1018. a1 = a1 < I32_MAX / n ? a1 * n : I32_MAX;
  1019. if (sy == 0) {
  1020. a1 = 0;
  1021. } else {
  1022. a1 -= sx < I32_MAX / sy ? sx * sy : I32_MAX;
  1023. }
  1024. a2 = (lin_nsxx(qs, n) - (sx * sx));
  1025. if (a2 == 0) {
  1026. if (p->a1 == 0) {
  1027. /* If encountered in the beginning */
  1028. a1 = 0;
  1029. } else {
  1030. a1 = (p->a1 * 2) / 3;
  1031. }
  1032. } else {
  1033. a1 = H264Calculate(a1, DSCY, a2);
  1034. }
  1035. /* Value of a1 shouldn't be excessive (small) */
  1036. a1 = MAX(a1, -262144);
  1037. a1 = MIN(a1, 262143);
  1038. ASSERT(ABS(a1) * sx >= 0);
  1039. ASSERT(sx * DSCY >= 0);
  1040. a2 = DIV(sy * DSCY, n) - DIV(a1 * sx, n);
  1041. DBG(2, (DBGOUTPUT, "model: a2:%9d a1:%8d\n", a2, a1));
  1042. if (p->len > 0) {
  1043. p->a1 = a1;
  1044. p->a2 = a2;
  1045. }
  1046. }
  1047. /*------------------------------------------------------------------------------
  1048. lin_sy() calculate value of Sy for n points.
  1049. ------------------------------------------------------------------------------*/
  1050. static i32 lin_sy(i32 *qp, i32 *r, i32 n)
  1051. {
  1052. i32 sum = 0;
  1053. while (n--) {
  1054. sum += qp[n] * qp[n] * r[n];
  1055. if (sum < 0) {
  1056. return I32_MAX / DSCY;
  1057. }
  1058. }
  1059. return DIV(sum, DSCY);
  1060. }
  1061. /*------------------------------------------------------------------------------
  1062. lin_sx() calculate value of Sx for n points.
  1063. ------------------------------------------------------------------------------*/
  1064. static i32 lin_sx(i32 *qp, i32 n)
  1065. {
  1066. i32 tmp = 0;
  1067. while (n--) {
  1068. ASSERT(qp[n]);
  1069. tmp += qp[n];
  1070. }
  1071. return tmp;
  1072. }
  1073. /*------------------------------------------------------------------------------
  1074. lin_sxy() calculate value of Sxy for n points.
  1075. ------------------------------------------------------------------------------*/
  1076. static i32 lin_sxy(i32 *qp, i32 *r, i32 n)
  1077. {
  1078. i32 tmp, sum = 0;
  1079. while (n--) {
  1080. tmp = qp[n] * qp[n] * qp[n];
  1081. if (tmp > r[n]) {
  1082. sum += DIV(tmp, DSCY) * r[n];
  1083. } else {
  1084. sum += tmp * DIV(r[n], DSCY);
  1085. }
  1086. if (sum < 0) {
  1087. return I32_MAX;
  1088. }
  1089. }
  1090. return sum;
  1091. }
  1092. /*------------------------------------------------------------------------------
  1093. lin_nsxx() calculate value of n * Sxy for n points.
  1094. ------------------------------------------------------------------------------*/
  1095. static i32 lin_nsxx(i32 *qp, i32 n)
  1096. {
  1097. i32 tmp = 0, sum = 0, d = n ;
  1098. while (n--) {
  1099. tmp = qp[n];
  1100. tmp *= tmp;
  1101. sum += d * tmp;
  1102. }
  1103. return sum;
  1104. }