From 10303515843fcdc353e3ff550fb108ac245dc54a Mon Sep 17 00:00:00 2001 From: erankor Date: Sat, 30 Dec 2023 19:00:49 +0200 Subject: [PATCH 1/9] mkv fixes and laces support when reading frames, need to read a bit extra in order to have the headers of the frame following the last frame of the segment (in order to calcuation the duration of the last frame) in addition to reading a few bytes extra, need to ignore overflow when parsing the cluster/simple-block elements of the next-segment frame, since usually this frame is not read in full. --- vod/common.h | 1 + vod/mkv/ebml.c | 34 +++-- vod/mkv/ebml.h | 2 + vod/mkv/mkv_format.c | 357 +++++++++++++++++++++++++++++++++++-------- 4 files changed, 319 insertions(+), 75 deletions(-) diff --git a/vod/common.h b/vod/common.h index 0d598b90..e06eb992 100644 --- a/vod/common.h +++ b/vod/common.h @@ -98,6 +98,7 @@ void vod_log_error(vod_uint_t level, vod_log_t *log, int err, #define VOD_INT64_LEN NGX_INT64_LEN #define VOD_INT32_LEN NGX_INT32_LEN +#define VOD_MAX_UINT32_VALUE NGX_MAX_UINT32_VALUE #define VOD_MAX_SIZE_T_VALUE NGX_MAX_SIZE_T_VALUE #define VOD_MAX_OFF_T_VALUE NGX_MAX_OFF_T_VALUE diff --git a/vod/mkv/ebml.c b/vod/mkv/ebml.c index e1970249..447f8c6b 100644 --- a/vod/mkv/ebml.c +++ b/vod/mkv/ebml.c @@ -97,9 +97,10 @@ ebml_read_num(ebml_context_t* context, uint64_t* result, size_t max_size, int re } static vod_status_t -ebml_read_size(ebml_context_t* context, uint64_t* result) +ebml_read_size(ebml_context_t* context, uint64_t* result, bool_t truncate) { vod_status_t rc; + uint64_t left; rc = ebml_read_num(context, result, 8, 1); if (rc < 0) @@ -109,19 +110,28 @@ ebml_read_size(ebml_context_t* context, uint64_t* result) return rc; } + left = context->end_pos - context->cur_pos; if (is_unknown_size(*result, rc)) { - *result = context->end_pos - context->cur_pos; + *result = left; + return VOD_OK; } - else if (*result > (uint64_t)(context->end_pos - context->cur_pos)) + + if (*result <= left) { - vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, - "ebml_read_size: size %uL greater than the remaining stream bytes %uL", - *result, (uint64_t)(context->end_pos - context->cur_pos)); - return VOD_BAD_DATA; + return VOD_OK; } - return VOD_OK; + if (truncate) + { + *result = left; + return VOD_OK; + } + + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "ebml_read_size: size %uL greater than the remaining stream bytes %uL", + *result, left); + return VOD_BAD_DATA; } static vod_status_t @@ -194,9 +204,10 @@ ebml_parse_element(ebml_context_t* context, ebml_spec_t* spec, void* dest) uint64_t size; void* cur_dest; vod_status_t rc; + ebml_type_t type; // size - rc = ebml_read_size(context, &size); + rc = ebml_read_size(context, &size, spec->type & EBML_TRUNCATE_SIZE); if (rc != VOD_OK) { vod_log_debug1(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, @@ -210,7 +221,8 @@ ebml_parse_element(ebml_context_t* context, ebml_spec_t* spec, void* dest) return VOD_OK; } - max_size = ebml_max_sizes[spec->type]; + type = spec->type & ~EBML_TRUNCATE_SIZE; + max_size = ebml_max_sizes[type]; if (max_size && size > max_size) { vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, @@ -220,7 +232,7 @@ ebml_parse_element(ebml_context_t* context, ebml_spec_t* spec, void* dest) cur_dest = (u_char*)dest + spec->offset; - switch (spec->type) + switch (type) { case EBML_UINT: rc = ebml_read_uint(context, size, cur_dest); diff --git a/vod/mkv/ebml.h b/vod/mkv/ebml.h index 907d9ad4..3e0cf05a 100644 --- a/vod/mkv/ebml.h +++ b/vod/mkv/ebml.h @@ -7,6 +7,8 @@ #define ebml_read_id(context, id) ebml_read_num(context, id, 4, 0) #define is_unknown_size(num, num_bytes) ((num) + 1 == 1ULL << (7 * (num_bytes))) +#define EBML_TRUNCATE_SIZE 0x80 + // typedefs typedef enum { EBML_NONE, diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index 19cbe40f..987d3403 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -9,6 +9,24 @@ #define BITRATE_ESTIMATE_SEC (5) #define FRAMES_PER_PART (160) // about 4K #define MAX_GOP_FRAMES (600) // 10 sec GOP in 60 fps +#define MAX_LACES (256) // the count is stored in one byte + +/* when reading the frames, need to read some extra in order to have the headers + of the first frame of the next cluster - + + field max size + MKV_ID_CLUSTER (4) + size (8) + MKV_ID_CLUSTERTIMECODE (1) + size (1) + value (8) + MKV_SIMPLEBLOCK (1) + size (8) + track number (8) + timecode (2) + flags (1) +*/ +#define READ_FRAMES_EXTRA_SIZE (42) // prototypes static vod_status_t mkv_parse_seek_entry(ebml_context_t* context, ebml_spec_t* spec, void* dst); @@ -144,23 +162,23 @@ static ebml_spec_t mkv_spec_index[] = { // cluster static ebml_spec_t mkv_spec_cluster_fields[] = { { MKV_ID_CLUSTERTIMECODE, EBML_UINT, offsetof(mkv_cluster_t, timecode), NULL }, - { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM, 0, mkv_parse_frame }, + { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame }, { 0, EBML_NONE, 0, NULL } }; static ebml_spec_t mkv_spec_cluster[] = { - { MKV_ID_CLUSTER, EBML_MASTER, 0, mkv_spec_cluster_fields }, + { MKV_ID_CLUSTER, EBML_MASTER | EBML_TRUNCATE_SIZE, 0, mkv_spec_cluster_fields }, { 0, EBML_NONE, 0, NULL } }; static ebml_spec_t mkv_spec_bitrate_estimate_cluster_fields[] = { { MKV_ID_CLUSTERTIMECODE, EBML_UINT, offsetof(mkv_cluster_t, timecode), NULL }, - { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM, 0, mkv_parse_frame_estimate_bitrate }, + { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame_estimate_bitrate }, { 0, EBML_NONE, 0, NULL } }; static ebml_spec_t mkv_spec_bitrate_estimate_cluster[] = { - { MKV_ID_CLUSTER, EBML_MASTER, 0, mkv_spec_bitrate_estimate_cluster_fields }, + { MKV_ID_CLUSTER, EBML_MASTER | EBML_TRUNCATE_SIZE, 0, mkv_spec_bitrate_estimate_cluster_fields }, { 0, EBML_NONE, 0, NULL } }; @@ -228,10 +246,16 @@ typedef struct { typedef struct { input_frame_t* frame; + frame_list_part_t* part; + uint32_t laces; +} mkv_laced_frame_t; + +typedef struct { + mkv_laced_frame_t frame; uint64_t timecode; input_frame_t* unsorted_frame; uint64_t unsorted_timecode; -} frame_timecode_t; +} mkv_frame_timecode_t; typedef struct { uint64_t track_number; @@ -245,7 +269,7 @@ typedef struct { uint64_t total_frames_duration; uint64_t first_timecode; - vod_array_t gop_frames; // array of frame_timecode_t + vod_array_t gop_frames; // array of mkv_frame_timecode_t int32_t min_pts_delay; } mkv_frame_parse_track_context_t; @@ -811,7 +835,7 @@ mkv_metadata_parse( if (metadata->base.tracks.nelts > MAX_TRACK_COUNT) { vod_log_error(VOD_LOG_ERR, request_context->log, 0, - "mkv_metadata_parse: track count exceeded the limit of %i", (ngx_int_t)MAX_TRACK_COUNT); + "mkv_metadata_parse: track count exceeded the limit of %i", (vod_int_t)MAX_TRACK_COUNT); return VOD_BAD_REQUEST; } @@ -998,7 +1022,7 @@ mkv_get_read_frames_request( return VOD_BAD_DATA; } - read_req->read_size = index.cluster_pos - read_req->read_offset; + read_req->read_size = index.cluster_pos + READ_FRAMES_EXTRA_SIZE - read_req->read_offset; read_req->read_offset += metadata->base_layout.position_reference; return VOD_AGAIN; @@ -1007,10 +1031,10 @@ mkv_get_read_frames_request( static void mkv_sort_gop_frames(vod_array_t* gop_frames) { - frame_timecode_t* frames = gop_frames->elts; - frame_timecode_t* frame1; - frame_timecode_t* frame2; - input_frame_t* temp_frame; + mkv_frame_timecode_t* frames = gop_frames->elts; + mkv_frame_timecode_t* frame1; + mkv_frame_timecode_t* frame2; + mkv_laced_frame_t temp_frame; uint64_t temp_timecode; vod_uint_t index1; vod_uint_t index2; @@ -1054,11 +1078,49 @@ mkv_sort_gop_frames(vod_array_t* gop_frames) } } +static void +mkv_update_laces_duration(mkv_laced_frame_t* laced_frame, uint32_t duration) +{ + frame_list_part_t* part; + input_frame_t* frame; + uint32_t target_duration; + uint32_t prev_duration; + uint32_t laces; + uint32_t i; + + prev_duration = 0; + laces = laced_frame->laces; + + part = laced_frame->part; + frame = laced_frame->frame; + for (i = 0; i < laces; i++) + { + if (frame >= part->last_frame) + { + if (part->next == NULL) + { + // unexpected + break; + } + + part = part->next; + frame = part->first_frame; + } + + target_duration = duration * (i + 1) / laces; + frame->duration = target_duration - prev_duration; + + frame++; + prev_duration = target_duration; + } +} + static void mkv_update_frame_timestamps(mkv_frame_parse_track_context_t* context) { - frame_timecode_t* cur_frame; - frame_timecode_t* last_frame; + mkv_frame_timecode_t* cur_frame; + mkv_frame_timecode_t* last_frame; + uint32_t duration; int32_t pts_delay; // sort the frames @@ -1070,7 +1132,7 @@ mkv_update_frame_timestamps(mkv_frame_parse_track_context_t* context) cur_frame = context->gop_frames.elts; last_frame = cur_frame + (context->gop_frames.nelts - 1); - if (cur_frame->frame != NULL) + if (cur_frame->frame.frame != NULL) { // this gop is included in the parsed frames, calculate the pts delay and duration for (; cur_frame < last_frame; cur_frame++) @@ -1083,7 +1145,12 @@ mkv_update_frame_timestamps(mkv_frame_parse_track_context_t* context) } cur_frame->unsorted_frame->pts_delay = pts_delay; - cur_frame->frame->duration = cur_frame[1].timecode - cur_frame[0].timecode; + + duration = cur_frame[1].timecode - cur_frame[0].timecode; + + mkv_update_laces_duration(&cur_frame->frame, duration); + + context->total_frames_duration += duration; } } else @@ -1231,6 +1298,144 @@ mkv_parse_frames_estimate_bitrate( return VOD_OK; } +static vod_status_t +mkv_parse_laces(ebml_context_t* context, uint8_t flags, uint32_t* lace_sizes) +{ + vod_status_t rc; + size_t total; + uint64_t num; + uint32_t laces; + uint32_t size; + uint32_t i; + int64_t delta; + uint8_t lace_type; + u_char cur; + + lace_type = (flags & 0x06) >> 1; + if (lace_type == 0) + { + // no lacing + lace_sizes[0] = context->end_pos - context->cur_pos; + return 1; + } + + // get number of laces + if (context->cur_pos >= context->end_pos) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: overflow while reading number of laces"); + return VOD_BAD_DATA; + } + + laces = *context->cur_pos + 1; + context->cur_pos++; + + switch (lace_type) + { + case 0x1: // xiph + vod_memzero(lace_sizes, (laces - 1) * sizeof(lace_sizes[0])); + + total = 0; + for (i = 0; i < laces - 1; i++) + { + do + { + if (context->cur_pos >= context->end_pos) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: overflow while reading xiph lace size"); + return VOD_BAD_DATA; + } + + cur = *context->cur_pos; + context->cur_pos++; + + lace_sizes[i] += cur; + } while (cur == 0xff); + + total += lace_sizes[i]; + } + break; + + case 0x2: // fixed size + size = context->end_pos - context->cur_pos; + if (size % laces != 0) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: invalid fixed size lace, size=%uD, laces=%uD", size, laces); + return VOD_BAD_DATA; + } + + size /= laces; + for (i = 0; i < laces; i++) + { + lace_sizes[i] = size; + } + return laces; + + case 0x3: // EBML + rc = ebml_read_num(context, &num, 4, 1); + if (rc < 0) + { + vod_log_debug1(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_laces: ebml_read_num(initial lace size) failed %i", rc); + return rc; + } + + if (num > VOD_MAX_UINT32_VALUE) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: invalid ebml lace size %uL", num); + return VOD_BAD_DATA; + } + + lace_sizes[0] = num; + total = num; + + for (i = 1; i < laces - 1; i++) + { + rc = ebml_read_num(context, &num, 4, 1); + if (rc < 0) + { + vod_log_debug1(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_laces: ebml_read_num(lace size delta) failed %i", rc); + return rc; + } + + delta = num - ((1LL << (7 * rc - 1)) - 1); + if (delta > (int64_t)(VOD_MAX_UINT32_VALUE - lace_sizes[i - 1])) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: invalid ebml lace delta %L too big", delta); + return VOD_BAD_DATA; + } + + if (delta < -(int64_t)lace_sizes[i - 1]) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: invalid ebml lace delta %L too small", delta); + return VOD_BAD_DATA; + } + + lace_sizes[i] = lace_sizes[i - 1] + delta; + total += lace_sizes[i]; + } + break; + } + + size = context->end_pos - context->cur_pos; + if (size < total) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_laces: laces total size %uz overflows size left %uD", total, size); + return VOD_BAD_DATA; + } + + lace_sizes[laces - 1] = size - total; + + return laces; +} + static vod_status_t mkv_parse_frame( ebml_context_t* context, @@ -1241,12 +1446,14 @@ mkv_parse_frame( mkv_frame_parse_track_context_t* track_context; frame_list_part_t* last_frames_part; frame_list_part_t* new_frames_part; - frame_timecode_t* gop_frame; + mkv_frame_timecode_t* gop_frame; mkv_cluster_t* cluster = dst; input_frame_t* cur_frame; uint64_t frame_timecode; uint64_t track_number; uint32_t key_frame; + uint32_t lace_sizes[256]; + intptr_t laces, i; int16_t timecode; vod_status_t rc; uint8_t flags; @@ -1308,10 +1515,10 @@ mkv_parse_frame( } gop_frame->timecode = frame_timecode; gop_frame->unsorted_timecode = frame_timecode; - gop_frame->frame = NULL; + gop_frame->frame.frame = NULL; gop_frame->unsorted_frame = NULL; - switch (flags) + switch (flags & ~0x06) { case 0: case 1: // discardable @@ -1333,7 +1540,7 @@ mkv_parse_frame( gop_frame->timecode = frame_timecode; gop_frame->unsorted_timecode = frame_timecode; - gop_frame->frame = NULL; + gop_frame->frame.frame = NULL; gop_frame->unsorted_frame = NULL; switch (frame_parse_context->state) @@ -1384,58 +1591,80 @@ mkv_parse_frame( return VOD_BAD_DATA; } - // enforce frame count limit - if (track_context->frame_count >= frame_parse_context->max_frame_count) + rc = mkv_parse_laces(context, flags, lace_sizes); + if (rc < 0) { - vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, - "mkv_parse_frame: frame count exceeds the limit %uD", frame_parse_context->max_frame_count); - return VOD_BAD_DATA; + vod_log_debug1(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_frame: failed to parse lace sizes %i", rc); + return rc; } - last_frames_part = track_context->last_frames_part; - - if (last_frames_part->last_frame >= last_frames_part->first_frame + FRAMES_PER_PART) + laces = rc; + for (i = 0; i < laces; i++) { - // allocate a new part - new_frames_part = vod_alloc(context->request_context->pool, - sizeof(*new_frames_part) + FRAMES_PER_PART * sizeof(input_frame_t)); - if (new_frames_part == NULL) + // enforce frame count limit + if (track_context->frame_count >= frame_parse_context->max_frame_count) { - vod_log_debug0(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, - "mkv_parse_frame: vod_alloc failed"); - return VOD_ALLOC_FAILED; + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_parse_frame: frame count exceeds the limit %uD", frame_parse_context->max_frame_count); + return VOD_BAD_DATA; } - new_frames_part->first_frame = (void*)(new_frames_part + 1); - new_frames_part->last_frame = new_frames_part->first_frame; - new_frames_part->frames_source = last_frames_part->frames_source; - new_frames_part->frames_source_context = last_frames_part->frames_source_context; - new_frames_part->clip_to = UINT_MAX; // XXXXX fix this + last_frames_part = track_context->last_frames_part; - last_frames_part->next = new_frames_part; - track_context->last_frames_part = new_frames_part; - last_frames_part = new_frames_part; - } + if (last_frames_part->last_frame >= last_frames_part->first_frame + FRAMES_PER_PART) + { + // allocate a new part + new_frames_part = vod_alloc(context->request_context->pool, + sizeof(*new_frames_part) + FRAMES_PER_PART * sizeof(input_frame_t)); + if (new_frames_part == NULL) + { + vod_log_debug0(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_frame: vod_alloc failed"); + return VOD_ALLOC_FAILED; + } - // initialize the new frame (duration & pts delay are initialized later) - cur_frame = last_frames_part->last_frame++; - cur_frame->key_frame = key_frame; - cur_frame->offset = (uintptr_t)context->cur_pos; - cur_frame->size = context->end_pos - context->cur_pos; + new_frames_part->first_frame = (void*)(new_frames_part + 1); + new_frames_part->last_frame = new_frames_part->first_frame; + new_frames_part->frames_source = last_frames_part->frames_source; + new_frames_part->frames_source_context = last_frames_part->frames_source_context; + new_frames_part->clip_to = UINT_MAX; // XXXXX fix this - // add the frame to the gop frames - gop_frame->frame = cur_frame; - gop_frame->unsorted_frame = cur_frame; + last_frames_part->next = new_frames_part; + track_context->last_frames_part = new_frames_part; + last_frames_part = new_frames_part; + } - // update the track context - if (track_context->frame_count == 0) - { - track_context->first_timecode = frame_timecode; + // initialize the new frame (duration & pts delay are initialized later) + cur_frame = last_frames_part->last_frame++; + cur_frame->key_frame = key_frame; + cur_frame->duration = 0; + cur_frame->pts_delay = 0; + + cur_frame->offset = (uintptr_t)context->cur_pos; + cur_frame->size = lace_sizes[i]; + context->cur_pos += cur_frame->size; + + if (i == 0) + { + // add the frame to the gop frames + gop_frame->frame.frame = cur_frame; + gop_frame->frame.part = last_frames_part; + gop_frame->frame.laces = laces; + + gop_frame->unsorted_frame = cur_frame; + + // update the track context + if (track_context->frame_count == 0) + { + track_context->first_timecode = frame_timecode; + } + } + + track_context->frame_count++; + track_context->key_frame_count += key_frame; + track_context->total_frames_size += cur_frame->size; } - track_context->frame_count++; - track_context->key_frame_count += key_frame; - track_context->total_frames_size += cur_frame->size; - track_context->total_frames_duration += cur_frame->duration; return VOD_OK; } @@ -1449,9 +1678,9 @@ mkv_parse_frames( { mkv_frame_parse_track_context_t* track_context; mkv_frame_parse_context_t frame_parse_context; + mkv_frame_timecode_t* gop_frame; mkv_base_metadata_t* metadata = vod_container_of(base, mkv_base_metadata_t, base); frame_list_part_t* part; - frame_timecode_t* gop_frame; input_frame_t* last_frame; input_frame_t* cur_frame; media_track_t* cur_track; @@ -1509,7 +1738,7 @@ mkv_parse_frames( } // initialize the gop frames array - if (vod_array_init(&track_context->gop_frames, request_context->pool, 60, sizeof(frame_timecode_t)) != VOD_OK) + if (vod_array_init(&track_context->gop_frames, request_context->pool, 60, sizeof(mkv_frame_timecode_t)) != VOD_OK) { vod_log_debug0(VOD_LOG_DEBUG_LEVEL, request_context->log, 0, "mkv_parse_frames: vod_array_init failed"); @@ -1549,7 +1778,7 @@ mkv_parse_frames( } gop_frame->timecode = base->duration; gop_frame->unsorted_timecode = base->duration; - gop_frame->frame = NULL; + gop_frame->frame.frame = NULL; gop_frame->unsorted_frame = NULL; // close the last gop @@ -1643,7 +1872,7 @@ mkv_prepare_read_frames_request( metadata->end_time = rescale_time(parse_params->range->end, 1000, metadata->base.timescale); metadata->max_frame_count = parse_params->max_frame_count; metadata->parse_frames = TRUE; - end_margin = segmenter->max_segment_duration; + end_margin = 1000; } else { From 31dbddf70cc5d2b14d46bdcd836b7928658b3437 Mon Sep 17 00:00:00 2001 From: erankor Date: Sun, 31 Dec 2023 08:56:15 +0200 Subject: [PATCH 2/9] fix compilation warning --- vod/mkv/mkv_format.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index 987d3403..8f148ece 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -1302,12 +1302,12 @@ static vod_status_t mkv_parse_laces(ebml_context_t* context, uint8_t flags, uint32_t* lace_sizes) { vod_status_t rc; - size_t total; uint64_t num; + int64_t delta; + size_t total; uint32_t laces; uint32_t size; uint32_t i; - int64_t delta; uint8_t lace_type; u_char cur; @@ -1330,12 +1330,13 @@ mkv_parse_laces(ebml_context_t* context, uint8_t flags, uint32_t* lace_sizes) laces = *context->cur_pos + 1; context->cur_pos++; + total = 0; + switch (lace_type) { case 0x1: // xiph vod_memzero(lace_sizes, (laces - 1) * sizeof(lace_sizes[0])); - total = 0; for (i = 0; i < laces - 1; i++) { do From aa86fd6e64b206230b5bd0e050b4b6ec1257eb28 Mon Sep 17 00:00:00 2001 From: erankor Date: Fri, 12 Jan 2024 23:17:03 +0200 Subject: [PATCH 3/9] additional mkv fixes --- vod/mkv/mkv_defs.c | 10 +- vod/mkv/mkv_defs.h | 2 + vod/mkv/mkv_format.c | 202 ++++++++++++++++++++++++++++++++----- vod/mp4/mp4_init_segment.c | 104 +++++++++++++++---- 4 files changed, 271 insertions(+), 47 deletions(-) diff --git a/vod/mkv/mkv_defs.c b/vod/mkv/mkv_defs.c index d70f7aee..c4197415 100644 --- a/vod/mkv/mkv_defs.c +++ b/vod/mkv/mkv_defs.c @@ -6,7 +6,7 @@ mkv_codec_type_t mkv_codec_types[] = { // video { vod_string("V_MPEG4/ISO/AVC"), VOD_CODEC_ID_AVC, FORMAT_AVC1, TRUE }, - { vod_string("V_MPEGH/ISO/HEVC"), VOD_CODEC_ID_HEVC, FORMAT_HEV1, TRUE }, + { vod_string("V_MPEGH/ISO/HEVC"), VOD_CODEC_ID_HEVC, FORMAT_HVC1, TRUE }, { vod_string("V_VP8"), VOD_CODEC_ID_VP8, 0, FALSE }, { vod_string("V_VP9"), VOD_CODEC_ID_VP9, 0, FALSE }, { vod_string("V_AV1"), VOD_CODEC_ID_AV1, 0, FALSE }, @@ -15,7 +15,11 @@ mkv_codec_type_t mkv_codec_types[] = { { vod_string("A_AAC"), VOD_CODEC_ID_AAC, FORMAT_MP4A, TRUE }, { vod_string("A_MPEG/L3"), VOD_CODEC_ID_MP3, FORMAT_MP4A, FALSE }, { vod_string("A_VORBIS"), VOD_CODEC_ID_VORBIS,0, TRUE }, - { vod_string("A_OPUS"), VOD_CODEC_ID_OPUS, 0, TRUE }, - + { vod_string("A_OPUS"), VOD_CODEC_ID_OPUS, FORMAT_OPUS, TRUE }, + { vod_string("A_AC3"), VOD_CODEC_ID_AC3, FORMAT_AC3, FALSE }, + { vod_string("A_EAC3"), VOD_CODEC_ID_EAC3, FORMAT_EAC3, FALSE }, + { vod_string("A_DTS"), VOD_CODEC_ID_DTS, 0, TRUE }, + { vod_string("A_FLAC"), VOD_CODEC_ID_FLAC, FORMAT_FLAC, TRUE }, + { vod_null_string, 0, 0, FALSE } }; diff --git a/vod/mkv/mkv_defs.h b/vod/mkv/mkv_defs.h index 35005595..ac3c0cdf 100644 --- a/vod/mkv/mkv_defs.h +++ b/vod/mkv/mkv_defs.h @@ -64,6 +64,8 @@ // cluster #define MKV_ID_CLUSTERTIMECODE (0xE7) #define MKV_ID_SIMPLEBLOCK (0xA3) +#define MKV_ID_BLOCKGROUP (0xA0) +#define MKV_ID_BLOCK (0xA1) #define MKV_ID_CLUSTER (0x1F43B675) // sections diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index 8f148ece..56219534 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -20,13 +20,15 @@ MKV_ID_CLUSTERTIMECODE (1) size (1) value (8) - MKV_SIMPLEBLOCK (1) + MKV_BLOCKGROUP (1) + size (8) + MKV_BLOCK (1) size (8) track number (8) timecode (2) flags (1) */ -#define READ_FRAMES_EXTRA_SIZE (42) +#define READ_FRAMES_EXTRA_SIZE (51) // prototypes static vod_status_t mkv_parse_seek_entry(ebml_context_t* context, ebml_spec_t* spec, void* dst); @@ -74,7 +76,7 @@ typedef struct { uint64_t track; uint64_t time; uint64_t cluster_pos; - uint64_t relative_pos; // XXXXX needed ? + uint64_t relative_pos; } mkv_index_t; typedef struct { @@ -129,7 +131,7 @@ static ebml_spec_t mkv_spec_track_fields[] = { { MKV_ID_TRACKDEFAULTDURATION, EBML_UINT, offsetof(mkv_track_t, default_duration), NULL }, { MKV_ID_TRACKCODECDELAY, EBML_UINT, offsetof(mkv_track_t, codec_delay), NULL }, { MKV_ID_TRACKLANGUAGE, EBML_STRING, offsetof(mkv_track_t, language), NULL }, - { MKV_ID_TRACKNAME, EBML_STRING, offsetof(mkv_track_t, name), NULL }, + { MKV_ID_TRACKNAME, EBML_STRING, offsetof(mkv_track_t, name), NULL }, { MKV_ID_TRACKVIDEO, EBML_MASTER, offsetof(mkv_track_t, u.video), mkv_spec_track_video }, { MKV_ID_TRACKAUDIO, EBML_MASTER, offsetof(mkv_track_t, u.audio), mkv_spec_track_audio }, { 0, EBML_NONE, 0, NULL } @@ -160,9 +162,15 @@ static ebml_spec_t mkv_spec_index[] = { }; // cluster +static ebml_spec_t mkv_spec_block_group[] = { + { MKV_ID_BLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame }, + { 0, EBML_NONE, 0, NULL } +}; + static ebml_spec_t mkv_spec_cluster_fields[] = { { MKV_ID_CLUSTERTIMECODE, EBML_UINT, offsetof(mkv_cluster_t, timecode), NULL }, { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame }, + { MKV_ID_BLOCKGROUP, EBML_MASTER | EBML_TRUNCATE_SIZE, 0, mkv_spec_block_group }, { 0, EBML_NONE, 0, NULL } }; @@ -171,9 +179,15 @@ static ebml_spec_t mkv_spec_cluster[] = { { 0, EBML_NONE, 0, NULL } }; +static ebml_spec_t mkv_spec_bitrate_estimate_block_group[] = { + { MKV_ID_BLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame_estimate_bitrate }, + { 0, EBML_NONE, 0, NULL } +}; + static ebml_spec_t mkv_spec_bitrate_estimate_cluster_fields[] = { { MKV_ID_CLUSTERTIMECODE, EBML_UINT, offsetof(mkv_cluster_t, timecode), NULL }, { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame_estimate_bitrate }, + { MKV_ID_BLOCKGROUP, EBML_MASTER | EBML_TRUNCATE_SIZE, 0, mkv_spec_bitrate_estimate_block_group }, { 0, EBML_NONE, 0, NULL } }; @@ -302,11 +316,24 @@ static vod_str_t mkv_supported_doctypes[] = { vod_null_string }; +// XXXXX avoid using hardcoded extra data - build according to the first frame instead +static u_char mkv_extra_data_ac3[] = { + 0x50, 0x11, 0xe0 +}; + +static u_char mkv_extra_data_eac3[] = { + 0x07, 0x00, 0x20, 0x0f, 0x00, 0x00 +}; + +static u_char mkv_extra_data_opus[] = { + 0x00, 0x02, 0x01, 0x38, 0x00, 0x00, 0xbb, 0x80, 0x00, 0x00, 0x00 +}; + static bool_t mkv_is_doctype_supported(vod_str_t* doctype) { vod_str_t* cur_doctype; - + for (cur_doctype = mkv_supported_doctypes; cur_doctype->len; cur_doctype++) { if (doctype->len == cur_doctype->len && @@ -920,6 +947,24 @@ mkv_metadata_parse( cur_track->media_info.duration_millis = rescale_time(cur_track->media_info.duration, timescale, 1000); cur_track->media_info.extra_data = track.codec_private; + switch (cur_track->media_info.codec_id) + { + case VOD_CODEC_ID_AC3: + cur_track->media_info.extra_data.data = mkv_extra_data_ac3; + cur_track->media_info.extra_data.len = sizeof(mkv_extra_data_ac3); + break; + + case VOD_CODEC_ID_EAC3: + cur_track->media_info.extra_data.data = mkv_extra_data_eac3; + cur_track->media_info.extra_data.len = sizeof(mkv_extra_data_eac3); + break; + + case VOD_CODEC_ID_OPUS: + cur_track->media_info.extra_data.data = mkv_extra_data_opus; + cur_track->media_info.extra_data.len = sizeof(mkv_extra_data_opus); + break; + } + cur_track->index = track_index; rc = media_format_finalize_track( @@ -955,27 +1000,39 @@ mkv_get_read_frames_request( media_format_read_request_t* read_req) { ebml_context_t context; - uint64_t prev_cluster_pos; - uint64_t end_time; + media_track_t* cur_track; + vod_uint_t i; + uint64_t segment_duration; + uint64_t seen_tracks_mask; + uint64_t done_tracks_mask; + uint64_t all_tracks_mask; + uint64_t cur_track_mask; + uint64_t initial_time; + mkv_index_t prev_index; mkv_index_t index; vod_status_t rc; + size_t extra_read_size; bool_t done = FALSE; - // Note: adding a second to the end time, to make sure we get a frame following the last frame - // this is required since there is no duration per frame - end_time = metadata->end_time + rescale_time(end_margin, 1000, metadata->base.timescale); - read_req->read_offset = ULLONG_MAX; read_req->flags = 0; - prev_cluster_pos = ULLONG_MAX; + prev_index.cluster_pos = ULLONG_MAX; + seen_tracks_mask = 0; + done_tracks_mask = 0; context.request_context = request_context; context.cur_pos = metadata->cues.data; context.end_pos = context.cur_pos + metadata->cues.len; + // XXXXX optimize this - it may be possible to use the cuetime as the cluster timestamp, and start mid-cluster + // another possible optimization is to read in fixed sizes until the segment is complete (may reduce the total read size) + for (;;) { + vod_memzero(&index, sizeof(index)); + cur_track_mask = 0; + if (context.cur_pos < context.end_pos) { rc = ebml_parse_single(&context, mkv_spec_index, &index); @@ -985,6 +1042,23 @@ mkv_get_read_frames_request( "mkv_get_read_frames_request: ebml_parse_single failed %i", rc); return rc; } + + for (i = 0; i < metadata->base.tracks.nelts; i++) + { + cur_track = (media_track_t*)metadata->base.tracks.elts + i; + + if (index.track == cur_track->media_info.track_id) + { + cur_track_mask = 1 << i; + break; + } + } + + seen_tracks_mask |= cur_track_mask; + + vod_log_debug4(VOD_LOG_DEBUG_LEVEL, request_context->log, 0, + "mkv_get_read_frames_request: track=%uL, time=%uL, cluster_pos=%uL, relative_pos=%uL", + index.track, index.time, index.cluster_pos, index.relative_pos); } else { @@ -995,17 +1069,27 @@ mkv_get_read_frames_request( if (read_req->read_offset == ULLONG_MAX && metadata->start_time < index.time && - prev_cluster_pos != ULLONG_MAX) + prev_index.cluster_pos != ULLONG_MAX) { - read_req->read_offset = prev_cluster_pos; + read_req->read_offset = prev_index.cluster_pos; + initial_time = prev_index.time; } - if (end_time <= index.time || done) + if (done) { break; } - prev_cluster_pos = index.cluster_pos; + if (metadata->end_time <= index.time) + { + done_tracks_mask |= cur_track_mask; + if ((seen_tracks_mask & ~done_tracks_mask) == 0) + { + break; + } + } + + prev_index = index; } if (read_req->read_offset == ULLONG_MAX) @@ -1022,9 +1106,36 @@ mkv_get_read_frames_request( return VOD_BAD_DATA; } - read_req->read_size = index.cluster_pos + READ_FRAMES_EXTRA_SIZE - read_req->read_offset; + read_req->read_size = index.cluster_pos + index.relative_pos - read_req->read_offset; read_req->read_offset += metadata->base_layout.position_reference; + // since mkv does not contain a duration field per frame, we need to read the timestamp of + // one additional frame per track + all_tracks_mask = (1 << metadata->base.tracks.nelts) - 1; + if (all_tracks_mask & ~seen_tracks_mask) + { + // some needed tracks were not included in the index - increase the read size by 1 second + // in order to read another frame for each track + segment_duration = 1000; + if (index.time > initial_time + segment_duration) + { + segment_duration = index.time - initial_time; + } + extra_read_size = read_req->read_size * 1000 / segment_duration; + } + else + { + // all required tracks were found in the index, increase the read size by a fixed amount + // in order to contain the headers of the next frame + extra_read_size = READ_FRAMES_EXTRA_SIZE; + } + + read_req->read_size += extra_read_size; + + vod_log_debug4(VOD_LOG_DEBUG_LEVEL, request_context->log, 0, + "mkv_get_read_frames_request: reading offset=%uL, size=%uz, extra=%uz, pos_ref=%uL", + read_req->read_offset, read_req->read_size, extra_read_size, metadata->base_layout.position_reference); + return VOD_AGAIN; } @@ -1156,9 +1267,7 @@ mkv_update_frame_timestamps(mkv_frame_parse_track_context_t* context) else { // this gop is not included in the parsed frames, only find the min pts delay - for (cur_frame = context->gop_frames.elts; - cur_frame < last_frame; - cur_frame++) + for (; cur_frame < last_frame; cur_frame++) { pts_delay = cur_frame->unsorted_timecode - cur_frame->timecode; @@ -1173,6 +1282,47 @@ mkv_update_frame_timestamps(mkv_frame_parse_track_context_t* context) context->gop_frames.nelts = 0; } +static uint64_t +mkv_estimate_next_frame_timecode( + request_context_t* request_context, + mkv_frame_parse_track_context_t* context) +{ + mkv_frame_timecode_t* cur_frame; + mkv_frame_timecode_t* last_frame; + uint32_t laces; + uint64_t max_timecode; + uint64_t result; + + cur_frame = context->gop_frames.elts; + last_frame = cur_frame + (context->gop_frames.nelts - 1); + + // get the number of pending laces + max timecode + laces = 0; + max_timecode = 0; + for (; cur_frame < last_frame; cur_frame++) + { + laces += cur_frame->frame.laces; + if (max_timecode < cur_frame->timecode) + { + max_timecode = cur_frame->timecode; + } + } + + // estimate the next frame timecode using the average lace duration + result = max_timecode; + if (context->frame_count > laces) + { + result += context->total_frames_duration / (context->frame_count - laces) * laces; + } + + vod_log_error(VOD_LOG_WARN, request_context->log, 0, + "mkv_estimate_next_frame_timecode: estimating next frame timecode, " + "track_number=%uL, result=%uL, max=%uL, laces=%uD", + context->track_number, result, max_timecode, laces); + + return result; +} + static vod_status_t mkv_parse_frame_estimate_bitrate( ebml_context_t* context, @@ -1506,7 +1656,11 @@ mkv_parse_frame( } frame_timecode = cluster->timecode + timecode; - + + vod_log_debug3(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_frame: track=%uL, timecode=%uL, flags=0x%uxD", + track_number, frame_timecode, (uint32_t)flags); + gop_frame = vod_array_push(&track_context->gop_frames); if (gop_frame == NULL) { @@ -1538,7 +1692,7 @@ mkv_parse_frame( // repush the gop frame following the reset of the array gop_frame = vod_array_push(&track_context->gop_frames); // cant fail - + gop_frame->timecode = frame_timecode; gop_frame->unsorted_timecode = frame_timecode; gop_frame->frame.frame = NULL; @@ -1777,8 +1931,8 @@ mkv_parse_frames( "mkv_parse_frames: vod_array_push failed"); return VOD_ALLOC_FAILED; } - gop_frame->timecode = base->duration; - gop_frame->unsorted_timecode = base->duration; + gop_frame->timecode = mkv_estimate_next_frame_timecode(request_context, track_context); + gop_frame->unsorted_timecode = gop_frame->timecode; gop_frame->frame.frame = NULL; gop_frame->unsorted_frame = NULL; diff --git a/vod/mp4/mp4_init_segment.c b/vod/mp4/mp4_init_segment.c index 0ee432b1..34986f72 100644 --- a/vod/mp4/mp4_init_segment.c +++ b/vod/mp4/mp4_init_segment.c @@ -488,13 +488,24 @@ mp4_init_segment_write_avcc_atom(u_char* p, media_track_t* track) return p; } +static u_char* +mp4_init_segment_write_hvcc_atom(u_char* p, media_track_t* track) +{ + size_t atom_size = ATOM_HEADER_SIZE + track->media_info.extra_data.len; + + write_atom_header(p, atom_size, 'h', 'v', 'c', 'C'); + p = vod_copy(p, track->media_info.extra_data.data, track->media_info.extra_data.len); + return p; +} + static u_char* mp4_init_segment_write_stsd_video_entry(u_char* p, media_track_t* track) { size_t atom_size = ATOM_HEADER_SIZE + sizeof(sample_entry_t) + sizeof(stsd_video_t) + ATOM_HEADER_SIZE + track->media_info.extra_data.len; - write_atom_header(p, atom_size, 'a', 'v', 'c', '1'); + write_be32(p, atom_size); + p = ngx_copy(p, &track->media_info.format, sizeof(track->media_info.format)); // sample_entry_t write_be32(p, 0); // reserved @@ -518,15 +529,24 @@ mp4_init_segment_write_stsd_video_entry(u_char* p, media_track_t* track) write_be16(p, 0x18); // depth write_be16(p, 0xffff); // pre defined - p = mp4_init_segment_write_avcc_atom(p, track); + switch (track->media_info.codec_id) + { + case VOD_CODEC_ID_AVC: + p = mp4_init_segment_write_avcc_atom(p, track); + break; + + case VOD_CODEC_ID_HEVC: + p = mp4_init_segment_write_hvcc_atom(p, track); + break; + } return p; } static u_char* -mp4_init_segment_write_esds_atom(u_char* p, media_track_t* track) +mp4_init_segment_write_esds_atom(u_char* p, media_info_t* media_info) { - size_t extra_data_len = track->media_info.extra_data.len; + size_t extra_data_len = media_info->extra_data.len; size_t atom_size = mp4_esds_atom_size(extra_data_len); write_atom_header(p, atom_size, 'e', 's', 'd', 's'); @@ -541,15 +561,15 @@ mp4_init_segment_write_esds_atom(u_char* p, media_track_t* track) *p++ = MP4DecConfigDescrTag; // tag *p++ = sizeof(config_descr_t) + // len sizeof(descr_header_t) + extra_data_len; - *p++ = track->media_info.u.audio.object_type_id; + *p++ = media_info->u.audio.object_type_id; *p++ = 0x15; // stream type write_be24(p, 0); // buffer size - write_be32(p, track->media_info.bitrate); // max bitrate - write_be32(p, track->media_info.bitrate); // avg bitrate + write_be32(p, media_info->bitrate); // max bitrate + write_be32(p, media_info->bitrate); // avg bitrate *p++ = MP4DecSpecificDescrTag; // tag *p++ = extra_data_len; // len - p = vod_copy(p, track->media_info.extra_data.data, extra_data_len); + p = vod_copy(p, media_info->extra_data.data, extra_data_len); *p++ = MP4SLDescrTag; // tag *p++ = 1; // len @@ -558,13 +578,34 @@ mp4_init_segment_write_esds_atom(u_char* p, media_track_t* track) return p; } +static vod_inline size_t +mp4_init_segment_get_stsd_audio_entry_size(media_info_t* media_info) +{ + size_t size; + + size = ATOM_HEADER_SIZE + sizeof(sample_entry_t) + sizeof(stsd_audio_t); + + if (media_info->format == FORMAT_MP4A) + { + size += mp4_esds_atom_size(media_info->extra_data.len); + } + else + { + size += ATOM_HEADER_SIZE + media_info->extra_data.len; + } + + return size; +} + static u_char* -mp4_init_segment_write_stsd_audio_entry(u_char* p, media_track_t* track) +mp4_init_segment_write_stsd_audio_entry(u_char* p, media_info_t* media_info) { - size_t atom_size = ATOM_HEADER_SIZE + sizeof(sample_entry_t) + sizeof(stsd_audio_t) + - mp4_esds_atom_size(track->media_info.extra_data.len); + size_t atom_size; - write_atom_header(p, atom_size, 'm', 'p', '4', 'a'); + atom_size = mp4_init_segment_get_stsd_audio_entry_size(media_info); + + write_be32(p, atom_size); + p = ngx_copy(p, &media_info->format, sizeof(media_info->format)); // sample_entry_t write_be32(p, 0); // reserved @@ -574,14 +615,38 @@ mp4_init_segment_write_stsd_audio_entry(u_char* p, media_track_t* track) // stsd_audio_t write_be32(p, 0); // reserved write_be32(p, 0); // reserved - write_be16(p, track->media_info.u.audio.channels); - write_be16(p, track->media_info.u.audio.bits_per_sample); + write_be16(p, media_info->u.audio.channels); + write_be16(p, media_info->u.audio.bits_per_sample); write_be16(p, 0); // pre defined write_be16(p, 0); // reserved - write_be16(p, track->media_info.u.audio.sample_rate); + write_be16(p, media_info->u.audio.sample_rate); write_be16(p, 0); - p = mp4_init_segment_write_esds_atom(p, track); + if (media_info->format == FORMAT_MP4A) + { + p = mp4_init_segment_write_esds_atom(p, media_info); + } + else + { + atom_size = ATOM_HEADER_SIZE + media_info->extra_data.len; + + switch (media_info->codec_id) + { + case VOD_CODEC_ID_AC3: + write_atom_header(p, atom_size, 'd', 'a', 'c', '3'); + break; + + case VOD_CODEC_ID_EAC3: + write_atom_header(p, atom_size, 'd', 'e', 'c', '3'); + break; + + case VOD_CODEC_ID_OPUS: + write_atom_header(p, atom_size, 'd', 'O', 'p', 's'); + break; + } + + p = vod_copy(p, media_info->extra_data.data, media_info->extra_data.len); + } return p; } @@ -594,13 +659,12 @@ mp4_init_segment_get_stsd_atom_size(media_track_t* track) switch (track->media_info.media_type) { case MEDIA_TYPE_VIDEO: - atom_size += ATOM_HEADER_SIZE + sizeof(sample_entry_t) + sizeof(stsd_video_t)+ + atom_size += ATOM_HEADER_SIZE + sizeof(sample_entry_t) + sizeof(stsd_video_t) + ATOM_HEADER_SIZE + track->media_info.extra_data.len; break; case MEDIA_TYPE_AUDIO: - atom_size += ATOM_HEADER_SIZE + sizeof(sample_entry_t) + sizeof(stsd_audio_t)+ - mp4_esds_atom_size(track->media_info.extra_data.len); + atom_size += mp4_init_segment_get_stsd_audio_entry_size(&track->media_info); break; } @@ -620,7 +684,7 @@ mp4_init_segment_write_stsd_atom(u_char* p, size_t atom_size, media_track_t* tra break; case MEDIA_TYPE_AUDIO: - p = mp4_init_segment_write_stsd_audio_entry(p, track); + p = mp4_init_segment_write_stsd_audio_entry(p, &track->media_info); break; } return p; From 5820421ec40f30a072db8eab63e4cc0c9db40f0b Mon Sep 17 00:00:00 2001 From: erankor Date: Fri, 12 Jan 2024 23:27:17 +0200 Subject: [PATCH 4/9] silence compilation warning --- vod/mkv/mkv_format.c | 1 + 1 file changed, 1 insertion(+) diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index 56219534..bb344b93 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -1016,6 +1016,7 @@ mkv_get_read_frames_request( read_req->read_offset = ULLONG_MAX; read_req->flags = 0; + initial_time = 0; prev_index.cluster_pos = ULLONG_MAX; seen_tracks_mask = 0; From 675fcc4aa7bf0ca8411d8fe7f9d1157142f676f1 Mon Sep 17 00:00:00 2001 From: erankor Date: Fri, 12 Jan 2024 23:39:32 +0200 Subject: [PATCH 5/9] move to decl --- vod/mkv/mkv_format.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index bb344b93..67ef3e5b 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -1007,7 +1007,7 @@ mkv_get_read_frames_request( uint64_t done_tracks_mask; uint64_t all_tracks_mask; uint64_t cur_track_mask; - uint64_t initial_time; + uint64_t initial_time = 0; mkv_index_t prev_index; mkv_index_t index; vod_status_t rc; @@ -1016,7 +1016,6 @@ mkv_get_read_frames_request( read_req->read_offset = ULLONG_MAX; read_req->flags = 0; - initial_time = 0; prev_index.cluster_pos = ULLONG_MAX; seen_tracks_mask = 0; From 38265b4671a4d9950f498e028e75078dd5cafbd4 Mon Sep 17 00:00:00 2001 From: erankor Date: Mon, 15 Jan 2024 07:34:33 +0200 Subject: [PATCH 6/9] fix warning --- vod/mkv/mkv_format.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index 67ef3e5b..e461d1b2 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -1007,7 +1007,7 @@ mkv_get_read_frames_request( uint64_t done_tracks_mask; uint64_t all_tracks_mask; uint64_t cur_track_mask; - uint64_t initial_time = 0; + uint64_t initial_time; mkv_index_t prev_index; mkv_index_t index; vod_status_t rc; @@ -1017,7 +1017,10 @@ mkv_get_read_frames_request( read_req->read_offset = ULLONG_MAX; read_req->flags = 0; + initial_time = 0; prev_index.cluster_pos = ULLONG_MAX; + prev_index.time = 0; + seen_tracks_mask = 0; done_tracks_mask = 0; From 0cc1378028d31dbfdba8322f2563a9b09596d7c5 Mon Sep 17 00:00:00 2001 From: erankor Date: Sat, 20 Jan 2024 13:24:18 +0200 Subject: [PATCH 7/9] fix key block group key frame detection - a block group is a key frame if it has no block references (unlike simple groups which use flags & 0x80) - also improved debug logs - added per-lace debug log, including the absolute file offset, size, key frame etc --- vod/common.h | 12 ++++ vod/mkv/ebml.c | 12 ++-- vod/mkv/ebml.h | 1 + vod/mkv/mkv_defs.h | 1 + vod/mkv/mkv_format.c | 157 +++++++++++++++++++++++++++++++++++-------- 5 files changed, 148 insertions(+), 35 deletions(-) diff --git a/vod/common.h b/vod/common.h index e06eb992..043a82eb 100644 --- a/vod/common.h +++ b/vod/common.h @@ -80,6 +80,9 @@ #define vod_log_debug2(level, log, err, fmt, arg1, arg2) #define vod_log_debug3(level, log, err, fmt, arg1, arg2, arg3) #define vod_log_debug4(level, log, err, fmt, arg1, arg2, arg3, arg4) +#define vod_log_debug5(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5) +#define vod_log_debug6(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5, arg6) +#define vod_log_debug7(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5, arg6, arg7) typedef int bool_t; typedef int vod_status_t; @@ -264,6 +267,15 @@ void vod_log_error(vod_uint_t level, vod_log_t *log, int err, #define vod_log_debug4(level, log, err, fmt, arg1, arg2, arg3, arg4) \ ngx_log_debug4(level, log, err, fmt, arg1, arg2, arg3, arg4) +#define vod_log_debug5(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5) \ + ngx_log_debug5(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5) + +#define vod_log_debug6(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5, arg6) \ + ngx_log_debug6(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5, arg6) + +#define vod_log_debug7(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + ngx_log_debug7(level, log, err, fmt, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + #define vod_errno ngx_errno typedef intptr_t bool_t; diff --git a/vod/mkv/ebml.c b/vod/mkv/ebml.c index 447f8c6b..aa1b9ec7 100644 --- a/vod/mkv/ebml.c +++ b/vod/mkv/ebml.c @@ -261,9 +261,9 @@ ebml_parse_element(ebml_context_t* context, ebml_spec_t* spec, void* dest) break; case EBML_MASTER: - next_context.request_context = context->request_context; - next_context.cur_pos = context->cur_pos + size; - next_context.end_pos = context->end_pos; + next_context = *context; + next_context.cur_pos += size; + context->end_pos = next_context.cur_pos; rc = ebml_parse_master(context, spec->child, cur_dest); if (rc != VOD_OK) @@ -276,9 +276,9 @@ ebml_parse_element(ebml_context_t* context, ebml_spec_t* spec, void* dest) return VOD_OK; case EBML_CUSTOM: - next_context.request_context = context->request_context; - next_context.cur_pos = context->cur_pos + size; - next_context.end_pos = context->end_pos; + next_context = *context; + next_context.cur_pos += size; + context->end_pos = next_context.cur_pos; parser = spec->child; rc = parser(context, spec, cur_dest); diff --git a/vod/mkv/ebml.h b/vod/mkv/ebml.h index 3e0cf05a..e3d877a4 100644 --- a/vod/mkv/ebml.h +++ b/vod/mkv/ebml.h @@ -24,6 +24,7 @@ typedef struct { request_context_t* request_context; const u_char* cur_pos; const u_char* end_pos; + int64_t offset_delta; } ebml_context_t; typedef struct { diff --git a/vod/mkv/mkv_defs.h b/vod/mkv/mkv_defs.h index ac3c0cdf..1ffb5883 100644 --- a/vod/mkv/mkv_defs.h +++ b/vod/mkv/mkv_defs.h @@ -66,6 +66,7 @@ #define MKV_ID_SIMPLEBLOCK (0xA3) #define MKV_ID_BLOCKGROUP (0xA0) #define MKV_ID_BLOCK (0xA1) +#define MKV_ID_REFERENCEBLOCK (0xFB) #define MKV_ID_CLUSTER (0x1F43B675) // sections diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index e461d1b2..f9e65b2a 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -32,8 +32,10 @@ // prototypes static vod_status_t mkv_parse_seek_entry(ebml_context_t* context, ebml_spec_t* spec, void* dst); -static vod_status_t mkv_parse_frame(ebml_context_t* context, ebml_spec_t* spec, void* dst); +static vod_status_t mkv_simple_block(ebml_context_t* context, ebml_spec_t* spec, void* dst); static vod_status_t mkv_parse_frame_estimate_bitrate(ebml_context_t* context, ebml_spec_t* spec, void* dst); +static vod_status_t mkv_block_group(ebml_context_t* context, ebml_spec_t* spec, void* dst); +static vod_status_t mkv_reference_block(ebml_context_t* context, ebml_spec_t* spec, void* dst); // raw parsing structs typedef struct { @@ -83,6 +85,10 @@ typedef struct { uint64_t timecode; } mkv_cluster_t; +typedef struct { + vod_str_t block; +} mkv_block_group_t; + // matroksa specs // seekhead @@ -163,14 +169,15 @@ static ebml_spec_t mkv_spec_index[] = { // cluster static ebml_spec_t mkv_spec_block_group[] = { - { MKV_ID_BLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame }, + { MKV_ID_BLOCK, EBML_BINARY | EBML_TRUNCATE_SIZE, offsetof(mkv_block_group_t, block), NULL }, + { MKV_ID_REFERENCEBLOCK, EBML_CUSTOM, 0, mkv_reference_block }, { 0, EBML_NONE, 0, NULL } }; static ebml_spec_t mkv_spec_cluster_fields[] = { { MKV_ID_CLUSTERTIMECODE, EBML_UINT, offsetof(mkv_cluster_t, timecode), NULL }, - { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_parse_frame }, - { MKV_ID_BLOCKGROUP, EBML_MASTER | EBML_TRUNCATE_SIZE, 0, mkv_spec_block_group }, + { MKV_ID_SIMPLEBLOCK, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_simple_block }, + { MKV_ID_BLOCKGROUP, EBML_CUSTOM | EBML_TRUNCATE_SIZE, 0, mkv_block_group }, { 0, EBML_NONE, 0, NULL } }; @@ -246,6 +253,7 @@ typedef struct { uint64_t end_time; uint32_t max_frame_count; bool_t parse_frames; + uint64_t read_offset; } mkv_base_metadata_t; typedef struct { @@ -256,6 +264,7 @@ typedef struct { vod_str_t sections[SECTION_COUNT]; mkv_file_layout_t layout; mkv_base_metadata_t result; + uint64_t read_offset; } mkv_metadata_reader_state_t; typedef struct { @@ -297,6 +306,11 @@ typedef struct { mkv_frame_parse_track_context_t* last_track; } mkv_frame_parse_context_t; +typedef struct { + ebml_context_t context; + uint32_t references; +} mkv_block_group_context_t; + typedef struct { uint64_t track_number; uint64_t min_frame_timecode; @@ -361,6 +375,7 @@ mkv_metadata_reader_init( context.request_context = request_context; context.cur_pos = buffer->data; context.end_pos = buffer->data + buffer->len; + context.offset_delta = -(intptr_t)buffer->data; rc = ebml_parse_header(&context, &header); if (rc != VOD_OK) @@ -469,6 +484,7 @@ mkv_get_file_layout( context.request_context = request_context; context.cur_pos = buffer; context.end_pos = buffer + size; + context.offset_delta = -(intptr_t)buffer; // ebml header rc = ebml_parse_header(&context, &header); @@ -585,6 +601,8 @@ mkv_metadata_reader_read( } state->state = MRS_READ_SECTION_HEADER; + state->read_offset = position->pos; + result->read_req.read_offset = position->pos; result->read_req.read_size = 0; return VOD_AGAIN; @@ -595,6 +613,7 @@ mkv_metadata_reader_read( context.request_context = state->request_context; context.cur_pos = start_pos; context.end_pos = buffer->data + buffer->len; + context.offset_delta = state->read_offset - (intptr_t)buffer->data; // section id rc = ebml_read_id(&context, &id); @@ -648,6 +667,8 @@ mkv_metadata_reader_read( } state->state = MRS_READ_SECTION_DATA; + state->read_offset = position->pos; + result->read_req.read_offset = position->pos; result->read_req.read_size = size; return VOD_AGAIN; @@ -694,6 +715,7 @@ mkv_metadata_parse( context.request_context = request_context; context.cur_pos = metadata_parts[SECTION_INFO].data; context.end_pos = context.cur_pos + metadata_parts[SECTION_INFO].len; + context.offset_delta = -1; vod_memzero(&info, sizeof(info)); rc = ebml_parse_master(&context, mkv_spec_info, &info); @@ -1027,6 +1049,7 @@ mkv_get_read_frames_request( context.request_context = request_context; context.cur_pos = metadata->cues.data; context.end_pos = context.cur_pos + metadata->cues.len; + context.offset_delta = -1; // XXXXX optimize this - it may be possible to use the cuetime as the cluster timestamp, and start mid-cluster // another possible optimization is to read in fixed sizes until the segment is complete (may reduce the total read size) @@ -1135,6 +1158,8 @@ mkv_get_read_frames_request( read_req->read_size += extra_read_size; + metadata->read_offset = read_req->read_offset; + vod_log_debug4(VOD_LOG_DEBUG_LEVEL, request_context->log, 0, "mkv_get_read_frames_request: reading offset=%uL, size=%uz, extra=%uz, pos_ref=%uL", read_req->read_offset, read_req->read_size, extra_read_size, metadata->base_layout.position_reference); @@ -1401,6 +1426,7 @@ mkv_parse_frames_estimate_bitrate( { mkv_estimate_bitrate_track_context_t* track_context; mkv_estimate_bitrate_context_t context; + mkv_base_metadata_t* metadata = vod_container_of(base, mkv_base_metadata_t, base); media_track_t* cur_track; mkv_cluster_t cluster; vod_uint_t i; @@ -1419,6 +1445,7 @@ mkv_parse_frames_estimate_bitrate( context.context.request_context = request_context; context.context.cur_pos = frame_data->data; context.context.end_pos = frame_data->data + frame_data->len; + context.context.offset_delta = metadata->read_offset - (intptr_t)frame_data->data; for (i = 0; i < base->tracks.nelts; i++) { @@ -1592,20 +1619,18 @@ mkv_parse_laces(ebml_context_t* context, uint8_t flags, uint32_t* lace_sizes) static vod_status_t mkv_parse_frame( + mkv_frame_parse_context_t* frame_parse_context, ebml_context_t* context, - ebml_spec_t* spec, - void* dst) + mkv_cluster_t* cluster, + int key_frame) { - mkv_frame_parse_context_t* frame_parse_context = vod_container_of(context, mkv_frame_parse_context_t, context); mkv_frame_parse_track_context_t* track_context; frame_list_part_t* last_frames_part; frame_list_part_t* new_frames_part; mkv_frame_timecode_t* gop_frame; - mkv_cluster_t* cluster = dst; input_frame_t* cur_frame; uint64_t frame_timecode; uint64_t track_number; - uint32_t key_frame; uint32_t lace_sizes[256]; intptr_t laces, i; int16_t timecode; @@ -1660,10 +1685,6 @@ mkv_parse_frame( frame_timecode = cluster->timecode + timecode; - vod_log_debug3(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, - "mkv_parse_frame: track=%uL, timecode=%uL, flags=0x%uxD", - track_number, frame_timecode, (uint32_t)flags); - gop_frame = vod_array_push(&track_context->gop_frames); if (gop_frame == NULL) { @@ -1676,21 +1697,22 @@ mkv_parse_frame( gop_frame->frame.frame = NULL; gop_frame->unsorted_frame = NULL; - switch (flags & ~0x06) + if (key_frame == -1) + { + key_frame = (flags & 0x80) ? 1 : 0; + } + + if (!key_frame) { - case 0: - case 1: // discardable // XXXXX should not cross the clip offset if (frame_parse_context->state == FRS_WAIT_START_KEY_FRAME) { return VOD_OK; } - - key_frame = 0; - break; - - case 0x80: + } + else + { mkv_update_frame_timestamps(track_context); // repush the gop frame following the reset of the array @@ -1726,6 +1748,10 @@ mkv_parse_frame( case FRS_DONE: track_context->done = TRUE; + vod_log_debug3(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_frame: track=%uL, timecode=%uL, key=1, pos=%uL", + track_number, frame_timecode, (uint64_t)(uintptr_t)context->cur_pos + context->offset_delta); + // check whether all tracks are done for (track_context = frame_parse_context->first_track; track_context < frame_parse_context->last_track; @@ -1739,14 +1765,6 @@ mkv_parse_frame( return VOD_DONE; } - - key_frame = 1; - break; - - default: - vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, - "mkv_parse_frame: unsupported frame flags 0x%uxD", (uint32_t)flags); - return VOD_BAD_DATA; } rc = mkv_parse_laces(context, flags, lace_sizes); @@ -1822,11 +1840,91 @@ mkv_parse_frame( track_context->frame_count++; track_context->key_frame_count += key_frame; track_context->total_frames_size += cur_frame->size; + + if (laces > 1) + { + vod_log_debug7(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_frame: track=%uL, timecode=%uL, key=%d, pos=%uL, size=%uD, lace=%i/%i", + track_number, frame_timecode, key_frame, cur_frame->offset + context->offset_delta, cur_frame->size, i + 1, laces); + } + else + { + vod_log_debug5(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_parse_frame: track=%uL, timecode=%uL, key=%d, pos=%uL, size=%uD", + track_number, frame_timecode, key_frame, cur_frame->offset + context->offset_delta, cur_frame->size); + } + } + + return VOD_OK; +} + +static vod_status_t +mkv_block_group( + ebml_context_t* context, + ebml_spec_t* spec, + void* dst) +{ + mkv_frame_parse_context_t* frame_parse_context = vod_container_of(context, mkv_frame_parse_context_t, context); + mkv_block_group_context_t block_group_context; + mkv_block_group_t block_group; + ebml_context_t block_context; + mkv_cluster_t* cluster = dst; + vod_status_t rc; + + block_group_context.context = *context; + block_group_context.references = 0; + + vod_memzero(&block_group, sizeof(block_group)); + + rc = ebml_parse_master(&block_group_context.context, mkv_spec_block_group, &block_group); + if (rc != VOD_OK) + { + vod_log_debug0(VOD_LOG_DEBUG_LEVEL, context->request_context->log, 0, + "mkv_block_group: ebml_parse_master(block group) failed"); + return rc; + } + + if (block_group.block.len <= 0) + { + vod_log_error(VOD_LOG_ERR, context->request_context->log, 0, + "mkv_block_group: block group without block element"); + return VOD_BAD_DATA; } + block_context.request_context = context->request_context; + block_context.cur_pos = block_group.block.data; + block_context.end_pos = block_group.block.data + block_group.block.len; + block_context.offset_delta = context->offset_delta; + + return mkv_parse_frame(frame_parse_context, &block_context, cluster, block_group_context.references == 0); + +} + +static vod_status_t +mkv_reference_block( + ebml_context_t* context, + ebml_spec_t* spec, + void* dst) +{ + mkv_block_group_context_t* block_group_context = vod_container_of(context, mkv_block_group_context_t, context); + + block_group_context->references++; + return VOD_OK; } +static vod_status_t +mkv_simple_block( + ebml_context_t* context, + ebml_spec_t* spec, + void* dst) +{ + mkv_frame_parse_context_t* frame_parse_context = vod_container_of(context, mkv_frame_parse_context_t, context); + mkv_cluster_t* cluster = dst; + + return mkv_parse_frame(frame_parse_context, context, cluster, -1); +} + static vod_status_t mkv_parse_frames( request_context_t* request_context, @@ -1863,6 +1961,7 @@ mkv_parse_frames( frame_parse_context.context.request_context = request_context; frame_parse_context.context.cur_pos = frame_data->data; frame_parse_context.context.end_pos = frame_data->data + frame_data->len; + frame_parse_context.context.offset_delta = metadata->read_offset - (intptr_t)frame_data->data; frame_parse_context.start_time = metadata->start_time; frame_parse_context.end_time = metadata->end_time; frame_parse_context.max_frame_count = metadata->max_frame_count; From e1a8f0fb45b49966623daff282363a0131e3eeea Mon Sep 17 00:00:00 2001 From: erankor Date: Sat, 20 Jan 2024 17:19:49 +0200 Subject: [PATCH 8/9] align the segment start/end times to the index can significantly reduce the read overhead - on some sample content that was tested, the read overhead (=total_frame_size_read - total_output_size) was reduced by 44%. --- vod/mkv/mkv_format.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index f9e65b2a..0a4b5024 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -1034,6 +1034,7 @@ mkv_get_read_frames_request( mkv_index_t index; vod_status_t rc; size_t extra_read_size; + bool_t align_timestamps; bool_t done = FALSE; read_req->read_offset = ULLONG_MAX; @@ -1051,6 +1052,8 @@ mkv_get_read_frames_request( context.end_pos = context.cur_pos + metadata->cues.len; context.offset_delta = -1; + align_timestamps = TRUE; // XXXX conf param + // XXXXX optimize this - it may be possible to use the cuetime as the cluster timestamp, and start mid-cluster // another possible optimization is to read in fixed sizes until the segment is complete (may reduce the total read size) @@ -1093,12 +1096,21 @@ mkv_get_read_frames_request( done = TRUE; } - if (read_req->read_offset == ULLONG_MAX && - metadata->start_time < index.time && - prev_index.cluster_pos != ULLONG_MAX) + if (read_req->read_offset == ULLONG_MAX) { - read_req->read_offset = prev_index.cluster_pos; - initial_time = prev_index.time; + if (align_timestamps && + metadata->start_time <= index.time) + { + metadata->start_time = index.time; + read_req->read_offset = index.cluster_pos; + initial_time = index.time; + } + else if (metadata->start_time < index.time && + prev_index.cluster_pos != ULLONG_MAX) + { + read_req->read_offset = prev_index.cluster_pos; + initial_time = prev_index.time; + } } if (done) @@ -1108,6 +1120,12 @@ mkv_get_read_frames_request( if (metadata->end_time <= index.time) { + if (align_timestamps) + { + metadata->end_time = index.time; + align_timestamps = FALSE; + } + done_tracks_mask |= cur_track_mask; if ((seen_tracks_mask & ~done_tracks_mask) == 0) { @@ -1160,9 +1178,11 @@ mkv_get_read_frames_request( metadata->read_offset = read_req->read_offset; - vod_log_debug4(VOD_LOG_DEBUG_LEVEL, request_context->log, 0, - "mkv_get_read_frames_request: reading offset=%uL, size=%uz, extra=%uz, pos_ref=%uL", - read_req->read_offset, read_req->read_size, extra_read_size, metadata->base_layout.position_reference); + vod_log_debug6(VOD_LOG_DEBUG_LEVEL, request_context->log, 0, + "mkv_get_read_frames_request: reading offsets=%uL..%uL (size=%uz, extra=%uz), time=%uL..%uL", + read_req->read_offset, read_req->read_offset + read_req->read_size, + read_req->read_size, extra_read_size, + metadata->start_time, metadata->end_time); return VOD_AGAIN; } From 484c498651b7ed7b8c65657a705b506a78143781 Mon Sep 17 00:00:00 2001 From: erankor Date: Sun, 28 Jan 2024 08:32:54 +0200 Subject: [PATCH 9/9] style --- vod/mkv/mkv_format.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vod/mkv/mkv_format.c b/vod/mkv/mkv_format.c index 0a4b5024..09991ee9 100644 --- a/vod/mkv/mkv_format.c +++ b/vod/mkv/mkv_format.c @@ -1297,18 +1297,14 @@ mkv_update_frame_timestamps(mkv_frame_parse_track_context_t* context) for (; cur_frame < last_frame; cur_frame++) { pts_delay = cur_frame->unsorted_timecode - cur_frame->timecode; - + cur_frame->unsorted_frame->pts_delay = pts_delay; if (pts_delay < context->min_pts_delay) { context->min_pts_delay = pts_delay; } - cur_frame->unsorted_frame->pts_delay = pts_delay; - duration = cur_frame[1].timecode - cur_frame[0].timecode; - mkv_update_laces_duration(&cur_frame->frame, duration); - context->total_frames_duration += duration; } } @@ -1341,12 +1337,12 @@ mkv_estimate_next_frame_timecode( uint64_t max_timecode; uint64_t result; - cur_frame = context->gop_frames.elts; - last_frame = cur_frame + (context->gop_frames.nelts - 1); - // get the number of pending laces + max timecode laces = 0; max_timecode = 0; + + cur_frame = context->gop_frames.elts; + last_frame = cur_frame + (context->gop_frames.nelts - 1); for (; cur_frame < last_frame; cur_frame++) { laces += cur_frame->frame.laces;