diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapper.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapper.java index a6e669604e..66a8b637da 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapper.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapper.java @@ -70,7 +70,7 @@ protected Configuration getDefaultJobConfiguration() { // Add extra configuration for this mapper File inputDir = Utils.getTempDataDirectory(); try { - TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, 100); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapperOutputReader.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapperOutputReader.java index 03000cb3cd..8447890e33 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapperOutputReader.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestValidateSchemaAndBuildDictMapperOutputReader.java @@ -61,7 +61,7 @@ public void testGetWithNoFile() throws Exception { public void testGetWithEmptyFile() throws Exception { File inputDir = Utils.getTempDataDirectory(); String avroOutputFile = "empty_file.avro"; - TestWriteUtils.writeEmptyAvroFileWithUserSchema(inputDir, avroOutputFile, fileSchema.toString()); + TestWriteUtils.writeEmptyAvroFile(inputDir, avroOutputFile, fileSchema); ValidateSchemaAndBuildDictMapperOutputReader reader = new ValidateSchemaAndBuildDictMapperOutputReader(inputDir.getAbsolutePath(), avroOutputFile); reader.close(); @@ -94,7 +94,7 @@ public void testGetWithInvalidInputFileDataSize() throws Exception { avroOutputFile, 0, ByteBuffer.wrap("TestDictionary".getBytes()), - fileSchema.toString()); + fileSchema); ValidateSchemaAndBuildDictMapperOutputReader reader = new ValidateSchemaAndBuildDictMapperOutputReader(inputDir.getAbsolutePath(), avroOutputFile); reader.close(); @@ -109,7 +109,7 @@ public void testGetWithValidInputFileDataSize() throws Exception { avroOutputFile, 1, ByteBuffer.wrap("TestDictionary".getBytes()), - fileSchema.toString()); + fileSchema); ValidateSchemaAndBuildDictMapperOutputReader reader = new ValidateSchemaAndBuildDictMapperOutputReader(inputDir.getAbsolutePath(), avroOutputFile); ValidateSchemaAndBuildDictMapperOutput output = reader.getOutput(); @@ -128,12 +128,8 @@ public void testGetWithValidInputFileDataSize() throws Exception { public void testGetWithNoZstdDictionary() throws Exception { File inputDir = Utils.getTempDataDirectory(); String avroOutputFile = "valid_file.avro"; - TestWriteUtils.writeSimpleAvroFileForValidateSchemaAndBuildDictMapperOutput( - inputDir, - avroOutputFile, - 1, - null, - fileSchema.toString()); + TestWriteUtils + .writeSimpleAvroFileForValidateSchemaAndBuildDictMapperOutput(inputDir, avroOutputFile, 1, null, fileSchema); ValidateSchemaAndBuildDictMapperOutputReader reader = new ValidateSchemaAndBuildDictMapperOutputReader(inputDir.getAbsolutePath(), avroOutputFile); ValidateSchemaAndBuildDictMapperOutput output = reader.getOutput(); diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestVeniceAvroRecordReader.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestVeniceAvroRecordReader.java index 2629e50bf9..9daa6c4058 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestVeniceAvroRecordReader.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/TestVeniceAvroRecordReader.java @@ -6,12 +6,11 @@ import static com.linkedin.venice.hadoop.VenicePushJob.TOPIC_PROP; import static com.linkedin.venice.hadoop.VenicePushJob.UPDATE_SCHEMA_STRING_PROP; import static com.linkedin.venice.hadoop.VenicePushJob.VALUE_FIELD_PROP; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING_V2; -import static com.linkedin.venice.utils.TestWriteUtils.STRING_RECORD_SCHEMA_STRING; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V2_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.STRING_TO_NAME_RECORD_V1_SCHEMA; -import com.linkedin.venice.schema.AvroSchemaParseUtils; import com.linkedin.venice.schema.writecompute.WriteComputeSchemaConverter; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.VeniceProperties; import java.util.Properties; import org.apache.avro.Schema; @@ -27,10 +26,10 @@ public class TestVeniceAvroRecordReader { @Test public void testGeneratePartialUpdate() { - Schema updateSchema = WriteComputeSchemaConverter.getInstance().convert(NESTED_SCHEMA_STRING_V2); + Schema updateSchema = WriteComputeSchemaConverter.getInstance().convertFromValueRecordSchema(NAME_RECORD_V2_SCHEMA); Properties properties = new Properties(); properties.put(TOPIC_PROP, "test_store_rt"); - properties.put(SCHEMA_STRING_PROP, STRING_RECORD_SCHEMA_STRING); + properties.put(SCHEMA_STRING_PROP, STRING_TO_NAME_RECORD_V1_SCHEMA.toString()); properties.put(GENERATE_PARTIAL_UPDATE_RECORD_FROM_INPUT, true); properties.put(UPDATE_SCHEMA_STRING_PROP, updateSchema); properties.put(KEY_FIELD_PROP, "key"); @@ -38,11 +37,9 @@ public void testGeneratePartialUpdate() { VeniceProperties veniceProperties = new VeniceProperties(properties); VeniceAvroRecordReader recordReader = new VeniceAvroRecordReader(veniceProperties); - GenericRecord record = - new GenericData.Record(AvroSchemaParseUtils.parseSchemaFromJSONLooseValidation(STRING_RECORD_SCHEMA_STRING)); + GenericRecord record = new GenericData.Record(STRING_TO_NAME_RECORD_V1_SCHEMA); record.put("key", "123"); - GenericRecord valueRecord = - new GenericData.Record(AvroSchemaParseUtils.parseSchemaFromJSONLooseValidation(NESTED_SCHEMA_STRING)); + GenericRecord valueRecord = new GenericData.Record(TestWriteUtils.NAME_RECORD_V1_SCHEMA); valueRecord.put("firstName", "FN"); valueRecord.put("lastName", "LN"); record.put("value", valueRecord); diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/VenicePushJobTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/VenicePushJobTest.java index 1f8cf83305..6e237421c2 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/VenicePushJobTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/VenicePushJobTest.java @@ -17,8 +17,8 @@ import static com.linkedin.venice.hadoop.VenicePushJob.VENICE_DISCOVER_URL_PROP; import static com.linkedin.venice.hadoop.VenicePushJob.VENICE_STORE_NAME_PROP; import static com.linkedin.venice.status.BatchJobHeartbeatConfigs.HEARTBEAT_ENABLED_CONFIG; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING; -import static com.linkedin.venice.utils.TestWriteUtils.UPDATE_SCHEMA_OF_NESTED_SCHEMA_STRING; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V1_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V1_UPDATE_SCHEMA; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyInt; @@ -101,8 +101,8 @@ public class VenicePushJobTest { public void testVPJcheckInputUpdateSchema() { VenicePushJob vpj = mock(VenicePushJob.class); when(vpj.isUpdateSchema(anyString())).thenCallRealMethod(); - Assert.assertTrue(vpj.isUpdateSchema(UPDATE_SCHEMA_OF_NESTED_SCHEMA_STRING)); - Assert.assertFalse(vpj.isUpdateSchema(NESTED_SCHEMA_STRING)); + Assert.assertTrue(vpj.isUpdateSchema(NAME_RECORD_V1_UPDATE_SCHEMA.toString())); + Assert.assertFalse(vpj.isUpdateSchema(NAME_RECORD_V1_SCHEMA.toString())); } @Test(expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*Repush with TTL is only supported while using Kafka Input Format.*") diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java index d2c56f4b65..d91ac30ca6 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java @@ -5,6 +5,7 @@ import static com.linkedin.venice.hadoop.DefaultInputDataInfoProvider.PATH_FILTER; import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_KB; import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_MB; +import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema; import com.github.luben.zstd.ZstdDictTrainer; import com.github.luben.zstd.ZstdException; @@ -12,7 +13,6 @@ import com.linkedin.venice.hadoop.InputDataInfoProvider; import com.linkedin.venice.hadoop.PushJobZstdConfig; import com.linkedin.venice.hadoop.VeniceAvroRecordReader; -import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.VeniceProperties; import java.io.File; @@ -36,7 +36,9 @@ private void runTest(int numOfFiles, int numOfRecordsPerFile, int dictSizeLimitI FileSystem fs = FileSystem.get(new Configuration()); File inputDir = Utils.getTempDataDirectory(); try { - TestWriteUtils.writeMultipleAvroFilesWithUserSchema(inputDir, numOfFiles, numOfRecordsPerFile); + for (int i = 0; i < numOfFiles; i++) { + writeSimpleAvroFileWithStringToStringSchema(inputDir, numOfRecordsPerFile, "testInput" + i + ".avro"); + } Properties props = new Properties(); props.setProperty(COMPRESSION_DICTIONARY_SIZE_LIMIT, String.valueOf(dictSizeLimitInKB * BYTES_PER_KB)); props.setProperty(COMPRESSION_DICTIONARY_SAMPLE_SIZE, String.valueOf(dictSampleSizeLimitInMB * BYTES_PER_MB)); diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java index e1028ad32d..95a54e9257 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java @@ -1,9 +1,9 @@ package com.linkedin.venice.schema; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING_V2; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING_V3; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING_V4; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V1_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V2_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V3_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V4_SCHEMA; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; import com.linkedin.venice.controllerapi.MultiSchemaResponse; @@ -459,10 +459,14 @@ public void testGetLatestUpdateSchemaFromSchemaResponse() { @Test public void testValidateSubsetSchema() { - Assert.assertTrue(AvroSupersetSchemaUtils.validateSubsetValueSchema(NESTED_SCHEMA_STRING, NESTED_SCHEMA_STRING_V2)); + Assert.assertTrue( + AvroSupersetSchemaUtils + .validateSubsetValueSchema(NAME_RECORD_V1_SCHEMA.toString(), NAME_RECORD_V2_SCHEMA.toString())); Assert.assertFalse( - AvroSupersetSchemaUtils.validateSubsetValueSchema(NESTED_SCHEMA_STRING_V2, NESTED_SCHEMA_STRING_V3)); + AvroSupersetSchemaUtils + .validateSubsetValueSchema(NAME_RECORD_V2_SCHEMA.toString(), NAME_RECORD_V3_SCHEMA.toString())); Assert.assertFalse( - AvroSupersetSchemaUtils.validateSubsetValueSchema(NESTED_SCHEMA_STRING_V3, NESTED_SCHEMA_STRING_V4)); + AvroSupersetSchemaUtils + .validateSubsetValueSchema(NAME_RECORD_V3_SCHEMA.toString(), NAME_RECORD_V4_SCHEMA.toString())); } } diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLUtilsTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLUtilsTest.java index 9e8beae58f..8d0c08b3cb 100644 --- a/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLUtilsTest.java +++ b/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLUtilsTest.java @@ -1,8 +1,8 @@ package com.linkedin.venice.etl; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_VALUE_SCHEMA_STRING; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_WITH_NULL_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_VALUE_SCHEMA; import java.util.ArrayList; import java.util.List; @@ -14,7 +14,7 @@ public class ETLUtilsTest { @Test public void testTransformValueSchemaForETLForRecordSchema() { - Schema schema = Schema.parse(ETL_VALUE_SCHEMA_STRING); + Schema schema = ETL_VALUE_SCHEMA; Schema etlValueSchema = ETLUtils.transformValueSchemaForETL(schema); Assert.assertEquals(Schema.Type.UNION, etlValueSchema.getType()); @@ -26,7 +26,7 @@ public void testTransformValueSchemaForETLForRecordSchema() { @Test public void testTransformValueSchemaForETLForUnionSchemaWithoutNullField() { - Schema schema = Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL); + Schema schema = ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA; Schema etlValueSchema = ETLUtils.transformValueSchemaForETL(schema); Assert.assertEquals(Schema.Type.UNION, etlValueSchema.getType()); @@ -45,7 +45,7 @@ public void testTransformValueSchemaForETLForUnionSchemaWithoutNullField() { @Test public void testTransformValueSchemaForETLForUnionSchemaWithNullField() { - Schema schema = Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL); + Schema schema = ETL_UNION_VALUE_WITH_NULL_SCHEMA; Schema etlValueSchema = ETLUtils.transformValueSchemaForETL(schema); Assert.assertEquals(Schema.Type.UNION, etlValueSchema.getType()); @@ -58,7 +58,7 @@ public void testTransformValueSchemaForETLForUnionSchemaWithNullField() { @Test public void testGetValueSchemaFromETLValueSchemaForRecordTypes() { - Schema valueSchema = Schema.parse(ETL_VALUE_SCHEMA_STRING); + Schema valueSchema = ETL_VALUE_SCHEMA; Schema etlValueSchema = ETLUtils.transformValueSchemaForETL(valueSchema); Schema inferredValueSchema = @@ -69,7 +69,7 @@ public void testGetValueSchemaFromETLValueSchemaForRecordTypes() { @Test public void testGetValueSchemaFromETLValueSchemaForUnionTypesWithoutNull() { - Schema valueSchema = Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL); + Schema valueSchema = ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA; Schema etlValueSchema = ETLUtils.transformValueSchemaForETL(valueSchema); Schema inferredValueSchema = @@ -80,7 +80,7 @@ public void testGetValueSchemaFromETLValueSchemaForUnionTypesWithoutNull() { @Test public void testGetValueSchemaFromETLValueSchemaForUnionTypesWithNull() { - Schema valueSchema = Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL); + Schema valueSchema = ETL_UNION_VALUE_WITH_NULL_SCHEMA; Schema etlValueSchema = ETLUtils.transformValueSchemaForETL(valueSchema); Schema inferredValueSchema = diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLValueSchemaTransformationTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLValueSchemaTransformationTest.java index 9bf362f193..d79c40ea0e 100644 --- a/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLValueSchemaTransformationTest.java +++ b/internal/venice-common/src/test/java/com/linkedin/venice/etl/ETLValueSchemaTransformationTest.java @@ -1,10 +1,9 @@ package com.linkedin.venice.etl; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_VALUE_SCHEMA_STRING; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_WITH_NULL_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_VALUE_SCHEMA; -import org.apache.avro.Schema; import org.testng.Assert; import org.testng.annotations.Test; @@ -12,22 +11,21 @@ public class ETLValueSchemaTransformationTest { @Test public void testRecordSchemaBecomesUnionWithNull() { - Schema valueSchema = Schema.parse(ETL_VALUE_SCHEMA_STRING); - ETLValueSchemaTransformation transformation = ETLValueSchemaTransformation.fromSchema(valueSchema); + ETLValueSchemaTransformation transformation = ETLValueSchemaTransformation.fromSchema(ETL_VALUE_SCHEMA); Assert.assertEquals(transformation, ETLValueSchemaTransformation.UNIONIZE_WITH_NULL); } @Test public void testUnionSchemaWithoutNullAddsNull() { - Schema valueSchema = Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL); - ETLValueSchemaTransformation transformation = ETLValueSchemaTransformation.fromSchema(valueSchema); + ETLValueSchemaTransformation transformation = + ETLValueSchemaTransformation.fromSchema(ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA); Assert.assertEquals(transformation, ETLValueSchemaTransformation.ADD_NULL_TO_UNION); } @Test public void testUnionSchemaWithNullStaysUnchanged() { - Schema valueSchema = Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL); - ETLValueSchemaTransformation transformation = ETLValueSchemaTransformation.fromSchema(valueSchema); + ETLValueSchemaTransformation transformation = + ETLValueSchemaTransformation.fromSchema(ETL_UNION_VALUE_WITH_NULL_SCHEMA); Assert.assertEquals(transformation, ETLValueSchemaTransformation.NONE); } } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/client/store/StoreClientPerfTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/client/store/StoreClientPerfTest.java index e207a3a480..daf93b54b6 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/client/store/StoreClientPerfTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/client/store/StoreClientPerfTest.java @@ -219,7 +219,7 @@ private ResultsContainer clientStressTest( int valueSchemaId = 1; int valueSizeInBytes = 800; - String valueSchemaStr = TestWriteUtils.USER_SCHEMA_WITH_A_FLOAT_ARRAY_STRING; + String valueSchemaStr = TestWriteUtils.USER_WITH_FLOAT_ARRAY_SCHEMA.toString(); Schema valueSchema = new Schema.Parser().parse(valueSchemaStr); Set keys = new HashSet<>(); setupSchemaAndRequest(valueSchemaId, valueSchemaStr); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestFabricBuildout.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestFabricBuildout.java index f295239473..a7c112f734 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestFabricBuildout.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestFabricBuildout.java @@ -93,14 +93,15 @@ public void testStoresMetadataCopyOver() { // Create a test store only in dc0 region NewStoreResponse newStoreResponse = dc0Client.retryableRequest( 3, - c -> c.createNewStore(storeName, "", "\"string\"", TestWriteUtils.USER_SCHEMA_STRING_SIMPLE_WITH_DEFAULT)); + c -> c.createNewStore(storeName, "", "\"string\"", TestWriteUtils.USER_WITH_DEFAULT_SCHEMA.toString())); Assert.assertFalse( newStoreResponse.isError(), "The NewStoreResponse returned an error: " + newStoreResponse.getError()); // Enable read compute to test superset schema registration. Assert.assertFalse( dc0Client.updateStore(storeName, new UpdateStoreQueryParams().setReadComputationEnabled(true)).isError()); - Assert.assertFalse(dc0Client.addValueSchema(storeName, TestWriteUtils.USER_SCHEMA_STRING_WITH_DEFAULT).isError()); + Assert.assertFalse( + dc0Client.addValueSchema(storeName, TestWriteUtils.USER_WITH_DEFAULT_SCHEMA.toString()).isError()); checkStoreConfig(dc0Client, storeName); // Mimic source fabric store-level execution id Assert.assertFalse( @@ -167,12 +168,18 @@ public void testCompareStore() { ControllerClient childControllerClient1 = new ControllerClient(clusterName, childDatacenters.get(1).getControllerConnectString())) { String testStoreName = Utils.getUniqueString("test-store"); - NewStoreResponse newStoreResponse = childControllerClient0 - .createNewStore(testStoreName, "test", "\"string\"", TestWriteUtils.NESTED_SCHEMA_STRING); + NewStoreResponse newStoreResponse = childControllerClient0.createNewStore( + testStoreName, + "test", + TestWriteUtils.STRING_SCHEMA.toString(), + TestWriteUtils.NAME_RECORD_V1_SCHEMA.toString()); Assert.assertFalse(newStoreResponse.isError()); checkStoreConfig(childControllerClient0, testStoreName); - newStoreResponse = childControllerClient1 - .createNewStore(testStoreName, "test", "\"string\"", TestWriteUtils.NESTED_SCHEMA_STRING); + newStoreResponse = childControllerClient1.createNewStore( + testStoreName, + "test", + TestWriteUtils.STRING_SCHEMA.toString(), + TestWriteUtils.NAME_RECORD_V1_SCHEMA.toString()); Assert.assertFalse(newStoreResponse.isError()); checkStoreConfig(childControllerClient1, testStoreName); @@ -181,7 +188,7 @@ public void testCompareStore() { childControllerClient0.emptyPush(testStoreName, Utils.getUniqueString("empty-push-1"), 1L); Assert.assertFalse(versionCreationResponse.isError()); SchemaResponse schemaResponse = - childControllerClient0.addValueSchema(testStoreName, TestWriteUtils.NESTED_SCHEMA_STRING_V2); + childControllerClient0.addValueSchema(testStoreName, TestWriteUtils.NAME_RECORD_V2_SCHEMA.toString()); Assert.assertFalse(schemaResponse.isError()); StoreComparisonResponse response = parentControllerClient.compareStore(testStoreName, dcNames[0], dcNames[1]); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestParentControllerWithMultiDataCenter.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestParentControllerWithMultiDataCenter.java index 7af4c18a76..f49bfe03a4 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestParentControllerWithMultiDataCenter.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestParentControllerWithMultiDataCenter.java @@ -320,8 +320,8 @@ public void testEnableActiveActiveReplicationSchema() { String clusterName = CLUSTER_NAMES[0]; String storeName = Utils.getUniqueString("store"); String valueRecordSchemaStr1 = BASIC_USER_SCHEMA_STRING_WITH_DEFAULT; - String valueRecordSchemaStr2 = TestWriteUtils.USER_SCHEMA_STRING_SIMPLE_WITH_DEFAULT; - String valueRecordSchemaStr3 = TestWriteUtils.USER_SCHEMA_STRING_WITH_DEFAULT; + String valueRecordSchemaStr2 = TestWriteUtils.SIMPLE_USER_WITH_DEFAULT_SCHEMA.toString(); + String valueRecordSchemaStr3 = TestWriteUtils.USER_WITH_DEFAULT_SCHEMA.toString(); Schema rmdSchema1 = RmdSchemaGenerator.generateMetadataSchema(valueRecordSchemaStr1, 1); Schema rmdSchema2 = RmdSchemaGenerator.generateMetadataSchema(valueRecordSchemaStr2, 1); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestVeniceHelixAdminWithSharedEnvironment.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestVeniceHelixAdminWithSharedEnvironment.java index d76f91d2e9..029ef02b05 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestVeniceHelixAdminWithSharedEnvironment.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestVeniceHelixAdminWithSharedEnvironment.java @@ -1522,7 +1522,7 @@ public void testComputationEnabled() { @Test public void testAddAndRemoveDerivedSchema() { String storeName = Utils.getUniqueString("write_compute_store"); - String recordSchemaStr = TestWriteUtils.USER_SCHEMA_STRING_WITH_DEFAULT; + String recordSchemaStr = TestWriteUtils.USER_WITH_DEFAULT_SCHEMA.toString(); Schema derivedSchema = WriteComputeSchemaConverter.getInstance().convertFromValueRecordSchemaStr(recordSchemaStr); veniceAdmin.createStore(clusterName, storeName, storeOwner, KEY_SCHEMA, recordSchemaStr); @@ -1790,7 +1790,7 @@ public void testVersionLevelActiveActiveReplicationConfig() { @Test public void testAddMetadataSchema() { String storeName = Utils.getUniqueString("aa_store"); - String recordSchemaStr = TestWriteUtils.USER_SCHEMA_STRING_WITH_DEFAULT; + String recordSchemaStr = TestWriteUtils.USER_WITH_DEFAULT_SCHEMA.toString(); int replicationMetadataVersionId = multiClusterConfig.getCommonConfig().getReplicationMetadataVersion(); Schema metadataSchema = RmdSchemaGenerator.generateMetadataSchema(recordSchemaStr, replicationMetadataVersionId); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/VeniceParentHelixAdminTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/VeniceParentHelixAdminTest.java index 5fde82c8bf..930bb2ab9b 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/VeniceParentHelixAdminTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/VeniceParentHelixAdminTest.java @@ -397,13 +397,13 @@ public void testSupersetSchemaWithCustomSupersetSchemaGenerator() throws IOExcep final String CUSTOM_PROP = "custom_prop"; // Contains f0, f1 Schema valueSchemaV1 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV1.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV1.avsc")); // Contains f2, f3 Schema valueSchemaV4 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV4.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV4.avsc")); // Contains f0 Schema valueSchemaV6 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV6.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV6.avsc")); Properties properties = new Properties(); // This cluster setup don't have server, we cannot perform push here. properties.setProperty(CONTROLLER_AUTO_MATERIALIZE_META_SYSTEM_STORE, String.valueOf(false)); @@ -768,15 +768,15 @@ private void testSupersetSchemaRegistration(ControllerClient parentControllerCli String owner = "test_owner"; String keySchemaStr = "\"long\""; Schema valueSchemaV1 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV1.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV1.avsc")); Schema valueSchemaV2 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV2.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV2.avsc")); Schema valueSchemaV3 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV3.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV3.avsc")); Schema valueSchemaV4 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV4.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV4.avsc")); Schema valueSchemaV5 = - AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("supersetschemas/ValueV5.avsc")); + AvroCompatibilityHelper.parse(TestWriteUtils.loadFileAsString("valueSchema/supersetschemas/ValueV5.avsc")); NewStoreResponse newStoreResponse = parentControllerClient.createNewStore(storeName, owner, keySchemaStr, valueSchemaV1.toString()); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/ActiveActiveReplicationForHybridTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/ActiveActiveReplicationForHybridTest.java index 4387fcf9c1..6f97dc4a76 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/ActiveActiveReplicationForHybridTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/ActiveActiveReplicationForHybridTest.java @@ -282,7 +282,9 @@ public void testEnableNRisRequiredBeforeEnablingAA() { String storeName = Utils.getUniqueString("test-store"); String anotherStoreName = Utils.getUniqueString("test-store"); try { - assertCommand(parentControllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + assertCommand( + parentControllerClient + .createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); // Expect the request to fail since AA cannot be enabled without enabling NR try { @@ -302,7 +304,9 @@ public void testEnableNRisRequiredBeforeEnablingAA() { updateStoreToHybrid(storeName, parentControllerClient, Optional.of(true), Optional.of(true), Optional.of(false)); // Create a new store - assertCommand(parentControllerClient.createNewStore(anotherStoreName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + assertCommand( + parentControllerClient + .createNewStore(anotherStoreName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); // Enable NR updateStoreToHybrid( @@ -344,7 +348,9 @@ public void testAAReplicationCanConsumeFromAllRegions(boolean isChunkingEnabled, String clusterName = CLUSTER_NAMES[0]; String storeName = Utils.getUniqueString("test-store"); try { - assertCommand(parentControllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + assertCommand( + parentControllerClient + .createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); updateStoreToHybrid( storeName, parentControllerClient, @@ -541,7 +547,9 @@ public void testAAReplicationCanConsumeFromAllRegions(boolean isChunkingEnabled, public void controllerClientCanGetStoreReplicationMetadataSchema() { String storeName = Utils.getUniqueString("test-store"); try { - assertCommand(parentControllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + assertCommand( + parentControllerClient + .createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); updateStoreToHybrid(storeName, parentControllerClient, Optional.of(true), Optional.of(true), Optional.of(false)); // Empty push to create a version @@ -563,7 +571,9 @@ public void testAAReplicationCanResolveConflicts(boolean useLogicalTimestamp, bo String clusterName = CLUSTER_NAMES[0]; String storeName = Utils.getUniqueString("test-store"); try { - assertCommand(parentControllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + assertCommand( + parentControllerClient + .createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); updateStoreToHybrid( storeName, parentControllerClient, @@ -755,7 +765,9 @@ public void testHelixReplicationFactorConfigChange() { String kafkaTopic; try { - assertCommand(parentControllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + assertCommand( + parentControllerClient + .createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); updateStoreToHybrid(storeName, parentControllerClient, Optional.of(true), Optional.of(true), Optional.of(true)); // Empty push to create a version ControllerResponse response = assertCommand( diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientMemoryLimitTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientMemoryLimitTest.java index 9ff5cbbe17..9e0c2095fa 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientMemoryLimitTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientMemoryLimitTest.java @@ -24,7 +24,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.runVPJ; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendCustomSizeStreamingRecord; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -55,6 +54,7 @@ import com.linkedin.venice.utils.DataProviderUtils; import com.linkedin.venice.utils.PropertyBuilder; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.VeniceProperties; import io.tehuti.metrics.MetricsRepository; @@ -139,7 +139,7 @@ public void testDaVinciMemoryLimitShouldFailLargeDataPush(boolean ingestionIsola // Test a small push File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir, true, 100, 100); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 100, 100); Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); String storeNameWithoutMemoryEnforcement = Utils.getUniqueString("store_without_memory_enforcement"); @@ -197,7 +197,7 @@ public void testDaVinciMemoryLimitShouldFailLargeDataPush(boolean ingestionIsola // Run a bigger push and the push should fail inputDir = getTempDataDirectory(); inputDirPath = "file://" + inputDir.getAbsolutePath(); - writeSimpleAvroFileWithUserSchema(inputDir, true, 1000, 100000); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 1000, 100000); final Properties vpjPropertiesForV2 = defaultVPJProps(venice, inputDirPath, storeName); VeniceException exception = @@ -240,7 +240,7 @@ public void testDaVinciMemoryLimitShouldFailLargeDataPushAndResumeHybridStore( // Test a small push File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir, true, 100, 100); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 100, 100); Properties vpjProperties = defaultVPJProps(venice, inputDirPath, batchOnlyStoreName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); @@ -348,7 +348,7 @@ public void testDaVinciMemoryLimitShouldFailLargeDataPushAndResumeHybridStore( // Run a bigger push and the push should fail inputDir = getTempDataDirectory(); inputDirPath = "file://" + inputDir.getAbsolutePath(); - writeSimpleAvroFileWithUserSchema(inputDir, true, 1000, 100000); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 1000, 100000); final Properties vpjPropertiesForV2 = defaultVPJProps(venice, inputDirPath, batchOnlyStoreName); VeniceException exception = @@ -382,7 +382,7 @@ public void testHybridStoreHittingMemoryLimiterShouldResumeAfterFreeUpResource(/ // Test a medium push close to the memory limit File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir, true, 190, 100000); // ~19MB + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 190, 100000); // ~19MB Properties vpjProperties = defaultVPJProps(venice, inputDirPath, batchOnlyStoreName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientTest.java index 2e229e40b2..cba05dfef6 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DaVinciClientTest.java @@ -991,7 +991,7 @@ private void setUpStore( // Produce input data. File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - writeSimpleAvroFileWithIntToStringSchema(inputDir, true); + writeSimpleAvroFileWithIntToStringSchema(inputDir); // Setup VPJ job properties. Properties vpjProperties = defaultVPJProps(cluster, inputDirPath, storeName); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DataRecoveryTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DataRecoveryTest.java index 38b40d5a0f..68521cff3f 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DataRecoveryTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/DataRecoveryTest.java @@ -264,8 +264,8 @@ public void testBatchOnlyDataRecovery() throws Exception { Map additionalConfigs = PubSubBrokerWrapper.getBrokerDetailsForClients(pubSubBrokerWrappers); TestUtils.writeBatchData( versionCreationResponse, - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), IntStream.range(0, 10).mapToObj(i -> new AbstractMap.SimpleEntry<>(String.valueOf(i), String.valueOf(i))), HelixReadOnlySchemaRepository.VALUE_SCHEMA_STARTING_ID, pubSubProducerAdapterFactory, diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/NearlineE2ELatencyTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/NearlineE2ELatencyTest.java index a6af7cc011..f60a2eb93d 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/NearlineE2ELatencyTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/NearlineE2ELatencyTest.java @@ -134,8 +134,8 @@ public void testEndToEndNearlineMetric() { Map additionalConfigs = PubSubBrokerWrapper.getBrokerDetailsForClients(pubSubBrokerWrappers); TestUtils.writeBatchData( versionCreationResponse, - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), IntStream.range(0, 10).mapToObj(i -> new AbstractMap.SimpleEntry<>(String.valueOf(i), String.valueOf(i))), HelixReadOnlySchemaRepository.VALUE_SCHEMA_STARTING_ID, pubSubProducerAdapterFactory, diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/OneTouchDataRecoveryTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/OneTouchDataRecoveryTest.java index ba44cd7e56..e4f80c5d11 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/OneTouchDataRecoveryTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/OneTouchDataRecoveryTest.java @@ -130,8 +130,8 @@ public void testBatchOnlyDataRecoveryAPIs() { Map additionalConfigs = PubSubBrokerWrapper.getBrokerDetailsForClients(pubSubBrokerWrappers); TestUtils.writeBatchData( versionCreationResponse, - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), IntStream.range(0, 10).mapToObj(i -> new AbstractMap.SimpleEntry<>(String.valueOf(i), String.valueOf(i))), HelixReadOnlySchemaRepository.VALUE_SCHEMA_STARTING_ID, pubSubProducerAdapterFactory, diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java index 3f44b29a65..142ce64b2a 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java @@ -19,8 +19,8 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingDeleteRecord; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingRecord; import static com.linkedin.venice.utils.TestUtils.assertCommand; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING; -import static com.linkedin.venice.utils.TestWriteUtils.NESTED_SCHEMA_STRING_V2; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V1_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V2_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; import static com.linkedin.venice.utils.TestWriteUtils.loadFileAsString; import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithStringToPartialUpdateOpRecordSchema; @@ -283,7 +283,7 @@ public void testIncrementalPushPartialUpdateClassicFormat() throws IOException { final String storeName = Utils.getUniqueString("inc_push_update_classic_format"); String parentControllerUrl = parentController.getControllerUrl(); File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithStringToPartialUpdateOpRecordSchema(inputDir, true); + Schema recordSchema = writeSimpleAvroFileWithStringToPartialUpdateOpRecordSchema(inputDir); String keySchemaStr = recordSchema.getField(DEFAULT_KEY_FIELD_PROP).schema().toString(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); Properties vpjProperties = @@ -292,7 +292,9 @@ public void testIncrementalPushPartialUpdateClassicFormat() throws IOException { vpjProperties.put(INCREMENTAL_PUSH, true); try (ControllerClient parentControllerClient = new ControllerClient(CLUSTER_NAME, parentControllerUrl)) { - assertCommand(parentControllerClient.createNewStore(storeName, "test_owner", keySchemaStr, NESTED_SCHEMA_STRING)); + assertCommand( + parentControllerClient + .createNewStore(storeName, "test_owner", keySchemaStr, TestWriteUtils.NAME_RECORD_V1_SCHEMA.toString())); UpdateStoreQueryParams updateStoreParams = new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) .setCompressionStrategy(CompressionStrategy.NO_OP) @@ -344,7 +346,7 @@ public void testIncrementalPushPartialUpdateNewFormat() throws IOException { final String storeName = Utils.getUniqueString("inc_push_update_new_format"); String parentControllerUrl = parentController.getControllerUrl(); File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir, true); + Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir); String keySchemaStr = recordSchema.getField(DEFAULT_KEY_FIELD_PROP).schema().toString(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); Properties vpjProperties = @@ -354,7 +356,8 @@ public void testIncrementalPushPartialUpdateNewFormat() throws IOException { try (ControllerClient parentControllerClient = new ControllerClient(CLUSTER_NAME, parentControllerUrl)) { assertCommand( - parentControllerClient.createNewStore(storeName, "test_owner", keySchemaStr, NESTED_SCHEMA_STRING_V2)); + parentControllerClient + .createNewStore(storeName, "test_owner", keySchemaStr, NAME_RECORD_V2_SCHEMA.toString())); UpdateStoreQueryParams updateStoreParams = new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) .setCompressionStrategy(CompressionStrategy.NO_OP) @@ -412,7 +415,7 @@ public void testPartialUpdateOnBatchPushedKeys(CompressionStrategy compressionSt final String storeName = Utils.getUniqueString("updateBatch"); String parentControllerUrl = parentController.getControllerUrl(); File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir, true); + Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir); String keySchemaStr = recordSchema.getField(DEFAULT_KEY_FIELD_PROP).schema().toString(); String valueSchemaStr = recordSchema.getField(DEFAULT_VALUE_FIELD_PROP).schema().toString(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); @@ -979,7 +982,7 @@ public void testWriteComputeWithHybridLeaderFollowerLargeRecord( String inputDirPath = "file://" + inputDir.getAbsolutePath(); String parentControllerURL = parentController.getControllerUrl(); // Records 1-100, id string to name record - Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir, true); + Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir); VeniceClusterWrapper veniceClusterWrapper = childDatacenters.get(0).getClusters().get(CLUSTER_NAME); Properties vpjProperties = IntegrationTestPushUtils.defaultVPJProps(multiRegionMultiClusterWrapper, inputDirPath, storeName); @@ -1003,15 +1006,16 @@ public void testWriteComputeWithHybridLeaderFollowerLargeRecord( assertFalse(response.isError()); // Add a new value schema v2 to store - SchemaResponse schemaResponse = controllerClient.addValueSchema(storeName, NESTED_SCHEMA_STRING_V2); + SchemaResponse schemaResponse = controllerClient.addValueSchema(storeName, NAME_RECORD_V2_SCHEMA.toString()); assertFalse(schemaResponse.isError()); - // Add WC (Write Compute) schema associated to v2. - // Note that Write Compute schema needs to be registered manually here because the integration test harness - // does not create any parent controller. In production, when a value schema is added to a WC-enabled store via - // a parent controller, it will automatically generate and register its WC schema. - Schema writeComputeSchema = WriteComputeSchemaConverter.getInstance() - .convertFromValueRecordSchema(AvroCompatibilityHelper.parse(NESTED_SCHEMA_STRING_V2)); + // Add partial update schema associated to v2. + // Note that partial update schema needs to be registered manually here because the integration test harness + // does not create any parent controller. In production, when a value schema is added to a partial update + // enabled + // store via a parent controller, it will automatically generate and register its WC schema. + Schema writeComputeSchema = + WriteComputeSchemaConverter.getInstance().convertFromValueRecordSchema(NAME_RECORD_V2_SCHEMA); schemaResponse = controllerClient.addDerivedSchema(storeName, schemaResponse.getId(), writeComputeSchema.toString()); assertFalse(schemaResponse.isError()); @@ -1052,8 +1056,7 @@ public void testWriteComputeWithHybridLeaderFollowerLargeRecord( // Do not send large record to RT; RT doesn't support chunking veniceProducer = getSamzaProducer(veniceClusterWrapper, storeName, Version.PushType.STREAM); String key = String.valueOf(101); - Schema valueSchema = AvroCompatibilityHelper.parse(NESTED_SCHEMA_STRING); - GenericRecord value = new GenericData.Record(valueSchema); + GenericRecord value = new GenericData.Record(NAME_RECORD_V1_SCHEMA); char[] chars = new char[100]; Arrays.fill(chars, 'f'); String firstName = new String(chars); @@ -1196,7 +1199,7 @@ public void testWriteComputeWithSamzaBatchJob() throws Exception { File inputDir = getTempDataDirectory(); String parentControllerURL = parentController.getControllerUrl(); // Records 1-100, id string to name record - Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir, true); + Schema recordSchema = writeSimpleAvroFileWithStringToRecordSchema(inputDir); VeniceClusterWrapper veniceClusterWrapper = childDatacenters.get(0).getClusters().get(CLUSTER_NAME); try (ControllerClient controllerClient = new ControllerClient(CLUSTER_NAME, parentControllerURL)) { @@ -1217,13 +1220,13 @@ public void testWriteComputeWithSamzaBatchJob() throws Exception { assertFalse(response.isError()); // Add a new value schema v2 to store - SchemaResponse schemaResponse = controllerClient.addValueSchema(storeName, NESTED_SCHEMA_STRING_V2); + SchemaResponse schemaResponse = controllerClient.addValueSchema(storeName, NAME_RECORD_V2_SCHEMA.toString()); assertFalse(schemaResponse.isError()); // Add WC (Write Compute) schema associated to v2. // (this is a test environment only needed step since theres no parent) - Schema writeComputeSchema = WriteComputeSchemaConverter.getInstance() - .convertFromValueRecordSchema(AvroCompatibilityHelper.parse(NESTED_SCHEMA_STRING_V2)); + Schema writeComputeSchema = + WriteComputeSchemaConverter.getInstance().convertFromValueRecordSchema(NAME_RECORD_V2_SCHEMA); schemaResponse = controllerClient.addDerivedSchema(storeName, schemaResponse.getId(), writeComputeSchema.toString()); assertFalse(schemaResponse.isError()); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushJobDetailsTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushJobDetailsTest.java index 5c0bf971c1..85c29564e1 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushJobDetailsTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushJobDetailsTest.java @@ -19,7 +19,6 @@ import static com.linkedin.venice.pushmonitor.ExecutionStatus.WARNING; import static com.linkedin.venice.utils.IntegrationTestPushUtils.defaultVPJProps; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -48,6 +47,7 @@ import com.linkedin.venice.status.protocol.PushJobDetailsStatusTuple; import com.linkedin.venice.status.protocol.PushJobStatusRecordKey; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import java.io.File; @@ -114,7 +114,7 @@ public void setUp() throws IOException { TimeUnit.MINUTES); File inputDir = getTempDataDirectory(); inputDirPath = "file://" + inputDir.getAbsolutePath(); - recordSchema = writeSimpleAvroFileWithUserSchema(inputDir, false); + recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); for (int i = 1; i <= latestSchemaId; i++) { schemaVersionMap.put(i, Utils.getSchemaFromResource("avro/PushJobDetails/v" + i + "/PushJobDetails.avsc")); } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushStatusStoreTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushStatusStoreTest.java index 1ea0c180a2..fab2603934 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushStatusStoreTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PushStatusStoreTest.java @@ -320,7 +320,7 @@ private Properties getVPJProperties() throws Exception { // Produce input data. File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - writeSimpleAvroFileWithIntToStringSchema(inputDir, true); + writeSimpleAvroFileWithIntToStringSchema(inputDir); return defaultVPJProps(cluster, inputDirPath, storeName); } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/StoragePersonaTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/StoragePersonaTest.java index 7879d3956e..172ee9bf4e 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/StoragePersonaTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/StoragePersonaTest.java @@ -67,7 +67,8 @@ public void cleanUp() { private Store setUpTestStoreAndAddToRepo(long quota) { Store testStore = TestUtils.createTestStore(Utils.getUniqueString("testStore"), "testStoreOwner", 100); - controllerClient.createNewStore(testStore.getName(), testStore.getOwner(), STRING_SCHEMA, STRING_SCHEMA); + controllerClient + .createNewStore(testStore.getName(), testStore.getOwner(), STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore(testStore.getName(), new UpdateStoreQueryParams().setStorageQuotaInByte(quota)); return testStore; } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveIngestion.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveIngestion.java index 475c204f93..5f0f404da0 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveIngestion.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveIngestion.java @@ -29,7 +29,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingRecord; import static com.linkedin.venice.utils.TestUtils.generateInput; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; import com.linkedin.davinci.consumer.ChangeEvent; @@ -208,7 +207,7 @@ public void testLeaderLagWithIgnoredData() throws Exception { // create a active-active enabled store and run batch push job // batch job contains 100 records File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); Properties props = @@ -283,7 +282,7 @@ public void testKIFRepushActiveActiveStore(boolean isChunkingEnabled) throws Exc // create a active-active enabled store and run batch push job // batch job contains 100 records File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store-kif-repush"); Properties props = @@ -456,7 +455,7 @@ public void testKIFRepushActiveActiveStore(boolean isChunkingEnabled) throws Exc public void testActiveActiveStoreRestart() throws Exception { // create a active-active enabled store and run batch push job File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); Properties props = @@ -537,7 +536,7 @@ public void testAAIngestionWithStoreView() throws Exception { // create a active-active enabled store and run batch push job // batch job contains 100 records File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); Properties props = diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationForIncPush.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationForIncPush.java index 542092b4af..8faa55d38a 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationForIncPush.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationForIncPush.java @@ -145,17 +145,17 @@ public void testAAReplicationForIncrementalPushToRT() throws Exception { IntegrationTestPushUtils.defaultVPJProps(multiRegionMultiClusterWrapper, inputDirPathInc2, storeName); propsInc2.put(SEND_CONTROL_MESSAGES_DIRECTLY, true); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDirBatch, true, 100); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDirBatch); String keySchemaStr = recordSchema.getField(VenicePushJob.DEFAULT_KEY_FIELD_PROP).schema().toString(); String valueSchemaStr = recordSchema.getField(VenicePushJob.DEFAULT_VALUE_FIELD_PROP).schema().toString(); propsInc1.setProperty(INCREMENTAL_PUSH, "true"); propsInc1.put(SOURCE_GRID_FABRIC, dcNames[2]); - TestWriteUtils.writeSimpleAvroFileWithUserSchema2(inputDirInc1); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema2(inputDirInc1); propsInc2.setProperty(INCREMENTAL_PUSH, "true"); propsInc2.put(SOURCE_GRID_FABRIC, dcNames[1]); - TestWriteUtils.writeSimpleAvroFileWithUserSchema3(inputDirInc2); + TestWriteUtils.writeSimpleAvroFileWithString2StringSchema3(inputDirInc2); TestUtils.assertCommand(parentControllerClient.createNewStore(storeName, "owner", keySchemaStr, valueSchemaStr)); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationWithDownRegion.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationWithDownRegion.java index 25a940ffb2..e81837fc06 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationWithDownRegion.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestActiveActiveReplicationWithDownRegion.java @@ -142,7 +142,7 @@ public void testDownedKafka() throws Exception { String storeName = Utils.getUniqueString("test-store"); String parentControllerUrls = multiRegionMultiClusterWrapper.getControllerConnectString(); try (ControllerClient parentControllerClient = new ControllerClient(clusterName, parentControllerUrls)) { - parentControllerClient.createNewStore(storeName, "owner", INT_SCHEMA, STRING_SCHEMA); + parentControllerClient.createNewStore(storeName, "owner", INT_SCHEMA.toString(), STRING_SCHEMA.toString()); TestUtils.updateStoreToHybrid( storeName, parentControllerClient, diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBackupVersionDatabaseOptimization.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBackupVersionDatabaseOptimization.java index 3cb799073a..3982140ef9 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBackupVersionDatabaseOptimization.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBackupVersionDatabaseOptimization.java @@ -8,7 +8,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.defaultVPJProps; import static com.linkedin.venice.utils.TestWriteUtils.DEFAULT_USER_DATA_RECORD_COUNT; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; @@ -23,6 +22,7 @@ import com.linkedin.venice.integration.utils.VeniceClusterWrapper; import com.linkedin.venice.integration.utils.VeniceServerWrapper; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Utils; import io.tehuti.Metric; import io.tehuti.metrics.MetricsRepository; @@ -81,7 +81,7 @@ public void verifyBackupVersionDatabaseOptimizationOccurs() throws IOException { String storeName = Utils.getUniqueString("backup-version-optimization-validation-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); @@ -122,7 +122,7 @@ public void execute() { int recordCountOf2ndRun = DEFAULT_USER_DATA_RECORD_COUNT * 2; File inputDir2 = getTempDataDirectory(); String inputDirPath2 = "file://" + inputDir2.getAbsolutePath(); - writeSimpleAvroFileWithUserSchema(inputDir2, true, recordCountOf2ndRun); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir2, recordCountOf2ndRun); Properties vpjProperties2 = defaultVPJProps(venice, inputDirPath2, storeName); runVPJ(vpjProperties2, 2, controllerClient); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java index 7d666f6d0c..f6869554c5 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java @@ -22,25 +22,25 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.IntegrationTestPushUtils.defaultVPJProps; import static com.linkedin.venice.utils.IntegrationTestPushUtils.updateStore; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_KEY_SCHEMA_STRING; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL; -import static com.linkedin.venice.utils.TestWriteUtils.ETL_VALUE_SCHEMA_STRING; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_KEY_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_UNION_VALUE_WITH_NULL_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.ETL_VALUE_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.TestRecordType; +import static com.linkedin.venice.utils.TestWriteUtils.USER_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; import static com.linkedin.venice.utils.TestWriteUtils.loadFileAsString; -import static com.linkedin.venice.utils.TestWriteUtils.writeAlternateSimpleAvroFileWithUserSchema; +import static com.linkedin.venice.utils.TestWriteUtils.writeAlternateSimpleAvroFileWithStringToStringSchema; import static com.linkedin.venice.utils.TestWriteUtils.writeAvroFileWithManyFloatsAndCustomTotalSize; import static com.linkedin.venice.utils.TestWriteUtils.writeETLFileWithUnionWithNullSchema; import static com.linkedin.venice.utils.TestWriteUtils.writeETLFileWithUnionWithoutNullSchema; import static com.linkedin.venice.utils.TestWriteUtils.writeETLFileWithUserSchema; -import static com.linkedin.venice.utils.TestWriteUtils.writeEmptyAvroFileWithUserSchema; +import static com.linkedin.venice.utils.TestWriteUtils.writeEmptyAvroFile; import static com.linkedin.venice.utils.TestWriteUtils.writeSchemaWithUnknownFieldIntoAvroFile; import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithASchemaWithAWrongDefaultValue; import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithCustomSize; import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithDuplicateKey; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema2; +import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema2; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; import com.linkedin.venice.client.exceptions.VeniceClientException; @@ -196,7 +196,7 @@ private void testStoreWithDuplicateKeys(boolean isDuplicateKeyAllowed) throws Ex @Test(timeOut = TEST_TIMEOUT) public void testEmptyPush() throws Exception { testBatchStore( - inputDir -> new KeyAndValueSchemas(writeEmptyAvroFileWithUserSchema(inputDir)), + inputDir -> new KeyAndValueSchemas(writeEmptyAvroFile(inputDir, USER_SCHEMA)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> {}); } @@ -257,7 +257,7 @@ public void testCompressingRecord(boolean compressionMetricCollectionEnabled, bo } }; String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> { properties .setProperty(COMPRESSION_METRIC_COLLECTION_ENABLED, String.valueOf(compressionMetricCollectionEnabled)); @@ -273,7 +273,7 @@ public void testCompressingRecord(boolean compressionMetricCollectionEnabled, bo @Test(timeOut = TEST_TIMEOUT) public void testZstdCompressingAvroRecordCanFailWhenNoFallbackAvailable() throws Exception { testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> { // test single get. Can throw exception since no fallback available @@ -330,7 +330,7 @@ static VPJValidator getSimpleFileWithUserSchemaValidatorForZstd() { @Test(timeOut = TEST_TIMEOUT) public void testZstdCompressingAvroRecordWhenNoFallbackAvailableWithSleep() throws Exception { testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> properties.setProperty(ZSTD_COMPRESSION_LEVEL, String.valueOf(17)), getSimpleFileWithUserSchemaValidatorForZstd(), new UpdateStoreQueryParams().setCompressionStrategy(CompressionStrategy.ZSTD_WITH_DICT)); @@ -342,7 +342,7 @@ public void testZstdCompressingAvroRecordWhenFallbackAvailable( boolean useMapperToBuildDict) throws Exception { // Running a batch push first. String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> { properties .setProperty(COMPRESSION_METRIC_COLLECTION_ENABLED, String.valueOf(compressionMetricCollectionEnabled)); @@ -378,7 +378,7 @@ public void testZstdCompressingAvroRecordWhenFallbackAvailable( } }; testBatchStore( - inputDir -> new KeyAndValueSchemas(writeAlternateSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(writeAlternateSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> { properties .setProperty(COMPRESSION_METRIC_COLLECTION_ENABLED, String.valueOf(compressionMetricCollectionEnabled)); @@ -411,7 +411,7 @@ public void testZstdCompressingAvroRecordWhenFallbackAvailable( @Test(timeOut = TEST_TIMEOUT) public void testEarlyDeleteBackupStore() throws Exception { String storeName = testBatchStoreMultiVersionPush( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> { // test single get @@ -440,7 +440,7 @@ public void testEarlyDeleteBackupStore() throws Exception { @Test(timeOut = TEST_TIMEOUT) public void testIncrementalPush() throws Exception { String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> { for (int i = 1; i <= 100; i++) { @@ -450,7 +450,7 @@ public void testIncrementalPush() throws Exception { new UpdateStoreQueryParams().setIncrementalPushEnabled(true)); testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema2(inputDir)), + inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithStringToStringSchema2(inputDir)), properties -> properties.setProperty(INCREMENTAL_PUSH, "true"), (avroClient, vsonClient, metricsRepository) -> { // Original data from the full push @@ -471,7 +471,7 @@ public void testIncrementalPushWithCompression( boolean compressionMetricCollectionEnabled, boolean useMapperToBuildDict) throws Exception { String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> { properties .setProperty(COMPRESSION_METRIC_COLLECTION_ENABLED, String.valueOf(compressionMetricCollectionEnabled)); @@ -483,20 +483,26 @@ public void testIncrementalPushWithCompression( .setHybridOffsetLagThreshold(10) .setHybridRewindSeconds(0)); - testBatchStore(inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema2(inputDir)), properties -> { - properties.setProperty(INCREMENTAL_PUSH, "true"); - properties.setProperty(COMPRESSION_METRIC_COLLECTION_ENABLED, String.valueOf(compressionMetricCollectionEnabled)); - properties.setProperty(USE_MAPPER_TO_BUILD_DICTIONARY, String.valueOf(useMapperToBuildDict)); - }, (avroClient, vsonClient, metricsRepository) -> { - // Original data from the full push - for (int i = 1; i <= 50; i++) { - Assert.assertEquals(avroClient.get(Integer.toString(i)).get().toString(), "test_name_" + i); - } - // Modified data from the inc push - for (int i = 51; i <= 150; i++) { - Assert.assertEquals(avroClient.get(Integer.toString(i)).get().toString(), "test_name_" + (i * 2)); - } - }, storeName, null); + testBatchStore( + inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithStringToStringSchema2(inputDir)), + properties -> { + properties.setProperty(INCREMENTAL_PUSH, "true"); + properties + .setProperty(COMPRESSION_METRIC_COLLECTION_ENABLED, String.valueOf(compressionMetricCollectionEnabled)); + properties.setProperty(USE_MAPPER_TO_BUILD_DICTIONARY, String.valueOf(useMapperToBuildDict)); + }, + (avroClient, vsonClient, metricsRepository) -> { + // Original data from the full push + for (int i = 1; i <= 50; i++) { + Assert.assertEquals(avroClient.get(Integer.toString(i)).get().toString(), "test_name_" + i); + } + // Modified data from the inc push + for (int i = 51; i <= 150; i++) { + Assert.assertEquals(avroClient.get(Integer.toString(i)).get().toString(), "test_name_" + (i * 2)); + } + }, + storeName, + null); } @Test(timeOut = TEST_TIMEOUT) @@ -507,7 +513,7 @@ public void testIncrementalPushWritesToRealTimeTopicWithPolicy() throws Exceptio LOGGER.info("Start of {}", uniqueTestId); try { String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> { for (int i = 1; i <= 100; i++) { @@ -521,7 +527,7 @@ public void testIncrementalPushWritesToRealTimeTopicWithPolicy() throws Exceptio .setHybridRewindSeconds(0)); testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema2(inputDir)), + inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithStringToStringSchema2(inputDir)), properties -> properties.setProperty(INCREMENTAL_PUSH, "true"), (avroClient, vsonClient, metricsRepository) -> { for (int i = 51; i <= 150; i++) { @@ -532,7 +538,7 @@ public void testIncrementalPushWritesToRealTimeTopicWithPolicy() throws Exceptio null); testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> { TestUtils.waitForNonDeterministicAssertion(30, TimeUnit.SECONDS, true, () -> { @@ -556,7 +562,7 @@ public void testIncrementalPushWritesToRealTimeTopicWithPolicy() throws Exceptio @Test(timeOut = TEST_TIMEOUT) public void testMetaStoreSchemaValidation() throws Exception { String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, (avroClient, vsonClient, metricsRepository) -> { // test single get @@ -601,7 +607,7 @@ public void testKafkaInputBatchJob() throws Exception { } }; String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, validator); // Re-push with Kafka Input @@ -617,7 +623,7 @@ public void testKafkaInputAAStore() throws Exception { } }; String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir)), properties -> {}, validator, new UpdateStoreQueryParams().setActiveActiveReplicationEnabled(true) @@ -637,7 +643,7 @@ public void testReducerCountValidation() throws Exception { } }; String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeSimpleAvroFileWithUserSchema(inputDir, false, 1)), + inputDir -> new KeyAndValueSchemas(TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 1)), properties -> {}, validator, new UpdateStoreQueryParams().setPartitionCount(3)); @@ -663,25 +669,21 @@ public void testReducerCountValidation() throws Exception { @Test(timeOut = TEST_TIMEOUT) public void testBatchFromETL() throws Exception { testBatchStore(inputDir -> { - writeETLFileWithUserSchema(inputDir, false); - return new KeyAndValueSchemas(Schema.parse(ETL_KEY_SCHEMA_STRING), Schema.parse(ETL_VALUE_SCHEMA_STRING)); + writeETLFileWithUserSchema(inputDir); + return new KeyAndValueSchemas(ETL_KEY_SCHEMA, ETL_VALUE_SCHEMA); }, properties -> properties.setProperty(SOURCE_ETL, "true"), (avroClient, vsonClient, metricsRepository) -> { // test single get for (int i = 1; i <= 50; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - GenericRecord value = new GenericData.Record(Schema.parse(ETL_VALUE_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + GenericRecord value = new GenericData.Record(ETL_VALUE_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); value.put(DEFAULT_VALUE_FIELD_PROP, "test_name_" + i); - Assert.assertEquals(avroClient.get(key).get().toString(), value.toString()); } for (int i = 51; i <= 100; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertNull(avroClient.get(key).get()); } }); @@ -690,33 +692,25 @@ public void testBatchFromETL() throws Exception { @Test(timeOut = TEST_TIMEOUT) public void testBatchFromETLWithForUnionWithNullSchema() throws Exception { testBatchStore(inputDir -> { - writeETLFileWithUnionWithNullSchema(inputDir, false); - return new KeyAndValueSchemas( - Schema.parse(ETL_KEY_SCHEMA_STRING), - Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL)); + writeETLFileWithUnionWithNullSchema(inputDir); + return new KeyAndValueSchemas(ETL_KEY_SCHEMA, ETL_UNION_VALUE_WITH_NULL_SCHEMA); }, properties -> properties.setProperty(SOURCE_ETL, "true"), (avroClient, vsonClient, metricsRepository) -> { // test single get for (int i = 1; i <= 25; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertEquals(avroClient.get(key).get().toString(), "string_" + i); } for (int i = 26; i <= 50; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertEquals(avroClient.get(key).get(), i); } for (int i = 51; i <= 100; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertNull(avroClient.get(key).get()); } }); @@ -725,33 +719,25 @@ public void testBatchFromETLWithForUnionWithNullSchema() throws Exception { @Test(timeOut = TEST_TIMEOUT) public void testBatchFromETLWithForUnionWithoutNullSchema() throws Exception { testBatchStore(inputDir -> { - writeETLFileWithUnionWithoutNullSchema(inputDir, false); - return new KeyAndValueSchemas( - Schema.parse(ETL_KEY_SCHEMA_STRING), - Schema.parse(ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL)); + writeETLFileWithUnionWithoutNullSchema(inputDir); + return new KeyAndValueSchemas(ETL_KEY_SCHEMA, ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA); }, properties -> properties.setProperty(SOURCE_ETL, "true"), (avroClient, vsonClient, metricsRepository) -> { // test single get for (int i = 1; i <= 25; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertEquals(avroClient.get(key).get().toString(), "string_" + i); } for (int i = 26; i <= 50; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertEquals(avroClient.get(key).get(), i); } for (int i = 51; i <= 100; i++) { - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Assert.assertNull(avroClient.get(key).get()); } }); @@ -1286,7 +1272,7 @@ public void testKafkaInputBatchJobSucceedsWhenSourceTopicIsEmpty() throws Except // Run an Empty Push String storeName = testBatchStore( - inputDir -> new KeyAndValueSchemas(writeEmptyAvroFileWithUserSchema(inputDir)), + inputDir -> new KeyAndValueSchemas(writeEmptyAvroFile(inputDir, USER_SCHEMA)), properties -> {}, emptyValidator); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestChangeCaptureIngestion.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestChangeCaptureIngestion.java index 5eb1e68145..7921ac16c5 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestChangeCaptureIngestion.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestChangeCaptureIngestion.java @@ -26,8 +26,8 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingDeleteRecord; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingRecord; +import static com.linkedin.venice.utils.TestWriteUtils.DEFAULT_USER_DATA_RECORD_COUNT; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import com.linkedin.davinci.consumer.ChangeEvent; import com.linkedin.davinci.consumer.ChangelogClientConfig; @@ -159,7 +159,8 @@ public void testAAIngestionWithStoreView(CompressionStrategy compressionStrategy // TODO: Something seems to be wrong in the test set up or code that makes it so that the push job // will error if we publish records which exceed the chunking threshold (something about getting a cluster // lock when making the system stores?) - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir, LARGE_RECORD_SIZE); + Schema recordSchema = TestWriteUtils + .writeSimpleAvroFileWithStringToStringSchema(inputDir, DEFAULT_USER_DATA_RECORD_COUNT, LARGE_RECORD_SIZE); // Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestEmptyPush.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestEmptyPush.java index 2cc33e4b6c..250011dc55 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestEmptyPush.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestEmptyPush.java @@ -8,8 +8,9 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.runVPJ; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingRecord; import static com.linkedin.venice.utils.TestWriteUtils.STRING_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.USER_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeEmptyAvroFileWithUserSchema; +import static com.linkedin.venice.utils.TestWriteUtils.writeEmptyAvroFile; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertNotNull; @@ -80,14 +81,14 @@ public void testEmptyPushWithZstdWithDictCompression(boolean sendControlMessageD String storeName = Utils.getUniqueString("test_empty_push_store"); try (ControllerClient controllerClient = new ControllerClient(venice.getClusterName(), venice.getAllControllersURLs())) { - controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA); + controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore( storeName, new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) .setCompressionStrategy(CompressionStrategy.ZSTD_WITH_DICT)); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - writeEmptyAvroFileWithUserSchema(inputDir); + writeEmptyAvroFile(inputDir, USER_SCHEMA); Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); vpjProperties.setProperty(SEND_CONTROL_MESSAGES_DIRECTLY, Boolean.toString(sendControlMessageDirectly)); @@ -113,7 +114,7 @@ public void testEmptyPushByChangingCompressionStrategyForHybridStore() throws IO venice.getPubSubBrokerWrapper(), venice.getPubSubTopicRepository()) .getTopicManager()) { - controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA); + controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore( storeName, new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) @@ -123,7 +124,7 @@ public void testEmptyPushByChangingCompressionStrategyForHybridStore() throws IO File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - writeEmptyAvroFileWithUserSchema(inputDir); + writeEmptyAvroFile(inputDir, USER_SCHEMA); // First empty push with dict compression enabled. Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybrid.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybrid.java index e9675b5e51..04a22a24b2 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybrid.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybrid.java @@ -27,7 +27,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingRecord; import static com.linkedin.venice.utils.TestWriteUtils.STRING_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotEquals; @@ -101,6 +100,7 @@ import com.linkedin.venice.utils.Pair; import com.linkedin.venice.utils.TestMockTime; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.VeniceProperties; @@ -209,7 +209,7 @@ public void testHybridInitializationOnMultiColo() throws IOException { final String storeName = Utils.getUniqueString("multi-colo-hybrid-store"); // Create store at parent, make it a hybrid store - controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA); + controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore( storeName, new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) @@ -322,7 +322,7 @@ public void testHybridEndToEnd(boolean multiDivStream, boolean chunkingEnabled, String storeName = Utils.getUniqueString("hybrid-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); @@ -843,7 +843,7 @@ public void testLeaderHonorLastTopicSwitchMessage() throws Exception { String storeName = Utils.getUniqueString("hybrid-store"); // Create store , make it a hybrid store - controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA); + controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore( storeName, new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) @@ -878,7 +878,7 @@ public void testLeaderHonorLastTopicSwitchMessage() throws Exception { veniceWriterProperties.put(KAFKA_BOOTSTRAP_SERVERS, venice.getPubSubBrokerWrapper().getAddress()); veniceWriterProperties.putAll( PubSubBrokerWrapper.getBrokerDetailsForClients(Collections.singletonList(venice.getPubSubBrokerWrapper()))); - AvroSerializer stringSerializer = new AvroSerializer(Schema.parse(STRING_SCHEMA)); + AvroSerializer stringSerializer = new AvroSerializer(STRING_SCHEMA); PubSubProducerAdapterFactory pubSubProducerAdapterFactory = venice.getPubSubBrokerWrapper().getPubSubClientsFactory().getProducerAdapterFactory(); @@ -971,7 +971,7 @@ public void testLeaderCanReleaseLatch(boolean isIngestionIsolationEnabled) { SystemProducer producer = null; try (ControllerClient controllerClient = new ControllerClient(clusterName, veniceClusterWrapper.getAllControllersURLs())) { - controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA); + controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore( storeName, new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA) @@ -1156,7 +1156,7 @@ public void testHybridStoreTimeLagThresholdWithEmptyRT(boolean isRealTimeTopicEm String storeName = Utils.getUniqueString("hybrid-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); @@ -1277,7 +1277,7 @@ public void testDuplicatedMessagesWontBePersisted( String storeName = Utils.getUniqueString("hybrid-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try ( @@ -1309,9 +1309,9 @@ public void testDuplicatedMessagesWontBePersisted( veniceWriterProperties.put(KAFKA_BOOTSTRAP_SERVERS, venice.getPubSubBrokerWrapper().getAddress()); veniceWriterProperties.putAll( PubSubBrokerWrapper.getBrokerDetailsForClients(Collections.singletonList(venice.getPubSubBrokerWrapper()))); - AvroSerializer stringSerializer = new AvroSerializer(Schema.parse(STRING_SCHEMA)); + AvroSerializer stringSerializer = new AvroSerializer(STRING_SCHEMA); AvroGenericDeserializer stringDeserializer = - new AvroGenericDeserializer<>(Schema.parse(STRING_SCHEMA), Schema.parse(STRING_SCHEMA)); + new AvroGenericDeserializer<>(STRING_SCHEMA, STRING_SCHEMA); try (VeniceWriter realTimeTopicWriter = TestUtils.getVeniceWriterFactory(veniceWriterProperties, pubSubProducerAdapterFactory) .createVeniceWriter(new VeniceWriterOptions.Builder(Version.composeRealTimeTopic(storeName)).build())) { @@ -1463,7 +1463,7 @@ public void testOffsetRecordSyncedForIngestionIsolationHandover() throws Excepti String storeName = Utils.getUniqueString("hybrid-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); @@ -1571,7 +1571,7 @@ public void testVersionSwapDeferredWithHybrid() throws Exception { String storeName = Utils.getUniqueString("hybrid-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); AvroGenericStoreClient client = ClientFactory.getAndStartGenericAvroClient( @@ -1598,7 +1598,7 @@ public void testVersionSwapDeferredWithHybrid() throws Exception { veniceWriterProperties.put(VeniceWriter.MAX_ELAPSED_TIME_FOR_SEGMENT_IN_MS, "0"); veniceWriterProperties.putAll( PubSubBrokerWrapper.getBrokerDetailsForClients(Collections.singletonList(venice.getPubSubBrokerWrapper()))); - AvroSerializer stringSerializer = new AvroSerializer(Schema.parse(STRING_SCHEMA)); + AvroSerializer stringSerializer = new AvroSerializer(STRING_SCHEMA); String prefix = "foo_object_"; PubSubProducerAdapterFactory pubSubProducerAdapterFactory = venice.getPubSubBrokerWrapper().getPubSubClientsFactory().getProducerAdapterFactory(); @@ -1645,7 +1645,7 @@ public void testHybridDIVEnhancement() throws Exception { String storeName = Utils.getUniqueString("hybrid-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(venice, inputDirPath, storeName); try (ControllerClient controllerClient = createStoreForJob(venice.getClusterName(), recordSchema, vpjProperties); AvroGenericStoreClient client = ClientFactory.getAndStartGenericAvroClient( @@ -1667,7 +1667,7 @@ public void testHybridDIVEnhancement() throws Exception { veniceWriterProperties.put(VeniceWriter.MAX_ELAPSED_TIME_FOR_SEGMENT_IN_MS, "0"); veniceWriterProperties.putAll( PubSubBrokerWrapper.getBrokerDetailsForClients(Collections.singletonList(venice.getPubSubBrokerWrapper()))); - AvroSerializer stringSerializer = new AvroSerializer(Schema.parse(STRING_SCHEMA)); + AvroSerializer stringSerializer = new AvroSerializer(STRING_SCHEMA); String prefix = "hybrid_DIV_enhancement_"; PubSubProducerAdapterFactory pubSubProducerAdapterFactory = venice.getPubSubBrokerWrapper().getPubSubClientsFactory().getProducerAdapterFactory(); @@ -1717,7 +1717,8 @@ public void testHybridWithAmplificationFactor(boolean enableIngestionIsolation) try (ControllerClient controllerClient = new ControllerClient(cluster.getClusterName(), cluster.getAllControllersURLs())) { - TestUtils.assertCommand(controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA)); + TestUtils.assertCommand( + controllerClient.createNewStore(storeName, "owner", STRING_SCHEMA.toString(), STRING_SCHEMA.toString())); TestUtils.assertCommand(controllerClient.updateStore(storeName, params)); TestUtils.waitForNonDeterministicAssertion(30, TimeUnit.SECONDS, true, () -> { StoreResponse storeResponse = TestUtils.assertCommand(controllerClient.getStore(storeName)); @@ -1733,8 +1734,8 @@ public void testHybridWithAmplificationFactor(boolean enableIngestionIsolation) cluster.createVersion( storeName, - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), IntStream.range(0, keyCount) .mapToObj(i -> new AbstractMap.SimpleEntry<>(String.valueOf(i), String.valueOf(i)))); @@ -1757,8 +1758,8 @@ public void testHybridWithAmplificationFactor(boolean enableIngestionIsolation) // Create a new version with updated amplification factor cluster.createVersion( storeName, - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), IntStream.range(0, keyCount) .mapToObj(i -> new AbstractMap.SimpleEntry<>(String.valueOf(i), String.valueOf(i)))); TestUtils.waitForNonDeterministicAssertion(60, TimeUnit.SECONDS, true, true, () -> { @@ -1785,8 +1786,8 @@ public void testHybridWithAmplificationFactor(boolean enableIngestionIsolation) // Create a new version with updated amplification factor cluster.createVersion( storeName, - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), IntStream.range(0, keyCount) .mapToObj(i -> new AbstractMap.SimpleEntry<>(String.valueOf(i), String.valueOf(i)))); watermarkOfSuccessfullyVerifiedKeys.set(0); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybridQuota.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybridQuota.java index 3c241125a5..06066a4f3c 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybridQuota.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestHybridQuota.java @@ -14,7 +14,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendCustomSizeStreamingRecord; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sendStreamingRecord; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -38,6 +37,7 @@ import com.linkedin.venice.pushmonitor.HybridStoreQuotaStatus; import com.linkedin.venice.utils.IntegrationTestPushUtils; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.locks.ClusterLockManager; @@ -122,7 +122,7 @@ public void testHybridStoreQuota(boolean chunkingEnabled, boolean isStreamReproc String storeName = Utils.getUniqueString("test-store") + "_v1"; File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(sharedVenice, inputDirPath, storeName); SafeHelixManager readManager = null; diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestLeaderReplicaFailover.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestLeaderReplicaFailover.java index 3ef8ba0904..56516b4ef1 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestLeaderReplicaFailover.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestLeaderReplicaFailover.java @@ -5,7 +5,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.IntegrationTestPushUtils.defaultVPJProps; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.*; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; @@ -29,6 +28,7 @@ import com.linkedin.venice.serializer.AvroSerializer; import com.linkedin.venice.utils.IntegrationTestPushUtils; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.writer.VeniceWriter; @@ -104,7 +104,7 @@ public void testLeaderReplicaFailover() throws Exception { VeniceUserStoreType.BATCH_ONLY.toString(), Optional.empty())); File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); Properties props = defaultVPJProps(clusterWrapper, inputDirPath, storeName); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobVersionCleanup.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobVersionCleanup.java index ec282dad8c..e0937dde8a 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobVersionCleanup.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobVersionCleanup.java @@ -76,7 +76,7 @@ public void cleanUp() { public void testMultipleBatchPushWithVersionCleanup() throws Exception { String clusterName = CLUSTER_NAMES[0]; File inputDir = getTempDataDirectory(); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, 50); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 50); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); String parentControllerUrls = multiRegionMultiClusterWrapper.getControllerConnectString(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithEmergencySourceRegionSelection.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithEmergencySourceRegionSelection.java index ce729dabc6..41a79d0d6c 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithEmergencySourceRegionSelection.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithEmergencySourceRegionSelection.java @@ -118,7 +118,7 @@ public void testNativeReplicationForBatchPushWithEmergencySourceOverride(int rec throws Exception { String clusterName = CLUSTER_NAMES[0]; File inputDir = getTempDataDirectory(); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, recordCount); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, recordCount); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); String parentControllerUrls = multiRegionMultiClusterWrapper.getControllerConnectString(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplication.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplication.java index 598795d3dc..d2dd60928c 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplication.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplication.java @@ -45,7 +45,6 @@ import static com.linkedin.venice.utils.TestUtils.assertCommand; import static com.linkedin.venice.utils.TestWriteUtils.STRING_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertFalse; import com.linkedin.d2.balancer.D2Client; @@ -492,7 +491,7 @@ public void testNativeReplicationForIncrementalPush() throws Exception { props.put(INPUT_PATH_PROP, inputDirInc); props.put(SEND_CONTROL_MESSAGES_DIRECTLY, true); - TestWriteUtils.writeSimpleAvroFileWithUserSchema2(inputDirInc); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema2(inputDirInc); try (VenicePushJob job = new VenicePushJob("Incremental Push", props)) { job.run(); } @@ -582,8 +581,8 @@ public void testClusterLevelAdminCommandForNativeReplication() throws Exception (parentControllerClient, clusterName, batchOnlyStoreName, props, inputDir) -> { // Create a hybrid store String hybridStoreName = Utils.getUniqueString("hybrid-store"); - NewStoreResponse newStoreResponse = - parentControllerClient.createNewStore(hybridStoreName, "", STRING_SCHEMA, STRING_SCHEMA); + NewStoreResponse newStoreResponse = parentControllerClient + .createNewStore(hybridStoreName, "", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); Assert.assertFalse(newStoreResponse.isError()); UpdateStoreQueryParams updateStoreParams = new UpdateStoreQueryParams().setHybridRewindSeconds(10).setHybridOffsetLagThreshold(2); @@ -593,8 +592,8 @@ public void testClusterLevelAdminCommandForNativeReplication() throws Exception * Create an incremental push enabled store */ String incrementPushStoreName = Utils.getUniqueString("incremental-push-store"); - newStoreResponse = - parentControllerClient.createNewStore(incrementPushStoreName, "", STRING_SCHEMA, STRING_SCHEMA); + newStoreResponse = parentControllerClient + .createNewStore(incrementPushStoreName, "", STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); Assert.assertFalse(newStoreResponse.isError()); updateStoreParams = new UpdateStoreQueryParams().setIncrementalPushEnabled(true); assertCommand(parentControllerClient.updateStore(incrementPushStoreName, updateStoreParams)); @@ -856,7 +855,7 @@ public void testPushDirectlyToChildRegion() throws IOException { // In multi-region setup, the batch push to child controller should be disabled. String clusterName = CLUSTER_NAMES[0]; File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("testPushDirectlyToChildColo"); Properties props = IntegrationTestPushUtils.defaultVPJProps(childDatacenters.get(0), inputDirPath, storeName); @@ -1020,7 +1019,7 @@ public void testTargetedRegionPushJobFullConsumptionForBatchStore() throws Excep } props.put(TARGETED_REGION_PUSH_ENABLED, true); props.put(POST_VALIDATION_CONSUMPTION_ENABLED, true); - TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, 20); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, 20); try (VenicePushJob job = new VenicePushJob("Test push job 2", props)) { job.run(); // the job should succeed @@ -1095,7 +1094,7 @@ private void motherOfAllTests( UpdateStoreQueryParams updateStoreParams = updateStoreParamsTransformer .apply(new UpdateStoreQueryParams().setStorageQuotaInByte(Store.UNLIMITED_STORAGE_QUOTA)); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, recordCount); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, recordCount); String keySchemaStr = recordSchema.getField(DEFAULT_KEY_FIELD_PROP).schema().toString(); String valueSchemaStr = recordSchema.getField(DEFAULT_VALUE_FIELD_PROP).schema().toString(); @@ -1238,8 +1237,8 @@ private VeniceWriter startIncrementalPush( Assert.assertNotNull(response.getKafkaTopic()); VeniceWriter veniceWriter = veniceWriterFactory.createVeniceWriter( new VeniceWriterOptions.Builder(response.getKafkaTopic()) - .setKeySerializer(new VeniceAvroKafkaSerializer(STRING_SCHEMA)) - .setValueSerializer(new VeniceAvroKafkaSerializer(STRING_SCHEMA)) + .setKeySerializer(new VeniceAvroKafkaSerializer(STRING_SCHEMA.toString())) + .setValueSerializer(new VeniceAvroKafkaSerializer(STRING_SCHEMA.toString())) .build()); veniceWriter.broadcastStartOfIncrementalPush(incrementalPushVersion, new HashMap<>()); return veniceWriter; diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplicationSharedProducer.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplicationSharedProducer.java index cda45fd834..f8584ec6ef 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplicationSharedProducer.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithNativeReplicationSharedProducer.java @@ -130,7 +130,7 @@ public void testNativeReplicationForBatchPush(int recordCount, int partitionCoun storeProps[i] = props; props.put(SEND_CONTROL_MESSAGES_DIRECTLY, true); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, recordCount); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, recordCount); String keySchemaStr = recordSchema.getField(VenicePushJob.DEFAULT_KEY_FIELD_PROP).schema().toString(); String valueSchemaStr = recordSchema.getField(VenicePushJob.DEFAULT_VALUE_FIELD_PROP).schema().toString(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithSourceGridFabricSelection.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithSourceGridFabricSelection.java index f0320d00cf..330e1f8b57 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithSourceGridFabricSelection.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestPushJobWithSourceGridFabricSelection.java @@ -101,7 +101,7 @@ public void cleanUp() { public void testPushJobWithSourceGridFabricSelection(int recordCount, int partitionCount) throws Exception { String clusterName = clusterNames[0]; File inputDir = getTempDataDirectory(); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, recordCount); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, recordCount); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); String parentControllerUrls = multiRegionMultiClusterWrapper.getControllerConnectString(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStaleDataVisibility.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStaleDataVisibility.java index 90e59c4e54..7116447ecd 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStaleDataVisibility.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStaleDataVisibility.java @@ -2,7 +2,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import com.linkedin.venice.ConfigKeys; import com.linkedin.venice.common.VeniceSystemStoreType; @@ -18,6 +17,7 @@ import com.linkedin.venice.meta.Version; import com.linkedin.venice.utils.IntegrationTestPushUtils; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import java.io.File; @@ -103,7 +103,7 @@ public void cleanUp() { public void testGetClusterStaleStores() throws Exception { String clusterName = CLUSTER_NAMES[0]; File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); String parentControllerUrls = multiRegionMultiClusterWrapper.getControllerConnectString(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreMigration.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreMigration.java index 28f98ff8bd..9d8b326f62 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreMigration.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreMigration.java @@ -424,7 +424,7 @@ private Properties createAndPushStore(String clusterName, String storeName) thro Properties props = IntegrationTestPushUtils.defaultVPJProps(twoLayerMultiRegionMultiClusterWrapper, inputDirPath, storeName); props.put(SEND_CONTROL_MESSAGES_DIRECTLY, true); - Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, true, RECORD_COUNT); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, RECORD_COUNT); String keySchemaStr = recordSchema.getField(DEFAULT_KEY_FIELD_PROP).schema().toString(); String valueSchemaStr = recordSchema.getField(DEFAULT_VALUE_FIELD_PROP).schema().toString(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreUpdateStoragePersona.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreUpdateStoragePersona.java index be0ba2573a..1a923386d6 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreUpdateStoragePersona.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestStoreUpdateStoragePersona.java @@ -73,7 +73,8 @@ private StoragePersona addPersonaToRepoAndWait(long quota, Optional> private Store setUpTestStoreAndAddToRepo(long quota) { Store testStore = TestUtils.createTestStore(Utils.getUniqueString("testStore"), "testStoreOwner", 100); testStore.setStorageQuotaInByte(quota); - controllerClient.createNewStore(testStore.getName(), testStore.getOwner(), STRING_SCHEMA, STRING_SCHEMA); + controllerClient + .createNewStore(testStore.getName(), testStore.getOwner(), STRING_SCHEMA.toString(), STRING_SCHEMA.toString()); controllerClient.updateStore(testStore.getName(), new UpdateStoreQueryParams().setStorageQuotaInByte(quota)); return testStore; } @@ -136,8 +137,8 @@ void testUpdatePersonaFailedAlreadyHasPersona() { controllerClient.createNewStoreWithParameters( testStore.getName(), testStore.getOwner(), - STRING_SCHEMA, - STRING_SCHEMA, + STRING_SCHEMA.toString(), + STRING_SCHEMA.toString(), new UpdateStoreQueryParams().setStoragePersona(persona.getName()).setStorageQuotaInByte(quota)); ControllerResponse response = controllerClient .updateStore(testStore.getName(), new UpdateStoreQueryParams().setStoragePersona(persona2.getName())); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestTopicWiseSharedConsumerPoolResilience.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestTopicWiseSharedConsumerPoolResilience.java index 3af34eb459..47745322f3 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestTopicWiseSharedConsumerPoolResilience.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestTopicWiseSharedConsumerPoolResilience.java @@ -9,7 +9,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.IntegrationTestPushUtils.defaultVPJProps; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import com.linkedin.davinci.kafka.consumer.KafkaConsumerService; import com.linkedin.venice.controller.Admin; @@ -20,6 +19,7 @@ import com.linkedin.venice.integration.utils.VeniceControllerWrapper; import com.linkedin.venice.meta.Version; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Utils; import java.io.File; import java.io.IOException; @@ -71,7 +71,7 @@ public void testConsumerPoolShouldNotExhaustDuringRegularDataPushes() throws IOE String storeName = Utils.getUniqueString("batch-store"); File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); // records 1-100 + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // records 1-100 Properties vpjProperties = defaultVPJProps(veniceCluster, inputDirPath, storeName); VeniceControllerWrapper controllerWrapper = veniceCluster.getRandomVeniceController(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java index 272aa7dd45..bbb2b3a118 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java @@ -21,8 +21,7 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.defaultVPJProps; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema2; +import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema2; import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleVsonFileWithUserSchema; import com.linkedin.venice.client.store.AvroGenericStoreClient; @@ -167,7 +166,7 @@ public void cleanUp() { @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*Inconsistent file.* schema found.*") public void testRunJobWithInputHavingDifferentSchema() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); writeSimpleAvroFileWithDifferentUserSchema(inputDir); // Setup job properties @@ -187,7 +186,7 @@ public void testRunJobWithInputHavingDifferentSchema() throws Exception { @Test(expectedExceptions = VeniceSchemaFieldNotFoundException.class, expectedExceptionsMessageRegExp = ".*Could not find field: id1.*") public void testRunJobWithInvalidKeyField() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String storeName = Utils.getUniqueString("store"); veniceCluster.getNewStore(storeName); @@ -208,7 +207,7 @@ public void testRunJobWithInvalidKeyField() throws Exception { @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceSchemaFieldNotFoundException.class, expectedExceptionsMessageRegExp = ".*Could not find field: name1.*") public void testRunJobWithInvalidValueField() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String inputDirPath = "file://" + inputDir.getAbsolutePath(); @@ -251,7 +250,7 @@ public void testRunJobWithInvalidValueFieldVson() throws Exception { @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*should not have sub directory.*") public void testRunJobWithSubDirInInputDir() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Create sub directory File subDir = new File(inputDir, "sub-dir"); subDir.mkdir(); @@ -319,12 +318,11 @@ public void testRunJobByPickingUpLatestFolder() throws Exception { public void testRunJobWithDifferentKeySchemaConfig() throws Exception { File inputDir = getTempDataDirectory(); String storeName = Utils.getUniqueString("store"); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringWithExtraSchema(inputDir); String inputDirPath = "file://" + inputDir.getAbsolutePath(); Properties props = defaultVPJProps(veniceCluster, inputDirPath, storeName); createStoreForJob(veniceCluster.getClusterName(), recordSchema, props).close(); - String jobName = "Test push job"; // Run job with different key schema (from 'string' to 'int') props.setProperty(KEY_FIELD_PROP, "age"); @@ -339,12 +337,11 @@ public void testRunJobWithDifferentKeySchemaConfig() throws Exception { @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*Failed to validate value schema.*") public void testRunJobMultipleTimesWithInCompatibleValueSchemaConfig() throws Exception { File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringWithExtraSchema(inputDir); String inputDirPath = "file://" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); Properties props = defaultVPJProps(veniceCluster, inputDirPath, storeName); createStoreForJob(veniceCluster.getClusterName(), recordSchema, props).close(); - String jobName = "Test push job"; // Run job with different value schema (from 'string' to 'int') props.setProperty(VALUE_FIELD_PROP, "age"); props.setProperty(CONTROLLER_REQUEST_RETRY_ATTEMPTS, "2"); @@ -358,7 +355,7 @@ public void testRunJobWithEOPSuppressed() throws Exception { String storeName = Utils.getUniqueString("store"); String routerUrl = veniceCluster.getRandomRouterURL(); ControllerClient controllerClient = new ControllerClient(veniceCluster.getClusterName(), routerUrl); - Schema recordSchema = writeSimpleAvroFileWithStringToStringSchema(inputDir, false); + Schema recordSchema = writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file://" + inputDir.getAbsolutePath(); Properties props = defaultVPJProps(veniceCluster, inputDirPath, storeName); props.setProperty(SUPPRESS_END_OF_PUSH_MESSAGE, "true"); @@ -384,7 +381,7 @@ public void testRunJobWithDeferredVersionSwap() throws Exception { String storeName = Utils.getUniqueString("store"); String routerUrl = veniceCluster.getRandomRouterURL(); ControllerClient controllerClient = new ControllerClient(veniceCluster.getClusterName(), routerUrl); - Schema recordSchema = writeSimpleAvroFileWithStringToStringSchema(inputDir, false); + Schema recordSchema = writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file://" + inputDir.getAbsolutePath(); Properties props = defaultVPJProps(veniceCluster, inputDirPath, storeName); props.setProperty(DEFER_VERSION_SWAP, "true"); @@ -489,7 +486,7 @@ public void testWCBatchJob() throws Exception { @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*Exception or error caught during VenicePushJob.*") public void testRunJobWithBuggySprayingMapReduceShufflePartitioner() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String inputDirPath = "file://" + inputDir.getAbsolutePath(); @@ -511,7 +508,7 @@ public void testRunJobWithBuggySprayingMapReduceShufflePartitioner() throws Exce @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*Exception or error caught during VenicePushJob.*") public void testRunJobWithBuggyOffsettingMapReduceShufflePartitioner() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String inputDirPath = "file://" + inputDir.getAbsolutePath(); @@ -533,7 +530,7 @@ public void testRunJobWithBuggyOffsettingMapReduceShufflePartitioner() throws Ex @Test(timeOut = TEST_TIMEOUT, expectedExceptions = VeniceException.class, expectedExceptionsMessageRegExp = ".*Exception or error caught during VenicePushJob.*") public void testRunJobWithNonDeterministicPartitioner() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String inputDirPath = "file://" + inputDir.getAbsolutePath(); @@ -556,7 +553,7 @@ public void testRunJobWithNonDeterministicPartitioner() throws Exception { @Test(timeOut = TEST_TIMEOUT, description = "KIF repush should copy all data including recent incPush2RT to new VT") public void testKIFRepushForIncrementalPushStores() throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String inputDirPath = "file://" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); @@ -576,7 +573,7 @@ public void testKIFRepushForIncrementalPushStores() throws Exception { TimeUnit.SECONDS, () -> Assert.assertEquals(controllerClient.getStore(storeName).getStore().getCurrentVersion(), 1)); - writeSimpleAvroFileWithUserSchema2(inputDir); + writeSimpleAvroFileWithStringToStringSchema2(inputDir); props.setProperty(INCREMENTAL_PUSH, "true"); TestWriteUtils.runPushJob("Test push job", props); TestUtils.waitForNonDeterministicAssertion( @@ -625,7 +622,7 @@ public void testKIFRepushForIncrementalPushStores() throws Exception { @Test(timeOut = TEST_TIMEOUT, dataProvider = "True-and-False", dataProviderClass = DataProviderUtils.class) public void testKIFRepushFetch(boolean chunkingEnabled) throws Exception { File inputDir = getTempDataDirectory(); - writeSimpleAvroFileWithUserSchema(inputDir); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); // Setup job properties String inputDirPath = "file://" + inputDir.getAbsolutePath(); String storeName = Utils.getUniqueString("store"); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/kafka/ssl/TestProduceWithSSL.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/kafka/ssl/TestProduceWithSSL.java index e58efbf8a4..1590b88843 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/kafka/ssl/TestProduceWithSSL.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/kafka/ssl/TestProduceWithSSL.java @@ -3,7 +3,6 @@ import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.IntegrationTestPushUtils.sslVPJProps; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import com.linkedin.venice.client.store.AvroGenericStoreClient; import com.linkedin.venice.client.store.ClientConfig; @@ -108,7 +107,7 @@ public void testVenicePushJobSupportSSL() throws Exception { VeniceClusterWrapper cluster = this.cluster; File inputDir = getTempDataDirectory(); String storeName = Utils.getUniqueString("store"); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file://" + inputDir.getAbsolutePath(); Properties props = sslVPJProps(cluster, inputDirPath, storeName); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/multicluster/TestMetadataOperationInMultiCluster.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/multicluster/TestMetadataOperationInMultiCluster.java index 178db1669f..f1e3c2af69 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/multicluster/TestMetadataOperationInMultiCluster.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/multicluster/TestMetadataOperationInMultiCluster.java @@ -2,7 +2,6 @@ import static com.linkedin.venice.hadoop.VenicePushJob.VENICE_STORE_NAME_PROP; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import com.linkedin.venice.controllerapi.ControllerClient; import com.linkedin.venice.controllerapi.ControllerResponse; @@ -19,6 +18,7 @@ import com.linkedin.venice.meta.Version; import com.linkedin.venice.utils.IntegrationTestPushUtils; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import java.io.File; @@ -141,7 +141,7 @@ public void testRunVPJInMultiCluster() throws Exception { String storeNameSuffix = "-testStore"; File inputDir = getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); Map propertiesMap = new HashMap<>(); for (String clusterName: clusterNames) { diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/restart/TestRestartServerAfterDeletingSstFilesWithActiveActiveIngestion.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/restart/TestRestartServerAfterDeletingSstFilesWithActiveActiveIngestion.java index 4337a7ad86..48a553425f 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/restart/TestRestartServerAfterDeletingSstFilesWithActiveActiveIngestion.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/restart/TestRestartServerAfterDeletingSstFilesWithActiveActiveIngestion.java @@ -9,7 +9,6 @@ import static com.linkedin.venice.integration.utils.VeniceClusterWrapperConstants.DEFAULT_PARENT_DATA_CENTER_REGION_NAME; import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob; import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory; -import static com.linkedin.venice.utils.TestWriteUtils.writeSimpleAvroFileWithUserSchema; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -50,6 +49,7 @@ import com.linkedin.venice.utils.Pair; import com.linkedin.venice.utils.SslUtils; import com.linkedin.venice.utils.TestUtils; +import com.linkedin.venice.utils.TestWriteUtils; import com.linkedin.venice.utils.Time; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.view.TestView; @@ -141,7 +141,7 @@ public void setUp() throws Exception { Optional.empty())); // create a active-active enabled store File inputDir = getTempDataDirectory(); - Schema recordSchema = writeSimpleAvroFileWithUserSchema(inputDir); + Schema recordSchema = TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir); String inputDirPath = "file:" + inputDir.getAbsolutePath(); Properties props = IntegrationTestPushUtils.defaultVPJProps(multiRegionMultiClusterWrapper, inputDirPath, storeName); diff --git a/internal/venice-test-common/src/integrationtest/java/com/linkedin/venice/fastclient/grpc/VeniceGrpcEndToEndTest.java b/internal/venice-test-common/src/integrationtest/java/com/linkedin/venice/fastclient/grpc/VeniceGrpcEndToEndTest.java index 1aae271af4..e44148ff3e 100644 --- a/internal/venice-test-common/src/integrationtest/java/com/linkedin/venice/fastclient/grpc/VeniceGrpcEndToEndTest.java +++ b/internal/venice-test-common/src/integrationtest/java/com/linkedin/venice/fastclient/grpc/VeniceGrpcEndToEndTest.java @@ -103,7 +103,7 @@ public String writeData(String storeName) throws IOException { // 2. Write data to the store w/ writeSimpleAvroFileWithUserSchema File inputDir = TestWriteUtils.getTempDataDirectory(); String inputDirPath = "file://" + inputDir.getAbsolutePath(); - TestWriteUtils.writeSimpleAvroFileWithUserSchema(inputDir, false, recordCnt); + TestWriteUtils.writeSimpleAvroFileWithStringToStringSchema(inputDir, recordCnt); // 3. Run a push job to push the data to Venice (VPJ) Properties vpjProps = TestWriteUtils.defaultVPJProps(cluster.getRandomRouterURL(), inputDirPath, storeName); diff --git a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/PushInputSchemaBuilder.java b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/PushInputSchemaBuilder.java new file mode 100644 index 0000000000..d5be8da614 --- /dev/null +++ b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/PushInputSchemaBuilder.java @@ -0,0 +1,55 @@ +package com.linkedin.venice.utils; + +import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_KEY_FIELD_PROP; +import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_VALUE_FIELD_PROP; + +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import org.apache.avro.Schema; + + +/** + * This class is a simple schema builder to generate Venice Push Job input file schema. This builder allows user to set + * up key schema, value schema and additional unrelated fields. It will perform sanity check to make sure generated schema + * is good for push job in integration test. + */ +public class PushInputSchemaBuilder { + private static final String namespace = "example.avro"; + private static final String name = "AvroFileRecord"; + private static final String doc = "File Schema For Test Push"; + private final Schema fileSchema = Schema.createRecord(name, doc, namespace, false); + private final Map nameToFieldMap = new HashMap<>(); + + public PushInputSchemaBuilder() { + } + + public PushInputSchemaBuilder setKeySchema(Schema schema) { + return setFieldSchema(DEFAULT_KEY_FIELD_PROP, schema); + } + + public PushInputSchemaBuilder setValueSchema(Schema schema) { + return setFieldSchema(DEFAULT_VALUE_FIELD_PROP, schema); + } + + public PushInputSchemaBuilder setFieldSchema(String fieldName, Schema fieldSchema) { + if (nameToFieldMap.containsKey(fieldName)) { + throw new IllegalStateException( + "Field has been set: " + fieldName + " with schema: " + nameToFieldMap.get(fieldName).toString()); + } + nameToFieldMap.put(fieldName, AvroCompatibilityHelper.createSchemaField(fieldName, fieldSchema, "", null)); + return this; + } + + public Schema build() { + if (!nameToFieldMap.containsKey(DEFAULT_KEY_FIELD_PROP)) { + throw new IllegalStateException("Key field schema has not been setup."); + } + if (!nameToFieldMap.containsKey(DEFAULT_VALUE_FIELD_PROP)) { + throw new IllegalStateException("Value field schema has not been setup."); + } + fileSchema.setFields(new ArrayList<>(nameToFieldMap.values())); + return fileSchema; + } +} diff --git a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestUtils.java b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestUtils.java index b2abdd5768..bdcccdf9ce 100644 --- a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestUtils.java +++ b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestUtils.java @@ -5,7 +5,6 @@ import static com.linkedin.venice.ConfigKeys.PARTITIONER_CLASS; import static com.linkedin.venice.ConfigKeys.SERVER_FORKED_PROCESS_JVM_ARGUMENT_LIST; import static com.linkedin.venice.ConfigKeys.SERVER_INGESTION_MODE; -import static com.linkedin.venice.utils.TestWriteUtils.STRING_SCHEMA; import static com.linkedin.venice.utils.Utils.getUniqueString; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.anyInt; @@ -658,8 +657,7 @@ public static void createAndVerifyStoreInAllRegions( String storeName, ControllerClient parentControllerClient, List controllerClientList) { - Assert - .assertFalse(parentControllerClient.createNewStore(storeName, "owner", STRING_SCHEMA, STRING_SCHEMA).isError()); + Assert.assertFalse(parentControllerClient.createNewStore(storeName, "owner", "\"string\"", "\"string\"").isError()); TestUtils.waitForNonDeterministicAssertion(60, TimeUnit.SECONDS, () -> { for (ControllerClient client: controllerClientList) { Assert.assertFalse(client.getStore(storeName).isError()); diff --git a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java index 01e63af15b..3080b3ea97 100644 --- a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java +++ b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java @@ -24,10 +24,13 @@ import com.linkedin.venice.controllerapi.ControllerResponse; import com.linkedin.venice.controllerapi.UpdateStoreQueryParams; import com.linkedin.venice.etl.ETLUtils; +import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.hadoop.VenicePushJob; +import com.linkedin.venice.schema.AvroSchemaParseUtils; import com.linkedin.venice.schema.vson.VsonAvroSchemaAdapter; import com.linkedin.venice.schema.vson.VsonAvroSerializer; import com.linkedin.venice.schema.vson.VsonSchema; +import com.linkedin.venice.schema.writecompute.WriteComputeSchemaConverter; import com.linkedin.venice.writer.VeniceWriter; import com.linkedin.venice.writer.update.UpdateBuilderImpl; import java.io.File; @@ -63,562 +66,201 @@ public class TestWriteUtils { public static final Logger LOGGER = LogManager.getLogger(TestWriteUtils.class); - public static final String USER_SCHEMA_STRING = - "{" + " \"namespace\" : \"example.avro\", " + " \"type\": \"record\", " + " \"name\": \"User\", " - + " \"fields\": [ " + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP - + "\", \"type\": \"string\"}, " + " { \"name\": \"" + DEFAULT_VALUE_FIELD_PROP - + "\", \"type\": \"string\"}, " + " { \"name\": \"age\", \"type\": \"int\" }" + " ] " + " } "; - - public static final String ETL_KEY_SCHEMA_STRING = "{\n" + " \"type\":\"record\",\n" + " \"name\":\"key\",\n" - + " \"namespace\":\"com.linkedin.venice.testkey\",\n" + " \"fields\":[\n" + " {\n" - + " \"name\":\"" + DEFAULT_KEY_FIELD_PROP + "\",\n" + " \"type\":\"string\"\n" - + " }\n" + " ]\n" + "}"; - - public static final String ETL_VALUE_SCHEMA_STRING = "{\n" + " \"type\":\"record\",\n" - + " \"name\":\"value\",\n" + " \"namespace\":\"com.linkedin.venice.testvalue\",\n" + " \"fields\":[\n" - + " {\n" + " \"name\":\"" + DEFAULT_VALUE_FIELD_PROP + "\",\n" - + " \"type\":\"string\"\n" + " }\n" + " ],\n" + " \"version\":10\n" + "}"; - - public static final String ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL = "[\"int\", \"string\"]"; - - public static final String ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL = "[\"int\", \"string\", \"null\"]"; - - public static final String USER_SCHEMA_STRING_SIMPLE_WITH_DEFAULT = - "{" + " \"namespace\" : \"example.avro\", " + " \"type\": \"record\", " + " \"name\": \"User\", " - + " \"fields\": [ " + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP - + "\", \"type\": \"string\", \"default\": \"\"}, " + " { \"name\": \"" + DEFAULT_VALUE_FIELD_PROP - + "\", \"type\": \"string\", \"default\": \"\"}" + " ] " + " } "; - - public static final String USER_SCHEMA_STRING_WITH_DEFAULT = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"User\", " + " \"fields\": [ " - + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"string\", \"default\": \"\"}, " - + " { \"name\": \"" + DEFAULT_VALUE_FIELD_PROP + "\", \"type\": \"string\", \"default\": \"\"}, " - + " { \"name\": \"age\", \"type\": \"int\", \"default\": 1 }" + " ] " + " } "; - - public static final String USER_SCHEMA_WITH_A_FLOAT_ARRAY_STRING = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"ManyFloats\", " + " \"fields\": [ " - + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"string\" }, " + " { \"name\": \"" - + DEFAULT_VALUE_FIELD_PROP + "\", \"type\": {\"type\": \"array\", \"items\": \"float\"} }, " - + " { \"name\": \"age\", \"type\": \"int\" }" + " ] " + " } "; - - public static final String INT_STRING_SCHEMA_STRING = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"IntToString\", " + " \"fields\": [ " - + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"int\"}, " + " { \"name\": \"" - + DEFAULT_VALUE_FIELD_PROP + "\", \"type\": \"string\"} " + " ] " + " } "; - - public static final String STRING_STRING_SCHEMA_STRING = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"StringToString\", " + " \"fields\": [ " - + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"string\"}, " + " { \"name\": \"" - + DEFAULT_VALUE_FIELD_PROP + "\", \"type\": \"string\"} " + " ] " + " } "; - - public static final String NESTED_SCHEMA_STRING = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"nameRecord\", " + " \"fields\": [ " - + " { \"name\": \"firstName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"lastName\", \"type\": \"string\", \"default\": \"\" } " + " ]" + " } "; - public static final String NESTED_SCHEMA_STRING_V2 = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"nameRecord\", " + " \"fields\": [ " - + " { \"name\": \"firstName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"lastName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"age\", \"type\": \"int\", \"default\": -1 } " + " ]" + " } "; - - public static final String NESTED_SCHEMA_STRING_V3 = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"nameRecord\", " + " \"fields\": [ " - + " { \"name\": \"firstName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"lastName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"height\", \"type\": \"int\", \"default\": -1 } " + " ]" + " } "; - - public static final String NESTED_SCHEMA_STRING_V4 = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"nameRecord\", " + " \"fields\": [ " - + " { \"name\": \"firstName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"lastName\", \"type\": \"string\", \"default\": \"\" }, " - + " { \"name\": \"height\", \"type\": \"long\", \"default\": -1 } " + " ]" + " } "; - - public static final String STRING_RECORD_SCHEMA_STRING = - "{" + " \"namespace\" : \"example.avro\", " + " \"type\": \"record\", " - + " \"name\": \"StringToRecord\", " + " \"fields\": [ " + " { \"name\": \"" - + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"string\", \"default\": \"\"}, " + " { \"name\": \"" - + DEFAULT_VALUE_FIELD_PROP + "\", \"type\": " + NESTED_SCHEMA_STRING + " } " + " ] " + " } "; - - public static final String UPDATE_SCHEMA_OF_NESTED_SCHEMA_STRING = "{\n" + " \"type\" : \"record\",\n" - + " \"name\" : \"nameRecordWriteOpRecord\",\n" + " \"namespace\" : \"example.avro\",\n" + " \"fields\" : [ {\n" - + " \"name\" : \"firstName\",\n" + " \"type\" : [ {\n" + " \"type\" : \"record\",\n" - + " \"name\" : \"NoOp\",\n" + " \"fields\" : [ ]\n" + " }, \"string\" ],\n" - + " \"default\" : { }\n" + " }, {\n" + " \"name\" : \"lastName\",\n" - + " \"type\" : [ \"NoOp\", \"string\" ],\n" + " \"default\" : { }\n" + " } ]\n" + "}"; - - public static final String PARTIAL_UPDATE_STRING_RECORD_SCHEMA_STRING = "{" + " \"namespace\" : \"example.avro\", " - + " \"type\": \"record\", " + " \"name\": \"StringToRecord\", " + " \"fields\": [ " - + " { \"name\": \"" + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"string\", \"default\": \"\"}, " - + " { \"name\": \"" + DEFAULT_VALUE_FIELD_PROP + "\", \"type\": " + UPDATE_SCHEMA_OF_NESTED_SCHEMA_STRING - + " } " + " ] " + " } "; - - public static final String STRING_SCHEMA = "\"string\""; - - public static final String INT_SCHEMA = "\"int\""; - public static final int DEFAULT_USER_DATA_RECORD_COUNT = 100; - public static final String DEFAULT_USER_DATA_VALUE_PREFIX = "test_name_"; + // Key / Value Schema + public static final Schema STRING_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/primitive/String.avsc")); + public static final Schema INT_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/primitive/Int.avsc")); + public static final Schema USER_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/User.avsc")); + public static final Schema USER_WITH_DEFAULT_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/UserWithDefault.avsc")); + public static final Schema SIMPLE_USER_WITH_DEFAULT_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/SimpleUserWithDefault.avsc")); + public static final Schema USER_WITH_FLOAT_ARRAY_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/UserWithFloatArray.avsc")); + public static final Schema NAME_RECORD_V1_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV1.avsc")); + public static final Schema NAME_RECORD_V2_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV2.avsc")); + public static final Schema NAME_RECORD_V3_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV3.avsc")); + public static final Schema NAME_RECORD_V4_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV4.avsc")); + + // ETL Schema + public static final Schema ETL_KEY_SCHEMA = AvroCompatibilityHelper.parse(loadSchemaFileFromResource("etl/Key.avsc")); + public static final Schema ETL_VALUE_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("etl/Value.avsc")); + public static final Schema ETL_UNION_VALUE_WITH_NULL_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("etl/UnionValueWithNull.avsc")); + public static final Schema ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("etl/UnionValueWithoutNull.avsc")); + + // Partial Update Schema + public static final Schema NAME_RECORD_V1_UPDATE_SCHEMA = + WriteComputeSchemaConverter.getInstance().convertFromValueRecordSchema(NAME_RECORD_V1_SCHEMA); + + // Push Input Folder Schema + public static final Schema INT_TO_STRING_SCHEMA = + new PushInputSchemaBuilder().setKeySchema(INT_SCHEMA).setValueSchema(STRING_SCHEMA).build(); + public static final Schema STRING_TO_STRING_SCHEMA = + new PushInputSchemaBuilder().setKeySchema(STRING_SCHEMA).setValueSchema(STRING_SCHEMA).build(); + public static final Schema STRING_TO_NAME_RECORD_V1_SCHEMA = + new PushInputSchemaBuilder().setKeySchema(STRING_SCHEMA).setValueSchema(NAME_RECORD_V1_SCHEMA).build(); + public static final Schema STRING_TO_NAME_RECORD_V1_UPDATE_SCHEMA = + new PushInputSchemaBuilder().setKeySchema(STRING_SCHEMA).setValueSchema(NAME_RECORD_V1_UPDATE_SCHEMA).build(); + public static final Schema STRING_TO_STRING_WITH_EXTRA_FIELD_SCHEMA = + new PushInputSchemaBuilder().setKeySchema(STRING_SCHEMA) + .setValueSchema(STRING_SCHEMA) + .setFieldSchema("age", INT_SCHEMA) + .build(); + public static File getTempDataDirectory() { return Utils.getTempDataDirectory(); } - /** - * This function is used to generate a small avro file with 'user' schema. - * - * @param parentDir - * @return the Schema object for the avro file - * @throws IOException - */ - public static Schema writeSimpleAvroFileWithUserSchema(File parentDir) throws IOException { - return writeSimpleAvroFileWithUserSchema(parentDir, true); - } - - public static Schema writeSimpleAvroFileWithUserSchema(File parentDir, int recordLength) throws IOException { - return writeSimpleAvroFileWithUserSchema(parentDir, true, DEFAULT_USER_DATA_RECORD_COUNT, recordLength); - } - - public static Schema writeSimpleAvroFileWithUserSchema(File parentDir, boolean fileNameWithAvroSuffix) - throws IOException { - return writeSimpleAvroFileWithUserSchema(parentDir, fileNameWithAvroSuffix, DEFAULT_USER_DATA_RECORD_COUNT); + public static Schema writeSimpleAvroFileWithStringToStringSchema(File parentDir) throws IOException { + return writeSimpleAvroFileWithStringToStringSchema(parentDir, DEFAULT_USER_DATA_RECORD_COUNT); } - public static Schema writeSimpleAvroFileWithUserSchema( - File parentDir, - boolean fileNameWithAvroSuffix, - int recordCount) throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_user.avro"; - } else { - fileName = "simple_user"; - } - return writeSimpleAvroFileWithUserSchema(parentDir, recordCount, fileName); + public static Schema writeSimpleAvroFileWithStringToStringSchema(File parentDir, int recordCount) throws IOException { + return writeSimpleAvroFileWithStringToStringSchema(parentDir, recordCount, "string2string.avro"); } - public static Schema writeSimpleAvroFileWithUserSchema(File parentDir, int recordCount, String fileName) + public static Schema writeSimpleAvroFileWithStringToStringSchema(File parentDir, int recordCount, String fileName) throws IOException { - return writeAvroFile(parentDir, fileName, USER_SCHEMA_STRING, (recordSchema, writer) -> { + return writeAvroFile(parentDir, fileName, STRING_TO_STRING_SCHEMA, (recordSchema, writer) -> { for (int i = 1; i <= recordCount; ++i) { GenericRecord user = new GenericData.Record(recordSchema); user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); user.put(DEFAULT_VALUE_FIELD_PROP, DEFAULT_USER_DATA_VALUE_PREFIX + i); - user.put("age", i); writer.append(user); } }); } - public static Schema writeSimpleAvroFileWithUserSchema( - File parentDir, - boolean fileNameWithAvroSuffix, - int recordCount, - int recordSizeMin) throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_user.avro"; - } else { - fileName = "simple_user"; - } - char[] chars = new char[recordSizeMin]; - return writeAvroFile(parentDir, fileName, USER_SCHEMA_STRING, (recordSchema, writer) -> { - for (int i = 1; i <= recordCount; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - Arrays.fill(chars, String.valueOf(i).charAt(0)); - user.put(DEFAULT_VALUE_FIELD_PROP, String.copyValueOf(chars)); - user.put("age", i); - writer.append(user); - } - }); - } - - public static void writeMultipleAvroFilesWithUserSchema(File parentDir, int fileCount, int recordCount) - throws IOException { - for (int i = 0; i < fileCount; i++) { - writeSimpleAvroFileWithUserSchema(parentDir, recordCount, "testInput" + i + ".avro"); - } - } - - public static Schema writeSimpleAvroFileForValidateSchemaAndBuildDictMapperOutput( - File parentDir, - String file, - long inputFileDataSize, - ByteBuffer zstdDictionary, - String avroSchema) throws IOException { - return writeAvroFile(parentDir, file, avroSchema, (recordSchema, writer) -> { - GenericRecord user = new GenericData.Record(recordSchema); - user.put(KEY_INPUT_FILE_DATA_SIZE, inputFileDataSize); - if (zstdDictionary != null) { - user.put(KEY_ZSTD_COMPRESSION_DICTIONARY, zstdDictionary); - } - writer.append(user); - }); - } - - public static Schema writeETLFileWithUserSchema(File parentDir, boolean fileNameWithAvroSuffix) throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_etl_user.avro"; - } else { - fileName = "simple_etl_user"; - } - + public static Schema writeSimpleAvroFileWithStringToStringWithExtraSchema(File parentDir) throws IOException { return writeAvroFile( parentDir, - fileName, - getETLStoreSchemaString(ETL_KEY_SCHEMA_STRING, ETL_VALUE_SCHEMA_STRING), + "string2string_extra_field.avro", + STRING_TO_STRING_WITH_EXTRA_FIELD_SCHEMA, (recordSchema, writer) -> { - for (int i = 1; i <= 50; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - GenericRecord value = new GenericData.Record(Schema.parse(ETL_VALUE_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - value.put(DEFAULT_VALUE_FIELD_PROP, DEFAULT_USER_DATA_VALUE_PREFIX + i); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", value); - user.put("offset", (long) i); - user.put("DELETED_TS", null); - - writer.append(user); - } - - for (int i = 51; i <= 100; ++i) { + for (int i = 1; i <= DEFAULT_USER_DATA_RECORD_COUNT; ++i) { GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", null); - user.put("offset", (long) i); - user.put("DELETED_TS", (long) i); - + user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + user.put(DEFAULT_VALUE_FIELD_PROP, DEFAULT_USER_DATA_VALUE_PREFIX + i); + user.put("age", i); writer.append(user); } }); } - public static Schema writeETLFileWithUnionWithNullSchema(File parentDir, boolean fileNameWithAvroSuffix) + public static Schema writeSimpleAvroFileWithStringToStringSchema(File parentDir, int recordCount, int recordSizeMin) throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_etl_union_with_null.avro"; - } else { - fileName = "simple_etl_union_with_null"; - } - - return writeAvroFile( - parentDir, - fileName, - getETLStoreSchemaString(ETL_KEY_SCHEMA_STRING, ETL_UNION_VALUE_SCHEMA_STRING_WITH_NULL), - (recordSchema, writer) -> { - for (int i = 1; i <= 25; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", "string_" + i); - user.put("offset", (long) i); - user.put("DELETED_TS", null); - - writer.append(user); - } - - for (int i = 26; i <= 50; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", i); - user.put("offset", (long) i); - user.put("DELETED_TS", null); - - writer.append(user); - } - - for (int i = 51; i <= 100; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", null); - user.put("offset", (long) i); - user.put("DELETED_TS", (long) i); - - writer.append(user); - } - }); - } - - public static Schema writeETLFileWithUnionWithoutNullSchema(File parentDir, boolean fileNameWithAvroSuffix) - throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_etl_union_without_null.avro"; - } else { - fileName = "simple_etl_union_without_null"; - } - - return writeAvroFile( - parentDir, - fileName, - getETLStoreSchemaString(ETL_KEY_SCHEMA_STRING, ETL_UNION_VALUE_SCHEMA_STRING_WITHOUT_NULL), - (recordSchema, writer) -> { - for (int i = 1; i <= 25; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", "string_" + i); - user.put("offset", (long) i); - user.put("DELETED_TS", null); - - writer.append(user); - } - - for (int i = 26; i <= 50; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", i); - user.put("offset", (long) i); - user.put("DELETED_TS", null); - - writer.append(user); - } - - for (int i = 51; i <= 100; ++i) { - GenericRecord user = new GenericData.Record(recordSchema); - - GenericRecord key = new GenericData.Record(Schema.parse(ETL_KEY_SCHEMA_STRING)); - - key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - - user.put("opalSegmentIdPart", 0); - user.put("opalSegmentIdSeq", 0); - user.put("opalSegmentOffset", (long) 0); - user.put("metadata", new HashMap<>()); - - user.put("key", key); - user.put("value", null); - user.put("offset", (long) i); - user.put("DELETED_TS", (long) i); - - writer.append(user); - } - }); - } - - public static Schema writeAlternateSimpleAvroFileWithUserSchema(File parentDir, boolean fileNameWithAvroSuffix) - throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_user.avro"; - } else { - fileName = "simple_user"; - } - return writeAvroFile(parentDir, fileName, USER_SCHEMA_STRING, (recordSchema, writer) -> { - String name = "alternate_test_name_"; - for (int i = 1; i <= 100; ++i) { + char[] chars = new char[recordSizeMin]; + return writeAvroFile(parentDir, "string2string.avro", STRING_TO_STRING_SCHEMA, (recordSchema, writer) -> { + for (int i = 1; i <= recordCount; ++i) { GenericRecord user = new GenericData.Record(recordSchema); user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - user.put(DEFAULT_VALUE_FIELD_PROP, name + i); - user.put("age", i); + Arrays.fill(chars, String.valueOf(i).charAt(0)); + user.put(DEFAULT_VALUE_FIELD_PROP, String.copyValueOf(chars)); writer.append(user); } }); } - public static Schema writeSimpleAvroFileWithIntToStringSchema(File parentDir, boolean fileNameWithAvroSuffix) - throws IOException { - return writeSimpleAvroFileWithIntToStringSchema(parentDir, fileNameWithAvroSuffix, 100); - } - - public static Schema writeSimpleAvroFileWithIntToStringSchema( - File parentDir, - boolean fileNameWithAvroSuffix, - int recordCount) throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_int2string.avro"; - } else { - fileName = "simple_int2string"; - } - return writeAvroFile(parentDir, fileName, INT_STRING_SCHEMA_STRING, (recordSchema, writer) -> { - for (int i = 1; i <= recordCount; ++i) { - GenericRecord i2i = new GenericData.Record(recordSchema); - i2i.put(DEFAULT_KEY_FIELD_PROP, i); - i2i.put(DEFAULT_VALUE_FIELD_PROP, "name " + Integer.toString(i)); - writer.append(i2i); - } - }); - } - - public static Schema writeSimpleAvroFileWithStringToStringSchema(File parentDir, boolean fileNameWithAvroSuffix) - throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_string2string.avro"; - } else { - fileName = "simple_string2string"; - } - return writeAvroFile(parentDir, fileName, STRING_STRING_SCHEMA_STRING, (recordSchema, writer) -> { - for (int i = 1; i <= 100; ++i) { - GenericRecord s2s = new GenericData.Record(recordSchema); - s2s.put(DEFAULT_KEY_FIELD_PROP, "jobPosting:" + i); - s2s.put(DEFAULT_VALUE_FIELD_PROP, String.valueOf(i)); - writer.append(s2s); - } - }); - } - - public static Schema writeSimpleAvroFileWithStringToRecordSchema(File parentDir, boolean fileNameWithAvroSuffix) - throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_string2record.avro"; - } else { - fileName = "simple_string2record"; - } - return writeAvroFile(parentDir, fileName, STRING_RECORD_SCHEMA_STRING, (recordSchema, writer) -> { - Schema valueSchema = AvroCompatibilityHelper.parse(NESTED_SCHEMA_STRING); - String firstName = "first_name_"; - String lastName = "last_name_"; - for (int i = 1; i <= 100; ++i) { - GenericRecord keyValueRecord = new GenericData.Record(recordSchema); - keyValueRecord.put(DEFAULT_KEY_FIELD_PROP, String.valueOf(i)); // Key - GenericRecord valueRecord = new GenericData.Record(valueSchema); - valueRecord.put("firstName", firstName + i); - valueRecord.put("lastName", lastName + i); - keyValueRecord.put(DEFAULT_VALUE_FIELD_PROP, valueRecord); // Value - writer.append(keyValueRecord); - } - }); - } - - public static Schema writeSimpleAvroFileWithStringToPartialUpdateOpRecordSchema( - File parentDir, - boolean fileNameWithAvroSuffix) throws IOException { - String fileName; - if (fileNameWithAvroSuffix) { - fileName = "simple_string2record.avro"; - } else { - fileName = "simple_string2record"; - } - return writeAvroFile(parentDir, fileName, PARTIAL_UPDATE_STRING_RECORD_SCHEMA_STRING, (recordSchema, writer) -> { - Schema valueSchema = AvroCompatibilityHelper.parse(UPDATE_SCHEMA_OF_NESTED_SCHEMA_STRING); - String firstName = "first_name_"; - String lastName = "last_name_"; - for (int i = 1; i <= 100; ++i) { - GenericRecord keyValueRecord = new GenericData.Record(recordSchema); - keyValueRecord.put(DEFAULT_KEY_FIELD_PROP, String.valueOf(i)); // Key - GenericRecord valueRecord = new UpdateBuilderImpl(valueSchema).setNewFieldValue("firstName", firstName + i) - .setNewFieldValue("lastName", lastName + i) - .build(); - keyValueRecord.put(DEFAULT_VALUE_FIELD_PROP, valueRecord); // Value - writer.append(keyValueRecord); + public static Schema writeAlternateSimpleAvroFileWithStringToStringSchema(File parentDir) throws IOException { + return writeAvroFile(parentDir, "string2string.avro", STRING_TO_STRING_SCHEMA, (recordSchema, writer) -> { + for (int i = 1; i <= DEFAULT_USER_DATA_RECORD_COUNT; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + user.put(DEFAULT_VALUE_FIELD_PROP, "alternate_test_name_" + i); + writer.append(user); } }); } /** - * This file overrides half of the value in {@link #writeSimpleAvroFileWithUserSchema(File)} + * This file overrides half of the value in {@link #writeSimpleAvroFileWithStringToStringSchema(File)} * and add some new values. * It's designed to test incremental push */ - public static Schema writeSimpleAvroFileWithUserSchema2(File parentDir) throws IOException { - return writeAvroFile(parentDir, "simple_user.avro", USER_SCHEMA_STRING, (recordSchema, writer) -> { + public static Schema writeSimpleAvroFileWithStringToStringSchema2(File parentDir) throws IOException { + return writeAvroFile(parentDir, "string2string.avro", STRING_TO_STRING_SCHEMA, (recordSchema, writer) -> { for (int i = 51; i <= 150; ++i) { GenericRecord user = new GenericData.Record(recordSchema); user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); user.put(DEFAULT_VALUE_FIELD_PROP, DEFAULT_USER_DATA_VALUE_PREFIX + (i * 2)); - user.put("age", i * 2); writer.append(user); } }); } /** - * This file add some new value in {@link #writeSimpleAvroFileWithUserSchema(File)} + * This file add some new value in {@link #writeSimpleAvroFileWithStringToStringSchema(File)} * It's designed to test incremental push */ - public static Schema writeSimpleAvroFileWithUserSchema3(File parentDir) throws IOException { - return writeAvroFile(parentDir, "simple_user.avro", USER_SCHEMA_STRING, (recordSchema, writer) -> { + public static Schema writeSimpleAvroFileWithString2StringSchema3(File parentDir) throws IOException { + return writeAvroFile(parentDir, "string2string.avro", STRING_TO_STRING_SCHEMA, (recordSchema, writer) -> { for (int i = 51; i <= 200; ++i) { GenericRecord user = new GenericData.Record(recordSchema); user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); user.put(DEFAULT_VALUE_FIELD_PROP, DEFAULT_USER_DATA_VALUE_PREFIX + (i * 3)); - user.put("age", i * 3); writer.append(user); } }); } - public static Schema writeSimpleAvroFileWithDuplicateKey(File parentDir) throws IOException { - return writeAvroFile(parentDir, "duplicate_key_user.avro", USER_SCHEMA_STRING, (recordSchema, avroFileWriter) -> { - for (int i = 0; i < 100; i++) { - GenericRecord user = new GenericData.Record(recordSchema); - user.put(DEFAULT_KEY_FIELD_PROP, i % 10 == 0 ? "0" : Integer.toString(i)); // DEFAULT_KEY_FIELD_PROP is the key - user.put(DEFAULT_VALUE_FIELD_PROP, "test_name" + i); - user.put("age", i); - avroFileWriter.append(user); + public static Schema writeSimpleAvroFileWithDuplicateKey(File parentDir) throws IOException { + return writeAvroFile( + parentDir, + "duplicate_key_user.avro", + STRING_TO_STRING_SCHEMA, + (recordSchema, avroFileWriter) -> { + for (int i = 0; i < 100; i++) { + GenericRecord user = new GenericData.Record(recordSchema); + user.put(DEFAULT_KEY_FIELD_PROP, i % 10 == 0 ? "0" : Integer.toString(i)); + user.put(DEFAULT_VALUE_FIELD_PROP, "test_name" + i); + avroFileWriter.append(user); + } + }); + } + + public static Schema writeSimpleAvroFileWithCustomSize( + File parentDir, + int numberOfRecords, + int minValueSize, + int maxValueSize) throws IOException { + return writeAvroFile( + parentDir, + "string2string_large.avro", + STRING_TO_STRING_SCHEMA, + (recordSchema, avroFileWriter) -> { + int sizeRange = maxValueSize - minValueSize; + for (int i = 0; i < numberOfRecords; i++) { + int sizeForThisRecord = minValueSize + sizeRange / numberOfRecords * (i + 1); + GenericRecord user = new GenericData.Record(recordSchema); + user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + char[] chars = new char[sizeForThisRecord]; + Arrays.fill(chars, Integer.toString(i).charAt(0)); + Utf8 utf8Value = new Utf8(new String(chars)); + user.put(DEFAULT_VALUE_FIELD_PROP, utf8Value); + avroFileWriter.append(user); + } + }); + } + + public static Schema writeSimpleAvroFileWithIntToStringSchema(File parentDir) throws IOException { + return writeAvroFile(parentDir, "int2string.avro", INT_TO_STRING_SCHEMA, (recordSchema, writer) -> { + for (int i = 1; i <= DEFAULT_USER_DATA_RECORD_COUNT; ++i) { + GenericRecord i2s = new GenericData.Record(recordSchema); + i2s.put(DEFAULT_KEY_FIELD_PROP, i); + i2s.put(DEFAULT_VALUE_FIELD_PROP, "name " + i); + writer.append(i2s); } }); } @@ -629,38 +271,53 @@ public static void writeInvalidAvroFile(File parentDir, String fileName) throws writer.close(); } - public static Schema writeEmptyAvroFileWithUserSchema(File parentDir, String fileName, String schema) - throws IOException { + public static Schema writeEmptyAvroFile(File parentDir, Schema schema) throws IOException { + return writeEmptyAvroFile(parentDir, "empty_file.avro", schema); + } + + public static Schema writeEmptyAvroFile(File parentDir, String fileName, Schema schema) throws IOException { return writeAvroFile(parentDir, fileName, schema, (recordSchema, avroFileWriter) -> { // No-op so that the file is empty }); } - public static Schema writeEmptyAvroFileWithUserSchema(File parentDir) throws IOException { - return writeEmptyAvroFileWithUserSchema(parentDir, "empty_file.avro", USER_SCHEMA_STRING); - } - - public static Schema writeSimpleAvroFileWithCustomSize( - File parentDir, - int numberOfRecords, - int minValueSize, - int maxValueSize) throws IOException { - return writeAvroFile(parentDir, "large_values.avro", USER_SCHEMA_STRING, (recordSchema, avroFileWriter) -> { - int sizeRange = maxValueSize - minValueSize; - for (int i = 0; i < numberOfRecords; i++) { - int sizeForThisRecord = minValueSize + sizeRange / numberOfRecords * (i + 1); - GenericRecord user = new GenericData.Record(recordSchema); - user.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); // DEFAULT_KEY_FIELD_PROP is the key - char[] chars = new char[sizeForThisRecord]; - Arrays.fill(chars, Integer.toString(i).charAt(0)); - Utf8 utf8Value = new Utf8(new String(chars)); - user.put(DEFAULT_VALUE_FIELD_PROP, utf8Value); - user.put("age", i); - avroFileWriter.append(user); + public static Schema writeSimpleAvroFileWithStringToRecordSchema(File parentDir) throws IOException { + return writeAvroFile(parentDir, "string2record.avro", STRING_TO_NAME_RECORD_V1_SCHEMA, (recordSchema, writer) -> { + String firstName = "first_name_"; + String lastName = "last_name_"; + for (int i = 1; i <= DEFAULT_USER_DATA_RECORD_COUNT; ++i) { + GenericRecord keyValueRecord = new GenericData.Record(recordSchema); + keyValueRecord.put(DEFAULT_KEY_FIELD_PROP, String.valueOf(i)); // Key + GenericRecord valueRecord = new GenericData.Record(NAME_RECORD_V1_SCHEMA); + valueRecord.put("firstName", firstName + i); + valueRecord.put("lastName", lastName + i); + keyValueRecord.put(DEFAULT_VALUE_FIELD_PROP, valueRecord); // Value + writer.append(keyValueRecord); } }); } + public static Schema writeSimpleAvroFileWithStringToPartialUpdateOpRecordSchema(File parentDir) throws IOException { + return writeAvroFile( + parentDir, + "string2record.avro", + STRING_TO_NAME_RECORD_V1_UPDATE_SCHEMA, + (recordSchema, writer) -> { + String firstName = "first_name_"; + String lastName = "last_name_"; + for (int i = 1; i <= 100; ++i) { + GenericRecord keyValueRecord = new GenericData.Record(recordSchema); + keyValueRecord.put(DEFAULT_KEY_FIELD_PROP, String.valueOf(i)); // Key + GenericRecord valueRecord = + new UpdateBuilderImpl(NAME_RECORD_V1_UPDATE_SCHEMA).setNewFieldValue("firstName", firstName + i) + .setNewFieldValue("lastName", lastName + i) + .build(); + keyValueRecord.put(DEFAULT_VALUE_FIELD_PROP, valueRecord); // Value + writer.append(keyValueRecord); + } + }); + } + public static Schema writeSimpleAvroFileWithASchemaWithAWrongDefaultValue(File parentDir, int numberOfRecords) throws IOException { final String schemaWithWrongDefaultValue = "{\n" + " \"namespace\": \"example.avro\",\n" @@ -670,21 +327,18 @@ public static Schema writeSimpleAvroFileWithASchemaWithAWrongDefaultValue(File p + " \"fields\": [\n" + " {\"name\": \"" + DEFAULT_KEY_FIELD_PROP + "\", \"type\": \"string\"},\n" + " {\"name\": \"score\", \"type\": \"float\", \"default\": 0}\n" + " ]}\n" + " }\n" + " ]\n" + "}"; - return writeAvroFile( - parentDir, - "record_with_wrong_default.avro", - schemaWithWrongDefaultValue, - (recordSchema, avroFileWriter) -> { - for (int i = 0; i < numberOfRecords; i++) { - GenericRecord simpleRecord = new GenericData.Record(recordSchema); - simpleRecord.put("key", Integer.toString(i)); - GenericRecord value = new GenericData.Record(recordSchema.getField("value").schema()); - value.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); - value.put("score", 100.0f); - simpleRecord.put("value", value); - avroFileWriter.append(simpleRecord); - } - }); + Schema schema = AvroSchemaParseUtils.parseSchemaFromJSON(schemaWithWrongDefaultValue, false); + return writeAvroFile(parentDir, "record_with_wrong_default.avro", schema, (recordSchema, avroFileWriter) -> { + for (int i = 0; i < numberOfRecords; i++) { + GenericRecord simpleRecord = new GenericData.Record(recordSchema); + simpleRecord.put("key", Integer.toString(i)); + GenericRecord value = new GenericData.Record(recordSchema.getField("value").schema()); + value.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + value.put("score", 100.0f); + simpleRecord.put("value", value); + avroFileWriter.append(simpleRecord); + } + }); } public static Schema writeAvroFileWithManyFloatsAndCustomTotalSize( @@ -695,7 +349,7 @@ public static Schema writeAvroFileWithManyFloatsAndCustomTotalSize( return writeAvroFile( parentDir, "many_floats.avro", - USER_SCHEMA_WITH_A_FLOAT_ARRAY_STRING, + USER_WITH_FLOAT_ARRAY_SCHEMA, (recordSchema, avroFileWriter) -> { int sizeRange = maxValueSize - minValueSize; for (int i = 0; i < numberOfRecords; i++) { @@ -718,12 +372,8 @@ public static GenericRecord getRecordWithFloatArray(Schema recordSchema, int ind return user; } - private static Schema writeAvroFile( - File parentDir, - String fileName, - String recordSchemaStr, - AvroFileWriter fileWriter) throws IOException { - Schema recordSchema = AvroCompatibilityHelper.parse(recordSchemaStr); + private static Schema writeAvroFile(File parentDir, String fileName, Schema recordSchema, AvroFileWriter fileWriter) + throws IOException { File file = new File(parentDir, fileName); DatumWriter datumWriter = new GenericDatumWriter<>(recordSchema); @@ -731,7 +381,6 @@ private static Schema writeAvroFile( dataFileWriter.create(recordSchema, file); fileWriter.write(recordSchema, dataFileWriter); } - return recordSchema; } @@ -1008,10 +657,24 @@ private static Properties defaultVPJPropsInternal(Properties props, String input return props; } - public static String loadFileAsString(String fileName) throws IOException { - return IOUtils.toString( - Objects.requireNonNull(Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName)), - StandardCharsets.UTF_8); + public static String loadFileAsString(String fileName) { + try { + return IOUtils.toString( + Objects.requireNonNull(Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName)), + StandardCharsets.UTF_8); + } catch (IOException e) { + throw new VeniceException(e); + } + } + + public static String loadSchemaFileFromResource(String fileName) { + try { + return IOUtils.toString( + Objects.requireNonNull(TestWriteUtils.class.getClassLoader().getResourceAsStream(fileName)), + StandardCharsets.UTF_8); + } catch (IOException e) { + throw new VeniceException(e); + } } public static String loadFileAsStringQuietlyWithErrorLogged(String fileName) { @@ -1032,10 +695,214 @@ public static void updateStore(String storeName, ControllerClient controllerClie "The UpdateStore response returned an error: " + controllerResponse.getError()); } - public static String getETLStoreSchemaString(String keySchema, String valueSchema) { + public static Schema writeSimpleAvroFileForValidateSchemaAndBuildDictMapperOutput( + File parentDir, + String file, + long inputFileDataSize, + ByteBuffer zstdDictionary, + Schema avroSchema) throws IOException { + return writeAvroFile(parentDir, file, avroSchema, (recordSchema, writer) -> { + GenericRecord user = new GenericData.Record(recordSchema); + user.put(KEY_INPUT_FILE_DATA_SIZE, inputFileDataSize); + if (zstdDictionary != null) { + user.put(KEY_ZSTD_COMPRESSION_DICTIONARY, zstdDictionary); + } + writer.append(user); + }); + } + + public static Schema writeETLFileWithUserSchema(File parentDir) throws IOException { + String fileName = "simple_etl_user.avro"; + return writeAvroFile( + parentDir, + fileName, + getETLStoreSchemaString(ETL_KEY_SCHEMA.toString(), ETL_VALUE_SCHEMA.toString()), + (recordSchema, writer) -> { + for (int i = 1; i <= 50; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + GenericRecord value = new GenericData.Record(ETL_VALUE_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + value.put(DEFAULT_VALUE_FIELD_PROP, DEFAULT_USER_DATA_VALUE_PREFIX + i); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", value); + user.put("offset", (long) i); + user.put("DELETED_TS", null); + + writer.append(user); + } + + for (int i = 51; i <= 100; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", null); + user.put("offset", (long) i); + user.put("DELETED_TS", (long) i); + + writer.append(user); + } + }); + } + + public static Schema writeETLFileWithUnionWithNullSchema(File parentDir) throws IOException { + return writeAvroFile( + parentDir, + "simple_etl_union_with_null.avro", + getETLStoreSchemaString(ETL_KEY_SCHEMA.toString(), ETL_UNION_VALUE_WITH_NULL_SCHEMA.toString()), + (recordSchema, writer) -> { + for (int i = 1; i <= 25; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", "string_" + i); + user.put("offset", (long) i); + user.put("DELETED_TS", null); + + writer.append(user); + } + + for (int i = 26; i <= 50; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", i); + user.put("offset", (long) i); + user.put("DELETED_TS", null); + + writer.append(user); + } + + for (int i = 51; i <= 100; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", null); + user.put("offset", (long) i); + user.put("DELETED_TS", (long) i); + + writer.append(user); + } + }); + } + + public static Schema writeETLFileWithUnionWithoutNullSchema(File parentDir) throws IOException { + + return writeAvroFile( + parentDir, + "simple_etl_union_without_null.avro", + getETLStoreSchemaString(ETL_KEY_SCHEMA.toString(), ETL_UNION_VALUE_WITHOUT_NULL_SCHEMA.toString()), + (recordSchema, writer) -> { + for (int i = 1; i <= 25; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", "string_" + i); + user.put("offset", (long) i); + user.put("DELETED_TS", null); + + writer.append(user); + } + + for (int i = 26; i <= 50; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", i); + user.put("offset", (long) i); + user.put("DELETED_TS", null); + + writer.append(user); + } + + for (int i = 51; i <= 100; ++i) { + GenericRecord user = new GenericData.Record(recordSchema); + + GenericRecord key = new GenericData.Record(ETL_KEY_SCHEMA); + + key.put(DEFAULT_KEY_FIELD_PROP, Integer.toString(i)); + + user.put("opalSegmentIdPart", 0); + user.put("opalSegmentIdSeq", 0); + user.put("opalSegmentOffset", (long) 0); + user.put("metadata", new HashMap<>()); + + user.put("key", key); + user.put("value", null); + user.put("offset", (long) i); + user.put("DELETED_TS", (long) i); + + writer.append(user); + } + }); + } + + public static Schema getETLStoreSchemaString(String keySchema, String valueSchema) { String finalValueSchema = ETLUtils.transformValueSchemaForETL(AvroCompatibilityHelper.parse(valueSchema)).toString(); - return "{\n" + " \"type\": \"record\",\n" + " \"name\": \"storeName_v1\",\n" + String fileSchema = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"storeName_v1\",\n" + " \"namespace\": \"com.linkedin.gobblin.venice.model\",\n" + " \"fields\": [\n" + " {\n" + " \"name\": \"opalSegmentIdPart\",\n" + " \"type\": \"int\",\n" + " \"doc\": \"Opal segment id partition\"\n" + " },\n" + " {\n" @@ -1056,6 +923,7 @@ public static String getETLStoreSchemaString(String keySchema, String valueSchem + " \"avro.java.string\": \"String\"\n" + " },\n" + " \"doc\": \"Metadata of the record; currently it contains the schemaId of the record\",\n" + " \"default\": {}\n" + " }\n" + " ]\n" + "}"; + return AvroCompatibilityHelper.parse(fileSchema); } public static void runPushJob(String jobId, Properties props) { diff --git a/internal/venice-test-common/src/main/resources/etl/Key.avsc b/internal/venice-test-common/src/main/resources/etl/Key.avsc new file mode 100644 index 0000000000..2f2bc4f0ce --- /dev/null +++ b/internal/venice-test-common/src/main/resources/etl/Key.avsc @@ -0,0 +1,9 @@ +{ + "type" : "record", + "name" : "key", + "namespace" : "com.linkedin.venice.testkey", + "fields" : [ { + "name" : "key", + "type" : "string" + } ] +} diff --git a/internal/venice-test-common/src/main/resources/etl/UnionValueWithNull.avsc b/internal/venice-test-common/src/main/resources/etl/UnionValueWithNull.avsc new file mode 100644 index 0000000000..d9cdbf2822 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/etl/UnionValueWithNull.avsc @@ -0,0 +1 @@ +[ "int", "string", "null" ] diff --git a/internal/venice-test-common/src/main/resources/etl/UnionValueWithoutNull.avsc b/internal/venice-test-common/src/main/resources/etl/UnionValueWithoutNull.avsc new file mode 100644 index 0000000000..f97481b691 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/etl/UnionValueWithoutNull.avsc @@ -0,0 +1 @@ +[ "int", "string" ] diff --git a/internal/venice-test-common/src/main/resources/etl/Value.avsc b/internal/venice-test-common/src/main/resources/etl/Value.avsc new file mode 100644 index 0000000000..635c2ca2e4 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/etl/Value.avsc @@ -0,0 +1,10 @@ +{ + "type" : "record", + "name" : "value", + "namespace" : "com.linkedin.venice.testvalue", + "fields" : [ { + "name" : "value", + "type" : "string" + } ], + "version" : 10 +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/NameV1.avsc b/internal/venice-test-common/src/main/resources/valueSchema/NameV1.avsc new file mode 100644 index 0000000000..48d6954c5c --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/NameV1.avsc @@ -0,0 +1,14 @@ +{ + "type" : "record", + "name" : "nameRecord", + "namespace" : "example.avro", + "fields" : [ { + "name" : "firstName", + "type" : "string", + "default" : "" + }, { + "name" : "lastName", + "type" : "string", + "default" : "" + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/NameV2.avsc b/internal/venice-test-common/src/main/resources/valueSchema/NameV2.avsc new file mode 100644 index 0000000000..382a73be56 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/NameV2.avsc @@ -0,0 +1,18 @@ +{ + "type" : "record", + "name" : "nameRecord", + "namespace" : "example.avro", + "fields" : [ { + "name" : "firstName", + "type" : "string", + "default" : "" + }, { + "name" : "lastName", + "type" : "string", + "default" : "" + }, { + "name" : "age", + "type" : "int", + "default" : -1 + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/NameV3.avsc b/internal/venice-test-common/src/main/resources/valueSchema/NameV3.avsc new file mode 100644 index 0000000000..b49686ef5a --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/NameV3.avsc @@ -0,0 +1,18 @@ +{ + "type" : "record", + "name" : "nameRecord", + "namespace" : "example.avro", + "fields" : [ { + "name" : "firstName", + "type" : "string", + "default" : "" + }, { + "name" : "lastName", + "type" : "string", + "default" : "" + }, { + "name" : "height", + "type" : "int", + "default" : -1 + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/NameV4.avsc b/internal/venice-test-common/src/main/resources/valueSchema/NameV4.avsc new file mode 100644 index 0000000000..2410915355 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/NameV4.avsc @@ -0,0 +1,18 @@ +{ + "type" : "record", + "name" : "nameRecord", + "namespace" : "example.avro", + "fields" : [ { + "name" : "firstName", + "type" : "string", + "default" : "" + }, { + "name" : "lastName", + "type" : "string", + "default" : "" + }, { + "name" : "height", + "type" : "long", + "default" : -1 + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/SimpleUserWithDefault.avsc b/internal/venice-test-common/src/main/resources/valueSchema/SimpleUserWithDefault.avsc new file mode 100644 index 0000000000..3159648d20 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/SimpleUserWithDefault.avsc @@ -0,0 +1,14 @@ +{ + "type" : "record", + "name" : "User", + "namespace" : "example.avro", + "fields" : [ { + "name" : "key", + "type" : "string", + "default" : "" + }, { + "name" : "value", + "type" : "string", + "default" : "" + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/User.avsc b/internal/venice-test-common/src/main/resources/valueSchema/User.avsc new file mode 100644 index 0000000000..f931971f8f --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/User.avsc @@ -0,0 +1,15 @@ +{ + "type" : "record", + "name" : "User", + "namespace" : "example.avro", + "fields" : [ { + "name" : "key", + "type" : "string" + }, { + "name" : "value", + "type" : "string" + }, { + "name" : "age", + "type" : "int" + } ] +} \ No newline at end of file diff --git a/internal/venice-test-common/src/main/resources/valueSchema/UserWithDefault.avsc b/internal/venice-test-common/src/main/resources/valueSchema/UserWithDefault.avsc new file mode 100644 index 0000000000..87a20bfb45 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/UserWithDefault.avsc @@ -0,0 +1,18 @@ +{ + "type" : "record", + "name" : "User", + "namespace" : "example.avro", + "fields" : [ { + "name" : "key", + "type" : "string", + "default" : "" + }, { + "name" : "value", + "type" : "string", + "default" : "" + }, { + "name" : "age", + "type" : "int", + "default" : 1 + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/UserWithFloatArray.avsc b/internal/venice-test-common/src/main/resources/valueSchema/UserWithFloatArray.avsc new file mode 100644 index 0000000000..b74eef604f --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/UserWithFloatArray.avsc @@ -0,0 +1,18 @@ +{ + "type" : "record", + "name" : "ManyFloats", + "namespace" : "example.avro", + "fields" : [ { + "name" : "key", + "type" : "string" + }, { + "name" : "value", + "type" : { + "type" : "array", + "items" : "float" + } + }, { + "name" : "age", + "type" : "int" + } ] +} diff --git a/internal/venice-test-common/src/main/resources/valueSchema/primitive/Int.avsc b/internal/venice-test-common/src/main/resources/valueSchema/primitive/Int.avsc new file mode 100644 index 0000000000..f08c4dfdee --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/primitive/Int.avsc @@ -0,0 +1 @@ +"int" \ No newline at end of file diff --git a/internal/venice-test-common/src/main/resources/valueSchema/primitive/String.avsc b/internal/venice-test-common/src/main/resources/valueSchema/primitive/String.avsc new file mode 100644 index 0000000000..1f13d5d498 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/primitive/String.avsc @@ -0,0 +1 @@ +"string" \ No newline at end of file diff --git a/internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV1.avsc b/internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV1.avsc similarity index 100% rename from internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV1.avsc rename to internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV1.avsc diff --git a/internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV2.avsc b/internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV2.avsc similarity index 100% rename from internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV2.avsc rename to internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV2.avsc diff --git a/internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV3.avsc b/internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV3.avsc similarity index 100% rename from internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV3.avsc rename to internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV3.avsc diff --git a/internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV4.avsc b/internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV4.avsc similarity index 100% rename from internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV4.avsc rename to internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV4.avsc diff --git a/internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV5.avsc b/internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV5.avsc similarity index 100% rename from internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV5.avsc rename to internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV5.avsc diff --git a/internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV6.avsc b/internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV6.avsc similarity index 100% rename from internal/venice-test-common/src/integrationTest/resources/supersetschemas/ValueV6.avsc rename to internal/venice-test-common/src/main/resources/valueSchema/supersetschemas/ValueV6.avsc