OOM error with data upload from MySQL table into Apache Druid using JSON task

42 Views Asked by At

A digital ocean droplet was spinned off to run Apache Druid(v28.0.1) with 8 GB of RAM. Installed MySQL database and created a table with 11 columns (datatype-varchar and most of the fields are nullable). Have imported 4.8 million records in it the same table.

I wanted to import data (all 4.8 million rows) into the Druid by submitting a JSON task but that returns out of memory error.

However it works when I try use the same JSON task to upload only 30 thousand rows (which brings an alternative for me to submit json task 160 times to upload all the 48 lac records but there should be a better way).

My assumption is that these issues occurs due to the memory parameters defined in the start-druid script present in the bin dir. Or do I need to change something in the submitted JSON task itself?

What exactly should I change in the configuration?

Following is the JSON task that I submitted

{
  "type": "index_parallel",
  "spec": {
    "dataSchema": {
      "dataSource": "mydata_sql",
      "timestampSpec": {
        "column": "__time",
        "format": "iso",
        "missingValue": "2024-02-18T00:00:00.000Z"
      },
      "dimensionsSpec": {
        "dimensions": [],
        "dimensionExclusions": [
          "__time"
        ],
        "includeAllDimensions": false,
        "useSchemaDiscovery": false
      },
      "metricsSpec": [],
      "granularitySpec": {
        "type": "uniform",
        "segmentGranularity": "DAY",
        "queryGranularity": {
          "type": "none"
        },
        "rollup": false,
        "intervals": []
      },
      "transformSpec": {
        "filter": null,
        "transforms": []
      }
    },
    "ioConfig": {
      "type": "index_parallel",
      "inputSource": {
        "type": "sql",
        "sqls": [
          "SELECT * FROM mydatatable"
        ],
        "foldCase": false,
        "database": {
          "type": "mysql",
          "connectorConfig": {
            "createTables": true,
            "host": "localhost",
            "port": 1527,
            "connectURI": "jdbc:mysql://localhost:3306/mydb",
            "user": "mydbuser",
            "password": "mydbpass",
            "dbcp": null
          },
          "driverClassName": null
        }
      },
      "inputFormat": null,
      "appendToExisting": false,
      "dropExisting": false
    },
    "tuningConfig": {
      "type": "index_parallel",
      "maxRowsPerSegment": null,
      "appendableIndexSpec": {
        "type": "onheap",
        "preserveExistingMetrics": false
      },
      "maxRowsInMemory": 1000000,
      "maxBytesInMemory": 0,
      "skipBytesInMemoryOverheadCheck": false,
      "maxTotalRows": null,
      "numShards": null,
      "splitHintSpec": null,
      "partitionsSpec": null,
      "indexSpec": {
        "bitmap": {
          "type": "roaring"
        },
        "dimensionCompression": "lz4",
        "stringDictionaryEncoding": {
          "type": "utf8"
        },
        "metricCompression": "lz4",
        "longEncoding": "longs"
      },
      "indexSpecForIntermediatePersists": {
        "bitmap": {
          "type": "roaring"
        },
        "dimensionCompression": "lz4",
        "stringDictionaryEncoding": {
          "type": "utf8"
        },
        "metricCompression": "lz4",
        "longEncoding": "longs"
      },
      "maxPendingPersists": 0,
      "forceGuaranteedRollup": false,
      "reportParseExceptions": false,
      "pushTimeout": 0,
      "segmentWriteOutMediumFactory": null,
      "maxNumConcurrentSubTasks": 1,
      "maxRetry": 3,
      "taskStatusCheckPeriodMs": 1000,
      "chatHandlerTimeout": "PT10S",
      "chatHandlerNumRetries": 5,
      "maxNumSegmentsToMerge": 100,
      "totalNumMergeTasks": 10,
      "logParseExceptions": false,
      "maxParseExceptions": 2147483647,
      "maxSavedParseExceptions": 0,
      "maxColumnsToMerge": -1,
      "awaitSegmentAvailabilityTimeoutMillis": 0,
      "maxAllowedLockCount": -1,
      "partitionDimensions": []
    }
  },
  "context": {
    "forceTimeChunkLock": true,
    "useLineageBasedSegmentAllocation": true
  }
}
0

There are 0 best solutions below