Schema Info

Schema Name: sfdl_test_rds.random_userdata_1_pq
  • created: 2022-05-23 17:02:00
  • disabled: False
  • format: raw
  • id: 1800
  • is_partitioned: False
  • is_schema_datatyped: False
  • name:
    {
        "database": "sfdl_test_rds",
        "table": "random_userdata_1_pq"
    }
  • type: rds
Schema Versions: 2
    • created: 2022-05-24 12:44:20
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_test_rds`",
          "DROP TABLE IF EXISTS `sfdl_test_rds.random_userdata_1_pq`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n",
          "MSCK REPAIR TABLE sfdl_test_rds.random_userdata_1_pq"
      ]
    • hive_path: s3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23
    • id: 4689200
    • processed: 1
    • raw_data:
      {
          "birthdate": "string",
          "cc": "string",
          "comments": "string",
          "country": "string",
          "email": "string",
          "first_name": "string",
          "gender": "string",
          "id": "int32",
          "ip_address": "string",
          "last_name": "string",
          "registration_dttm": "timestamp[ns]",
          "salary": "double",
          "title": "string"
      }
    • schema_attributes:
      {
          "birthdate": "string",
          "cc": "string",
          "comments": "string",
          "country": "string",
          "email": "string",
          "first_name": "string",
          "gender": "string",
          "id": "int",
          "ip_address": "string",
          "last_name": "string",
          "registration_dttm": "timestamp",
          "salary": "double",
          "title": "string"
      }
    • schema_name_id: 1800
    • schema_scan_id: 5925338
    • updated: 2022-05-24 12:44:35
    • created: 2022-05-24 12:08:08
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_test_rds`",
          "DROP TABLE IF EXISTS `sfdl_test_rds.random_userdata_1_pq`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp[ns] , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n",
          "MSCK REPAIR TABLE sfdl_test_rds.random_userdata_1_pq"
      ]
    • hive_path: s3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23
    • id: 4689199
    • processed: 0
    • raw_data:
      {
          "birthdate": "string",
          "cc": "string",
          "comments": "string",
          "country": "string",
          "email": "string",
          "first_name": "string",
          "gender": "string",
          "id": "int32",
          "ip_address": "string",
          "last_name": "string",
          "registration_dttm": "timestamp[ns]",
          "salary": "double",
          "title": "string"
      }
    • schema_attributes:
      {
          "birthdate": "string",
          "cc": "string",
          "comments": "string",
          "country": "string",
          "email": "string",
          "first_name": "string",
          "gender": "string",
          "id": "int",
          "ip_address": "string",
          "last_name": "string",
          "registration_dttm": "timestamp[ns]",
          "salary": "double",
          "title": "string"
      }
    • schema_name_id: 1800
    • schema_scan_id: 5925285
    • updated: None
Schema Scans: 2
Last at 2022-05-24 12:44:13
    • duration: 0:00:25.201782
    • exit_message:
      {
          "ddl_changed": true,
          "ingested_partitions": 0,
          "partitions_applied": false,
          "success": true
      }
    • id: 5925338
    • payload:
      {
          "datatype_dict": false,
          "file_format": "parquet",
          "is_schema_datatyped": false,
          "partition": false,
          "s3": {
              "bucket": "sfdl-rds-testing-dev",
              "hive_path": "s3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23",
              "key": "raw/random_datasets/userdata_1/2022-05-23/userdata1.parquet"
          },
          "schema": {
              "name": "sfdl_test_rds.random_userdata_1_pq",
              "type": "rds"
          }
      }
    • running: False
    • schema_name_id: 1800
    • start_time: 2022-05-24 12:44:13
    • success: True
    • trace_id: 2982596475669719431
    • duration: 0:00:23.740994
    • exit_message:
      {
          "exception": "failed to apply schema `Invalid ddl? `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp[ns] , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n` An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 1:8: mismatched input 'EXTERNAL'. Expecting: 'OR', 'SCHEMA', 'TABLE', 'VIEW'`: ['CREATE DATABASE IF NOT EXISTS `sfdl_test_rds`', 'DROP TABLE IF EXISTS `sfdl_test_rds.random_userdata_1_pq`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_test_rds.random_userdata_1_pq`\\n(\\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp[ns] , `salary`  double , `title`  string \\n)\\nROW FORMAT SERDE\\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\\n \\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\\n\", 'MSCK REPAIR TABLE sfdl_test_rds.random_userdata_1_pq']\nTraceback (most recent call last):\n  File \"/src/athena-runner/athena_runner/runner.py\", line 93, in _run_single_query\n    query_id_response = self.client.start_query_execution(**athena_args)\n  File \"/usr/local/lib/python3.7/site-packages/botocore/client.py\", line 386, in _api_call\n    return self._make_api_call(operation_name, kwargs)\n  File \"/usr/local/lib/python3.7/site-packages/botocore/client.py\", line 705, in _make_api_call\n    raise error_class(parsed_response, operation_name)\nbotocore.errorfactory.InvalidRequestException: An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 1:8: mismatched input 'EXTERNAL'. Expecting: 'OR', 'SCHEMA', 'TABLE', 'VIEW'\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n    self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n  File \"/src/athena-runner/athena_runner/runner.py\", line 133, in run_multi_query\n    result = self.run_single_query(query, timeout, database)\n  File \"/src/athena-runner/athena_runner/runner.py\", line 51, in run_single_query\n    return self._run_single_query(query, timeout, database)\n  File \"/src/athena-runner/athena_runner/runner.py\", line 119, in _run_single_query\n    raise AthenaRunnerQueryException('Invalid ddl? `{}` {}'.format(query, e))\nathena_runner.exceptions.AthenaRunnerQueryException: Invalid ddl? `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp[ns] , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n` An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 1:8: mismatched input 'EXTERNAL'. Expecting: 'OR', 'SCHEMA', 'TABLE', 'VIEW'\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n    response = service.run()\n  File \"/schema_manager/schema_generator/process_hive_table.py\", line 99, in run\n    response['ddl_changed'] = table_generator.run()\n  File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n    self.apply_latest_version()\n  File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n    raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Invalid ddl? `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp[ns] , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n` An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 1:8: mismatched input 'EXTERNAL'. Expecting: 'OR', 'SCHEMA', 'TABLE', 'VIEW'`: ['CREATE DATABASE IF NOT EXISTS `sfdl_test_rds`', 'DROP TABLE IF EXISTS `sfdl_test_rds.random_userdata_1_pq`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_test_rds.random_userdata_1_pq`\\n(\\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp[ns] , `salary`  double , `title`  string \\n)\\nROW FORMAT SERDE\\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\\n \\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\\n\", 'MSCK REPAIR TABLE sfdl_test_rds.random_userdata_1_pq']\n: Expecting value: line 1 column 1 (char 0)"
      }
    • id: 5925285
    • payload:
      {
          "datatype_dict": null,
          "file_format": "parquet",
          "is_schema_datatyped": false,
          "partition": false,
          "s3": {
              "bucket": "sfdl-rds-testing-dev",
              "hive_path": "s3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23",
              "key": "raw/random_datasets/userdata_1/2022-05-23/userdata1.parquet"
          },
          "schema": {
              "name": "sfdl_test_rds.random_userdata_1_pq",
              "type": "rds"
          }
      }
    • running: False
    • schema_name_id: 1800
    • start_time: 2022-05-24 12:07:53
    • success: False
    • trace_id: 16115858325177331618
Partitions:
  • count: 0
  • ddl: None
  • name: None
  • state:
    []
  • type: None
Schema:
  • attributes:
    {
        "birthdate": "string",
        "cc": "string",
        "comments": "string",
        "country": "string",
        "email": "string",
        "first_name": "string",
        "gender": "string",
        "id": "int",
        "ip_address": "string",
        "last_name": "string",
        "registration_dttm": "timestamp",
        "salary": "double",
        "title": "string"
    }
  • created: 2022-05-24 12:44:20
  • ddl:
    {
        "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_test_rds`",
        "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n",
        "drop_table": "DROP TABLE IF EXISTS `sfdl_test_rds.random_userdata_1_pq`",
        "repair_table": "MSCK REPAIR TABLE sfdl_test_rds.random_userdata_1_pq"
    }
Server:
  • hostname: schemamanager-5447774fc6-4w58v
  • name: schema_manager
  • process_start: 2026-04-02 21:43:44
  • sf_env: dev
  • version: 2427328685
Raw:
{
    "name": {
        "created": "2022-05-23 17:02:00",
        "disabled": false,
        "format": "raw",
        "id": 1800,
        "is_partitioned": false,
        "is_schema_datatyped": false,
        "name": {
            "database": "sfdl_test_rds",
            "table": "random_userdata_1_pq"
        },
        "type": "rds"
    },
    "partition": {
        "count": 0,
        "ddl": null,
        "name": null,
        "state": [],
        "type": null
    },
    "schema": {
        "attributes": {
            "birthdate": "string",
            "cc": "string",
            "comments": "string",
            "country": "string",
            "email": "string",
            "first_name": "string",
            "gender": "string",
            "id": "int",
            "ip_address": "string",
            "last_name": "string",
            "registration_dttm": "timestamp",
            "salary": "double",
            "title": "string"
        },
        "created": "2022-05-24 12:44:20",
        "ddl": {
            "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_test_rds`",
            "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_test_rds.random_userdata_1_pq`\n(\n `birthdate`  string , `cc`  string , `comments`  string , `country`  string , `email`  string , `first_name`  string , `gender`  string , `id`  int , `ip_address`  string , `last_name`  string , `registration_dttm`  timestamp , `salary`  double , `title`  string \n)\nROW FORMAT SERDE\n'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-rds-testing-dev/raw/random_datasets/userdata_1/2022-05-23'\n",
            "drop_table": "DROP TABLE IF EXISTS `sfdl_test_rds.random_userdata_1_pq`",
            "repair_table": "MSCK REPAIR TABLE sfdl_test_rds.random_userdata_1_pq"
        }
    },
    "server": {
        "hostname": "schemamanager-5447774fc6-4w58v",
        "name": "schema_manager",
        "process_start": "2026-04-02 21:43:44",
        "sf_env": "dev",
        "version": "2427328685"
    }
}