{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "technicalSpecifications": {
    "productType": "Database Transfer and Import Tool",
    "description": "FastTransfer is a high-performance data transfer engine that enables fast, parallel data movement between databases (database-to-database transfer) or from files to databases (file import using DuckDB integration for Parquet, CSV, JSON, Excel, BSON)",
    "version": "0.29",
    "cliInterface": {
      "description": "FastTransfer command-line interface for database-to-database transfers and file-to-database imports",
      "executable": "FastTransfer.exe (Windows) or FastTransfer (Linux)",
      "platforms": ["Windows x64", "Linux x64"],
      "basicSyntax": "FastTransfer --sourceconnectiontype <TYPE> --sourceserver <HOST> --targetconnectiontype <TYPE> --targetserver <HOST> [OPTIONS]",
      "executionModes": [
        "Database to Database Transfer (with parallel processing)",
        "File to Database Import (Parquet, CSV, JSON, Excel, BSON via DuckDB)",
        "Custom Query Transfer",
        "Parallel Transfer with Various Methods (Ntile, RangeId, Ctid, Rowid, DataDriven)"
      ],
      "returnCodes": {
        "0": "Success - Transfer completed successfully",
        "1": "General error - Unspecified failure"
      }
    },
    "sourceConnectionTypes": {
      "description": "Supported source connection types for database transfers and file imports",
      "connectionTypes": [
        {
          "type": "pgcopy",
          "name": "PostgreSQL (Binary Copy)",
          "description": "Native PostgreSQL binary copy protocol for maximum speed",
          "databases": ["PostgreSQL", "CedarDB", "Citus Data"],
          "usage": "--sourceconnectiontype pgcopy"
        },
        {
          "type": "pgsql",
          "name": "PostgreSQL (Standard)",
          "description": "Standard PostgreSQL driver",
          "databases": ["PostgreSQL"],
          "usage": "--sourceconnectiontype pgsql"
        },
        {
          "type": "mysql",
          "name": "MySQL/MariaDB",
          "description": "Native MySQL driver supporting MySQL and MariaDB",
          "databases": ["MySQL", "MariaDB"],
          "usage": "--sourceconnectiontype mysql"
        },
        {
          "type": "mssql",
          "name": "Microsoft SQL Server",
          "description": "Native SQL Server driver for Windows and Linux",
          "databases": ["SQL Server"],
          "usage": "--sourceconnectiontype mssql"
        },
        {
          "type": "oraodp",
          "name": "Oracle Database",
          "description": "Oracle Data Provider for high-performance Oracle transfers",
          "databases": ["Oracle"],
          "usage": "--sourceconnectiontype oraodp"
        },
        {
          "type": "clickhouse",
          "name": "ClickHouse",
          "description": "Native ClickHouse driver for analytical database transfers",
          "databases": ["ClickHouse"],
          "usage": "--sourceconnectiontype clickhouse"
        },
        {
          "type": "hana",
          "name": "SAP HANA",
          "description": "SAP HANA driver for in-memory database transfers",
          "databases": ["SAP HANA"],
          "usage": "--sourceconnectiontype hana"
        },
        {
          "type": "teradata",
          "name": "Teradata",
          "description": "Teradata enterprise data warehouse driver",
          "databases": ["Teradata"],
          "usage": "--sourceconnectiontype teradata"
        },
        {
          "type": "nzsql",
          "name": "IBM Netezza",
          "description": "Native Netezza driver for data warehouse transfers",
          "databases": ["Netezza"],
          "usage": "--sourceconnectiontype nzsql"
        },
        {
          "type": "duckdb",
          "name": "DuckDB",
          "description": "DuckDB embedded analytical database",
          "databases": ["DuckDB"],
          "usage": "--sourceconnectiontype duckdb"
        },
        {
          "type": "duckdbstream",
          "name": "DuckDB Stream (File Import)",
          "description": "DuckDB streaming mode for importing Parquet, CSV, JSON, Excel files to databases",
          "databases": ["Files: Parquet, CSV, JSON, XLSX, BSON"],
          "usage": "--sourceconnectiontype duckdbstream --sourceserver ':memory:' --query \"SELECT * FROM read_parquet('path/*.parquet')\"",
          "note": "This is how FastTransfer imports files to databases - using DuckDB to read files"
        },
        {
          "type": "odbc",
          "name": "ODBC (Generic)",
          "description": "Generic ODBC driver for any database with ODBC support",
          "databases": ["Any ODBC-compatible database"],
          "usage": "--sourceconnectiontype odbc",
          "note": "Requires ODBC driver to be installed on the system"
        },
        {
          "type": "oledb",
          "name": "OLE DB (Generic)",
          "description": "Generic OLE DB provider for Windows-based databases",
          "databases": ["Any OLE DB-compatible database"],
          "usage": "--sourceconnectiontype oledb",
          "note": "Requires OLE DB provider to be installed on the system"
        }
      ]
    },
    "targetConnectionTypes": {
      "description": "Supported target connection types for database loads",
      "connectionTypes": [
        {
          "type": "pgcopy",
          "name": "PostgreSQL Binary Copy",
          "description": "PostgreSQL COPY command for high-speed bulk loading",
          "databases": ["PostgreSQL", "CedarDB", "Citus Data"],
          "usage": "--targetconnectiontype pgcopy"
        },
        {
          "type": "mysqlbulk",
          "name": "MySQL Bulk Insert",
          "description": "MySQL bulk insert for fast data loading",
          "databases": ["MySQL", "MariaDB"],
          "usage": "--targetconnectiontype mysqlbulk"
        },
        {
          "type": "msbulk",
          "name": "SQL Server Bulk Insert",
          "description": "SQL Server bulk copy API for maximum performance",
          "databases": ["SQL Server"],
          "usage": "--targetconnectiontype msbulk"
        },
        {
          "type": "orabulk",
          "name": "Oracle Bulk Load",
          "description": "Oracle direct path or conventional path loading",
          "databases": ["Oracle"],
          "usage": "--targetconnectiontype orabulk"
        },
        {
          "type": "clickhousebulk",
          "name": "ClickHouse Bulk Insert",
          "description": "ClickHouse bulk loading for columnar storage",
          "databases": ["ClickHouse"],
          "usage": "--targetconnectiontype clickhousebulk"
        },
        {
          "type": "hanabulk",
          "name": "SAP HANA Bulk Load",
          "description": "SAP HANA bulk insert for in-memory database",
          "databases": ["SAP HANA"],
          "usage": "--targetconnectiontype hanabulk"
        },
        {
          "type": "teradata",
          "name": "Teradata Bulk Load",
          "description": "Teradata FastLoad protocol for bulk loading",
          "databases": ["Teradata"],
          "usage": "--targetconnectiontype teradata"
        },
        {
          "type": "duckdb",
          "name": "DuckDB Appender",
          "description": "DuckDB fast appender API for bulk insert",
          "databases": ["DuckDB"],
          "usage": "--targetconnectiontype duckdb"
        }
      ],
      "note": "All target connection types use bulk loading APIs for maximum performance. Targets are ALWAYS databases - files cannot be targets."
    },
    "coreParameters": [
      {
        "group": "Source Connection",
        "parameters": [
          {
            "parameter": "--sourceconnectiontype",
            "required": true,
            "valueType": "string",
            "description": "Type of source database connection (or duckdbstream for file imports)",
            "allowedValues": ["pgcopy", "pgsql", "mysql", "mssql", "oraodp", "clickhouse", "hana", "teradata", "nzsql", "duckdb", "duckdbstream", "odbc", "oledb"],
            "example": "--sourceconnectiontype pgcopy"
          },
          {
            "parameter": "--sourceserver",
            "required": true,
            "valueType": "string",
            "description": "Source database server hostname:port. Use ':memory:' for duckdbstream file imports",
            "example": "--sourceserver pghost.domain:5432"
          },
          {
            "parameter": "--sourcedatabase",
            "required": true,
            "valueType": "string",
            "description": "Source database name",
            "example": "--sourcedatabase tpch"
          },
          {
            "parameter": "--sourceuser",
            "required": "conditional",
            "valueType": "string",
            "description": "Source database username (not needed for duckdbstream or --sourcetrusted)",
            "example": "--sourceuser FastUser"
          },
          {
            "parameter": "--sourcepassword",
            "required": "conditional",
            "valueType": "string",
            "description": "Source database password",
            "example": "--sourcepassword FastPassword",
            "securityNote": "Password is obfuscated in logs"
          },
          {
            "parameter": "--sourcetrusted",
            "required": false,
            "valueType": "boolean (flag)",
            "description": "Use Windows Authentication for SQL Server source",
            "example": "--sourcetrusted"
          },
          {
            "parameter": "--sourceschema",
            "required": false,
            "valueType": "string",
            "description": "Source schema name (if not default)",
            "example": "--sourceschema public"
          }
        ]
      },
      {
        "group": "Target Connection",
        "parameters": [
          {
            "parameter": "--targetconnectiontype",
            "required": true,
            "valueType": "string",
            "description": "Type of target database connection for bulk loading",
            "allowedValues": ["pgcopy", "mysqlbulk", "msbulk", "orabulk", "clickhousebulk", "hanabulk", "teradata", "duckdb"],
            "example": "--targetconnectiontype msbulk"
          },
          {
            "parameter": "--targetserver",
            "required": true,
            "valueType": "string",
            "description": "Target database server hostname:port or hostname\\instance",
            "example": "--targetserver mssqlhost.domain\\Instance"
          },
          {
            "parameter": "--targetdatabase",
            "required": true,
            "valueType": "string",
            "description": "Target database name",
            "example": "--targetdatabase tpch10"
          },
          {
            "parameter": "--targetuser",
            "required": "conditional",
            "valueType": "string",
            "description": "Target database username (not needed for --targettrusted)",
            "example": "--targetuser FastUser"
          },
          {
            "parameter": "--targetpassword",
            "required": "conditional",
            "valueType": "string",
            "description": "Target database password",
            "example": "--targetpassword FastPassword"
          },
          {
            "parameter": "--targettrusted",
            "required": false,
            "valueType": "boolean (flag)",
            "description": "Use Windows Authentication for SQL Server target",
            "example": "--targettrusted"
          },
          {
            "parameter": "--targettable",
            "required": true,
            "valueType": "string",
            "description": "Target table name where data will be loaded",
            "example": "--targettable orders"
          },
          {
            "parameter": "--targetschema",
            "required": false,
            "valueType": "string",
            "description": "Target schema name (if not default)",
            "example": "--targetschema dbo"
          }
        ]
      },
      {
        "group": "Data Selection",
        "parameters": [
          {
            "parameter": "--query",
            "required": "conditional",
            "valueType": "string",
            "description": "SQL query to select data from source. For file imports with duckdbstream, use DuckDB SQL (e.g., SELECT * FROM read_parquet('*.parquet'))",
            "example": "--query \"SELECT * FROM tpch_10.orders\"",
            "note": "Either --query or --sourcetable must be specified, not both"
          },
          {
            "parameter": "--sourcetable",
            "required": "conditional",
            "valueType": "string",
            "description": "Source table name (alternative to --query)",
            "example": "--sourcetable orders"
          }
        ]
      },
      {
        "group": "Parallel Processing",
        "parameters": [
          {
            "parameter": "--parallelmethod",
            "required": true,
            "valueType": "string",
            "description": "Parallel processing method. 'None' for single-threaded, or parallel method name",
            "allowedValues": ["None", "DataDriven", "Ntile", "RangeId", "Random", "Ctid", "NZDataSlice", "Physloc", "Rowid"],
            "default": "None",
            "example": "--parallelmethod Ntile",
            "details": {
              "None": "No parallelism - single thread",
              "DataDriven": "Split based on distinct values from a column",
              "Ntile": "Split using NTILE function on a column (even chunks)",
              "RangeId": "Split by min/max range on numeric column",
              "Random": "Random distribution on integer column",
              "Ctid": "PostgreSQL CTID-based splitting (automatic)",
              "NZDataSlice": "Netezza dataslice-based splitting (automatic)",
              "Physloc": "SQL Server physical location splitting (automatic)",
              "Rowid": "Oracle ROWID-based splitting (automatic)"
            }
          },
          {
            "parameter": "--paralleldegree",
            "required": false,
            "valueType": "integer",
            "description": "Number of parallel threads. Use -1 for CPU count, -2 for CPU count × 2",
            "default": "1",
            "example": "--paralleldegree 8",
            "note": "Only used when --parallelmethod is not 'None'"
          },
          {
            "parameter": "--distributekeycolumn",
            "required": "conditional",
            "valueType": "string",
            "description": "Column name for parallel methods that need distribution key (DataDriven, Ntile, RangeId, Random). Can include expressions like YEAR(date_column)",
            "example": "--distributekeycolumn o_orderkey",
            "note": "Required for DataDriven, Ntile, RangeId, Random methods. Not needed for automatic methods (Ctid, Rowid, etc.)"
          },
          {
            "parameter": "--datadrivenquery",
            "required": "conditional",
            "valueType": "string",
            "description": "Query that returns distinct values for DataDriven parallel method",
            "example": "--datadrivenquery \"select file from glob('D:\\\\temp\\\\*.parquet')\"",
            "note": "Only used with --parallelmethod DataDriven for file-based parallelism"
          }
        ]
      },
      {
        "group": "Load Options",
        "parameters": [
          {
            "parameter": "--loadmode",
            "required": false,
            "valueType": "string",
            "description": "Target table load mode",
            "allowedValues": ["Append", "Truncate"],
            "default": "Append",
            "example": "--loadmode Truncate",
            "details": {
              "Append": "Add rows to existing table",
              "Truncate": "Delete all rows before inserting (full refresh)"
            }
          },
          {
            "parameter": "--mapmethod",
            "required": false,
            "valueType": "string",
            "description": "Column mapping method between source and target",
            "allowedValues": ["Name", "Position"],
            "default": "Name",
            "example": "--mapmethod Name",
            "details": {
              "Name": "Map columns by name",
              "Position": "Map columns by ordinal position"
            }
          }
        ]
      },
      {
        "group": "Monitoring and Logging",
        "parameters": [
          {
            "parameter": "--runid",
            "required": false,
            "valueType": "string",
            "description": "Unique identifier for this transfer run (useful for logging and monitoring)",
            "example": "--runid pg_to_mssql_parallel_ntile"
          },
          {
            "parameter": "--loglevel",
            "required": false,
            "valueType": "string",
            "description": "Logging verbosity level",
            "allowedValues": ["Debug", "Info", "Warning", "Error"],
            "default": "Info",
            "example": "--loglevel Debug"
          },
          {
            "parameter": "--logfile",
            "required": false,
            "valueType": "string",
            "description": "Path to log file (if not specified, logs to console)",
            "example": "--logfile C:\\logs\\fasttransfer.log"
          }
        ]
      },
      {
        "group": "License",
        "parameters": [
          {
            "parameter": "--license",
            "required": false,
            "valueType": "string",
            "description": "Path to license file or license content. If not specified, looks for FastTransfer.lic in executable directory",
            "example": "--license C:\\licenses\\FastTransfer.lic"
          }
        ]
      },
      {
        "group": "Help",
        "parameters": [
          {
            "parameter": "--help",
            "required": false,
            "valueType": "boolean (flag)",
            "description": "Display help information",
            "example": "--help"
          },
          {
            "parameter": "--version",
            "required": false,
            "valueType": "boolean (flag)",
            "description": "Display version information",
            "example": "--version"
          }
        ]
      }
    ],
    "realExamples": [
      {
        "scenario": "PostgreSQL to SQL Server transfer with parallel processing",
        "description": "High-performance parallel database-to-database transfer using Ntile method",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"pgcopy\" `\n--sourceserver \"pghost.domain:5432\" `\n--sourcedatabase \"tpch\" `\n--sourceuser \"FastUser\" `\n--sourcepassword \"FastPassword\" `\n--query \"SELECT * FROM tpch_10.orders\" `\n--targetconnectiontype \"msbulk\" `\n--targetserver \"mssqlhost.domain\\Instance\" `\n--targetdatabase \"tpch10\" `\n--targetuser \"FastUser\" `\n--targetpassword \"FastPassword\" `\n--targettable \"orders\" `\n--parallelmethod \"Ntile\" `\n--distributekeycolumn \"o_orderkey\" `\n--paralleldegree 8 `\n--runid \"pg_to_mssql_parallel_ntile\"",
        "source": "PostgreSQL",
        "target": "SQL Server",
        "parallelMethod": "Ntile",
        "threads": 8
      },
      {
        "scenario": "Parquet file import to SQL Server",
        "description": "Import multiple Parquet files using DuckDB streaming with DataDriven parallel processing",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"duckdbstream\" `\n--sourceserver \":memory:\" `\n--query \"SELECT * exclude filename FROM read_parquet('D:\\\\temp\\\\TEST_100C_1MRows\\\\*.parquet', filename=true)\" `\n--targetconnectiontype \"msbulk\" `\n--targetserver \"localhost\\\\SS2025\" `\n--targettrusted `\n--targetdatabase \"FastImport\" `\n--targetschema \"dbo\" `\n--targettable \"TEST_100C_1MRows\" `\n--parallelmethod \"DataDriven\" `\n--distributekeycolumn \"filename\" `\n--datadrivenquery \"select file from glob('D:\\\\temp\\\\TEST_100C_1MRows\\\\*.parquet')\" `\n--loadmode \"Truncate\" `\n--paralleldegree 10 `\n--mapmethod \"Name\" `\n--runid \"parquet_2_MSSQL_PARALLEL_10\"",
        "source": "Parquet Files",
        "target": "SQL Server",
        "parallelMethod": "DataDriven",
        "threads": 10,
        "note": "This shows how FastTransfer imports files - using duckdbstream as source type"
      },
      {
        "scenario": "Oracle to ClickHouse transfer with automatic parallel",
        "description": "Transfer Oracle table to ClickHouse using Oracle ROWID for automatic parallelism",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"oraodp\" `\n--sourceserver \"oraclehost.domain:1521/OraService\" `\n--sourcedatabase \"tpch\" `\n--sourceuser \"FastUser\" `\n--sourcepassword \"FastPassword\" `\n--query \"SELECT * FROM TPCH.ORDERS\" `\n--targetconnectiontype \"clickhousebulk\" `\n--targetserver \"clickhouse.domain:8443\" `\n--targetdatabase \"analytics\" `\n--targetuser \"FastUser\" `\n--targetpassword \"FastPassword\" `\n--targettable \"orders\" `\n--parallelmethod \"Rowid\" `\n--loadmode \"Truncate\" `\n--paralleldegree -2 `\n--runid \"oracle_to_clickhouse_parallel_rowid\"",
        "source": "Oracle",
        "target": "ClickHouse",
        "parallelMethod": "Rowid (automatic)",
        "threads": "CPU count × 2 (-2)",
        "note": "Rowid method works only with Oracle sources"
      },
      {
        "scenario": "MySQL to PostgreSQL with Truncate mode",
        "description": "Full refresh transfer from MySQL to PostgreSQL with table truncation",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"mysql\" `\n--sourceserver \"mysqlhost:3306\" `\n--sourcedatabase \"sales\" `\n--sourceuser \"myuser\" `\n--sourcepassword \"mypass\" `\n--query \"SELECT * FROM customers\" `\n--targetconnectiontype \"pgcopy\" `\n--targetserver \"pghost:5432\" `\n--targetdatabase \"warehouse\" `\n--targetuser \"pguser\" `\n--targetpassword \"pgpass\" `\n--targettable \"customers\" `\n--loadmode \"Truncate\" `\n--parallelmethod \"None\" `\n--runid \"mysql_to_pg_customers\"",
        "source": "MySQL",
        "target": "PostgreSQL",
        "parallelMethod": "None (single-threaded)",
        "note": "Truncate mode deletes all rows before insert"
      },
      {
        "scenario": "CSV file import to PostgreSQL",
        "description": "Import CSV files using DuckDB read_csv function",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"duckdbstream\" `\n--sourceserver \":memory:\" `\n--query \"SELECT * FROM read_csv('D:\\\\data\\\\customers\\\\*.csv', header=true)\" `\n--targetconnectiontype \"pgcopy\" `\n--targetserver \"localhost:5432\" `\n--targetdatabase \"import_db\" `\n--targetuser \"postgres\" `\n--targetpassword \"postgres\" `\n--targettable \"customers\" `\n--parallelmethod \"DataDriven\" `\n--distributekeycolumn \"customer_id\" `\n--paralleldegree 4 `\n--loadmode \"Append\" `\n--runid \"csv_import_customers\"",
        "source": "CSV Files",
        "target": "PostgreSQL",
        "parallelMethod": "DataDriven",
        "threads": 4
      },
      {
        "scenario": "SQL Server to SQL Server with Windows Authentication",
        "description": "Transfer between SQL Server instances using Windows Authentication",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"mssql\" `\n--sourceserver \"source-sql\\PROD\" `\n--sourcedatabase \"Orders\" `\n--sourcetrusted `\n--query \"SELECT * FROM dbo.orders WHERE order_date >= '2026-01-01'\" `\n--targetconnectiontype \"msbulk\" `\n--targetserver \"target-sql\\DEV\" `\n--targetdatabase \"Orders_Dev\" `\n--targettrusted `\n--targettable \"orders\" `\n--targetschema \"dbo\" `\n--parallelmethod \"Physloc\" `\n--paralleldegree 12 `\n--loadmode \"Append\" `\n--runid \"mssql_to_mssql_incremental\"",
        "source": "SQL Server",
        "target": "SQL Server",
        "parallelMethod": "Physloc (automatic for SQL Server)",
        "threads": 12,
        "note": "Physloc method works only with SQL Server sources"
      },
      {
        "scenario": "PostgreSQL to PostgreSQL with Ctid parallel",
        "description": "Fast PostgreSQL to PostgreSQL transfer using Ctid automatic parallelism",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"pgcopy\" `\n--sourceserver \"pg-source:5432\" `\n--sourcedatabase \"production\" `\n--sourceuser \"app_user\" `\n--sourcepassword \"secret\" `\n--query \"SELECT * FROM public.events\" `\n--targetconnectiontype \"pgcopy\" `\n--targetserver \"pg-target:5432\" `\n--targetdatabase \"analytics\" `\n--targetuser \"etl_user\" `\n--targetpassword \"secret\" `\n--targettable \"events\" `\n--targetschema \"public\" `\n--parallelmethod \"Ctid\" `\n--paralleldegree 16 `\n--loadmode \"Truncate\" `\n--mapmethod \"Name\" `\n--runid \"pg_to_pg_events\"",
        "source": "PostgreSQL",
        "target": "PostgreSQL",
        "parallelMethod": "Ctid (automatic for PostgreSQL)",
        "threads": 16,
        "note": "Ctid method works only with PostgreSQL sources. Binary copy mode (pgcopy) for maximum speed."
      },
      {
        "scenario": "JSON file import to ClickHouse",
        "description": "Import JSON files to ClickHouse using DuckDB JSON reader",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"duckdbstream\" `\n--sourceserver \":memory:\" `\n--query \"SELECT * FROM read_json('D:\\\\logs\\\\app_*.json', format='auto')\" `\n--targetconnectiontype \"clickhousebulk\" `\n--targetserver \"clickhouse:8443\" `\n--targetdatabase \"logs\" `\n--targetuser \"logger\" `\n--targetpassword \"logpass\" `\n--targettable \"app_logs\" `\n--parallelmethod \"DataDriven\" `\n--distributekeycolumn \"log_date\" `\n--paralleldegree 8 `\n--loadmode \"Append\" `\n--runid \"json_logs_import\"",
        "source": "JSON Files",
        "target": "ClickHouse",
        "parallelMethod": "DataDriven",
        "threads": 8
      },
      {
        "scenario": "Excel file import to MySQL",
        "description": "Import Excel spreadsheet to MySQL using DuckDB Excel reader",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"duckdbstream\" `\n--sourceserver \":memory:\" `\n--query \"SELECT * FROM st_read('D:\\\\reports\\\\sales_report.xlsx', layer='Sales Data')\" `\n--targetconnectiontype \"mysqlbulk\" `\n--targetserver \"mysql-server:3306\" `\n--targetdatabase \"reports\" `\n--targetuser \"reporter\" `\n--targetpassword \"report123\" `\n--targettable \"sales_data\" `\n--parallelmethod \"None\" `\n--loadmode \"Truncate\" `\n--runid \"excel_sales_import\"",
        "source": "Excel (XLSX)",
        "target": "MySQL",
        "parallelMethod": "None",
        "note": "DuckDB can read Excel files using st_read function"
      },
      {
        "scenario": "Netezza to SAP HANA with Netezza-specific parallel",
        "description": "Transfer from Netezza to SAP HANA using NZDataSlice for optimal Netezza parallelism",
        "command": ".\\FastTransfer.exe `\n--sourceconnectiontype \"nzsql\" `\n--sourceserver \"netezza-host\" `\n--sourcedatabase \"warehouse\" `\n--sourceuser \"nzuser\" `\n--sourcepassword \"nzpass\" `\n--query \"SELECT * FROM dbo.fact_sales\" `\n--targetconnectiontype \"hanabulk\" `\n--targetserver \"hana-server:30015\" `\n--targetdatabase \"ANALYTICS\" `\n--targetuser \"HANAUSER\" `\n--targetpassword \"hanapass\" `\n--targettable \"fact_sales\" `\n--parallelmethod \"NZDataSlice\" `\n--paralleldegree 24 `\n--loadmode \"Append\" `\n--runid \"netezza_to_hana_sales\"",
        "source": "Netezza",
        "target": "SAP HANA",
        "parallelMethod": "NZDataSlice (automatic for Netezza)",
        "threads": 24,
        "note": "NZDataSlice method works only with Netezza sources"
      }
    ],
    "performanceOptimization": {
      "description": "Best practices and recommendations for maximizing transfer performance",
      "parallelProcessing": {
        "description": "Parallel transfer configuration for maximum throughput",
        "recommendations": [
          {
            "scenario": "Large tables (> 10M rows)",
            "threadCount": "12-24 threads",
            "splitByColumn": "Indexed integer or date column (primary key ideal)",
            "expectedThroughput": "20-24M cells/second",
            "notes": "Linear scaling with CPU cores up to database connection limits"
          },
          {
            "scenario": "Medium tables (1M-10M rows)",
            "threadCount": "8-12 threads",
            "splitByColumn": "Any indexed numeric column",
            "expectedThroughput": "15-20M cells/second",
            "notes": "Optimal for most production workloads"
          },
          {
            "scenario": "Small tables (< 1M rows)",
            "threadCount": "4-8 threads or single-threaded",
            "splitByColumn": "Not required for < 1M rows",
            "expectedThroughput": "10-15M cells/second",
            "notes": "Parallel overhead may not justify multi-threading"
          },
          {
            "scenario": "Cross-datacenter transfers",
            "threadCount": "16-24 threads",
            "splitByColumn": "Indexed column with good distribution",
            "expectedThroughput": "Limited by network bandwidth (typically 1-5 Gbps)",
            "notes": "Higher parallelism compensates for network latency"
          }
        ],
        "splitByColumnGuidelines": [
          "Use primary key or unique indexed integer column",
          "Ensure even data distribution across column values",
          "Avoid columns with high skew (e.g., status flags with 99% same value)",
          "Date/timestamp columns work well for time-series data",
          "Composite keys are not supported - use single column only"
        ]
      },

      "databaseSpecificTips": [
        {
          "database": "PostgreSQL",
          "tips": [
            "Ensure sufficient max_connections for parallel transfers",
            "Use pgcopy connection type for fastest PostgreSQL binary protocol",
            "Monitor pg_stat_activity to track concurrent transfer connections",
            "Consider increasing work_mem for large query processing"
          ]
        },
        {
          "database": "SQL Server",
          "tips": [
            "Use msbulk connection type for fastest bulk insert API",
            "Ensure READ_COMMITTED_SNAPSHOT is enabled to avoid blocking",
            "Use --targettrusted true for Windows Authentication (faster than SQL auth)",
            "Monitor sys.dm_exec_requests for transfer query progress"
          ]
        },
        {
          "database": "Oracle",
          "tips": [
            "Use oraodp (Oracle Data Provider) for best performance",
            "Ensure sufficient PROCESSES and SESSIONS parameters",
            "Consider parallel hint in --query for large table scans",
            "Use Rowid parallel method for automatic parallel processing without distribution key"
          ]
        },
        {
          "database": "MySQL",
          "tips": [
            "Increase max_connections for parallel transfers",
            "Use mysqlbulk target type for high-speed bulk loading",
            "Consider innodb_buffer_pool_size for large table transfers",
            "Monitor SHOW PROCESSLIST to track transfer connections"
          ]
        },
        {
          "database": "SAP HANA",
          "tips": [
            "Leverage HANA's in-memory architecture for extremely fast transfers",
            "Use hanabulk target type for optimized bulk loading",
            "Parallel transfers scale linearly with HANA cores",
            "Column-store tables provide best loading performance"
          ]
        },
        {
          "database": "Teradata",
          "tips": [
            "Use high parallelism (24-32 threads) to leverage Teradata's parallel architecture",
            "Split by primary index column for optimal data distribution",
            "Monitor AMPs utilization to ensure balanced workload",
            "Teradata connection type supports both source and target"
          ]
        },
        {
          "database": "ClickHouse",
          "tips": [
            "Use clickhousebulk target type for native bulk insert protocol",
            "Parallel transfers leverage ClickHouse's distributed table architecture",
            "Consider distribution key column alignment with ClickHouse sharding key",
            "High parallelism (16-24 threads) maximizes ClickHouse ingest capacity"
          ]
        },
        {
          "database": "Netezza",
          "tips": [
            "Use NZDataSlice parallel method for automatic Netezza-optimized distribution",
            "No distribution key needed with NZDataSlice - automatically uses Netezza dataslices",
            "High parallelism matches Netezza SPU architecture",
            "Monitor Netezza workload manager for resource allocation"
          ]
        }
      ]
    },
    "errorHandling": {
      "description": "Error handling, logging, and troubleshooting for transfer operations",
      "features": [
        {
          "feature": "Automatic retry on transient errors",
          "description": "Automatically retries failed chunks/batches up to 3 times before failing",
          "applicableTo": "Network errors, temporary connection issues, timeout errors"
        },
        {
          "feature": "Detailed error logging",
          "description": "Comprehensive error messages with context including row ranges, SQL queries, and stack traces",
          "configuration": "Use --log-level debug for maximum verbosity"
        },
        {
          "feature": "Password obfuscation in logs",
          "description": "Passwords are automatically masked in log files and console output",
          "securityNote": "Prevents credential exposure in logs and monitoring systems"
        },
        {
          "feature": "Progress tracking",
          "description": "Real-time progress display showing rows transferred, speed, and ETA",
          "configuration": "Enable with --progress true"
        },
        {
          "feature": "Graceful shutdown on interruption",
          "description": "Handles Ctrl+C and system signals gracefully, closing connections and files properly",
          "note": "Partial transfers may result in incomplete data in target database"
        }
      ],
      "commonErrors": [
        {
          "error": "Connection timeout",
          "errorCode": 2,
          "cause": "Cannot establish connection to source database - network issues, firewall, or database offline",
          "solutions": [
            "Verify database server is running and accessible",
            "Check firewall rules allow connection from FastTransfer host",
            "Verify server hostname/IP and port are correct",
            "Test connectivity with database native tools (psql, sqlcmd, etc.)",
            "Increase --timeout if database is slow to respond"
          ]
        },
        {
          "error": "Authentication failed",
          "errorCode": 3,
          "cause": "Invalid database credentials - wrong username, password, or insufficient permissions",
          "solutions": [
            "Verify username and password are correct",
            "Ensure user has SELECT permissions on source tables/schemas",
            "For SQL Server, verify Windows Authentication with --trusted true if applicable",
            "Check database-specific authentication requirements (Oracle service name, etc.)",
            "Use --log-level debug to see detailed authentication error messages"
          ]
        },
        {
          "error": "Query timeout",
          "errorCode": 4,
          "cause": "Database query execution exceeded timeout limit",
          "solutions": [
            "Increase --timeout to allow more time for large queries",
            "Add indexes to distribution key column for faster parallel splits",
            "Reduce --paralleldegree thread count to decrease database load",
            "Use --query with WHERE clause to filter and reduce row count",
            "Check database performance and query execution plans"
          ]
        },
        {
          "error": "License validation failed",
          "errorCode": 1,
          "cause": "License file not found, expired, or invalid",
          "solutions": [
            "Ensure FastTransfer.lic file is in same directory as executable",
            "Use --license parameter to specify alternate license path",
            "Verify license has not expired",
            "Contact ARPE support for license renewal or issues",
            "For trial, ensure trial period has not ended"
          ]
        },
        {
          "error": "Permission denied on table",
          "errorCode": 5,
          "cause": "Database user lacks SELECT permission on specified table",
          "solutions": [
            "Grant SELECT permission to user on table/schema",
            "Use database administrator account for transfers",
            "Verify schema and table name are correct (case-sensitive)",
            "Check database-specific security policies or row-level security"
          ]
        }
      ],
      "troubleshootingTips": [
        "Enable --loglevel debug to see detailed execution steps and SQL queries",
        "Use monitoring tools to track transfer progress in real-time",
        "Test with small table or --query with LIMIT clause before full transfer",
        "Verify database connectivity with native tools before using FastTransfer",
        "Check system resources (CPU, memory, network) during transfers",
        "Monitor both source and target database server load",
        "For performance issues, reduce --paralleldegree to decrease database load",
        "Verify distribution key column has good data distribution for parallel methods"
      ]
    },
    "systemRequirements": {
      "minimum": {
        "cpu": "2 cores, 2.0 GHz",
        "ram": "4 GB",
        "disk": "50 MB for executable (installation)",
        "network": "100 Mbps for database transfers",
        "os": "Windows 10 (x64), Linux kernel 3.10+ (x64)",
        "notes": "Sufficient for small table transfers (< 10M rows) and basic workloads"
      },
      "recommended": {
        "cpu": "8-16 cores, 3.0+ GHz",
        "ram": "16-32 GB for large parallel transfers",
        "disk": "SSD recommended for best performance",
        "network": "1 Gbps LAN for database transfers",
        "os": "Windows 11 (x64), Windows Server 2019+, Ubuntu 20.04+, RHEL 8+, CentOS 8+",
        "notes": "Optimal for enterprise workloads with parallel transfers (16-24 threads) and large tables (100M+ rows)"
      },
      "scalabilityLimits": {
        "maxParallelThreads": 64,
        "maxRowsPerTransfer": "Unlimited (tested with 10+ billion rows)",
        "memoryUsage": "Streaming architecture uses constant memory (~200-500 MB) regardless of data volume"
      },
      "operatingSystemDetails": {
        "windows": {
          "supportedVersions": ["Windows 10 (x64)", "Windows 11 (x64)", "Windows Server 2016+"],
          "executable": "FastTransfer.exe",
          "dependencies": "None - all drivers embedded",
          "authentication": "Supports Windows Authentication (--trusted true) for SQL Server"
        },
        "linux": {
          "supportedDistributions": ["Ubuntu 18.04+", "RHEL 7+", "CentOS 7+", "Debian 10+", "Amazon Linux 2"],
          "executable": "FastTransfer",
          "dependencies": "None - statically linked with all drivers embedded",
          "permissions": "Requires execute permission (chmod +x FastTransfer)"
        }
      },
      "licenseRequirements": {
        "trial": "30-day full-featured trial - no limitations on features or data volume",
        "production": "Company-based annual license covering entire legal entity - unlimited servers, CPUs, users, and data volume",
        "licenseFile": "FastTransfer.lic file must be in executable directory or specified with --license parameter",
        "activation": "Online license validation (requires internet connectivity for initial activation)"
      }
    }
  },
  "metadata": {
    "lastUpdated": "2026-02-26",
    "dataFormat": "JSON",
    "purpose": "Technical specifications and CLI reference for AI agents and LLMs",
    "audience": ["AI agents", "LLMs", "chatbots", "developers", "data engineers"],
    "schemaVersion": "3.0",
    "documentationType": "Database Transfer and Import Tool Technical Reference",
    "aeoOptimized": true,
    "productVersion": "0.29",
    "keywords": [
      "FastTransfer",
      "database transfer",
      "database migration",
      "PostgreSQL to SQL Server",
      "Oracle to ClickHouse",
      "MySQL to PostgreSQL",
      "parallel data transfer",
      "bulk data loading",
      "DuckDB integration",
      "Parquet import",
      "CSV import",
      "file to database import",
      "high-performance ETL",
      "database replication",
      "data synchronization"
    ],
    "semanticTags": [
      "data-migration",
      "etl-tool",
      "database-transfer",
      "parallel-processing",
      "bulk-loading",
      "file-import",
      "high-performance",
      "multi-platform",
      "cross-database",
      "database-to-database"
    ],
    "coreCapabilities": [
      "Database-to-database transfers with parallel processing",
      "File-to-database imports (Parquet, CSV, JSON, Excel, BSON)",
      "Support for 13+ source connection types including PostgreSQL, Oracle, MySQL, SQL Server, SAP HANA, Teradata, ClickHouse, Netezza",
      "Support for 8+ target bulk loading APIs for high-speed inserts",
      "9 parallel processing methods including automatic methods (Ctid, Rowid, Physloc, NZDataSlice)",
      "DuckDB integration for reading files with SQL queries",
      "Streaming architecture with constant memory usage",
      "Up to 64 parallel threads for maximum throughput"
    ],
    "useCases": [
      "PostgreSQL to SQL Server migration",
      "Oracle to ClickHouse analytics migration",
      "MySQL to PostgreSQL modernization",
      "Parquet file data lake to database import",
      "CSV bulk import to any database",
      "Cross-database ETL pipelines",
      "Database replication and synchronization",
      "Data warehouse loading from files",
      "Cloud-to-on-premise database transfers",
      "Multi-database consolidation"
    ],
    "changeLog": [
      {
        "version": "3.0",
        "date": "2026-02-26",
        "changes": "Major correction: FastTransfer is database-to-database transfer OR file-to-database import tool (NOT export tool). Removed all invented parameters for file output (--format, --output) and cloud storage (--s3-bucket, --azure-container, --gcs-bucket, --onelake-workspace). Verified all parameters against actual codebase. Replaced all examples with real commands from actual implementation."
      },
      {
        "version": "2.0",
        "date": "2026-02-26",
        "changes": "(DEPRECATED) Incorrect version that described FastTransfer as export tool - see version 3.0 for corrections."
      },
      {
        "version": "1.0",
        "date": "2026-02-20",
        "changes": "Initial version (contained inaccuracies about product capabilities)"
      }
    ]
  }
}
