I have about 2TB+ of XML data that I need to load into MSSQL DB (in the shortest time possible). Issue is that the data in each XML files ranges from 5MB to 100MB with all text in the first line. One file had about 44 million+ characters in the first line.
I wrote a script to read the XML files and populate the DB, currently it says it will take about 300 days to process. I am now trying SQLXML 4.0. I used XmlSchemaInference to generate an XSD from multiple XML files. I used XSD2DB tool to generate the database from the XSD. I noticed that the tool created primary keys and foreign keys from the parent tables to the child tables. I thereafter inlcuded the relationships created by the tool into the XSD, but these IDs are not in the XML files.
I run the SQLXML code and I get an error that I cannot insert into the primary keys columns. If I remove the primary keys and make them nullable, the data is inserted but I am unable to relate the data across the different tables.
I need to either persist the unique ID from the main parent table to the children or try and get SQLXML to ignore the primary keys, but insert them into the child nodes (related tables). If you look below at the XSD, you will notice in the relationship tags, I have included the primary and foreign keys generated by the XSD2DB tool. Is there a way to get SQLXML to populate these or allow MSSQL to self increment? I refer to here for bulk loading into identity columns. Any assistance is greatly appreciated!
XSD Sample (Apologies had to replace a common word with Foo):
<?xml version="1.0" encoding="utf-8"?>
<xs:schema xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:sql="urn:schemas-microsoft-com:mapping-schema">
<xs:annotation>
<xs:appinfo>
<sql:relationship name="FoosFoo"
parent="Foos"
parent-key="Foos_Id"
child="Foo"
child-key="Foos_Id"
/>
<sql:relationship name="FooFooIdentification"
parent="Foo"
parent-key="Foo_Id"
child="FooIdentification"
child-key="Foo_Id"
/>
<sql:relationship name="FooIdentificationFooFlags"
parent="FooIdentification"
parent-key="FooIdentification_Id"
child="FooFlags"
child-key="FooIdentification_Id"
/>
<sql:relationship name="FooFlagsFooFlag"
parent="FooFlags"
parent-key="FooFlags_Id"
child="FooFlag"
child-key="FooFlags_Id"
/>
<sql:relationship name="FooFlagFooFlagDetails"
parent="FooFlag"
parent-key="FooFlag_Id"
child="FooFlagDetails"
child-key="FooFlag_Id"
/>
<sql:relationship name="FooContacts"
parent="Foo"
parent-key="Foo_Id"
child="Contacts"
child-key="Foo_Id"
/>
<sql:relationship name="ContactsContact"
parent="Contacts"
parent-key="Contacts_Id"
child="Contact"
child-key="Contacts_Id"
/>
<sql:relationship name="ContactContactTypes"
parent="Contact"
parent-key="Contact_Id"
child="ContactTypes"
child-key="Contact_Id"
/>
<sql:relationship name="ContactTypesContactType"
parent="ContactTypes"
parent-key="ContactTypes_Id"
child="ContactType"
child-key="ContactTypes_Id"
/>
<sql:relationship name="ContactTypeContactTypeCode"
parent="ContactType"
parent-key="ContactType_Id"
child="ContactTypeCode"
child-key="ContactType_Id"
/>
<sql:relationship name="FooAddresses"
parent="Foo"
parent-key="Foo_Id"
child="Addresses"
child-key="Foo_Id"
/>
<sql:relationship name="AddressesAddress"
parent="Addresses"
parent-key="Addresses_Id"
child="Address"
child-key="Addresses_Id"
/>
</xs:appinfo>
</xs:annotation>
<xs:element name="FooBulkXMLFile" sql:is-constant="1">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="Header" sql:relation="Header">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="DateTimeCreated" type="xs:string" />
<xs:element minOccurs="0" name="FileType" type="xs:string" />
<xs:element minOccurs="0" name="MonthlyDate" nillable="true" />
<xs:element minOccurs="0" name="DailyDate" type="xs:string" />
<xs:element minOccurs="0" name="FooSourceSystem" type="xs:string" />
<xs:element minOccurs="0" name="FooSourceVersion" type="xs:string" />
<xs:element minOccurs="0" name="FooCount" type="xs:long" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element minOccurs="0" name="Foos" sql:relation="Foos">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="unbounded" name="Foo" sql:relation="Foo" sql:relationship="FoosFoo">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="FooIdentification" sql:relation="FooIdentification" sql:relationship="FooFooIdentification">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="FooNumber" type="xs:string" />
<xs:element minOccurs="0" name="IsActive" type="xs:boolean" />
<xs:element minOccurs="0" name="CountryOfOriginCode" type="xs:string" />
<xs:element minOccurs="0" name="FooName" type="xs:string" />
<xs:element minOccurs="0" name="CreatedDate" type="xs:string" />
<xs:element minOccurs="0" name="EditDate" type="xs:string" />
<xs:element minOccurs="0" name="FooFlags" sql:relation="FooFlags" sql:relationship="FooIdentificationFooFlags">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="unbounded" name="FooFlag" sql:relation="FooFlag" sql:relationship="FooFlagsFooFlag">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="FooFlagType" type="xs:string" />
<xs:element minOccurs="0" name="FooFlagDescription" type="xs:string" />
<xs:element minOccurs="0" name="FooFlagValue" type="xs:boolean" />
<xs:element minOccurs="0" name="FooFlagLastVerificationDate" type="xs:string" />
<xs:element minOccurs="0" name="FooFlagDetails" sql:relation="FooFlagDetails" sql:relationship="FooFlagFooFlagDetails">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="FooFlagDetail" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element minOccurs="0" name="Contacts" sql:relation="Contacts" sql:relationship="FooContacts">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="unbounded" name="Contact" sql:relation="Contact" sql:relationship="ContactsContact">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="ContactID" type="xs:long" />
<xs:element minOccurs="0" name="ContactTypes" sql:relation="ContactTypes" sql:relationship="ContactContactTypes">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="ContactType" sql:relation="ContactType" sql:relationship="ContactTypesContactType">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="unbounded" name="ContactTypeCode" type="xs:string" sql:field="ContactTypeCode_Column" sql:relation="ContactTypeCode" sql:relationship="ContactTypeContactTypeCode" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element minOccurs="0" name="Name" type="xs:string" />
<xs:element minOccurs="0" name="Surname" type="xs:string" />
<xs:element minOccurs="0" name="EmailAddress" type="xs:string" /><xs:element minOccurs="0" name="CreatedDate" type="xs:string" />
<xs:element minOccurs="0" name="EditDate" type="xs:string" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element minOccurs="0" name="Addresses" sql:relation="Addresses" sql:relationship="FooAddresses">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="unbounded" name="Address" sql:relation="Address" sql:relationship="AddressesAddress">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" name="AddressID" type="xs:long" />
<xs:element minOccurs="0" name="IsPreferred" type="xs:boolean" />
<xs:element minOccurs="0" name="IsActive" type="xs:boolean" />
<xs:element minOccurs="0" name="AddressTypeCode" type="xs:string" />
<xs:element minOccurs="0" name="AddressLine1" type="xs:string" />
<xs:element minOccurs="0" name="AddressLine2" type="xs:string" />
<xs:element minOccurs="0" name="CountryCode" type="xs:string" />
<xs:element minOccurs="0" name="ProvinceCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="DistrictCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="MunicipalityCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="CityCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="SuburbCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="WardCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="PostalCode" nillable="true" type="xs:string" />
<xs:element minOccurs="0" name="IsPostalAddress" type="xs:boolean" />
<xs:element minOccurs="0" name="IsDeliveryAddress" type="xs:boolean" />
<xs:element minOccurs="0" name="IsPhysicalAddress" type="xs:boolean" />
<xs:element minOccurs="0" name="IsPaymentAddress" type="xs:boolean" />
<xs:element minOccurs="0" name="Field1" type="xs:string" />
<xs:element minOccurs="0" name="Field2" type="xs:string" />
<xs:element minOccurs="0" name="Field3" type="xs:string" />
<xs:element minOccurs="0" name="CreatedDate" type="xs:string" />
<xs:element minOccurs="0" name="EditDate" type="xs:string" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>