I updated a TextSplitSkill using the skillsetblade in the azure portal and I got an empty bracket as an output. After the mapping of the index with the indexer, my skillset in the index json came out empty i.e ""mypages": []
This is my skillset.json below
{
"@odata.context": "https://msserv.search.windows.net/$metadata#skillsets/$entity",
"@odata.etag": "\"0x8DCA023C2F4ABF8\"",
"name": "terminal-skillset",
"description": "Skillset created from the portal. skillsetName: terminal-skillset; contentField: merged_content; enrichmentGranularity: document; knowledgeStoreStorageAccount: ;",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.V3.EntityRecognitionSkill",
"name": "#1",
"description": null,
"context": "/document/merged_content",
"categories": [
"Person",
"Address",
"PersonType",
"URL",
"Event",
"Skill",
"DateTime",
"Organization",
"Quantity",
"Product",
"Email",
"Location",
"IPAddress",
"PhoneNumber"
],
"defaultLanguageCode": "en",
"minimumPrecision": null,
"modelVersion": null,
"inputs": [
{
"name": "text",
"source": "/document/merged_content"
},
{
"name": "languageCode",
"source": "/document/language"
}
],
"outputs": [
{
"name": "locations",
"targetName": "locations"
},
{
"name": "organizations",
"targetName": "organizations"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.KeyPhraseExtractionSkill",
"name": "#2",
"description": null,
"context": "/document/merged_content",
"defaultLanguageCode": "en",
"maxKeyPhraseCount": null,
"modelVersion": null,
"inputs": [
{
"name": "text",
"source": "/document/merged_content"
},
{
"name": "languageCode",
"source": "/document/language"
}
],
"outputs": [
{
"name": "keyPhrases",
"targetName": "keyphrases"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.LanguageDetectionSkill",
"name": "#3",
"description": null,
"context": "/document",
"defaultCountryHint": null,
"modelVersion": null,
"inputs": [
{
"name": "text",
"source": "/document/merged_content"
}
],
"outputs": [
{
"name": "languageCode",
"targetName": "language"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.MergeSkill",
"name": "#4",
"description": null,
"context": "/document",
"insertPreTag": " ",
"insertPostTag": " ",
"inputs": [
{
"name": "text",
"source": "/document/content"
},
{
"name": "itemsToInsert",
"source": "/document/normalized_images/*/text"
},
{
"name": "offsets",
"source": "/document/normalized_images/*/contentOffset"
}
],
"outputs": [
{
"name": "mergedText",
"targetName": "merged_content"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Vision.OcrSkill",
"name": "#5",
"description": null,
"context": "/document/normalized_images/*",
"textExtractionAlgorithm": null,
"lineEnding": "Space",
"defaultLanguageCode": "en",
"detectOrientation": true,
"inputs": [
{
"name": "image",
"source": "/document/normalized_images/*"
}
],
"outputs": [
{
"name": "text",
"targetName": "text"
},
{
"name": "layoutText",
"targetName": "layoutText"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "Split Skill",
"description": "Splits text into sentences",
"context": "/document/merged_content",
"defaultLanguageCode": "en",
"textSplitMode": "pages",
"maximumPageLength": 1000,
"pageOverlapLength": 100,
"maximumPagesToTake": 1,
"inputs": [
{
"name": "text",
"source": "/document/merged_content"
},
{
"name": "languageCode",
"source": "/document/language"
}
],
"outputs": [
{
"name": "textItems",
"targetName": "mypages"
}
]
}
],
"cognitiveServices": null,
"knowledgeStore": null,
"indexProjections": null,
"encryptionKey": null
}
This is my indexer.json
{
"@odata.context": "https://msserv.search.windows.net/$metadata#indexers/$entity",
"@odata.etag": "\"0x8DCA024A59D4EFC\"",
"name": "terminal-indexer",
"description": "",
"dataSourceName": "terminal-data",
"skillsetName": "terminal-skillset",
"targetIndexName": "terminal-index",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": 0,
"maxFailedItemsPerBatch": 0,
"base64EncodeKeys": null,
"configuration": {
"dataToExtract": "contentAndMetadata",
"parsingMode": "default",
"imageAction": "generateNormalizedImages"
}
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "metadata_storage_path",
"mappingFunction": {
"name": "base64Encode",
"parameters": null
}
}
],
"outputFieldMappings": [
{
"sourceFieldName": "/document/merged_content/locations",
"targetFieldName": "locations"
},
{
"sourceFieldName": "/document/merged_content/organizations",
"targetFieldName": "organizations"
},
{
"sourceFieldName": "/document/merged_content/keyphrases",
"targetFieldName": "keyphrases"
},
{
"sourceFieldName": "/document/language",
"targetFieldName": "language"
},
{
"sourceFieldName": "/document/merged_content",
"targetFieldName": "merged_content"
},
{
"sourceFieldName": "/document/normalized_images/*/text",
"targetFieldName": "text"
},
{
"sourceFieldName": "/document/normalized_images/*/layoutText",
"targetFieldName": "layoutText"
},
{
"sourceFieldName": "/document/merged_content/textItems",
"targetFieldName": "mypages"
}
],
"cache": null,
"encryptionKey": null
}