diff --git a/src/main/kotlin/com/rak/config/model/CardScrapeTargetConfig.kt b/src/main/kotlin/com/rak/config/model/CardScrapeTargetConfig.kt index ce983fb..85f25aa 100644 --- a/src/main/kotlin/com/rak/config/model/CardScrapeTargetConfig.kt +++ b/src/main/kotlin/com/rak/config/model/CardScrapeTargetConfig.kt @@ -4,13 +4,13 @@ import io.smallrye.config.WithName interface CardScrapeTargetConfig : AbstractScrapeTargetConfig { @WithName("name") - fun getEnglishName(): ScrapeTargetFieldConfig + fun getEnglishNameConfig(): ScrapeTargetFieldConfig @WithName("description") - fun getDescription(): ScrapeTargetFieldConfig + fun getDescriptionConfig(): ScrapeTargetFieldConfig @WithName("type") - fun getCardType(): ScrapeTargetFieldConfig + fun getCardTypeConfig(): ScrapeTargetFieldConfig @WithName("attack") - fun getAttack(): ScrapeTargetFieldConfig + fun getAttackConfig(): ScrapeTargetFieldConfig @WithName("defense") - fun getDefense(): ScrapeTargetFieldConfig + fun getDefenseConfig(): ScrapeTargetFieldConfig } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/SourceConfig.kt b/src/main/kotlin/com/rak/config/model/ProviderConfig.kt similarity index 93% rename from src/main/kotlin/com/rak/config/model/SourceConfig.kt rename to src/main/kotlin/com/rak/config/model/ProviderConfig.kt index 06e63db..919f0a4 100644 --- a/src/main/kotlin/com/rak/config/model/SourceConfig.kt +++ b/src/main/kotlin/com/rak/config/model/ProviderConfig.kt @@ -3,7 +3,7 @@ package com.rak.config.model import io.smallrye.config.WithName import java.util.* -interface SourceConfig { +interface ProviderConfig { @WithName("id") fun getId(): String diff --git a/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt b/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt index c7f22c6..ec947b0 100644 --- a/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt +++ b/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt @@ -4,9 +4,9 @@ import io.smallrye.config.WithName interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig { @WithName("id") - fun idSelector(): ScrapeTargetFieldConfig + fun getIdConfig(): ScrapeTargetFieldConfig @WithName("language") - fun languageSelector(): ScrapeTargetFieldConfig + fun getLanguageConfig(): ScrapeTargetFieldConfig @WithName("region-key") - fun regionKeySelector(): ScrapeTargetFieldConfig + fun getRegionKeyConfig(): ScrapeTargetFieldConfig } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt b/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt index b7389df..0a0ffd3 100644 --- a/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt +++ b/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt @@ -7,6 +7,7 @@ interface ScrapeTargetFieldConfig { @WithName("root") fun getRootConfig(): Optional @WithName("steps") - fun getSteps(): List - fun transform(): Optional> + fun getExtractionSteps(): List + @WithName("transform") + fun getOptionalTransformationSteps(): Optional> } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/SourcesConfig.kt b/src/main/kotlin/com/rak/config/model/SourcesConfig.kt index dd828ad..1ac206d 100644 --- a/src/main/kotlin/com/rak/config/model/SourcesConfig.kt +++ b/src/main/kotlin/com/rak/config/model/SourcesConfig.kt @@ -7,6 +7,6 @@ import io.smallrye.config.WithName interface SourcesConfig { @WithName("sources") - fun getSources(): MutableList + fun getSources(): MutableList } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/ExtractionService.kt b/src/main/kotlin/com/rak/service/ExtractionService.kt index 42d0044..b92fd38 100644 --- a/src/main/kotlin/com/rak/service/ExtractionService.kt +++ b/src/main/kotlin/com/rak/service/ExtractionService.kt @@ -1,11 +1,13 @@ package com.rak.service import com.rak.config.model.ExtractConfig +import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.model.Selector import com.rak.model.card.Card import com.rak.model.exception.ElementNotFoundException import com.rak.model.set.CardSet import com.rak.model.set.RegionalSet +import com.rak.model.transform.TransformationRegistry import com.rak.util.XPathUtil import jakarta.enterprise.context.ApplicationScoped import org.jsoup.nodes.Document @@ -17,6 +19,8 @@ class ExtractionService( private val sourceService: SourceService, ) { + private val transformationRegistry = TransformationRegistry() + fun extractSet(setName: String, root: Element, provider: String): CardSet { return CardSet( name = setName, @@ -29,17 +33,18 @@ class ExtractionService( val regionalSetSelector = source.getTargets().regionalSet().get() if (regionalSetSelector.getRootConfig().isPresent) { - val setId: String = extractTextFromElementBySteps( + val setId: String = extractTextFromElementByTargetFieldConfig( root, - regionalSetSelector.idSelector().getSteps() + regionalSetSelector.getIdConfig(), + ) ?: throw IllegalStateException("Parameter 'id' could not be found") - val setLanguage: String = extractTextFromElementBySteps( + val setLanguage: String = extractTextFromElementByTargetFieldConfig( root, - regionalSetSelector.languageSelector().getSteps() + regionalSetSelector.getLanguageConfig() ) ?: throw IllegalStateException("Parameter 'language' could not be found") - val setKey: String = extractTextFromElementBySteps( + val setKey: String = extractTextFromElementByTargetFieldConfig( root, - regionalSetSelector.regionKeySelector().getSteps() + regionalSetSelector.getRegionKeyConfig() ) ?: throw IllegalStateException("Parameter 'key' could not be found") return RegionalSet( @@ -48,32 +53,32 @@ class ExtractionService( setKey ) } else { - val setIdConfiguration = regionalSetSelector.idSelector() + val setIdConfiguration = regionalSetSelector.getIdConfig() if (!setIdConfiguration.getRootConfig().isPresent) { throw RuntimeException("as[po") // TODO fix me } val rootConfiguration = setIdConfiguration.getRootConfig().get() val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") - val setId: String = extractTextFromElementBySteps( + val setId: String = extractTextFromElementByTargetFieldConfig( setIdRoot, - setIdConfiguration.getSteps() + setIdConfiguration ) ?: throw IllegalStateException("Parameter 'id' could not be found") - val setLanguageConfiguration = regionalSetSelector.idSelector() + val setLanguageConfiguration = regionalSetSelector.getIdConfig() val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") - val setLanguage: String = extractTextFromElementBySteps( + val setLanguage: String = extractTextFromElementByTargetFieldConfig( setLanguageRoot, - setLanguageConfiguration.getSteps() + setLanguageConfiguration ) ?: throw IllegalStateException("Parameter 'language' could not be found") - val setKeyConfiguration = regionalSetSelector.idSelector() + val setKeyConfiguration = regionalSetSelector.getIdConfig() val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") - val setKey: String = extractTextFromElementBySteps( + val setKey: String = extractTextFromElementByTargetFieldConfig( setKeyRoot, - setKeyConfiguration.getSteps() + setKeyConfiguration ) ?: throw IllegalStateException("Parameter 'key' could not be found") return RegionalSet( @@ -102,32 +107,31 @@ class ExtractionService( ) }.toSet() } else { - val setIdConfiguration = regionalSetSelector.idSelector() - try { + val setIdConfiguration = regionalSetSelector.getIdConfig() val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get()) val setIds = setIdRoot.map { - extractTextFromElementBySteps( + extractTextFromElementByTargetFieldConfig( it, - setIdConfiguration.getSteps() + setIdConfiguration ) ?: throw IllegalStateException("Parameter 'id' could not be found") } - val languageConfiguration = regionalSetSelector.languageSelector() + val languageConfiguration = regionalSetSelector.getLanguageConfig() val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get()) val languages = languageRoot.map { - extractTextFromElementBySteps( + extractTextFromElementByTargetFieldConfig( it, - languageConfiguration.getSteps() + languageConfiguration ) ?: throw IllegalStateException("Parameter 'id' could not be found") } - val setKeyConfiguration = regionalSetSelector.regionKeySelector() + val setKeyConfiguration = regionalSetSelector.getRegionKeyConfig() val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get()) val setKeys = setKeyRoot.map { - extractTextFromElementBySteps( + extractTextFromElementByTargetFieldConfig( it, - setKeyConfiguration.getSteps() + setKeyConfiguration ) ?: throw IllegalStateException("Parameter 'id' could not be found") } @@ -155,19 +159,19 @@ class ExtractionService( rootConfiguration ) ?: throw ElementNotFoundException("TODO make this better") - val englishCardName: String = extractTextFromElementBySteps( + val englishCardName: String = extractTextFromElementByTargetFieldConfig( rootElement, - cardSelector.getEnglishName().getSteps() + cardSelector.getEnglishNameConfig() ) ?: throw IllegalStateException("Parameter 'name' could not be found") - val cardType: String = extractTextFromElementBySteps( + val cardType: String = extractTextFromElementByTargetFieldConfig( rootElement, - cardSelector.getEnglishName().getSteps() + cardSelector.getEnglishNameConfig() ) ?: throw IllegalStateException("Parameter 'name' could not be found") - val description: String = extractTextFromElementBySteps( + val description: String = extractTextFromElementByTargetFieldConfig( rootElement, - cardSelector.getEnglishName().getSteps() + cardSelector.getEnglishNameConfig() ) ?: throw IllegalStateException("Parameter 'name' could not be found") return null @@ -191,7 +195,7 @@ class ExtractionService( private fun getElementFromDocumentByExtractConfig( document: Element, - step: ExtractConfig + step: ExtractConfig, ): Element? { return if (step.selectorType() == Selector.CSS) { document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("") @@ -200,21 +204,23 @@ class ExtractionService( } } - private fun extractTextFromElementBySteps( + private fun extractTextFromElementByTargetFieldConfig( root: Element, - steps: List + extractionConfig: ScrapeTargetFieldConfig ): String? { - val stepsInCorrectOrder = steps.reversed() + val extractionSteps = extractionConfig.getExtractionSteps() + val transformationSteps = extractionConfig.getOptionalTransformationSteps() + var currentElement: Element? = root.clone() var result: String? = null - for (index in 0 until steps.size) { - val currentStep = steps.elementAtOrNull(index) ?: return null + for (index in 0 until extractionSteps.size) { + val currentStep = extractionSteps.elementAtOrNull(index) ?: return null if (currentElement == null) { throw IllegalStateException() } - if (index == steps.size - 1) { + if (index == extractionSteps.size - 1) { result = XPathUtil.extractResult(currentElement, currentStep.getQueryString()) } else { @@ -222,6 +228,14 @@ class ExtractionService( } } + if (result == null) { + throw ElementNotFoundException("Result could not be extracted") + } + + if (transformationSteps.isPresent) { + result = transformationRegistry.applyTransformations(result, transformationSteps.get()) + } + return result } diff --git a/src/main/kotlin/com/rak/service/SourceService.kt b/src/main/kotlin/com/rak/service/SourceService.kt index c332c5e..5153a69 100644 --- a/src/main/kotlin/com/rak/service/SourceService.kt +++ b/src/main/kotlin/com/rak/service/SourceService.kt @@ -2,10 +2,9 @@ package com.rak.service import com.rak.config.model.CardScrapeTargetConfig import com.rak.config.model.RegionalSetScrapeTargetConfig -import com.rak.config.model.SourceConfig +import com.rak.config.model.ProviderConfig import com.rak.config.model.SourcesConfig import com.rak.model.exception.InvalidConfigurationException -import io.quarkus.logging.Log import io.quarkus.runtime.Startup import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped @@ -21,9 +20,9 @@ class SourceService( sourcesConfiguration.getSources().forEach { validateSource(it) } } - private fun validateSource(sourceConfig: SourceConfig) { - val optionalRegionalSetConfig = sourceConfig.getTargets().regionalSet() - val optionalCardConfig = sourceConfig.getTargets().card() + private fun validateSource(providerConfig: ProviderConfig) { + val optionalRegionalSetConfig = providerConfig.getTargets().regionalSet() + val optionalCardConfig = providerConfig.getTargets().card() if (optionalRegionalSetConfig.isPresent) { validateSetExtractConfig(optionalRegionalSetConfig.get()) @@ -36,9 +35,9 @@ class SourceService( private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) { val selectors = listOf( - setExtractConfig.languageSelector(), - setExtractConfig.idSelector(), - setExtractConfig.regionKeySelector() + setExtractConfig.getLanguageConfig(), + setExtractConfig.getIdConfig(), + setExtractConfig.getRegionKeyConfig() ) // If global root is present, dedicated roots may not exist @@ -59,11 +58,11 @@ class SourceService( private fun validateCardExtractConfig(cardScrapeTargetConfig: CardScrapeTargetConfig) { val selectors = listOf( - cardScrapeTargetConfig.getEnglishName(), - cardScrapeTargetConfig.getDescription(), - cardScrapeTargetConfig.getCardType(), - cardScrapeTargetConfig.getAttack(), - cardScrapeTargetConfig.getDefense(), + cardScrapeTargetConfig.getEnglishNameConfig(), + cardScrapeTargetConfig.getDescriptionConfig(), + cardScrapeTargetConfig.getCardTypeConfig(), + cardScrapeTargetConfig.getAttackConfig(), + cardScrapeTargetConfig.getDefenseConfig(), ) if (cardScrapeTargetConfig.getRootConfig().isPresent) { @@ -81,7 +80,7 @@ class SourceService( } } - fun getSources(): Set = sourcesConfiguration.getSources().toSet() - fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id } + fun getSources(): Set = sourcesConfiguration.getSources().toSet() + fun getSourceById(id: String): ProviderConfig? = getSources().firstOrNull { it.getId() == id } } \ No newline at end of file