diff --git a/src/main/kotlin/com/rak/config/converter/EmptyStringConverter.kt b/src/main/kotlin/com/rak/config/converter/EmptyStringConverter.kt deleted file mode 100644 index 237fed8..0000000 --- a/src/main/kotlin/com/rak/config/converter/EmptyStringConverter.kt +++ /dev/null @@ -1,11 +0,0 @@ -package com.rak.config.converter - -import jakarta.annotation.Priority -import org.eclipse.microprofile.config.spi.Converter - -@Priority(1) -class EmptyStringConverter : Converter { - override fun convert(value: String): String { - return value - } -} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/FieldConfigFallback.kt b/src/main/kotlin/com/rak/config/model/FieldConfigFallback.kt new file mode 100644 index 0000000..dbedd5e --- /dev/null +++ b/src/main/kotlin/com/rak/config/model/FieldConfigFallback.kt @@ -0,0 +1,15 @@ +package com.rak.config.model + +import io.smallrye.config.WithDefault +import io.smallrye.config.WithName +import java.util.Optional + +interface FieldConfigFallback { + @WithName("steps") + fun getOptionalFallbackExtractionSteps(): Optional> + @WithName("transform") + fun getOptionalTransformationSteps(): Optional> + @WithName("default") + @WithDefault("N/A") + fun getOptionalDefaultValue(): String +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt b/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt index a6dc79e..7adb033 100644 --- a/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt +++ b/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt @@ -10,4 +10,6 @@ interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig { fun getExtractionSteps(): List @WithName("transform") fun getOptionalTransformationSteps(): Optional> + @WithName("fallback") + fun getFallbackConfiguration(): Optional } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/TransformationStepConfig.kt b/src/main/kotlin/com/rak/config/model/TransformationStepConfig.kt index 1ad9321..b34c99d 100644 --- a/src/main/kotlin/com/rak/config/model/TransformationStepConfig.kt +++ b/src/main/kotlin/com/rak/config/model/TransformationStepConfig.kt @@ -1,10 +1,8 @@ package com.rak.config.model -import com.rak.config.converter.EmptyStringConverter -import io.smallrye.config.WithConverter +import java.util.Optional interface TransformationStepConfig { fun name(): String - @WithConverter(EmptyStringConverter::class) - fun parameters(): MutableList + fun parameters(): Optional> } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt b/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt index 3630e2e..efa7134 100644 --- a/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt +++ b/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt @@ -2,5 +2,5 @@ package com.rak.model.transform @FunctionalInterface fun interface ParameterizedTransformation : AbstractTransformation { - fun apply(input: String, parameters: List): String + fun apply(input: String, parameters: MutableList): String } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt b/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt index b23bc9c..28ce33d 100644 --- a/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt +++ b/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt @@ -11,9 +11,13 @@ class TransformationRegistry { init { register("trim") { it.trim() } + register("removeInnerQuotes") { it.replace("\"", "") } register("replace") { input, parameters -> - require(parameters.size == 2) { - "'replace' requires exactly 2 parameters" + require(parameters.size == 1 || parameters.size == 2) { + "'replace' requires either 1 or 2 parameters" + } + if (parameters.size == 1) { + parameters.add("") } input.replace(parameters[0], parameters[1]) } @@ -39,14 +43,14 @@ class TransformationRegistry { val parameters = transformationStep.parameters() return when { transformations.containsKey(name) -> { - if (parameters.isNotEmpty()) { + if (parameters.isPresent && parameters.get().isNotEmpty()) { throw IllegalArgumentException("'$name' doesn't accept parameters") } else { transformations[name]!! } } parameterizedTransformation.containsKey(name) -> { - if (parameters.isEmpty()) { + if (parameters.isPresent && parameters.get().isEmpty()) { throw IllegalArgumentException("'$name' requires parameters") } else { parameterizedTransformation[name]!! @@ -65,7 +69,7 @@ class TransformationRegistry { ?: throw IllegalArgumentException("Unknown transformation: ${step.name()}") is ParameterizedTransformation -> - parameterizedTransformation[step.name()]?.apply(current, step.parameters()) + parameterizedTransformation[step.name()]?.apply(current, step.parameters().get()) ?: throw IllegalArgumentException("Unknown transformation: ${step.name()}") else -> throw IllegalStateException("Invalid transformation type") diff --git a/src/main/kotlin/com/rak/service/AbstractExtractionService.kt b/src/main/kotlin/com/rak/service/AbstractExtractionService.kt index 1a1c276..da92332 100644 --- a/src/main/kotlin/com/rak/service/AbstractExtractionService.kt +++ b/src/main/kotlin/com/rak/service/AbstractExtractionService.kt @@ -4,7 +4,7 @@ import com.rak.config.model.AbstractScrapeTargetConfig import com.rak.config.model.ExtractConfig import com.rak.config.model.ProviderConfig import com.rak.config.model.ScrapeTargetFieldConfig -import com.rak.model.DiscriminatorDirection +import com.rak.config.model.TransformationStepConfig import com.rak.model.Selector import com.rak.model.exception.ElementNotFoundException import com.rak.model.exception.InvalidConfigurationException @@ -35,7 +35,13 @@ abstract class AbstractExtractionService { element: Element, providerConfig: ProviderConfig, extractionConfig: T - ): Collection + ): List + + abstract fun extractNestedMultiples( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: T + ): List> fun getRootElement( element: Element, @@ -143,37 +149,6 @@ abstract class AbstractExtractionService { return resultList } - fun extractMulti( - elements: Elements, - extractionConfig: T - ): List> { - val resultList = mutableListOf>() - - - // refactor this - extractionConfig.getItems().forEach { (identifier, fieldConfig) -> - for(index in 0..elements.size - 1) { - val rootElement = elements[index] - val extractedText = extractTextFromElementByTargetFieldConfig( - rootElement, - fieldConfig - ) ?: throw ElementNotFoundException("Could not find element for '$identifier'") - - val mapToModify: MutableMap = try { - resultList[index] - } catch (_: IndexOutOfBoundsException) { - val newMap = mutableMapOf() - resultList.add(newMap) - newMap - } - - mapToModify.put(identifier, extractedText) - } - } - - return resultList - } - fun extractMultiWithDiscriminator( element: Element, extractionConfig: T @@ -206,33 +181,48 @@ abstract class AbstractExtractionService { var currentElement: Element? = root.clone() var result: String? = null - for (index in 0 until extractionSteps.size) { - val currentStep = extractionSteps.elementAtOrNull(index) ?: return null - if (currentElement == null) { - throw IllegalStateException() - } + try { + for (index in 0 until extractionSteps.size) { + val currentStep = extractionSteps.elementAtOrNull(index) ?: return null + if (currentElement == null) { + throw IllegalStateException() + } - if (index == extractionSteps.size - 1) { - result = when (currentStep.selectorType()) { - Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString()) - Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString()) + if (index == extractionSteps.size - 1) { + result = when (currentStep.selectorType()) { + Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString()) + Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString()) + } + } + else { + currentElement = when (currentStep.selectorType()) { + Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString()) + Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString()) + } } } - else { - currentElement = when (currentStep.selectorType()) { - Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString()) - Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString()) + + if (result == null) { + throw ElementNotFoundException("Result could not be extracted") + } + + if (transformationSteps.isPresent) { + result = transformationRegistry.applyTransformations(result, transformationSteps.get()) + } + } catch (ex: RuntimeException) { + when (ex) { + is ElementNotFoundException, + is IllegalStateException -> { + if (extractionConfig.getFallbackConfiguration().isPresent) { + result = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue() + } else { + throw ex + } } + else -> throw ex } } - if (result == null) { - throw ElementNotFoundException("Result could not be extracted") - } - - if (transformationSteps.isPresent) { - result = transformationRegistry.applyTransformations(result, transformationSteps.get()) - } return result } diff --git a/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt b/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt index 778799e..17a627d 100644 --- a/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt +++ b/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt @@ -32,11 +32,22 @@ class CardPrintExtractionService : AbstractExtractionService { - val objectAsListOfMaps = extractMultiWithDiscriminator(element, extractionConfig) + ): List { + throw NotImplementedException("Not implemented") + } - return objectAsListOfMaps.map { - CardPrint.fromMap(it[0]) + override fun extractNestedMultiples( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: CardPrintScrapeTargetConfig + ): List> { + val objectAsListOfMaps: List>> = extractMultiWithDiscriminator( + element, + extractionConfig + ) + + return objectAsListOfMaps.map { innerList -> + innerList.map { map -> CardPrint.fromMap(map) } } } } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt b/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt index 30c4b67..75a4a3a 100644 --- a/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt +++ b/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt @@ -4,6 +4,7 @@ import com.rak.config.model.ProviderConfig import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.config.model.SetScrapeTargetConfig import com.rak.config.model.SourcesConfig +import com.rak.model.card.CardPrint import com.rak.model.exception.NotImplementedException import com.rak.model.set.RegionalSet import jakarta.enterprise.context.ApplicationScoped @@ -35,18 +36,27 @@ class RegionalSetExtractionService( element: Element, providerConfig: ProviderConfig, extractionConfig: SetScrapeTargetConfig - ): Collection { + ): List { val regionalSetList = extractMulti(element, extractionConfig) - val cardPrintsInRegionalSet = extractMulti(element, extractionConfig) - val cardPrints = cardPrintExtractionService.extractMultiple( + val cardPrintGroups: List> = cardPrintExtractionService.extractNestedMultiples( element, providerConfig, providerConfig.getTargets().getCardPrintConfiguration().get() ) - return regionalSetList.map { - RegionalSet.fromMap(it, cardPrints) + // Pair each RegionalSet with its CardPrint group by index + return regionalSetList.mapIndexed { index, regionalSetMap -> + val cardPrintsForSet = cardPrintGroups.getOrElse(index) { emptyList() } + RegionalSet.fromMap(regionalSetMap, cardPrintsForSet) } } + + override fun extractNestedMultiples( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: SetScrapeTargetConfig + ): List> { + throw NotImplementedException("Not implemented") + } } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/SetExtractionService.kt b/src/main/kotlin/com/rak/service/SetExtractionService.kt index b1140a5..f2a4e32 100644 --- a/src/main/kotlin/com/rak/service/SetExtractionService.kt +++ b/src/main/kotlin/com/rak/service/SetExtractionService.kt @@ -36,7 +36,15 @@ class SetExtractionService( element: Element, providerConfig: ProviderConfig, extractionConfig: SetScrapeTargetConfig - ): Collection { + ): List { + throw NotImplementedException("Not implemented") + } + + override fun extractNestedMultiples( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: SetScrapeTargetConfig + ): List> { throw NotImplementedException("Not implemented") } } \ No newline at end of file diff --git a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter index c7a4762..752f336 100644 --- a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter +++ b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter @@ -1,2 +1,2 @@ com.rak.config.converter.TypeSelectorConverter -com.rak.config.converter.DiscriminatorDirectionConverter +com.rak.config.converter.DiscriminatorDirectionConverter \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 7170a34..640d7c8 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -41,9 +41,10 @@ scraper: value: "//li/text()" transform: - name: "replace" - parameters: - - " (" - - "" + parameters: [ + " (", + "" + ] language: steps: - type: xpath @@ -76,12 +77,19 @@ scraper: - type: xpath value: "./text()" regional-name: + fallback: + default: "N/A" steps: - type: xpath - value: "./td/a[2]" + value: "./td[2]" - type: xpath value: "./text()" + transform: + - name: "removeInnerQuotes" + parameters: [] rarity: + fallback: + default: "N/A" steps: - type: xpath value: "./td/a[3]"