From 39c0ebfc7c0a40bdb1c4139cc6c6c58ecca5ebfe Mon Sep 17 00:00:00 2001 From: Katarina Date: Wed, 25 Jun 2025 21:06:34 +0200 Subject: [PATCH] Attempt to implement CardPrints --- ...kotlin-compiler-2216708898994798099.salive | 0 .../DiscriminatorDirectionConverter.kt | 10 + .../model/AbstractScrapeTargetConfig.kt | 8 +- .../model/AbstractScrapeTargetFieldConfig.kt | 3 + .../model/CardPrintScrapeTargetConfig.kt | 14 + .../rak/config/model/DiscriminatorConfig.kt | 12 + .../config/model/ScrapeTargetFieldConfig.kt | 2 +- .../com/rak/config/model/TargetsConfig.kt | 2 + .../com/rak/controller/ScrapeController.kt | 17 ++ .../com/rak/model/DiscriminatorDirection.kt | 17 ++ .../kotlin/com/rak/model/card/CardPrint.kt | 23 ++ .../exception/NotImplementedException.kt | 3 + src/main/kotlin/com/rak/model/set/CardSet.kt | 6 +- .../kotlin/com/rak/model/set/RegionalSet.kt | 20 +- .../rak/service/AbstractExtractionService.kt | 279 ++++++++++++++++++ .../rak/service/CardPrintExtractionService.kt | 46 +++ .../com/rak/service/ExtractionService.kt | 41 ++- .../service/RegionalSetExtractionService.kt | 52 ++++ .../kotlin/com/rak/service/ScrapeService.kt | 18 +- .../com/rak/service/SetExtractionService.kt | 42 +++ src/main/kotlin/com/rak/util/CssUtil.kt | 19 ++ ....eclipse.microprofile.config.spi.Converter | 1 + src/main/resources/application.yml | 29 ++ 23 files changed, 653 insertions(+), 11 deletions(-) delete mode 100644 .kotlin/sessions/kotlin-compiler-2216708898994798099.salive create mode 100644 src/main/kotlin/com/rak/config/converter/DiscriminatorDirectionConverter.kt create mode 100644 src/main/kotlin/com/rak/config/model/AbstractScrapeTargetFieldConfig.kt create mode 100644 src/main/kotlin/com/rak/config/model/CardPrintScrapeTargetConfig.kt create mode 100644 src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt create mode 100644 src/main/kotlin/com/rak/model/DiscriminatorDirection.kt create mode 100644 src/main/kotlin/com/rak/model/card/CardPrint.kt create mode 100644 src/main/kotlin/com/rak/model/exception/NotImplementedException.kt create mode 100644 src/main/kotlin/com/rak/service/AbstractExtractionService.kt create mode 100644 src/main/kotlin/com/rak/service/CardPrintExtractionService.kt create mode 100644 src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt create mode 100644 src/main/kotlin/com/rak/service/SetExtractionService.kt create mode 100644 src/main/kotlin/com/rak/util/CssUtil.kt diff --git a/.kotlin/sessions/kotlin-compiler-2216708898994798099.salive b/.kotlin/sessions/kotlin-compiler-2216708898994798099.salive deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/kotlin/com/rak/config/converter/DiscriminatorDirectionConverter.kt b/src/main/kotlin/com/rak/config/converter/DiscriminatorDirectionConverter.kt new file mode 100644 index 0000000..bc5646c --- /dev/null +++ b/src/main/kotlin/com/rak/config/converter/DiscriminatorDirectionConverter.kt @@ -0,0 +1,10 @@ +package com.rak.config.converter + +import com.rak.model.DiscriminatorDirection +import org.eclipse.microprofile.config.spi.Converter + +class DiscriminatorDirectionConverter : Converter { + override fun convert(value: String): DiscriminatorDirection? { + return DiscriminatorDirection.from(value) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetConfig.kt b/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetConfig.kt index fc0fd26..59b3dd9 100644 --- a/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetConfig.kt +++ b/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetConfig.kt @@ -1,9 +1,15 @@ package com.rak.config.model +import io.smallrye.config.WithDefault import io.smallrye.config.WithName -import java.util.Optional +import java.util.* interface AbstractScrapeTargetConfig { @WithName("root") fun getRootConfig(): Optional + @WithName("multi") + @WithDefault("false") + fun isMulti(): Boolean + @WithName("discriminator") + fun getDiscriminator(): Optional } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetFieldConfig.kt b/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetFieldConfig.kt new file mode 100644 index 0000000..138ccc7 --- /dev/null +++ b/src/main/kotlin/com/rak/config/model/AbstractScrapeTargetFieldConfig.kt @@ -0,0 +1,3 @@ +package com.rak.config.model + +interface AbstractScrapeTargetFieldConfig \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/CardPrintScrapeTargetConfig.kt b/src/main/kotlin/com/rak/config/model/CardPrintScrapeTargetConfig.kt new file mode 100644 index 0000000..853ba91 --- /dev/null +++ b/src/main/kotlin/com/rak/config/model/CardPrintScrapeTargetConfig.kt @@ -0,0 +1,14 @@ +package com.rak.config.model + +import io.smallrye.config.WithName + +interface CardPrintScrapeTargetConfig : AbstractScrapeTargetConfig { + @WithName("id") + fun getIdConfig(): ScrapeTargetFieldConfig + @WithName("name") + fun getNameConfig(): ScrapeTargetFieldConfig + @WithName("regional-name") + fun getRegionNameConfig(): ScrapeTargetFieldConfig + @WithName("rarity") + fun getRarityConfig(): ScrapeTargetFieldConfig +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt b/src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt new file mode 100644 index 0000000..dc384b0 --- /dev/null +++ b/src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt @@ -0,0 +1,12 @@ +package com.rak.config.model + +import com.rak.config.converter.DiscriminatorDirectionConverter +import com.rak.model.DiscriminatorDirection +import io.smallrye.config.WithConverter +import io.smallrye.config.WithName + +interface DiscriminatorConfig : ScrapeTargetFieldConfig { + @WithName("direction") + @WithConverter(DiscriminatorDirectionConverter::class) + fun getDiscriminatorDirection(): DiscriminatorDirection +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt b/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt index 0a0ffd3..a6dc79e 100644 --- a/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt +++ b/src/main/kotlin/com/rak/config/model/ScrapeTargetFieldConfig.kt @@ -3,7 +3,7 @@ package com.rak.config.model import io.smallrye.config.WithName import java.util.* -interface ScrapeTargetFieldConfig { +interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig { @WithName("root") fun getRootConfig(): Optional @WithName("steps") diff --git a/src/main/kotlin/com/rak/config/model/TargetsConfig.kt b/src/main/kotlin/com/rak/config/model/TargetsConfig.kt index d73d1f9..1ddc725 100644 --- a/src/main/kotlin/com/rak/config/model/TargetsConfig.kt +++ b/src/main/kotlin/com/rak/config/model/TargetsConfig.kt @@ -8,4 +8,6 @@ interface TargetsConfig { fun getCardConfig(): Optional @WithName("set") fun getSetConfig(): Optional + @WithName("card-print") + fun getCardPrintConfiguration(): Optional } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/controller/ScrapeController.kt b/src/main/kotlin/com/rak/controller/ScrapeController.kt index 8ef7f0f..86eeb61 100644 --- a/src/main/kotlin/com/rak/controller/ScrapeController.kt +++ b/src/main/kotlin/com/rak/controller/ScrapeController.kt @@ -2,6 +2,7 @@ package com.rak.controller import com.rak.model.card.Card import com.rak.model.set.CardSet +import com.rak.model.set.RegionalSet import com.rak.service.ScrapeService import jakarta.ws.rs.Consumes import jakarta.ws.rs.GET @@ -35,6 +36,22 @@ class ScrapeController( ) } + @GET + @Path("/{provider}/regionalSet") + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + fun scrapeRegionalSet( + @RestPath + provider: String, + @RestQuery + setName: String + ): RegionalSet { + return scrapeService.scrapeRegionalSet( + provider, + setName + ) + } + @GET @Path("/{provider}/card") @Produces(MediaType.APPLICATION_JSON) diff --git a/src/main/kotlin/com/rak/model/DiscriminatorDirection.kt b/src/main/kotlin/com/rak/model/DiscriminatorDirection.kt new file mode 100644 index 0000000..5681cd8 --- /dev/null +++ b/src/main/kotlin/com/rak/model/DiscriminatorDirection.kt @@ -0,0 +1,17 @@ +package com.rak.model + +enum class DiscriminatorDirection(val value: String) { + ASC("asc"), + DESC("desc"); + + companion object { + fun from(value: String): DiscriminatorDirection? { + for (discriminatorDirection in DiscriminatorDirection.entries) { + if (discriminatorDirection.value == value) { + return discriminatorDirection + } + } + return null + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/card/CardPrint.kt b/src/main/kotlin/com/rak/model/card/CardPrint.kt new file mode 100644 index 0000000..a2547c9 --- /dev/null +++ b/src/main/kotlin/com/rak/model/card/CardPrint.kt @@ -0,0 +1,23 @@ +package com.rak.model.card + +import com.rak.model.set.RegionalSet + +data class CardPrint( + val id: String, + val name: String, + val regionalName: String? = null, + val rarity: String +) { + + companion object { + fun fromMap(map: Map): CardPrint { + return CardPrint( + map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"), + map["name"] ?: throw IllegalStateException("Parameter 'region' not found"), + map["regionalName"], + map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"), + ) + } + } + +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/exception/NotImplementedException.kt b/src/main/kotlin/com/rak/model/exception/NotImplementedException.kt new file mode 100644 index 0000000..c134d7b --- /dev/null +++ b/src/main/kotlin/com/rak/model/exception/NotImplementedException.kt @@ -0,0 +1,3 @@ +package com.rak.model.exception + +class NotImplementedException(message: String) : RuntimeException(message) \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/set/CardSet.kt b/src/main/kotlin/com/rak/model/set/CardSet.kt index a63c1dc..2685bed 100644 --- a/src/main/kotlin/com/rak/model/set/CardSet.kt +++ b/src/main/kotlin/com/rak/model/set/CardSet.kt @@ -5,4 +5,8 @@ import kotlin.collections.Set data class CardSet( val name: String, val regionalSets: Set -) \ No newline at end of file +) { + companion object { + + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/set/RegionalSet.kt b/src/main/kotlin/com/rak/model/set/RegionalSet.kt index 683325e..bfe04f5 100644 --- a/src/main/kotlin/com/rak/model/set/RegionalSet.kt +++ b/src/main/kotlin/com/rak/model/set/RegionalSet.kt @@ -1,13 +1,27 @@ package com.rak.model.set +import com.rak.model.card.CardPrint + data class RegionalSet( val prefix: String, val region: String, - val regionCode: String + val regionCode: String, + val cardPrints: Collection, + val numberOfCards: Int ) { companion object { + fun fromMap(map: Map, cardPrints: Collection): RegionalSet { + return RegionalSet( + map["prefix"] ?: throw IllegalStateException("Parameter 'prefix' not found"), + map["region"] ?: throw IllegalStateException("Parameter 'region' not found"), + map["regionCode"] ?: throw IllegalStateException("Parameter 'regionCode' not found"), + cardPrints, + cardPrints.size + ) + } + fun flattenFromMemberLists( idList: List, languageList: List, @@ -22,7 +36,9 @@ data class RegionalSet( regionalSetList.add(RegionalSet( prefix = idList[index], region = languageList[index], - regionCode = regionKeyAliasList[index] + regionCode = regionKeyAliasList[index], + listOf(), + numberOfCards = -1 )) } return regionalSetList diff --git a/src/main/kotlin/com/rak/service/AbstractExtractionService.kt b/src/main/kotlin/com/rak/service/AbstractExtractionService.kt new file mode 100644 index 0000000..1a7b1b2 --- /dev/null +++ b/src/main/kotlin/com/rak/service/AbstractExtractionService.kt @@ -0,0 +1,279 @@ +package com.rak.service + +import com.rak.config.model.AbstractScrapeTargetConfig +import com.rak.config.model.ExtractConfig +import com.rak.config.model.ProviderConfig +import com.rak.config.model.ScrapeTargetFieldConfig +import com.rak.model.DiscriminatorDirection +import com.rak.model.Selector +import com.rak.model.exception.ElementNotFoundException +import com.rak.model.exception.InvalidConfigurationException +import com.rak.model.transform.TransformationRegistry +import com.rak.util.CssUtil +import com.rak.util.XPathUtil +import org.jsoup.nodes.Element +import org.jsoup.select.Elements +import java.util.Optional +import kotlin.jvm.optionals.getOrElse + +// find root element from global or node config +// get field target configs as list +// extract item from root element via field config + +abstract class AbstractExtractionService { + + private val transformationRegistry = TransformationRegistry() + + abstract fun T.getItems(): Map + abstract fun extract( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: T + ): E + + abstract fun extractMultiple( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: T + ): Collection + + fun getRootElement( + element: Element, + globalRootExtractConfig: Optional, + nodeRootExtractConfig: Optional + ): Element { + return getRootElements( + element, + globalRootExtractConfig, + nodeRootExtractConfig + ).firstOrNull() ?: throw ElementNotFoundException("") + } + + fun getRootElements( + element: Element, + globalRootExtractConfig: Optional, + nodeRootExtractConfig: Optional + ): Elements { + val rootExtractConfig: ExtractConfig = globalRootExtractConfig.getOrElse { + nodeRootExtractConfig.orElseThrow { + InvalidConfigurationException("") + } + } + + return getElementsFromElementByExtractConfig( + element, + rootExtractConfig + ) + } + + protected fun getElementFromDocumentByExtractConfig( + element: Element, + step: ExtractConfig, + ): Element? { + return getElementsFromElementByExtractConfig(element, step).firstOrNull() + } + + protected fun getElementsFromElementByExtractConfig( + element: Element, + step: ExtractConfig, + ): Elements { + return if (step.selectorType() == Selector.CSS) { + element.select(step.getQueryString()) + } else { + element.selectXpath(step.getQueryString()) + } + } + + protected fun extractAsMap( + document: Element, + extractionConfig: T + ): Map { + val result = mutableMapOf() + + extractionConfig.getItems().forEach { (identifier, fieldConfig) -> + val rootElement = getRootElement( + document, + extractionConfig.getRootConfig(), + fieldConfig.getRootConfig() + ) + + val extractedText = extractTextFromElementByTargetFieldConfig( + rootElement, + fieldConfig + ) ?: throw ElementNotFoundException("Could not find element for '$identifier'") + + result.put(identifier, extractedText) + } + + return result + } + + fun extractAsListOfMaps( + element: Element, + extractionConfig: T + ): List> { + val resultList = mutableListOf>() + + extractionConfig.getItems().forEach { (identifier, fieldConfig) -> + val rootElements = getRootElements( + element, + extractionConfig.getRootConfig(), + fieldConfig.getRootConfig() + ) + + for(index in 0..rootElements.size - 1) { + val rootElement = rootElements[index] + val extractedText = extractTextFromElementByTargetFieldConfig( + rootElement, + fieldConfig + ) ?: throw ElementNotFoundException("Could not find element for '$identifier'") + + val mapToModify: MutableMap = try { + resultList[index] + } catch (_: IndexOutOfBoundsException) { + val newMap = mutableMapOf() + resultList.add(newMap) + newMap + } + + mapToModify.put(identifier, extractedText) + } + } + + return resultList + } + + fun extractAsListOfMaps( + elements: Elements, + extractionConfig: T + ): List> { + val resultList = mutableListOf>() + + + // refactor this + extractionConfig.getItems().forEach { (identifier, fieldConfig) -> + for(index in 0..elements.size - 1) { + val rootElement = elements[index] + val extractedText = extractTextFromElementByTargetFieldConfig( + rootElement, + fieldConfig + ) ?: throw ElementNotFoundException("Could not find element for '$identifier'") + + val mapToModify: MutableMap = try { + resultList[index] + } catch (_: IndexOutOfBoundsException) { + val newMap = mutableMapOf() + resultList.add(newMap) + newMap + } + + mapToModify.put(identifier, extractedText) + } + } + + return resultList + } + + fun extractWithDiscriminator( + element: Element, + extractionConfig: T + ): List>>{ + val rootElement = getRootElement( + element, + extractionConfig.getRootConfig(), + Optional.empty() + ) + + var rootElements = getRootElements( + element, + extractionConfig.getRootConfig(), + Optional.empty() + ) + + val discriminatedElements = getElementsFromElementByExtractConfig( + rootElement, + extractionConfig.getDiscriminator().get().getRootConfig().get(), + ) + + val discriminations = mutableListOf() + val result = mutableListOf>>() + + for (element in discriminatedElements) { + val discriminatorValue: String = extractTextFromElementByTargetFieldConfig( + element, + extractionConfig.getDiscriminator().get() + ) ?: throw ElementNotFoundException("") + + discriminations.add(discriminatorValue) + } + + val definitiveElements = if (discriminations.size < rootElements.size) { + if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) { + rootElements = Elements(rootElements.reversed()) + } + + while (discriminations.size < rootElements.size) { + rootElements.removeFirst() + } + + if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) { + rootElements = Elements(rootElements.reversed()) + } + + rootElements + } else { + rootElements + } + + result.add(extractAsListOfMaps( + definitiveElements, + extractionConfig + )) + + + + return result + } + + private fun extractTextFromElementByTargetFieldConfig( + root: Element, + extractionConfig: ScrapeTargetFieldConfig + ): String? { + val extractionSteps = extractionConfig.getExtractionSteps() + val transformationSteps = extractionConfig.getOptionalTransformationSteps() + + var currentElement: Element? = root.clone() + var result: String? = null + + for (index in 0 until extractionSteps.size) { + val currentStep = extractionSteps.elementAtOrNull(index) ?: return null + if (currentElement == null) { + throw IllegalStateException() + } + + if (index == extractionSteps.size - 1) { + result = when (currentStep.selectorType()) { + Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString()) + Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString()) + } + } + else { + currentElement = when (currentStep.selectorType()) { + Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString()) + Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString()) + } + } + } + + if (result == null) { + throw ElementNotFoundException("Result could not be extracted") + } + + if (transformationSteps.isPresent) { + result = transformationRegistry.applyTransformations(result, transformationSteps.get()) + } + + return result + } + +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt b/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt new file mode 100644 index 0000000..a3e13f6 --- /dev/null +++ b/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt @@ -0,0 +1,46 @@ +package com.rak.service + +import com.rak.config.model.CardPrintScrapeTargetConfig +import com.rak.config.model.ProviderConfig +import com.rak.config.model.ScrapeTargetFieldConfig +import com.rak.config.model.SetScrapeTargetConfig +import com.rak.model.card.CardPrint +import com.rak.model.exception.NotImplementedException +import com.rak.model.set.CardSet +import com.rak.model.set.RegionalSet +import jakarta.enterprise.context.ApplicationScoped +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element + +@ApplicationScoped +class CardPrintExtractionService : AbstractExtractionService() { + + override fun CardPrintScrapeTargetConfig.getItems(): Map { + return mapOf( + Pair("id", this.getIdConfig()), + Pair("name", this.getNameConfig()), + Pair("regionalName", this.getRegionNameConfig()), + Pair("rarity", this.getRarityConfig()), + ) + } + + override fun extract( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: CardPrintScrapeTargetConfig + ): CardPrint { + throw NotImplementedException("Not implemented") + } + + override fun extractMultiple( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: CardPrintScrapeTargetConfig + ): Collection { + val objectAsListOfMaps = extractWithDiscriminator(element, extractionConfig) + + return objectAsListOfMaps.map { + CardPrint.fromMap(it[0]) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/ExtractionService.kt b/src/main/kotlin/com/rak/service/ExtractionService.kt index 1d7e902..3f8e881 100644 --- a/src/main/kotlin/com/rak/service/ExtractionService.kt +++ b/src/main/kotlin/com/rak/service/ExtractionService.kt @@ -1,10 +1,13 @@ package com.rak.service +import com.rak.config.model.CardPrintScrapeTargetConfig import com.rak.config.model.ExtractConfig import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.model.Selector import com.rak.model.card.Card +import com.rak.model.card.CardPrint import com.rak.model.exception.ElementNotFoundException +import com.rak.model.exception.InvalidConfigurationException import com.rak.model.set.CardSet import com.rak.model.set.RegionalSet import com.rak.model.transform.TransformationRegistry @@ -13,6 +16,7 @@ import jakarta.enterprise.context.ApplicationScoped import org.jsoup.nodes.Document import org.jsoup.nodes.Element import org.jsoup.select.Elements +import java.util.Optional @ApplicationScoped class ExtractionService( @@ -28,6 +32,32 @@ class ExtractionService( ) } + fun getRootElement( + document: Document, + globalRootExtractConfig: Optional, + nodeRootExtractConfig: Optional + ): Element { + val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse( + nodeRootExtractConfig.orElseThrow { + InvalidConfigurationException("") + }) + + return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found") + } + + fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? { + val cardName = extractTextFromElementByTargetFieldConfig( + getRootElement( + document, + cardPrintConfig.getRootConfig(), + cardPrintConfig.getNameConfig().getRootConfig() + ), + cardPrintConfig.getNameConfig() + ) + + return null + } + fun extractRegionalSet(root: Element, provider: String): RegionalSet { val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") val setExtractionConfig = source.getTargets().getSetConfig().get() @@ -50,13 +80,12 @@ class ExtractionService( return RegionalSet( setId, setLanguage, - setKey + setKey, + listOf(), + -1 ) } else { val setIdConfiguration = setExtractionConfig.getIdConfig() - if (!setIdConfiguration.getRootConfig().isPresent) { - throw RuntimeException("as[po") // TODO fix me - } val rootConfiguration = setIdConfiguration.getRootConfig().get() val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") @@ -84,7 +113,9 @@ class ExtractionService( return RegionalSet( setId, setLanguage, - setKey + setKey, + listOf(), + -1 ) } } diff --git a/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt b/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt new file mode 100644 index 0000000..b475400 --- /dev/null +++ b/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt @@ -0,0 +1,52 @@ +package com.rak.service + +import com.rak.config.model.ProviderConfig +import com.rak.config.model.ScrapeTargetFieldConfig +import com.rak.config.model.SetScrapeTargetConfig +import com.rak.config.model.SourcesConfig +import com.rak.model.exception.NotImplementedException +import com.rak.model.set.RegionalSet +import jakarta.enterprise.context.ApplicationScoped +import org.jsoup.nodes.Element + +@ApplicationScoped +class RegionalSetExtractionService( + private val cardPrintExtractionService: CardPrintExtractionService, + private val sourcesConfig: SourcesConfig +) : AbstractExtractionService() { + + override fun SetScrapeTargetConfig.getItems(): Map { + return mapOf( + Pair("prefix", this.getIdConfig()), + Pair("regionCode", this.getRegionKeyConfig()), + Pair("region", this.getLanguageConfig()), + ) + } + + override fun extract( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: SetScrapeTargetConfig + ): RegionalSet { + throw NotImplementedException("Not implemented") + } + + override fun extractMultiple( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: SetScrapeTargetConfig + ): Collection { + val regionalSetList = extractAsListOfMaps(element, extractionConfig) + val cardPrintsInRegionalSet = extractAsListOfMaps(element, extractionConfig) + + val cardPrints = cardPrintExtractionService.extractMultiple( + element, + providerConfig, + providerConfig.getTargets().getCardPrintConfiguration().get() + ) + + return regionalSetList.map { + RegionalSet.fromMap(it, cardPrints) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/ScrapeService.kt b/src/main/kotlin/com/rak/service/ScrapeService.kt index 73d6936..403a7ea 100644 --- a/src/main/kotlin/com/rak/service/ScrapeService.kt +++ b/src/main/kotlin/com/rak/service/ScrapeService.kt @@ -2,6 +2,7 @@ package com.rak.service import com.rak.model.card.Card import com.rak.model.set.CardSet +import com.rak.model.set.RegionalSet import jakarta.enterprise.context.ApplicationScoped import org.jsoup.Jsoup import org.jsoup.nodes.Document @@ -10,6 +11,8 @@ import org.jsoup.nodes.Document class ScrapeService( private val sourceService: SourceService, private val extractionService: ExtractionService, + private val setExtractionService: SetExtractionService, + private val regionalSetExtractionService: RegionalSetExtractionService ) { @@ -22,7 +25,20 @@ class ScrapeService( val path: String = normalizePath(setName) val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() - return extractionService.extractSet(setName, document, provider) +// return extractionService.extractSet(setName, document, provider) + return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) + } + + fun scrapeRegionalSet( + provider: String, + setName: String, + ): RegionalSet { + val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") + + val path: String = normalizePath(setName) + val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() + + return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) } fun scrapeCard( diff --git a/src/main/kotlin/com/rak/service/SetExtractionService.kt b/src/main/kotlin/com/rak/service/SetExtractionService.kt new file mode 100644 index 0000000..b1140a5 --- /dev/null +++ b/src/main/kotlin/com/rak/service/SetExtractionService.kt @@ -0,0 +1,42 @@ +package com.rak.service + +import com.rak.config.model.ProviderConfig +import com.rak.config.model.ScrapeTargetFieldConfig +import com.rak.config.model.SetScrapeTargetConfig +import com.rak.model.exception.NotImplementedException +import com.rak.model.set.CardSet +import jakarta.enterprise.context.ApplicationScoped +import org.jsoup.nodes.Element + +@ApplicationScoped +class SetExtractionService( + private val regionalSetExtractionService: RegionalSetExtractionService +) : AbstractExtractionService() { + + override fun SetScrapeTargetConfig.getItems(): Map { + return mapOf( + Pair("prefix", this.getIdConfig()), + Pair("regionCode", this.getRegionKeyConfig()), + Pair("region", this.getLanguageConfig()), + ) + } + + override fun extract( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: SetScrapeTargetConfig + ): CardSet { + return CardSet( + "test", + regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet() + ) + } + + override fun extractMultiple( + element: Element, + providerConfig: ProviderConfig, + extractionConfig: SetScrapeTargetConfig + ): Collection { + throw NotImplementedException("Not implemented") + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/util/CssUtil.kt b/src/main/kotlin/com/rak/util/CssUtil.kt new file mode 100644 index 0000000..e9dc45b --- /dev/null +++ b/src/main/kotlin/com/rak/util/CssUtil.kt @@ -0,0 +1,19 @@ +package com.rak.util + +import org.jsoup.nodes.Element + +class CssUtil private constructor() { + + companion object { + fun getNextElement(element: Element, path: String): Element? { + return element.select(path).firstOrNull() + } + + fun extractResult(root: Element, path: String): String? { + return root + .select(path) + .firstOrNull()?.text() + } + } + +} \ No newline at end of file diff --git a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter index 2658a71..c7a4762 100644 --- a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter +++ b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter @@ -1 +1,2 @@ com.rak.config.converter.TypeSelectorConverter +com.rak.config.converter.DiscriminatorDirectionConverter diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 943051b..e1f4b39 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -54,6 +54,35 @@ scraper: steps: - type: xpath value: "//li/abbr/text()" + card-print: + multi: true + root: + type: css + value: ".tabber.wds-tabber > div" + discriminator: + direction: asc + root: + type: css + value: ".wds-tabs__tab" + steps: + - type: xpath + value: "//li/div/a/text()" + id: + steps: + - type: xpath + value: ".//table/tbody/tr[2]/td[1]/a/text()" + name: + steps: + - type: xpath + value: ".//table/tbody/tr[2]/td[1]/a/text()" + regional-name: + steps: + - type: xpath + value: ".//table/tbody/tr[2]/td[2]/a/text()" + rarity: + steps: + - type: xpath + value: ".//table/tbody/tr[2]/td[3]/a/text()" card: name: root: