From 5930da7a4c35738deb7c8311de62822c48737166 Mon Sep 17 00:00:00 2001 From: Katarina Date: Sun, 29 Jun 2025 16:49:19 +0200 Subject: [PATCH] Split Set/RegionalSet properly --- .../model/RegionalSetScrapeTargetConfig.kt | 12 ++++++++++++ .../rak/config/model/SetScrapeTargetConfig.kt | 8 ++------ .../com/rak/config/model/TargetsConfig.kt | 2 ++ src/main/kotlin/com/rak/model/set/CardSet.kt | 9 ++++++--- .../kotlin/com/rak/service/ScrapeService.kt | 12 +++++------- .../kotlin/com/rak/service/SourceService.kt | 6 +++--- ...nService.kt => CardSetExtractionService.kt} | 18 +++++++++++------- .../extract/RegionalSetExtractionService.kt | 12 ++++++------ src/main/resources/application.yml | 16 ++++++++++++++++ 9 files changed, 63 insertions(+), 32 deletions(-) create mode 100644 src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt rename src/main/kotlin/com/rak/service/extract/{SetExtractionService.kt => CardSetExtractionService.kt} (76%) diff --git a/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt b/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt new file mode 100644 index 0000000..ec947b0 --- /dev/null +++ b/src/main/kotlin/com/rak/config/model/RegionalSetScrapeTargetConfig.kt @@ -0,0 +1,12 @@ +package com.rak.config.model + +import io.smallrye.config.WithName + +interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig { + @WithName("id") + fun getIdConfig(): ScrapeTargetFieldConfig + @WithName("language") + fun getLanguageConfig(): ScrapeTargetFieldConfig + @WithName("region-key") + fun getRegionKeyConfig(): ScrapeTargetFieldConfig +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/SetScrapeTargetConfig.kt b/src/main/kotlin/com/rak/config/model/SetScrapeTargetConfig.kt index e0e3c37..959037b 100644 --- a/src/main/kotlin/com/rak/config/model/SetScrapeTargetConfig.kt +++ b/src/main/kotlin/com/rak/config/model/SetScrapeTargetConfig.kt @@ -3,10 +3,6 @@ package com.rak.config.model import io.smallrye.config.WithName interface SetScrapeTargetConfig : AbstractScrapeTargetConfig { - @WithName("id") - fun getIdConfig(): ScrapeTargetFieldConfig - @WithName("language") - fun getLanguageConfig(): ScrapeTargetFieldConfig - @WithName("region-key") - fun getRegionKeyConfig(): ScrapeTargetFieldConfig + @WithName("name") + fun getNameConfig(): ScrapeTargetFieldConfig } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/model/TargetsConfig.kt b/src/main/kotlin/com/rak/config/model/TargetsConfig.kt index 1ddc725..acdc3c0 100644 --- a/src/main/kotlin/com/rak/config/model/TargetsConfig.kt +++ b/src/main/kotlin/com/rak/config/model/TargetsConfig.kt @@ -8,6 +8,8 @@ interface TargetsConfig { fun getCardConfig(): Optional @WithName("set") fun getSetConfig(): Optional + @WithName("regional-set") + fun getRegionalSetConfig(): Optional @WithName("card-print") fun getCardPrintConfiguration(): Optional } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/set/CardSet.kt b/src/main/kotlin/com/rak/model/set/CardSet.kt index 0a9fd49..7045fd2 100644 --- a/src/main/kotlin/com/rak/model/set/CardSet.kt +++ b/src/main/kotlin/com/rak/model/set/CardSet.kt @@ -1,12 +1,15 @@ package com.rak.model.set -import kotlin.collections.Set - data class CardSet( var name: String, val regionalSets: Set ) { companion object { - + fun fromMap(map: Map, regionalSet: Set): CardSet { + return CardSet( + map["name"] ?: throw IllegalStateException("Parameter 'name' not found"), + regionalSet + ) + } } } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/ScrapeService.kt b/src/main/kotlin/com/rak/service/ScrapeService.kt index e9bc480..c368a06 100644 --- a/src/main/kotlin/com/rak/service/ScrapeService.kt +++ b/src/main/kotlin/com/rak/service/ScrapeService.kt @@ -7,7 +7,7 @@ import com.rak.model.exception.TargetNotFoundException import com.rak.model.set.CardSet import com.rak.model.set.RegionalSet import com.rak.service.extract.RegionalSetExtractionService -import com.rak.service.extract.SetExtractionService +import com.rak.service.extract.CardSetExtractionService import io.quarkus.logging.Log import jakarta.enterprise.context.ApplicationScoped import org.jsoup.Jsoup @@ -17,7 +17,7 @@ import java.lang.Exception @ApplicationScoped class ScrapeService( private val sourceService: SourceService, - private val setExtractionService: SetExtractionService, + private val cardSetExtractionService: CardSetExtractionService, private val regionalSetExtractionService: RegionalSetExtractionService, private val commonCrawlService: CommonCrawlService ) { @@ -59,13 +59,11 @@ class ScrapeService( } } - return setExtractionService.extract( + return cardSetExtractionService.extract( document, source, source.getTargets().getSetConfig().get() - ).apply { - name = setName - } + ) } fun scrapeRegionalSet( @@ -77,7 +75,7 @@ class ScrapeService( val path: String = normalizePath(setName) val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() - return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) + return regionalSetExtractionService.extract(document, source, source.getTargets().getRegionalSetConfig().get()) } fun scrapeCard( diff --git a/src/main/kotlin/com/rak/service/SourceService.kt b/src/main/kotlin/com/rak/service/SourceService.kt index d49fa52..60728d1 100644 --- a/src/main/kotlin/com/rak/service/SourceService.kt +++ b/src/main/kotlin/com/rak/service/SourceService.kt @@ -1,7 +1,7 @@ package com.rak.service import com.rak.config.model.CardScrapeTargetConfig -import com.rak.config.model.SetScrapeTargetConfig +import com.rak.config.model.RegionalSetScrapeTargetConfig import com.rak.config.model.ProviderConfig import com.rak.config.model.SourcesConfig import com.rak.model.exception.InvalidConfigurationException @@ -21,7 +21,7 @@ class SourceService( } private fun validateSource(providerConfig: ProviderConfig) { - val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig() + val optionalRegionalSetConfig = providerConfig.getTargets().getRegionalSetConfig() val optionalCardConfig = providerConfig.getTargets().getCardConfig() if (optionalRegionalSetConfig.isPresent) { @@ -33,7 +33,7 @@ class SourceService( } } - private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) { + private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) { val selectors = listOf( setExtractConfig.getLanguageConfig(), setExtractConfig.getIdConfig(), diff --git a/src/main/kotlin/com/rak/service/extract/SetExtractionService.kt b/src/main/kotlin/com/rak/service/extract/CardSetExtractionService.kt similarity index 76% rename from src/main/kotlin/com/rak/service/extract/SetExtractionService.kt rename to src/main/kotlin/com/rak/service/extract/CardSetExtractionService.kt index 9aa9d28..8dc2d0a 100644 --- a/src/main/kotlin/com/rak/service/extract/SetExtractionService.kt +++ b/src/main/kotlin/com/rak/service/extract/CardSetExtractionService.kt @@ -9,15 +9,13 @@ import jakarta.enterprise.context.ApplicationScoped import org.jsoup.nodes.Element @ApplicationScoped -class SetExtractionService( +class CardSetExtractionService( private val regionalSetExtractionService: RegionalSetExtractionService ) : AbstractExtractionService() { override fun SetScrapeTargetConfig.getItems(): Map { return mapOf( - Pair("prefix", this.getIdConfig()), - Pair("regionCode", this.getRegionKeyConfig()), - Pair("region", this.getLanguageConfig()), + Pair("name", this.getNameConfig()), ) } @@ -26,9 +24,15 @@ class SetExtractionService( providerConfig: ProviderConfig, extractionConfig: SetScrapeTargetConfig ): CardSet { - return CardSet( - "test", - regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet() + val set = extractSingle(element, extractionConfig) + + return CardSet.fromMap( + set, + regionalSetExtractionService.extractMultiple( + element, + providerConfig, + providerConfig.getTargets().getRegionalSetConfig().get() + ).toSet() ) } diff --git a/src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt b/src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt index 1144f19..35ea97c 100644 --- a/src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt +++ b/src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt @@ -2,7 +2,7 @@ package com.rak.service.extract import com.rak.config.model.ProviderConfig import com.rak.config.model.ScrapeTargetFieldConfig -import com.rak.config.model.SetScrapeTargetConfig +import com.rak.config.model.RegionalSetScrapeTargetConfig import com.rak.config.model.SourcesConfig import com.rak.model.card.CardPrint import com.rak.model.exception.NotImplementedException @@ -14,9 +14,9 @@ import org.jsoup.nodes.Element class RegionalSetExtractionService( private val cardPrintExtractionService: CardPrintExtractionService, private val sourcesConfig: SourcesConfig -) : AbstractExtractionService() { +) : AbstractExtractionService() { - override fun SetScrapeTargetConfig.getItems(): Map { + override fun RegionalSetScrapeTargetConfig.getItems(): Map { return mapOf( Pair("prefix", this.getIdConfig()), Pair("regionCode", this.getRegionKeyConfig()), @@ -27,7 +27,7 @@ class RegionalSetExtractionService( override fun extract( element: Element, providerConfig: ProviderConfig, - extractionConfig: SetScrapeTargetConfig + extractionConfig: RegionalSetScrapeTargetConfig ): RegionalSet { throw NotImplementedException("Not implemented") } @@ -35,7 +35,7 @@ class RegionalSetExtractionService( override fun extractMultiple( element: Element, providerConfig: ProviderConfig, - extractionConfig: SetScrapeTargetConfig + extractionConfig: RegionalSetScrapeTargetConfig ): List { val regionalSetList = extractMulti(element, extractionConfig) @@ -55,7 +55,7 @@ class RegionalSetExtractionService( override fun extractNestedMultiples( element: Element, providerConfig: ProviderConfig, - extractionConfig: SetScrapeTargetConfig + extractionConfig: RegionalSetScrapeTargetConfig ): List> { throw NotImplementedException("Not implemented") } diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 223bca6..6662507 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -30,6 +30,16 @@ scraper: url-pattern: "https://yugioh.fandom.com/wiki/%s" targets: set: + root: + type: css + value: "aside > .pi-title" + name: + type: string + extractors: + - steps: + - type: xpath + value: "//h2/text()" + regional-set: root: type: css value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" @@ -80,6 +90,12 @@ scraper: - steps: - type: xpath value: "./td/span/text()" + transform: + - name: "regexReplace" + parameters: [ + " .+", + "" + ] validation: pattern: "^.+-.+\\\\d.+$" name: