Split Set/RegionalSet properly

This commit is contained in:
2025-06-29 16:49:19 +02:00
parent 8a0777e557
commit 5930da7a4c
9 changed files with 63 additions and 32 deletions

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id")
fun getIdConfig(): ScrapeTargetFieldConfig
@WithName("language")
fun getLanguageConfig(): ScrapeTargetFieldConfig
@WithName("region-key")
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
}

View File

@@ -3,10 +3,6 @@ package com.rak.config.model
import io.smallrye.config.WithName import io.smallrye.config.WithName
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig { interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id") @WithName("name")
fun getIdConfig(): ScrapeTargetFieldConfig fun getNameConfig(): ScrapeTargetFieldConfig
@WithName("language")
fun getLanguageConfig(): ScrapeTargetFieldConfig
@WithName("region-key")
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
} }

View File

@@ -8,6 +8,8 @@ interface TargetsConfig {
fun getCardConfig(): Optional<CardScrapeTargetConfig> fun getCardConfig(): Optional<CardScrapeTargetConfig>
@WithName("set") @WithName("set")
fun getSetConfig(): Optional<SetScrapeTargetConfig> fun getSetConfig(): Optional<SetScrapeTargetConfig>
@WithName("regional-set")
fun getRegionalSetConfig(): Optional<RegionalSetScrapeTargetConfig>
@WithName("card-print") @WithName("card-print")
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig> fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
} }

View File

@@ -1,12 +1,15 @@
package com.rak.model.set package com.rak.model.set
import kotlin.collections.Set
data class CardSet( data class CardSet(
var name: String, var name: String,
val regionalSets: Set<RegionalSet> val regionalSets: Set<RegionalSet>
) { ) {
companion object { companion object {
fun fromMap(map: Map<String, String>, regionalSet: Set<RegionalSet>): CardSet {
return CardSet(
map["name"] ?: throw IllegalStateException("Parameter 'name' not found"),
regionalSet
)
}
} }
} }

View File

@@ -7,7 +7,7 @@ import com.rak.model.exception.TargetNotFoundException
import com.rak.model.set.CardSet import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet import com.rak.model.set.RegionalSet
import com.rak.service.extract.RegionalSetExtractionService import com.rak.service.extract.RegionalSetExtractionService
import com.rak.service.extract.SetExtractionService import com.rak.service.extract.CardSetExtractionService
import io.quarkus.logging.Log import io.quarkus.logging.Log
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup import org.jsoup.Jsoup
@@ -17,7 +17,7 @@ import java.lang.Exception
@ApplicationScoped @ApplicationScoped
class ScrapeService( class ScrapeService(
private val sourceService: SourceService, private val sourceService: SourceService,
private val setExtractionService: SetExtractionService, private val cardSetExtractionService: CardSetExtractionService,
private val regionalSetExtractionService: RegionalSetExtractionService, private val regionalSetExtractionService: RegionalSetExtractionService,
private val commonCrawlService: CommonCrawlService private val commonCrawlService: CommonCrawlService
) { ) {
@@ -59,13 +59,11 @@ class ScrapeService(
} }
} }
return setExtractionService.extract( return cardSetExtractionService.extract(
document, document,
source, source,
source.getTargets().getSetConfig().get() source.getTargets().getSetConfig().get()
).apply { )
name = setName
}
} }
fun scrapeRegionalSet( fun scrapeRegionalSet(
@@ -77,7 +75,7 @@ class ScrapeService(
val path: String = normalizePath(setName) val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) return regionalSetExtractionService.extract(document, source, source.getTargets().getRegionalSetConfig().get())
} }
fun scrapeCard( fun scrapeCard(

View File

@@ -1,7 +1,7 @@
package com.rak.service package com.rak.service
import com.rak.config.model.CardScrapeTargetConfig import com.rak.config.model.CardScrapeTargetConfig
import com.rak.config.model.SetScrapeTargetConfig import com.rak.config.model.RegionalSetScrapeTargetConfig
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.SourcesConfig import com.rak.config.model.SourcesConfig
import com.rak.model.exception.InvalidConfigurationException import com.rak.model.exception.InvalidConfigurationException
@@ -21,7 +21,7 @@ class SourceService(
} }
private fun validateSource(providerConfig: ProviderConfig) { private fun validateSource(providerConfig: ProviderConfig) {
val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig() val optionalRegionalSetConfig = providerConfig.getTargets().getRegionalSetConfig()
val optionalCardConfig = providerConfig.getTargets().getCardConfig() val optionalCardConfig = providerConfig.getTargets().getCardConfig()
if (optionalRegionalSetConfig.isPresent) { if (optionalRegionalSetConfig.isPresent) {
@@ -33,7 +33,7 @@ class SourceService(
} }
} }
private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) { private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
val selectors = listOf( val selectors = listOf(
setExtractConfig.getLanguageConfig(), setExtractConfig.getLanguageConfig(),
setExtractConfig.getIdConfig(), setExtractConfig.getIdConfig(),

View File

@@ -9,15 +9,13 @@ import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
@ApplicationScoped @ApplicationScoped
class SetExtractionService( class CardSetExtractionService(
private val regionalSetExtractionService: RegionalSetExtractionService private val regionalSetExtractionService: RegionalSetExtractionService
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() { ) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> { override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf( return mapOf(
Pair("prefix", this.getIdConfig()), Pair("name", this.getNameConfig()),
Pair("regionCode", this.getRegionKeyConfig()),
Pair("region", this.getLanguageConfig()),
) )
} }
@@ -26,9 +24,15 @@ class SetExtractionService(
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: SetScrapeTargetConfig
): CardSet { ): CardSet {
return CardSet( val set = extractSingle(element, extractionConfig)
"test",
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet() return CardSet.fromMap(
set,
regionalSetExtractionService.extractMultiple(
element,
providerConfig,
providerConfig.getTargets().getRegionalSetConfig().get()
).toSet()
) )
} }

View File

@@ -2,7 +2,7 @@ package com.rak.service.extract
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.SetScrapeTargetConfig import com.rak.config.model.RegionalSetScrapeTargetConfig
import com.rak.config.model.SourcesConfig import com.rak.config.model.SourcesConfig
import com.rak.model.card.CardPrint import com.rak.model.card.CardPrint
import com.rak.model.exception.NotImplementedException import com.rak.model.exception.NotImplementedException
@@ -14,9 +14,9 @@ import org.jsoup.nodes.Element
class RegionalSetExtractionService( class RegionalSetExtractionService(
private val cardPrintExtractionService: CardPrintExtractionService, private val cardPrintExtractionService: CardPrintExtractionService,
private val sourcesConfig: SourcesConfig private val sourcesConfig: SourcesConfig
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() { ) : AbstractExtractionService<RegionalSet, RegionalSetScrapeTargetConfig>() {
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> { override fun RegionalSetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf( return mapOf(
Pair("prefix", this.getIdConfig()), Pair("prefix", this.getIdConfig()),
Pair("regionCode", this.getRegionKeyConfig()), Pair("regionCode", this.getRegionKeyConfig()),
@@ -27,7 +27,7 @@ class RegionalSetExtractionService(
override fun extract( override fun extract(
element: Element, element: Element,
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: RegionalSetScrapeTargetConfig
): RegionalSet { ): RegionalSet {
throw NotImplementedException("Not implemented") throw NotImplementedException("Not implemented")
} }
@@ -35,7 +35,7 @@ class RegionalSetExtractionService(
override fun extractMultiple( override fun extractMultiple(
element: Element, element: Element,
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: RegionalSetScrapeTargetConfig
): List<RegionalSet> { ): List<RegionalSet> {
val regionalSetList = extractMulti(element, extractionConfig) val regionalSetList = extractMulti(element, extractionConfig)
@@ -55,7 +55,7 @@ class RegionalSetExtractionService(
override fun extractNestedMultiples( override fun extractNestedMultiples(
element: Element, element: Element,
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: RegionalSetScrapeTargetConfig
): List<List<RegionalSet>> { ): List<List<RegionalSet>> {
throw NotImplementedException("Not implemented") throw NotImplementedException("Not implemented")
} }

View File

@@ -30,6 +30,16 @@ scraper:
url-pattern: "https://yugioh.fandom.com/wiki/%s" url-pattern: "https://yugioh.fandom.com/wiki/%s"
targets: targets:
set: set:
root:
type: css
value: "aside > .pi-title"
name:
type: string
extractors:
- steps:
- type: xpath
value: "//h2/text()"
regional-set:
root: root:
type: css type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
@@ -80,6 +90,12 @@ scraper:
- steps: - steps:
- type: xpath - type: xpath
value: "./td/span/text()" value: "./td/span/text()"
transform:
- name: "regexReplace"
parameters: [
" .+",
""
]
validation: validation:
pattern: "^.+-.+\\\\d.+$" pattern: "^.+-.+\\\\d.+$"
name: name: