Refactor extraction logic

Add required models
This commit is contained in:
2025-06-25 13:57:44 +02:00
parent 284723c978
commit ce64f90a66
45 changed files with 586 additions and 141 deletions
@@ -1,8 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface AbstractModelDefinition {
@WithName("root")
fun rootSelector(): Step
}
@@ -1,12 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface CardDefinition {
@WithName("name")
fun nameSelector(): SelectorDefinition
@WithName("attack")
fun attackSelector(): SelectorDefinition
@WithName("effect")
fun effectSelector(): SelectorDefinition
}
-8
View File
@@ -1,8 +0,0 @@
package com.rak.config
import java.util.*
interface Items {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
}
@@ -1,12 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface RegionalSetDefinition : AbstractModelDefinition {
@WithName("id")
fun idSelector(): SelectorDefinition
@WithName("language")
fun languageSelector(): SelectorDefinition
@WithName("region-key")
fun regionKeySelector(): SelectorDefinition
}
@@ -1,8 +0,0 @@
package com.rak.config
import java.util.*
interface SelectorDefinition {
fun steps(): Set<Step>
fun transform(): Optional<List<TransformationStep>>
}
@@ -1,6 +1,6 @@
package com.rak.config.converter package com.rak.config.converter
import com.rak.model.scrape.selector.Selector import com.rak.model.Selector
import org.eclipse.microprofile.config.spi.Converter import org.eclipse.microprofile.config.spi.Converter
class TypeSelectorConverter : Converter<Selector> { class TypeSelectorConverter : Converter<Selector> {
@@ -0,0 +1,11 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.Optional
interface AbstractScrapeTargetConfig {
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("multi")
fun isMulti(): Optional<Boolean>
}
@@ -0,0 +1,16 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface CardScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("name")
fun getEnglishName(): ScrapeTargetFieldConfig
@WithName("description")
fun getDescription(): ScrapeTargetFieldConfig
@WithName("type")
fun getCardType(): ScrapeTargetFieldConfig
@WithName("attack")
fun getAttack(): ScrapeTargetFieldConfig
@WithName("defense")
fun getDefense(): ScrapeTargetFieldConfig
}
@@ -1,13 +1,14 @@
package com.rak.config package com.rak.config.model
import com.rak.config.converter.TypeSelectorConverter import com.rak.config.converter.TypeSelectorConverter
import com.rak.model.scrape.selector.Selector import com.rak.model.Selector
import io.smallrye.config.WithConverter import io.smallrye.config.WithConverter
import io.smallrye.config.WithName import io.smallrye.config.WithName
interface Step { interface ExtractConfig {
@WithConverter(TypeSelectorConverter::class) @WithConverter(TypeSelectorConverter::class)
@WithName("type") @WithName("type")
fun selectorType(): Selector // e.g. css or xpath fun selectorType(): Selector
fun value(): String @WithName("value")
fun getQueryString(): String
} }
@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id")
fun idSelector(): ScrapeTargetFieldConfig
@WithName("language")
fun languageSelector(): ScrapeTargetFieldConfig
@WithName("region-key")
fun regionKeySelector(): ScrapeTargetFieldConfig
}
@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.*
interface ScrapeTargetFieldConfig {
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("steps")
fun getSteps(): List<ExtractConfig>
fun transform(): Optional<List<TransformationStepConfig>>
}
@@ -1,4 +1,4 @@
package com.rak.config package com.rak.config.model
import io.smallrye.config.WithName import io.smallrye.config.WithName
import java.util.* import java.util.*
@@ -13,7 +13,7 @@ interface SourceConfig {
fun getDomain(): String fun getDomain(): String
@WithName("url-patterns") @WithName("url-patterns")
fun getUrlPatterns(): Optional<MutableSet<String>> fun getUrlPatterns(): Optional<MutableSet<String>>
@WithName("selectors") @WithName("targets")
fun getItems(): Items fun getTargets(): TargetsConfig
} }
@@ -1,10 +1,10 @@
package com.rak.config package com.rak.config.model
import io.smallrye.config.ConfigMapping import io.smallrye.config.ConfigMapping
import io.smallrye.config.WithName import io.smallrye.config.WithName
@ConfigMapping(prefix = "scraper") @ConfigMapping(prefix = "scraper")
interface SourcesConfiguration { interface SourcesConfig {
@WithName("sources") @WithName("sources")
fun getSources(): MutableList<SourceConfig> fun getSources(): MutableList<SourceConfig>
@@ -0,0 +1,8 @@
package com.rak.config.model
import java.util.*
interface TargetsConfig {
fun card(): Optional<CardScrapeTargetConfig>
fun regionalSet(): Optional<RegionalSetScrapeTargetConfig>
}
@@ -1,9 +1,9 @@
package com.rak.config package com.rak.config.model
import com.rak.config.converter.EmptyStringConverter import com.rak.config.converter.EmptyStringConverter
import io.smallrye.config.WithConverter import io.smallrye.config.WithConverter
interface TransformationStep { interface TransformationStepConfig {
fun name(): String fun name(): String
@WithConverter(EmptyStringConverter::class) @WithConverter(EmptyStringConverter::class)
fun parameters(): MutableList<String> fun parameters(): MutableList<String>
@@ -1,6 +1,7 @@
package com.rak.controller package com.rak.controller
import com.rak.model.RegionalSet import com.rak.model.card.Card
import com.rak.model.set.CardSet
import com.rak.service.ScrapeService import com.rak.service.ScrapeService
import jakarta.ws.rs.Consumes import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET import jakarta.ws.rs.GET
@@ -27,7 +28,7 @@ class ScrapeController(
provider: String, provider: String,
@RestQuery @RestQuery
setName: String setName: String
): List<RegionalSet> { ): CardSet {
return scrapeService.scrapeSet( return scrapeService.scrapeSet(
provider, provider,
setName setName
@@ -43,7 +44,7 @@ class ScrapeController(
provider: String, provider: String,
@RestQuery @RestQuery
cardName: String cardName: String
): Map<String, String> { ): Card? {
return scrapeService.scrapeCard( return scrapeService.scrapeCard(
provider, provider,
cardName cardName
@@ -1,7 +0,0 @@
package com.rak.model
data class RegionalSet(
val id: String,
val language: String,
val key: String
)
@@ -1,4 +1,4 @@
package com.rak.model.scrape.selector package com.rak.model
enum class Selector { enum class Selector {
CSS, CSS,
@@ -0,0 +1,11 @@
package com.rak.model.card
enum class Attribute {
WIND,
WATER,
FIRE,
EARTH,
LIGHT,
DARK,
DIVINE;
}
@@ -0,0 +1,8 @@
package com.rak.model.card
abstract class Card {
abstract val id: Int
abstract val cardType: CardType
abstract val description: String
abstract val name: String
}
@@ -0,0 +1,8 @@
package com.rak.model.card
enum class CardType {
MONSTER,
SPELL,
TRAP,
UNKNOWN
}
@@ -0,0 +1,3 @@
package com.rak.model.card
interface ICardType
@@ -0,0 +1,12 @@
package com.rak.model.card
enum class LinkArrow {
TOP_LEFT,
TOP,
TOP_RIGHT,
LEFT,
RIGHT,
BOTTOM_LEFT,
BOTTOM,
BOTTOM_RIGHT;
}
@@ -0,0 +1,20 @@
package com.rak.model.card
data class MonsterCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val monsterEffect: String? = null,
val attack: Int? = null,
val defense: Int? = null,
val level: Int? = null,
val isPendulum: Boolean = false,
val pendulumScale: Int? = null,
val pendulumEffect: String? = null,
val linkValue: Int? = null,
val subType: MonsterCardType,
val monsterType: MonsterType,
val attribute: Attribute,
val linkArrows: Set<LinkArrow>
) : Card()
@@ -0,0 +1,11 @@
package com.rak.model.card
enum class MonsterCardType : ICardType {
NORMAL,
EFFECT,
RITUAL,
FUSION,
SYNCHRO,
XYZ,
LINK
}
@@ -0,0 +1,32 @@
package com.rak.model.card
// TODO string value for proper names
// TODO consider adding unknown type
enum class MonsterType {
AQUA,
BEAST,
BEAST_WARRIOR,
CREATOR_GOD,
CYBERSE,
DINOSAUR,
DIVINE_BEAST,
DRAGON,
FAIRY,
FIEND,
FISH,
INSECT,
ILLUSION,
MACHINE,
PLANT,
PSYCHIC,
PYRO,
REPTILE,
ROCK,
SEA_SERPENT,
SPELLCASTER,
THUNDER,
WARRIOR,
WINGED_BEAST,
WYRM,
ZOMBIE
}
@@ -0,0 +1,9 @@
package com.rak.model.card
data class SpellCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val subType: SpellCardType
) : Card()
@@ -0,0 +1,11 @@
package com.rak.model.card
// TODO fix underscore for all types with string value
enum class SpellCardType {
NORMAL,
CONTINUOUS,
EQUIP,
QUICK_PLAY,
FIELD,
RITUAL
}
@@ -0,0 +1,9 @@
package com.rak.model.card
data class TrapCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val subType: TrapCardType
) : Card()
@@ -0,0 +1,7 @@
package com.rak.model.card
enum class TrapCardType {
NORMAL,
CONTINUOUS,
COUNTER
}
@@ -0,0 +1,3 @@
package com.rak.model.exception
class ElementNotFoundException(message: String) : RuntimeException(message)
@@ -0,0 +1,3 @@
package com.rak.model.exception
class InvalidConfigurationException(message: String) : RuntimeException(message)
@@ -0,0 +1,7 @@
package com.rak.model.exception
import java.lang.RuntimeException
class UnsupportedQueryForProviderException(
message: String,
) : RuntimeException(message)
@@ -1,4 +0,0 @@
package com.rak.model.scrape
abstract class AbstractScraper{
}
@@ -1,6 +0,0 @@
package com.rak.model.scrape
class JsoupScraper : AbstractScraper() {
}
@@ -1,5 +0,0 @@
package com.rak.model.scrape
data class ScrapeJob(
val url: String,
)
@@ -0,0 +1,8 @@
package com.rak.model.set
import kotlin.collections.Set
data class CardSet(
val name: String,
val regionalSets: Set<RegionalSet>
)
@@ -0,0 +1,33 @@
package com.rak.model.set
data class RegionalSet(
val prefix: String,
val region: String,
val regionCode: String
) {
companion object {
fun flattenFromMemberLists(
idList: List<String>,
languageList: List<String>,
regionKeyAliasList: List<String>,
): MutableSet<RegionalSet> {
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
throw IllegalArgumentException("Lists have to be the same size")
}
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
for (index in 0..idList.size - 1) {
regionalSetList.add(RegionalSet(
prefix = idList[index],
region = languageList[index],
regionCode = regionKeyAliasList[index]
))
}
return regionalSetList
}
}
}
@@ -1,6 +1,6 @@
package com.rak.model.transform package com.rak.model.transform
import com.rak.config.TransformationStep import com.rak.config.model.TransformationStepConfig
import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.ConcurrentHashMap
class TransformationRegistry { class TransformationRegistry {
@@ -34,7 +34,7 @@ class TransformationRegistry {
parameterizedTransformation.put(name, transformation) parameterizedTransformation.put(name, transformation)
} }
fun getTransformation(transformationStep: TransformationStep): AbstractTransformation { fun getTransformation(transformationStep: TransformationStepConfig): AbstractTransformation {
val name = transformationStep.name() val name = transformationStep.name()
val parameters = transformationStep.parameters() val parameters = transformationStep.parameters()
return when { return when {
@@ -56,7 +56,7 @@ class TransformationRegistry {
} }
} }
fun applyTransformations(input: String, steps: List<TransformationStep>): String { fun applyTransformations(input: String, steps: List<TransformationStepConfig>): String {
return steps.fold(input) { current, step -> return steps.fold(input) { current, step ->
val actualStep = getTransformation(step) val actualStep = getTransformation(step)
when (actualStep) { when (actualStep) {
@@ -1,44 +1,82 @@
package com.rak.service package com.rak.service
import com.rak.config.Step import com.rak.config.model.ExtractConfig
import com.rak.model.RegionalSet import com.rak.model.Selector
import com.rak.model.transform.TransformationRegistry import com.rak.model.card.Card
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.util.XPathUtil import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.select.Elements
@ApplicationScoped @ApplicationScoped
class ExtractionService( class ExtractionService(
private val sourceService: SourceService, private val sourceService: SourceService,
) { ) {
private val transformationRegistry: TransformationRegistry = TransformationRegistry() fun extractSet(setName: String, root: Element, provider: String): CardSet {
return CardSet(
name = setName,
regionalSets = extractRegionalSets(root, provider)
)
}
fun extractSet(document: Document, provider: String): List<RegionalSet> { fun extractRegionalSet(root: Element, provider: String): RegionalSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val regionalSetSelector = source.getItems().regionalSet().get() val regionalSetSelector = source.getTargets().regionalSet().get()
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value()) if (regionalSetSelector.getRootConfig().isPresent) {
val setId: String = extractTextFromElementBySteps(
return regionalSetRoot.map { root,
var setId: String = extractTextFromRootBySteps( regionalSetSelector.idSelector().getSteps()
it,
regionalSetSelector.idSelector().steps()
) ?: throw IllegalStateException("Parameter 'id' could not be found") ) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguage: String = extractTextFromElementBySteps(
setId = transformationRegistry.applyTransformations(setId, regionalSetSelector.idSelector().transform().get()) root,
regionalSetSelector.languageSelector().getSteps()
val setLanguage: String = extractTextFromRootBySteps(
it,
regionalSetSelector.languageSelector().steps()
) ?: throw IllegalStateException("Parameter 'language' could not be found") ) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKey: String = extractTextFromRootBySteps( val setKey: String = extractTextFromElementBySteps(
it, root,
regionalSetSelector.regionKeySelector().steps() regionalSetSelector.regionKeySelector().getSteps()
) ?: throw IllegalStateException("Parameter 'key' could not be found") ) ?: throw IllegalStateException("Parameter 'key' could not be found")
RegionalSet( return RegionalSet(
setId,
setLanguage,
setKey
)
} else {
val setIdConfiguration = regionalSetSelector.idSelector()
if (!setIdConfiguration.getRootConfig().isPresent) {
throw RuntimeException("as[po") // TODO fix me
}
val rootConfiguration = setIdConfiguration.getRootConfig().get()
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setId: String = extractTextFromElementBySteps(
setIdRoot,
setIdConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguageConfiguration = regionalSetSelector.idSelector()
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setLanguage: String = extractTextFromElementBySteps(
setLanguageRoot,
setLanguageConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKeyConfiguration = regionalSetSelector.idSelector()
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setKey: String = extractTextFromElementBySteps(
setKeyRoot,
setKeyConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId, setId,
setLanguage, setLanguage,
setKey setKey
@@ -46,10 +84,127 @@ class ExtractionService(
} }
} }
private fun extractTextFromRootBySteps( fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val regionalSetSelector = source.getTargets().regionalSet().get()
if (regionalSetSelector.getRootConfig().isPresent) {
val rootConfiguration = regionalSetSelector.getRootConfig().get()
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
root,
rootConfiguration
)
return regionalSetRoots.map {
extractRegionalSet(
it,
provider
)
}.toSet()
} else {
val setIdConfiguration = regionalSetSelector.idSelector()
try {
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
val setIds = setIdRoot.map {
extractTextFromElementBySteps(
it,
setIdConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val languageConfiguration = regionalSetSelector.languageSelector()
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
val languages = languageRoot.map {
extractTextFromElementBySteps(
it,
languageConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val setKeyConfiguration = regionalSetSelector.regionKeySelector()
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
val setKeys = setKeyRoot.map {
extractTextFromElementBySteps(
it,
setKeyConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
return RegionalSet.flattenFromMemberLists(
setIds,
languages,
setKeys
)
} catch (ex: NoSuchElementException) {
throw RuntimeException("sdfgs") // TODO handle me
}
}
}
fun extractCard(root: Document, provider: String): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val cardSelector = source.getTargets().card().get()
val rootConfigurationOptional = cardSelector.getRootConfig()
if (rootConfigurationOptional.isPresent) {
val rootConfiguration = rootConfigurationOptional.get()
val rootElement: Element = getElementFromDocumentByExtractConfig(
root,
rootConfiguration
) ?: throw ElementNotFoundException("TODO make this better")
val englishCardName: String = extractTextFromElementBySteps(
rootElement,
cardSelector.getEnglishName().getSteps()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val cardType: String = extractTextFromElementBySteps(
rootElement,
cardSelector.getEnglishName().getSteps()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val description: String = extractTextFromElementBySteps(
rootElement,
cardSelector.getEnglishName().getSteps()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
return null
} else {
return null
}
}
private fun getElementsFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Elements {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString())
} else {
document.selectXpath(step.getQueryString())
}
}
private fun getElementFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Element? {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
} else {
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
}
}
private fun extractTextFromElementBySteps(
root: Element, root: Element,
steps: Set<Step> steps: List<ExtractConfig>
): String? { ): String? {
val stepsInCorrectOrder = steps.reversed()
var currentElement: Element? = root.clone() var currentElement: Element? = root.clone()
var result: String? = null var result: String? = null
@@ -60,10 +215,10 @@ class ExtractionService(
} }
if (index == steps.size - 1) { if (index == steps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.value()) result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
} }
else { else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value()) currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
} }
} }
@@ -1,6 +1,7 @@
package com.rak.service package com.rak.service
import com.rak.model.RegionalSet import com.rak.model.card.Card
import com.rak.model.set.CardSet
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
@@ -15,21 +16,25 @@ class ScrapeService(
fun scrapeSet( fun scrapeSet(
provider: String, provider: String,
setName: String, setName: String,
): List<RegionalSet> { ): CardSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(setName) val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractSet(document, provider) return extractionService.extractSet(setName, document, provider)
} }
fun scrapeCard( fun scrapeCard(
provider: String, provider: String,
cardName: String, cardName: String,
): Map<String, String> { ): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(cardName) val path: String = normalizePath(cardName)
return mapOf() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractCard(document, provider)
} }
private fun normalizePath(path: String): String = path private fun normalizePath(path: String): String = path
@@ -1,14 +1,86 @@
package com.rak.service package com.rak.service
import com.rak.config.SourceConfig import com.rak.config.model.CardScrapeTargetConfig
import com.rak.config.SourcesConfiguration import com.rak.config.model.RegionalSetScrapeTargetConfig
import com.rak.config.model.SourceConfig
import com.rak.config.model.SourcesConfig
import com.rak.model.exception.InvalidConfigurationException
import io.quarkus.logging.Log
import io.quarkus.runtime.Startup
import jakarta.annotation.PostConstruct
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
@Startup
@ApplicationScoped @ApplicationScoped
class SourceService ( class SourceService(
val sourcesConfiguration: SourcesConfiguration val sourcesConfiguration: SourcesConfig
) { ) {
@PostConstruct
fun init() {
sourcesConfiguration.getSources().forEach { validateSource(it) }
}
private fun validateSource(sourceConfig: SourceConfig) {
val optionalRegionalSetConfig = sourceConfig.getTargets().regionalSet()
val optionalCardConfig = sourceConfig.getTargets().card()
if (optionalRegionalSetConfig.isPresent) {
validateSetExtractConfig(optionalRegionalSetConfig.get())
}
if (optionalCardConfig.isPresent) {
validateCardExtractConfig(optionalCardConfig.get())
}
}
private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
val selectors = listOf(
setExtractConfig.languageSelector(),
setExtractConfig.idSelector(),
setExtractConfig.regionKeySelector()
)
// If global root is present, dedicated roots may not exist
if (setExtractConfig.getRootConfig().isPresent) {
if (selectors.any { it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots cannot be set when a global extraction root is configured"
)
}
} else {
if (selectors.any { !it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots have to be set when a global extraction root is not configured"
)
}
}
}
private fun validateCardExtractConfig(cardScrapeTargetConfig: CardScrapeTargetConfig) {
val selectors = listOf(
cardScrapeTargetConfig.getEnglishName(),
cardScrapeTargetConfig.getDescription(),
cardScrapeTargetConfig.getCardType(),
cardScrapeTargetConfig.getAttack(),
cardScrapeTargetConfig.getDefense(),
)
if (cardScrapeTargetConfig.getRootConfig().isPresent) {
if (selectors.any { it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots cannot be set when a global extraction root is configured"
)
}
} else {
if (selectors.any { !it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots have to be set when a global extraction root is not configured"
)
}
}
}
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet() fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id } fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
@@ -1,13 +0,0 @@
package com.rak.service
import com.rak.model.transform.TransformationRegistry
import jakarta.enterprise.context.ApplicationScoped
@ApplicationScoped
class TransformService(
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
) {
}
+42 -2
View File
@@ -9,8 +9,11 @@ scraper:
domain: "yugioh-card.com" domain: "yugioh-card.com"
url-patterns: url-patterns:
- "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$" - "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
selectors: targets:
card: card:
root:
type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
name: name:
steps: steps:
- type: "css" - type: "css"
@@ -27,8 +30,9 @@ scraper:
domain: "yugioh.fandom.com" domain: "yugioh.fandom.com"
url-patterns: url-patterns:
- "^https://yugioh\\.fandom\\.com/wiki/.*$" - "^https://yugioh\\.fandom\\.com/wiki/.*$"
selectors: targets:
regional-set: regional-set:
multi: true
root: root:
type: css type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
@@ -51,3 +55,39 @@ scraper:
steps: steps:
- type: xpath - type: xpath
value: "//li/abbr/text()" value: "//li/abbr/text()"
card:
name:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "./tbody/tr[3]/th/text()"
description:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
type:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
attack:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
defense:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"