Refactor extraction logic

Add required models
This commit is contained in:
2025-06-25 13:57:44 +02:00
parent 284723c978
commit ce64f90a66
45 changed files with 586 additions and 141 deletions

View File

@@ -1,8 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface AbstractModelDefinition {
@WithName("root")
fun rootSelector(): Step
}

View File

@@ -1,12 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface CardDefinition {
@WithName("name")
fun nameSelector(): SelectorDefinition
@WithName("attack")
fun attackSelector(): SelectorDefinition
@WithName("effect")
fun effectSelector(): SelectorDefinition
}

View File

@@ -1,8 +0,0 @@
package com.rak.config
import java.util.*
interface Items {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
}

View File

@@ -1,12 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface RegionalSetDefinition : AbstractModelDefinition {
@WithName("id")
fun idSelector(): SelectorDefinition
@WithName("language")
fun languageSelector(): SelectorDefinition
@WithName("region-key")
fun regionKeySelector(): SelectorDefinition
}

View File

@@ -1,8 +0,0 @@
package com.rak.config
import java.util.*
interface SelectorDefinition {
fun steps(): Set<Step>
fun transform(): Optional<List<TransformationStep>>
}

View File

@@ -1,6 +1,6 @@
package com.rak.config.converter package com.rak.config.converter
import com.rak.model.scrape.selector.Selector import com.rak.model.Selector
import org.eclipse.microprofile.config.spi.Converter import org.eclipse.microprofile.config.spi.Converter
class TypeSelectorConverter : Converter<Selector> { class TypeSelectorConverter : Converter<Selector> {

View File

@@ -0,0 +1,11 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.Optional
interface AbstractScrapeTargetConfig {
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("multi")
fun isMulti(): Optional<Boolean>
}

View File

@@ -0,0 +1,16 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface CardScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("name")
fun getEnglishName(): ScrapeTargetFieldConfig
@WithName("description")
fun getDescription(): ScrapeTargetFieldConfig
@WithName("type")
fun getCardType(): ScrapeTargetFieldConfig
@WithName("attack")
fun getAttack(): ScrapeTargetFieldConfig
@WithName("defense")
fun getDefense(): ScrapeTargetFieldConfig
}

View File

@@ -1,13 +1,14 @@
package com.rak.config package com.rak.config.model
import com.rak.config.converter.TypeSelectorConverter import com.rak.config.converter.TypeSelectorConverter
import com.rak.model.scrape.selector.Selector import com.rak.model.Selector
import io.smallrye.config.WithConverter import io.smallrye.config.WithConverter
import io.smallrye.config.WithName import io.smallrye.config.WithName
interface Step { interface ExtractConfig {
@WithConverter(TypeSelectorConverter::class) @WithConverter(TypeSelectorConverter::class)
@WithName("type") @WithName("type")
fun selectorType(): Selector // e.g. css or xpath fun selectorType(): Selector
fun value(): String @WithName("value")
fun getQueryString(): String
} }

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id")
fun idSelector(): ScrapeTargetFieldConfig
@WithName("language")
fun languageSelector(): ScrapeTargetFieldConfig
@WithName("region-key")
fun regionKeySelector(): ScrapeTargetFieldConfig
}

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.*
interface ScrapeTargetFieldConfig {
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("steps")
fun getSteps(): List<ExtractConfig>
fun transform(): Optional<List<TransformationStepConfig>>
}

View File

@@ -1,4 +1,4 @@
package com.rak.config package com.rak.config.model
import io.smallrye.config.WithName import io.smallrye.config.WithName
import java.util.* import java.util.*
@@ -13,7 +13,7 @@ interface SourceConfig {
fun getDomain(): String fun getDomain(): String
@WithName("url-patterns") @WithName("url-patterns")
fun getUrlPatterns(): Optional<MutableSet<String>> fun getUrlPatterns(): Optional<MutableSet<String>>
@WithName("selectors") @WithName("targets")
fun getItems(): Items fun getTargets(): TargetsConfig
} }

View File

@@ -1,10 +1,10 @@
package com.rak.config package com.rak.config.model
import io.smallrye.config.ConfigMapping import io.smallrye.config.ConfigMapping
import io.smallrye.config.WithName import io.smallrye.config.WithName
@ConfigMapping(prefix = "scraper") @ConfigMapping(prefix = "scraper")
interface SourcesConfiguration { interface SourcesConfig {
@WithName("sources") @WithName("sources")
fun getSources(): MutableList<SourceConfig> fun getSources(): MutableList<SourceConfig>

View File

@@ -0,0 +1,8 @@
package com.rak.config.model
import java.util.*
interface TargetsConfig {
fun card(): Optional<CardScrapeTargetConfig>
fun regionalSet(): Optional<RegionalSetScrapeTargetConfig>
}

View File

@@ -1,9 +1,9 @@
package com.rak.config package com.rak.config.model
import com.rak.config.converter.EmptyStringConverter import com.rak.config.converter.EmptyStringConverter
import io.smallrye.config.WithConverter import io.smallrye.config.WithConverter
interface TransformationStep { interface TransformationStepConfig {
fun name(): String fun name(): String
@WithConverter(EmptyStringConverter::class) @WithConverter(EmptyStringConverter::class)
fun parameters(): MutableList<String> fun parameters(): MutableList<String>

View File

@@ -1,6 +1,7 @@
package com.rak.controller package com.rak.controller
import com.rak.model.RegionalSet import com.rak.model.card.Card
import com.rak.model.set.CardSet
import com.rak.service.ScrapeService import com.rak.service.ScrapeService
import jakarta.ws.rs.Consumes import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET import jakarta.ws.rs.GET
@@ -27,7 +28,7 @@ class ScrapeController(
provider: String, provider: String,
@RestQuery @RestQuery
setName: String setName: String
): List<RegionalSet> { ): CardSet {
return scrapeService.scrapeSet( return scrapeService.scrapeSet(
provider, provider,
setName setName
@@ -43,7 +44,7 @@ class ScrapeController(
provider: String, provider: String,
@RestQuery @RestQuery
cardName: String cardName: String
): Map<String, String> { ): Card? {
return scrapeService.scrapeCard( return scrapeService.scrapeCard(
provider, provider,
cardName cardName

View File

@@ -1,7 +0,0 @@
package com.rak.model
data class RegionalSet(
val id: String,
val language: String,
val key: String
)

View File

@@ -1,4 +1,4 @@
package com.rak.model.scrape.selector package com.rak.model
enum class Selector { enum class Selector {
CSS, CSS,

View File

@@ -0,0 +1,11 @@
package com.rak.model.card
enum class Attribute {
WIND,
WATER,
FIRE,
EARTH,
LIGHT,
DARK,
DIVINE;
}

View File

@@ -0,0 +1,8 @@
package com.rak.model.card
abstract class Card {
abstract val id: Int
abstract val cardType: CardType
abstract val description: String
abstract val name: String
}

View File

@@ -0,0 +1,8 @@
package com.rak.model.card
enum class CardType {
MONSTER,
SPELL,
TRAP,
UNKNOWN
}

View File

@@ -0,0 +1,3 @@
package com.rak.model.card
interface ICardType

View File

@@ -0,0 +1,12 @@
package com.rak.model.card
enum class LinkArrow {
TOP_LEFT,
TOP,
TOP_RIGHT,
LEFT,
RIGHT,
BOTTOM_LEFT,
BOTTOM,
BOTTOM_RIGHT;
}

View File

@@ -0,0 +1,20 @@
package com.rak.model.card
data class MonsterCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val monsterEffect: String? = null,
val attack: Int? = null,
val defense: Int? = null,
val level: Int? = null,
val isPendulum: Boolean = false,
val pendulumScale: Int? = null,
val pendulumEffect: String? = null,
val linkValue: Int? = null,
val subType: MonsterCardType,
val monsterType: MonsterType,
val attribute: Attribute,
val linkArrows: Set<LinkArrow>
) : Card()

View File

@@ -0,0 +1,11 @@
package com.rak.model.card
enum class MonsterCardType : ICardType {
NORMAL,
EFFECT,
RITUAL,
FUSION,
SYNCHRO,
XYZ,
LINK
}

View File

@@ -0,0 +1,32 @@
package com.rak.model.card
// TODO string value for proper names
// TODO consider adding unknown type
enum class MonsterType {
AQUA,
BEAST,
BEAST_WARRIOR,
CREATOR_GOD,
CYBERSE,
DINOSAUR,
DIVINE_BEAST,
DRAGON,
FAIRY,
FIEND,
FISH,
INSECT,
ILLUSION,
MACHINE,
PLANT,
PSYCHIC,
PYRO,
REPTILE,
ROCK,
SEA_SERPENT,
SPELLCASTER,
THUNDER,
WARRIOR,
WINGED_BEAST,
WYRM,
ZOMBIE
}

View File

@@ -0,0 +1,9 @@
package com.rak.model.card
data class SpellCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val subType: SpellCardType
) : Card()

View File

@@ -0,0 +1,11 @@
package com.rak.model.card
// TODO fix underscore for all types with string value
enum class SpellCardType {
NORMAL,
CONTINUOUS,
EQUIP,
QUICK_PLAY,
FIELD,
RITUAL
}

View File

@@ -0,0 +1,9 @@
package com.rak.model.card
data class TrapCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val subType: TrapCardType
) : Card()

View File

@@ -0,0 +1,7 @@
package com.rak.model.card
enum class TrapCardType {
NORMAL,
CONTINUOUS,
COUNTER
}

View File

@@ -0,0 +1,3 @@
package com.rak.model.exception
class ElementNotFoundException(message: String) : RuntimeException(message)

View File

@@ -0,0 +1,3 @@
package com.rak.model.exception
class InvalidConfigurationException(message: String) : RuntimeException(message)

View File

@@ -0,0 +1,7 @@
package com.rak.model.exception
import java.lang.RuntimeException
class UnsupportedQueryForProviderException(
message: String,
) : RuntimeException(message)

View File

@@ -1,4 +0,0 @@
package com.rak.model.scrape
abstract class AbstractScraper{
}

View File

@@ -1,6 +0,0 @@
package com.rak.model.scrape
class JsoupScraper : AbstractScraper() {
}

View File

@@ -1,5 +0,0 @@
package com.rak.model.scrape
data class ScrapeJob(
val url: String,
)

View File

@@ -0,0 +1,8 @@
package com.rak.model.set
import kotlin.collections.Set
data class CardSet(
val name: String,
val regionalSets: Set<RegionalSet>
)

View File

@@ -0,0 +1,33 @@
package com.rak.model.set
data class RegionalSet(
val prefix: String,
val region: String,
val regionCode: String
) {
companion object {
fun flattenFromMemberLists(
idList: List<String>,
languageList: List<String>,
regionKeyAliasList: List<String>,
): MutableSet<RegionalSet> {
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
throw IllegalArgumentException("Lists have to be the same size")
}
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
for (index in 0..idList.size - 1) {
regionalSetList.add(RegionalSet(
prefix = idList[index],
region = languageList[index],
regionCode = regionKeyAliasList[index]
))
}
return regionalSetList
}
}
}

View File

@@ -1,6 +1,6 @@
package com.rak.model.transform package com.rak.model.transform
import com.rak.config.TransformationStep import com.rak.config.model.TransformationStepConfig
import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.ConcurrentHashMap
class TransformationRegistry { class TransformationRegistry {
@@ -34,7 +34,7 @@ class TransformationRegistry {
parameterizedTransformation.put(name, transformation) parameterizedTransformation.put(name, transformation)
} }
fun getTransformation(transformationStep: TransformationStep): AbstractTransformation { fun getTransformation(transformationStep: TransformationStepConfig): AbstractTransformation {
val name = transformationStep.name() val name = transformationStep.name()
val parameters = transformationStep.parameters() val parameters = transformationStep.parameters()
return when { return when {
@@ -56,7 +56,7 @@ class TransformationRegistry {
} }
} }
fun applyTransformations(input: String, steps: List<TransformationStep>): String { fun applyTransformations(input: String, steps: List<TransformationStepConfig>): String {
return steps.fold(input) { current, step -> return steps.fold(input) { current, step ->
val actualStep = getTransformation(step) val actualStep = getTransformation(step)
when (actualStep) { when (actualStep) {

View File

@@ -1,44 +1,82 @@
package com.rak.service package com.rak.service
import com.rak.config.Step import com.rak.config.model.ExtractConfig
import com.rak.model.RegionalSet import com.rak.model.Selector
import com.rak.model.transform.TransformationRegistry import com.rak.model.card.Card
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.util.XPathUtil import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.select.Elements
@ApplicationScoped @ApplicationScoped
class ExtractionService( class ExtractionService(
private val sourceService: SourceService, private val sourceService: SourceService,
) { ) {
private val transformationRegistry: TransformationRegistry = TransformationRegistry() fun extractSet(setName: String, root: Element, provider: String): CardSet {
return CardSet(
name = setName,
regionalSets = extractRegionalSets(root, provider)
)
}
fun extractSet(document: Document, provider: String): List<RegionalSet> { fun extractRegionalSet(root: Element, provider: String): RegionalSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val regionalSetSelector = source.getItems().regionalSet().get() val regionalSetSelector = source.getTargets().regionalSet().get()
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value()) if (regionalSetSelector.getRootConfig().isPresent) {
val setId: String = extractTextFromElementBySteps(
return regionalSetRoot.map { root,
var setId: String = extractTextFromRootBySteps( regionalSetSelector.idSelector().getSteps()
it,
regionalSetSelector.idSelector().steps()
) ?: throw IllegalStateException("Parameter 'id' could not be found") ) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguage: String = extractTextFromElementBySteps(
setId = transformationRegistry.applyTransformations(setId, regionalSetSelector.idSelector().transform().get()) root,
regionalSetSelector.languageSelector().getSteps()
val setLanguage: String = extractTextFromRootBySteps(
it,
regionalSetSelector.languageSelector().steps()
) ?: throw IllegalStateException("Parameter 'language' could not be found") ) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKey: String = extractTextFromRootBySteps( val setKey: String = extractTextFromElementBySteps(
it, root,
regionalSetSelector.regionKeySelector().steps() regionalSetSelector.regionKeySelector().getSteps()
) ?: throw IllegalStateException("Parameter 'key' could not be found") ) ?: throw IllegalStateException("Parameter 'key' could not be found")
RegionalSet( return RegionalSet(
setId,
setLanguage,
setKey
)
} else {
val setIdConfiguration = regionalSetSelector.idSelector()
if (!setIdConfiguration.getRootConfig().isPresent) {
throw RuntimeException("as[po") // TODO fix me
}
val rootConfiguration = setIdConfiguration.getRootConfig().get()
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setId: String = extractTextFromElementBySteps(
setIdRoot,
setIdConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguageConfiguration = regionalSetSelector.idSelector()
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setLanguage: String = extractTextFromElementBySteps(
setLanguageRoot,
setLanguageConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKeyConfiguration = regionalSetSelector.idSelector()
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setKey: String = extractTextFromElementBySteps(
setKeyRoot,
setKeyConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId, setId,
setLanguage, setLanguage,
setKey setKey
@@ -46,10 +84,127 @@ class ExtractionService(
} }
} }
private fun extractTextFromRootBySteps( fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val regionalSetSelector = source.getTargets().regionalSet().get()
if (regionalSetSelector.getRootConfig().isPresent) {
val rootConfiguration = regionalSetSelector.getRootConfig().get()
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
root,
rootConfiguration
)
return regionalSetRoots.map {
extractRegionalSet(
it,
provider
)
}.toSet()
} else {
val setIdConfiguration = regionalSetSelector.idSelector()
try {
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
val setIds = setIdRoot.map {
extractTextFromElementBySteps(
it,
setIdConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val languageConfiguration = regionalSetSelector.languageSelector()
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
val languages = languageRoot.map {
extractTextFromElementBySteps(
it,
languageConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val setKeyConfiguration = regionalSetSelector.regionKeySelector()
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
val setKeys = setKeyRoot.map {
extractTextFromElementBySteps(
it,
setKeyConfiguration.getSteps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
return RegionalSet.flattenFromMemberLists(
setIds,
languages,
setKeys
)
} catch (ex: NoSuchElementException) {
throw RuntimeException("sdfgs") // TODO handle me
}
}
}
fun extractCard(root: Document, provider: String): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val cardSelector = source.getTargets().card().get()
val rootConfigurationOptional = cardSelector.getRootConfig()
if (rootConfigurationOptional.isPresent) {
val rootConfiguration = rootConfigurationOptional.get()
val rootElement: Element = getElementFromDocumentByExtractConfig(
root,
rootConfiguration
) ?: throw ElementNotFoundException("TODO make this better")
val englishCardName: String = extractTextFromElementBySteps(
rootElement,
cardSelector.getEnglishName().getSteps()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val cardType: String = extractTextFromElementBySteps(
rootElement,
cardSelector.getEnglishName().getSteps()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val description: String = extractTextFromElementBySteps(
rootElement,
cardSelector.getEnglishName().getSteps()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
return null
} else {
return null
}
}
private fun getElementsFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Elements {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString())
} else {
document.selectXpath(step.getQueryString())
}
}
private fun getElementFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Element? {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
} else {
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
}
}
private fun extractTextFromElementBySteps(
root: Element, root: Element,
steps: Set<Step> steps: List<ExtractConfig>
): String? { ): String? {
val stepsInCorrectOrder = steps.reversed()
var currentElement: Element? = root.clone() var currentElement: Element? = root.clone()
var result: String? = null var result: String? = null
@@ -60,10 +215,10 @@ class ExtractionService(
} }
if (index == steps.size - 1) { if (index == steps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.value()) result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
} }
else { else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value()) currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
} }
} }

View File

@@ -1,6 +1,7 @@
package com.rak.service package com.rak.service
import com.rak.model.RegionalSet import com.rak.model.card.Card
import com.rak.model.set.CardSet
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
@@ -15,21 +16,25 @@ class ScrapeService(
fun scrapeSet( fun scrapeSet(
provider: String, provider: String,
setName: String, setName: String,
): List<RegionalSet> { ): CardSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(setName) val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractSet(document, provider) return extractionService.extractSet(setName, document, provider)
} }
fun scrapeCard( fun scrapeCard(
provider: String, provider: String,
cardName: String, cardName: String,
): Map<String, String> { ): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(cardName) val path: String = normalizePath(cardName)
return mapOf() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractCard(document, provider)
} }
private fun normalizePath(path: String): String = path private fun normalizePath(path: String): String = path

View File

@@ -1,14 +1,86 @@
package com.rak.service package com.rak.service
import com.rak.config.SourceConfig import com.rak.config.model.CardScrapeTargetConfig
import com.rak.config.SourcesConfiguration import com.rak.config.model.RegionalSetScrapeTargetConfig
import com.rak.config.model.SourceConfig
import com.rak.config.model.SourcesConfig
import com.rak.model.exception.InvalidConfigurationException
import io.quarkus.logging.Log
import io.quarkus.runtime.Startup
import jakarta.annotation.PostConstruct
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
@Startup
@ApplicationScoped @ApplicationScoped
class SourceService ( class SourceService(
val sourcesConfiguration: SourcesConfiguration val sourcesConfiguration: SourcesConfig
) { ) {
@PostConstruct
fun init() {
sourcesConfiguration.getSources().forEach { validateSource(it) }
}
private fun validateSource(sourceConfig: SourceConfig) {
val optionalRegionalSetConfig = sourceConfig.getTargets().regionalSet()
val optionalCardConfig = sourceConfig.getTargets().card()
if (optionalRegionalSetConfig.isPresent) {
validateSetExtractConfig(optionalRegionalSetConfig.get())
}
if (optionalCardConfig.isPresent) {
validateCardExtractConfig(optionalCardConfig.get())
}
}
private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
val selectors = listOf(
setExtractConfig.languageSelector(),
setExtractConfig.idSelector(),
setExtractConfig.regionKeySelector()
)
// If global root is present, dedicated roots may not exist
if (setExtractConfig.getRootConfig().isPresent) {
if (selectors.any { it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots cannot be set when a global extraction root is configured"
)
}
} else {
if (selectors.any { !it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots have to be set when a global extraction root is not configured"
)
}
}
}
private fun validateCardExtractConfig(cardScrapeTargetConfig: CardScrapeTargetConfig) {
val selectors = listOf(
cardScrapeTargetConfig.getEnglishName(),
cardScrapeTargetConfig.getDescription(),
cardScrapeTargetConfig.getCardType(),
cardScrapeTargetConfig.getAttack(),
cardScrapeTargetConfig.getDefense(),
)
if (cardScrapeTargetConfig.getRootConfig().isPresent) {
if (selectors.any { it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots cannot be set when a global extraction root is configured"
)
}
} else {
if (selectors.any { !it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots have to be set when a global extraction root is not configured"
)
}
}
}
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet() fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id } fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }

View File

@@ -1,13 +0,0 @@
package com.rak.service
import com.rak.model.transform.TransformationRegistry
import jakarta.enterprise.context.ApplicationScoped
@ApplicationScoped
class TransformService(
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
) {
}

View File

@@ -9,8 +9,11 @@ scraper:
domain: "yugioh-card.com" domain: "yugioh-card.com"
url-patterns: url-patterns:
- "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$" - "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
selectors: targets:
card: card:
root:
type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
name: name:
steps: steps:
- type: "css" - type: "css"
@@ -27,8 +30,9 @@ scraper:
domain: "yugioh.fandom.com" domain: "yugioh.fandom.com"
url-patterns: url-patterns:
- "^https://yugioh\\.fandom\\.com/wiki/.*$" - "^https://yugioh\\.fandom\\.com/wiki/.*$"
selectors: targets:
regional-set: regional-set:
multi: true
root: root:
type: css type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
@@ -50,4 +54,40 @@ scraper:
region-key: region-key:
steps: steps:
- type: xpath - type: xpath
value: "//li/abbr/text()" value: "//li/abbr/text()"
card:
name:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "./tbody/tr[3]/th/text()"
description:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
type:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
attack:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
defense:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"