Compare commits

..

8 Commits

Author SHA1 Message Date
39c0ebfc7c Attempt to implement CardPrints 2025-06-25 21:06:34 +02:00
e0330e7baa Remove isMulti 2025-06-25 14:17:58 +02:00
3808fe153e Amend naming schema 2025-06-25 14:17:35 +02:00
0196308c10 Amend naming schema 2025-06-25 14:17:13 +02:00
72af626e54 Amend naming schema 2025-06-25 14:10:04 +02:00
ce64f90a66 Refactor extraction logic
Add required models
2025-06-25 13:57:44 +02:00
284723c978 Implement transformation application 2025-06-24 15:53:20 +02:00
8cc9a64111 Add Transformation model 2025-06-24 15:23:12 +02:00
62 changed files with 1394 additions and 247 deletions

View File

@@ -30,6 +30,11 @@ scraper:
steps: steps:
- type: "xpath" - type: "xpath"
value: "//li/text()" value: "//li/text()"
transform:
- name: "replace"
parameters:
- " ("
- ""
language: language:
steps: steps:
- type: "xpath" - type: "xpath"

View File

@@ -1,38 +0,0 @@
package com.rak.model.transform;
import jakarta.enterprise.context.ApplicationScoped;
import java.time.LocalDate;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ApplicationScoped
public class TestRegistry {
private final Map<String, Transformation> registry = new ConcurrentHashMap<>();
public TestRegistry() {
// Register built-in transformations
register("trim", string -> string.trim());
register("upper", String::toUpperCase);
register("parseInt", s -> Integer.parseInt((String) s));
register("parseFloat", s -> Float.parseFloat((String) s));
register("parseDate", s -> LocalDate.parse((String) s));
register("extract", this::extract);
}
public void register(String name, Transformation transformation) {
registry.put(name, transformation);
}
public Transformation get(String name) {
return registry.get(name);
}
private Object extract(Object input, String pattern) {
Pattern regex = Pattern.compile(pattern);
Matcher matcher = regex.matcher((String) input);
return matcher.find() ? matcher.group(1) : input;
}
}

View File

@@ -1,12 +0,0 @@
package com.rak.config
import io.smallrye.config.WithName
interface CardDefinition {
@WithName("name")
fun nameSelector(): SelectorDefinition
@WithName("attack")
fun attackSelector(): SelectorDefinition
@WithName("effect")
fun effectSelector(): SelectorDefinition
}

View File

@@ -1,8 +0,0 @@
package com.rak.config
import java.util.*
interface Items {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
}

View File

@@ -1,13 +0,0 @@
package com.rak.config
import com.rak.config.converter.AbstractModelDefinition
import io.smallrye.config.WithName
interface RegionalSetDefinition : AbstractModelDefinition {
@WithName("id")
fun idSelector(): SelectorDefinition
@WithName("language")
fun languageSelector(): SelectorDefinition
@WithName("region-key")
fun regionKeySelector(): SelectorDefinition
}

View File

@@ -1,5 +0,0 @@
package com.rak.config
interface SelectorDefinition {
fun steps(): Set<Step>
}

View File

@@ -1,9 +0,0 @@
package com.rak.config.converter
import com.rak.config.Step
import io.smallrye.config.WithName
interface AbstractModelDefinition {
@WithName("root")
fun rootSelector(): Step
}

View File

@@ -0,0 +1,10 @@
package com.rak.config.converter
import com.rak.model.DiscriminatorDirection
import org.eclipse.microprofile.config.spi.Converter
class DiscriminatorDirectionConverter : Converter<DiscriminatorDirection> {
override fun convert(value: String): DiscriminatorDirection? {
return DiscriminatorDirection.from(value)
}
}

View File

@@ -0,0 +1,11 @@
package com.rak.config.converter
import jakarta.annotation.Priority
import org.eclipse.microprofile.config.spi.Converter
@Priority(1)
class EmptyStringConverter : Converter<String> {
override fun convert(value: String): String {
return value
}
}

View File

@@ -1,6 +1,6 @@
package com.rak.config.converter package com.rak.config.converter
import com.rak.model.scrape.selector.Selector import com.rak.model.Selector
import org.eclipse.microprofile.config.spi.Converter import org.eclipse.microprofile.config.spi.Converter
class TypeSelectorConverter : Converter<Selector> { class TypeSelectorConverter : Converter<Selector> {

View File

@@ -0,0 +1,15 @@
package com.rak.config.model
import io.smallrye.config.WithDefault
import io.smallrye.config.WithName
import java.util.*
interface AbstractScrapeTargetConfig {
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("multi")
@WithDefault("false")
fun isMulti(): Boolean
@WithName("discriminator")
fun getDiscriminator(): Optional<DiscriminatorConfig>
}

View File

@@ -0,0 +1,3 @@
package com.rak.config.model
interface AbstractScrapeTargetFieldConfig

View File

@@ -0,0 +1,14 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface CardPrintScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id")
fun getIdConfig(): ScrapeTargetFieldConfig
@WithName("name")
fun getNameConfig(): ScrapeTargetFieldConfig
@WithName("regional-name")
fun getRegionNameConfig(): ScrapeTargetFieldConfig
@WithName("rarity")
fun getRarityConfig(): ScrapeTargetFieldConfig
}

View File

@@ -0,0 +1,16 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface CardScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("name")
fun getEnglishNameConfig(): ScrapeTargetFieldConfig
@WithName("description")
fun getDescriptionConfig(): ScrapeTargetFieldConfig
@WithName("type")
fun getCardTypeConfig(): ScrapeTargetFieldConfig
@WithName("attack")
fun getAttackConfig(): ScrapeTargetFieldConfig
@WithName("defense")
fun getDefenseConfig(): ScrapeTargetFieldConfig
}

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import com.rak.config.converter.DiscriminatorDirectionConverter
import com.rak.model.DiscriminatorDirection
import io.smallrye.config.WithConverter
import io.smallrye.config.WithName
interface DiscriminatorConfig : ScrapeTargetFieldConfig {
@WithName("direction")
@WithConverter(DiscriminatorDirectionConverter::class)
fun getDiscriminatorDirection(): DiscriminatorDirection
}

View File

@@ -1,13 +1,14 @@
package com.rak.config package com.rak.config.model
import com.rak.config.converter.TypeSelectorConverter import com.rak.config.converter.TypeSelectorConverter
import com.rak.model.scrape.selector.Selector import com.rak.model.Selector
import io.smallrye.config.WithConverter import io.smallrye.config.WithConverter
import io.smallrye.config.WithName import io.smallrye.config.WithName
interface Step { interface ExtractConfig {
@WithConverter(TypeSelectorConverter::class) @WithConverter(TypeSelectorConverter::class)
@WithName("type") @WithName("type")
fun selectorType(): Selector // e.g. css or xpath fun selectorType(): Selector
fun value(): String @WithName("value")
fun getQueryString(): String
} }

View File

@@ -1,9 +1,9 @@
package com.rak.config package com.rak.config.model
import io.smallrye.config.WithName import io.smallrye.config.WithName
import java.util.* import java.util.*
interface SourceConfig { interface ProviderConfig {
@WithName("id") @WithName("id")
fun getId(): String fun getId(): String
@@ -13,7 +13,7 @@ interface SourceConfig {
fun getDomain(): String fun getDomain(): String
@WithName("url-patterns") @WithName("url-patterns")
fun getUrlPatterns(): Optional<MutableSet<String>> fun getUrlPatterns(): Optional<MutableSet<String>>
@WithName("selectors") @WithName("targets")
fun getItems(): Items fun getTargets(): TargetsConfig
} }

View File

@@ -0,0 +1,13 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.*
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("steps")
fun getExtractionSteps(): List<ExtractConfig>
@WithName("transform")
fun getOptionalTransformationSteps(): Optional<List<TransformationStepConfig>>
}

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id")
fun getIdConfig(): ScrapeTargetFieldConfig
@WithName("language")
fun getLanguageConfig(): ScrapeTargetFieldConfig
@WithName("region-key")
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
}

View File

@@ -1,12 +1,12 @@
package com.rak.config package com.rak.config.model
import io.smallrye.config.ConfigMapping import io.smallrye.config.ConfigMapping
import io.smallrye.config.WithName import io.smallrye.config.WithName
@ConfigMapping(prefix = "scraper") @ConfigMapping(prefix = "scraper")
interface SourcesConfiguration { interface SourcesConfig {
@WithName("sources") @WithName("sources")
fun getSources(): MutableList<SourceConfig> fun getSources(): MutableList<ProviderConfig>
} }

View File

@@ -0,0 +1,13 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.*
interface TargetsConfig {
@WithName("card")
fun getCardConfig(): Optional<CardScrapeTargetConfig>
@WithName("set")
fun getSetConfig(): Optional<SetScrapeTargetConfig>
@WithName("card-print")
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
}

View File

@@ -0,0 +1,10 @@
package com.rak.config.model
import com.rak.config.converter.EmptyStringConverter
import io.smallrye.config.WithConverter
interface TransformationStepConfig {
fun name(): String
@WithConverter(EmptyStringConverter::class)
fun parameters(): MutableList<String>
}

View File

@@ -1,8 +1,9 @@
package com.rak.controller package com.rak.controller
import com.rak.config.SourcesConfiguration import com.rak.model.card.Card
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.service.ScrapeService import com.rak.service.ScrapeService
import com.rak.service.SourceService
import jakarta.ws.rs.Consumes import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET import jakarta.ws.rs.GET
import jakarta.ws.rs.Path import jakarta.ws.rs.Path
@@ -13,15 +14,11 @@ import org.jboss.resteasy.reactive.RestQuery
@Path("/api") @Path("/api")
class ExampleResource( class ScrapeController(
private val sourcesConfiguration: SourcesConfiguration,
private val scrapeService: ScrapeService, private val scrapeService: ScrapeService,
private val sourceService: SourceService
) { ) {
companion object {
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
}
@GET @GET
@Path("/{provider}/set") @Path("/{provider}/set")
@@ -32,8 +29,24 @@ class ExampleResource(
provider: String, provider: String,
@RestQuery @RestQuery
setName: String setName: String
): List<Map<String, String>> { ): CardSet {
return scrapeService.extractSet( return scrapeService.scrapeSet(
provider,
setName
)
}
@GET
@Path("/{provider}/regionalSet")
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
fun scrapeRegionalSet(
@RestPath
provider: String,
@RestQuery
setName: String
): RegionalSet {
return scrapeService.scrapeRegionalSet(
provider, provider,
setName setName
) )
@@ -48,8 +61,8 @@ class ExampleResource(
provider: String, provider: String,
@RestQuery @RestQuery
cardName: String cardName: String
): Map<String, String> { ): Card? {
return scrapeService.extractCard( return scrapeService.scrapeCard(
provider, provider,
cardName cardName
) )

View File

@@ -0,0 +1,17 @@
package com.rak.model
enum class DiscriminatorDirection(val value: String) {
ASC("asc"),
DESC("desc");
companion object {
fun from(value: String): DiscriminatorDirection? {
for (discriminatorDirection in DiscriminatorDirection.entries) {
if (discriminatorDirection.value == value) {
return discriminatorDirection
}
}
return null
}
}
}

View File

@@ -1,4 +1,4 @@
package com.rak.model.scrape.selector package com.rak.model
enum class Selector { enum class Selector {
CSS, CSS,

View File

@@ -0,0 +1,11 @@
package com.rak.model.card
enum class Attribute {
WIND,
WATER,
FIRE,
EARTH,
LIGHT,
DARK,
DIVINE;
}

View File

@@ -0,0 +1,8 @@
package com.rak.model.card
abstract class Card {
abstract val id: Int
abstract val cardType: CardType
abstract val description: String
abstract val name: String
}

View File

@@ -0,0 +1,23 @@
package com.rak.model.card
import com.rak.model.set.RegionalSet
data class CardPrint(
val id: String,
val name: String,
val regionalName: String? = null,
val rarity: String
) {
companion object {
fun fromMap(map: Map<String, String>): CardPrint {
return CardPrint(
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
map["regionalName"],
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
)
}
}
}

View File

@@ -0,0 +1,8 @@
package com.rak.model.card
enum class CardType {
MONSTER,
SPELL,
TRAP,
UNKNOWN
}

View File

@@ -0,0 +1,3 @@
package com.rak.model.card
interface ICardType

View File

@@ -0,0 +1,12 @@
package com.rak.model.card
enum class LinkArrow {
TOP_LEFT,
TOP,
TOP_RIGHT,
LEFT,
RIGHT,
BOTTOM_LEFT,
BOTTOM,
BOTTOM_RIGHT;
}

View File

@@ -0,0 +1,20 @@
package com.rak.model.card
data class MonsterCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val monsterEffect: String? = null,
val attack: Int? = null,
val defense: Int? = null,
val level: Int? = null,
val isPendulum: Boolean = false,
val pendulumScale: Int? = null,
val pendulumEffect: String? = null,
val linkValue: Int? = null,
val subType: MonsterCardType,
val monsterType: MonsterType,
val attribute: Attribute,
val linkArrows: Set<LinkArrow>
) : Card()

View File

@@ -0,0 +1,11 @@
package com.rak.model.card
enum class MonsterCardType : ICardType {
NORMAL,
EFFECT,
RITUAL,
FUSION,
SYNCHRO,
XYZ,
LINK
}

View File

@@ -0,0 +1,32 @@
package com.rak.model.card
// TODO string value for proper names
// TODO consider adding unknown type
enum class MonsterType {
AQUA,
BEAST,
BEAST_WARRIOR,
CREATOR_GOD,
CYBERSE,
DINOSAUR,
DIVINE_BEAST,
DRAGON,
FAIRY,
FIEND,
FISH,
INSECT,
ILLUSION,
MACHINE,
PLANT,
PSYCHIC,
PYRO,
REPTILE,
ROCK,
SEA_SERPENT,
SPELLCASTER,
THUNDER,
WARRIOR,
WINGED_BEAST,
WYRM,
ZOMBIE
}

View File

@@ -0,0 +1,9 @@
package com.rak.model.card
data class SpellCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val subType: SpellCardType
) : Card()

View File

@@ -0,0 +1,11 @@
package com.rak.model.card
// TODO fix underscore for all types with string value
enum class SpellCardType {
NORMAL,
CONTINUOUS,
EQUIP,
QUICK_PLAY,
FIELD,
RITUAL
}

View File

@@ -0,0 +1,9 @@
package com.rak.model.card
data class TrapCard(
override val id: Int,
override val cardType: CardType,
override val description: String,
override val name: String,
val subType: TrapCardType
) : Card()

View File

@@ -0,0 +1,7 @@
package com.rak.model.card
enum class TrapCardType {
NORMAL,
CONTINUOUS,
COUNTER
}

View File

@@ -0,0 +1,3 @@
package com.rak.model.exception
class ElementNotFoundException(message: String) : RuntimeException(message)

View File

@@ -0,0 +1,3 @@
package com.rak.model.exception
class InvalidConfigurationException(message: String) : RuntimeException(message)

View File

@@ -0,0 +1,3 @@
package com.rak.model.exception
class NotImplementedException(message: String) : RuntimeException(message)

View File

@@ -0,0 +1,7 @@
package com.rak.model.exception
import java.lang.RuntimeException
class UnsupportedQueryForProviderException(
message: String,
) : RuntimeException(message)

View File

@@ -1,4 +0,0 @@
package com.rak.model.scrape
abstract class AbstractScraper{
}

View File

@@ -1,6 +0,0 @@
package com.rak.model.scrape
class JsoupScraper : AbstractScraper() {
}

View File

@@ -1,5 +0,0 @@
package com.rak.model.scrape
data class ScrapeJob(
val url: String,
)

View File

@@ -0,0 +1,12 @@
package com.rak.model.set
import kotlin.collections.Set
data class CardSet(
val name: String,
val regionalSets: Set<RegionalSet>
) {
companion object {
}
}

View File

@@ -0,0 +1,49 @@
package com.rak.model.set
import com.rak.model.card.CardPrint
data class RegionalSet(
val prefix: String,
val region: String,
val regionCode: String,
val cardPrints: Collection<CardPrint>,
val numberOfCards: Int
) {
companion object {
fun fromMap(map: Map<String, String>, cardPrints: Collection<CardPrint>): RegionalSet {
return RegionalSet(
map["prefix"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
map["region"] ?: throw IllegalStateException("Parameter 'region' not found"),
map["regionCode"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
cardPrints,
cardPrints.size
)
}
fun flattenFromMemberLists(
idList: List<String>,
languageList: List<String>,
regionKeyAliasList: List<String>,
): MutableSet<RegionalSet> {
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
throw IllegalArgumentException("Lists have to be the same size")
}
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
for (index in 0..idList.size - 1) {
regionalSetList.add(RegionalSet(
prefix = idList[index],
region = languageList[index],
regionCode = regionKeyAliasList[index],
listOf(),
numberOfCards = -1
))
}
return regionalSetList
}
}
}

View File

@@ -2,5 +2,5 @@ package com.rak.model.transform
@FunctionalInterface @FunctionalInterface
fun interface ParameterizedTransformation : AbstractTransformation { fun interface ParameterizedTransformation : AbstractTransformation {
fun apply(input: Any, vararg parameters: Any): Any? fun apply(input: String, parameters: List<String>): String
} }

View File

@@ -2,5 +2,5 @@ package com.rak.model.transform
@FunctionalInterface @FunctionalInterface
fun interface Transformation : AbstractTransformation { fun interface Transformation : AbstractTransformation {
fun apply(input: Any): Any? fun apply(input: String): String
} }

View File

@@ -1,27 +1,76 @@
package com.rak.model.transform package com.rak.model.transform
import jakarta.enterprise.context.ApplicationScoped import com.rak.config.model.TransformationStepConfig
import java.util.concurrent.ConcurrentHashMap
@ApplicationScoped
class TransformationRegistry { class TransformationRegistry {
private val transformations = hashMapOf<String, (input: Any) -> Any>() private val transformations: ConcurrentHashMap<String, Transformation> = ConcurrentHashMap()
private val parameterizedTransformation: ConcurrentHashMap<String, ParameterizedTransformation> =
ConcurrentHashMap()
init { init {
register<String>("trim") { register("trim") { it.trim() }
(it as String).trim() register("replace") { input, parameters ->
require(parameters.size == 2) {
"'replace' requires exactly 2 parameters"
}
input.replace(parameters[0], parameters[1])
}
register("regexReplace") { input, params ->
require(params.size == 2) {
"'regexReplace' requires exactly 2 parameters"
}
input.replace(params[0].toRegex(), params[1])
} }
register<String>("replace", { s: Any ->
Integer.
})
} }
// fun <T : Any> register(name: String, transformation: (input: Any) -> T) {
// transformations[name] = transformation
// }
fun <T : Any> register(name: String, transformation: Transformation) { fun register(name: String, transformation: Transformation) {
transformations.put(name, transformation)
}
fun register(name: String, transformation: ParameterizedTransformation) {
parameterizedTransformation.put(name, transformation)
}
fun getTransformation(transformationStep: TransformationStepConfig): AbstractTransformation {
val name = transformationStep.name()
val parameters = transformationStep.parameters()
return when {
transformations.containsKey(name) -> {
if (parameters.isNotEmpty()) {
throw IllegalArgumentException("'$name' doesn't accept parameters")
} else {
transformations[name]!!
}
}
parameterizedTransformation.containsKey(name) -> {
if (parameters.isEmpty()) {
throw IllegalArgumentException("'$name' requires parameters")
} else {
parameterizedTransformation[name]!!
}
}
else -> throw IllegalArgumentException("Unknown transformation: '$name'")
}
}
fun applyTransformations(input: String, steps: List<TransformationStepConfig>): String {
return steps.fold(input) { current, step ->
val actualStep = getTransformation(step)
when (actualStep) {
is Transformation ->
transformations[step.name()]?.apply(current)
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
is ParameterizedTransformation ->
parameterizedTransformation[step.name()]?.apply(current, step.parameters())
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
else -> throw IllegalStateException("Invalid transformation type")
}
}
} }
} }

View File

@@ -0,0 +1,279 @@
package com.rak.service
import com.rak.config.model.AbstractScrapeTargetConfig
import com.rak.config.model.ExtractConfig
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.model.DiscriminatorDirection
import com.rak.model.Selector
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException
import com.rak.model.transform.TransformationRegistry
import com.rak.util.CssUtil
import com.rak.util.XPathUtil
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import java.util.Optional
import kotlin.jvm.optionals.getOrElse
// find root element from global or node config
// get field target configs as list
// extract item from root element via field config
abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
private val transformationRegistry = TransformationRegistry()
abstract fun T.getItems(): Map<String, ScrapeTargetFieldConfig>
abstract fun extract(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: T
): E
abstract fun extractMultiple(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: T
): Collection<E>
fun getRootElement(
element: Element,
globalRootExtractConfig: Optional<ExtractConfig>,
nodeRootExtractConfig: Optional<ExtractConfig>
): Element {
return getRootElements(
element,
globalRootExtractConfig,
nodeRootExtractConfig
).firstOrNull() ?: throw ElementNotFoundException("")
}
fun getRootElements(
element: Element,
globalRootExtractConfig: Optional<ExtractConfig>,
nodeRootExtractConfig: Optional<ExtractConfig>
): Elements {
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.getOrElse {
nodeRootExtractConfig.orElseThrow {
InvalidConfigurationException("")
}
}
return getElementsFromElementByExtractConfig(
element,
rootExtractConfig
)
}
protected fun getElementFromDocumentByExtractConfig(
element: Element,
step: ExtractConfig,
): Element? {
return getElementsFromElementByExtractConfig(element, step).firstOrNull()
}
protected fun getElementsFromElementByExtractConfig(
element: Element,
step: ExtractConfig,
): Elements {
return if (step.selectorType() == Selector.CSS) {
element.select(step.getQueryString())
} else {
element.selectXpath(step.getQueryString())
}
}
protected fun extractAsMap(
document: Element,
extractionConfig: T
): Map<String, String> {
val result = mutableMapOf<String, String>()
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
val rootElement = getRootElement(
document,
extractionConfig.getRootConfig(),
fieldConfig.getRootConfig()
)
val extractedText = extractTextFromElementByTargetFieldConfig(
rootElement,
fieldConfig
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
result.put(identifier, extractedText)
}
return result
}
fun extractAsListOfMaps(
element: Element,
extractionConfig: T
): List<Map<String, String>> {
val resultList = mutableListOf<MutableMap<String, String>>()
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
val rootElements = getRootElements(
element,
extractionConfig.getRootConfig(),
fieldConfig.getRootConfig()
)
for(index in 0..rootElements.size - 1) {
val rootElement = rootElements[index]
val extractedText = extractTextFromElementByTargetFieldConfig(
rootElement,
fieldConfig
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
val mapToModify: MutableMap<String, String> = try {
resultList[index]
} catch (_: IndexOutOfBoundsException) {
val newMap = mutableMapOf<String, String>()
resultList.add(newMap)
newMap
}
mapToModify.put(identifier, extractedText)
}
}
return resultList
}
fun extractAsListOfMaps(
elements: Elements,
extractionConfig: T
): List<Map<String, String>> {
val resultList = mutableListOf<MutableMap<String, String>>()
// refactor this
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
for(index in 0..elements.size - 1) {
val rootElement = elements[index]
val extractedText = extractTextFromElementByTargetFieldConfig(
rootElement,
fieldConfig
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
val mapToModify: MutableMap<String, String> = try {
resultList[index]
} catch (_: IndexOutOfBoundsException) {
val newMap = mutableMapOf<String, String>()
resultList.add(newMap)
newMap
}
mapToModify.put(identifier, extractedText)
}
}
return resultList
}
fun extractWithDiscriminator(
element: Element,
extractionConfig: T
): List<List<Map<String, String>>>{
val rootElement = getRootElement(
element,
extractionConfig.getRootConfig(),
Optional.empty<ExtractConfig>()
)
var rootElements = getRootElements(
element,
extractionConfig.getRootConfig(),
Optional.empty<ExtractConfig>()
)
val discriminatedElements = getElementsFromElementByExtractConfig(
rootElement,
extractionConfig.getDiscriminator().get().getRootConfig().get(),
)
val discriminations = mutableListOf<String>()
val result = mutableListOf<List<Map<String, String>>>()
for (element in discriminatedElements) {
val discriminatorValue: String = extractTextFromElementByTargetFieldConfig(
element,
extractionConfig.getDiscriminator().get()
) ?: throw ElementNotFoundException("")
discriminations.add(discriminatorValue)
}
val definitiveElements = if (discriminations.size < rootElements.size) {
if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) {
rootElements = Elements(rootElements.reversed())
}
while (discriminations.size < rootElements.size) {
rootElements.removeFirst()
}
if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) {
rootElements = Elements(rootElements.reversed())
}
rootElements
} else {
rootElements
}
result.add(extractAsListOfMaps(
definitiveElements,
extractionConfig
))
return result
}
private fun extractTextFromElementByTargetFieldConfig(
root: Element,
extractionConfig: ScrapeTargetFieldConfig
): String? {
val extractionSteps = extractionConfig.getExtractionSteps()
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until extractionSteps.size) {
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == extractionSteps.size - 1) {
result = when (currentStep.selectorType()) {
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString())
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
}
}
else {
currentElement = when (currentStep.selectorType()) {
Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString())
Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
}
}
}
if (result == null) {
throw ElementNotFoundException("Result could not be extracted")
}
if (transformationSteps.isPresent) {
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
}
return result
}
}

View File

@@ -0,0 +1,46 @@
package com.rak.service
import com.rak.config.model.CardPrintScrapeTargetConfig
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.SetScrapeTargetConfig
import com.rak.model.card.CardPrint
import com.rak.model.exception.NotImplementedException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
@ApplicationScoped
class CardPrintExtractionService : AbstractExtractionService<CardPrint, CardPrintScrapeTargetConfig>() {
override fun CardPrintScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf(
Pair("id", this.getIdConfig()),
Pair("name", this.getNameConfig()),
Pair("regionalName", this.getRegionNameConfig()),
Pair("rarity", this.getRarityConfig()),
)
}
override fun extract(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: CardPrintScrapeTargetConfig
): CardPrint {
throw NotImplementedException("Not implemented")
}
override fun extractMultiple(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: CardPrintScrapeTargetConfig
): Collection<CardPrint> {
val objectAsListOfMaps = extractWithDiscriminator(element, extractionConfig)
return objectAsListOfMaps.map {
CardPrint.fromMap(it[0])
}
}
}

View File

@@ -0,0 +1,273 @@
package com.rak.service
import com.rak.config.model.CardPrintScrapeTargetConfig
import com.rak.config.model.ExtractConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.model.Selector
import com.rak.model.card.Card
import com.rak.model.card.CardPrint
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.model.transform.TransformationRegistry
import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import java.util.Optional
@ApplicationScoped
class ExtractionService(
private val sourceService: SourceService,
) {
private val transformationRegistry = TransformationRegistry()
fun extractSet(setName: String, root: Element, provider: String): CardSet {
return CardSet(
name = setName,
regionalSets = extractRegionalSets(root, provider)
)
}
fun getRootElement(
document: Document,
globalRootExtractConfig: Optional<ExtractConfig>,
nodeRootExtractConfig: Optional<ExtractConfig>
): Element {
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
nodeRootExtractConfig.orElseThrow {
InvalidConfigurationException("")
})
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
}
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
val cardName = extractTextFromElementByTargetFieldConfig(
getRootElement(
document,
cardPrintConfig.getRootConfig(),
cardPrintConfig.getNameConfig().getRootConfig()
),
cardPrintConfig.getNameConfig()
)
return null
}
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val setExtractionConfig = source.getTargets().getSetConfig().get()
if (setExtractionConfig.getRootConfig().isPresent) {
val setId: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getIdConfig(),
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getLanguageConfig()
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKey: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getRegionKeyConfig()
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId,
setLanguage,
setKey,
listOf(),
-1
)
} else {
val setIdConfiguration = setExtractionConfig.getIdConfig()
val rootConfiguration = setIdConfiguration.getRootConfig().get()
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setId: String = extractTextFromElementByTargetFieldConfig(
setIdRoot,
setIdConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguageConfiguration = setExtractionConfig.getIdConfig()
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
setLanguageRoot,
setLanguageConfiguration
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKeyConfiguration = setExtractionConfig.getIdConfig()
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setKey: String = extractTextFromElementByTargetFieldConfig(
setKeyRoot,
setKeyConfiguration
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId,
setLanguage,
setKey,
listOf(),
-1
)
}
}
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val setExtractionConfig = source.getTargets().getSetConfig().get()
if (setExtractionConfig.getRootConfig().isPresent) {
val rootConfiguration = setExtractionConfig.getRootConfig().get()
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
root,
rootConfiguration
)
return regionalSetRoots.map {
extractRegionalSet(
it,
provider
)
}.toSet()
} else {
try {
val setIdConfiguration = setExtractionConfig.getIdConfig()
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
val setIds = setIdRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
setIdConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val languageConfiguration = setExtractionConfig.getLanguageConfig()
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
val languages = languageRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
languageConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig()
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
val setKeys = setKeyRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
setKeyConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
return RegionalSet.flattenFromMemberLists(
setIds,
languages,
setKeys
)
} catch (ex: NoSuchElementException) {
throw RuntimeException("sdfgs") // TODO handle me
}
}
}
fun extractCard(root: Document, provider: String): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val cardSelector = source.getTargets().getCardConfig().get()
val rootConfigurationOptional = cardSelector.getRootConfig()
if (rootConfigurationOptional.isPresent) {
val rootConfiguration = rootConfigurationOptional.get()
val rootElement: Element = getElementFromDocumentByExtractConfig(
root,
rootConfiguration
) ?: throw ElementNotFoundException("TODO make this better")
val englishCardName: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val cardType: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val description: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
return null
} else {
return null
}
}
private fun getElementsFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Elements {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString())
} else {
document.selectXpath(step.getQueryString())
}
}
private fun getElementFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig,
): Element? {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
} else {
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
}
}
private fun extractTextFromElementByTargetFieldConfig(
root: Element,
extractionConfig: ScrapeTargetFieldConfig
): String? {
val extractionSteps = extractionConfig.getExtractionSteps()
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until extractionSteps.size) {
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == extractionSteps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
}
else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
}
}
if (result == null) {
throw ElementNotFoundException("Result could not be extracted")
}
if (transformationSteps.isPresent) {
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
}
return result
}
}

View File

@@ -1,29 +0,0 @@
package com.rak.service
import jakarta.ws.rs.GET
import jakarta.ws.rs.Path
import jakarta.ws.rs.QueryParam
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
/**
* To use it via injection.
*
* ```kotlin
* @Inject
* @RestClient
* lateinit var myRemoteService: MyRemoteService
*
* fun doSomething() {
* val restClientExtensions = myRemoteService.getExtensionsById("io.quarkus:quarkus-rest-client")
* }
* ```
*/
@RegisterRestClient(baseUri = "https://stage.code.quarkus.io/api")
interface MyRemoteService {
@GET
@Path("/extensions")
fun getExtensionsById(@QueryParam("id") id: String): Set<Extension>
data class Extension(val id: String, val name: String, val shortName: String, val keywords: List<String>)
}

View File

@@ -0,0 +1,52 @@
package com.rak.service
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.SetScrapeTargetConfig
import com.rak.config.model.SourcesConfig
import com.rak.model.exception.NotImplementedException
import com.rak.model.set.RegionalSet
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Element
@ApplicationScoped
class RegionalSetExtractionService(
private val cardPrintExtractionService: CardPrintExtractionService,
private val sourcesConfig: SourcesConfig
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() {
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf(
Pair("prefix", this.getIdConfig()),
Pair("regionCode", this.getRegionKeyConfig()),
Pair("region", this.getLanguageConfig()),
)
}
override fun extract(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig
): RegionalSet {
throw NotImplementedException("Not implemented")
}
override fun extractMultiple(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig
): Collection<RegionalSet> {
val regionalSetList = extractAsListOfMaps(element, extractionConfig)
val cardPrintsInRegionalSet = extractAsListOfMaps(element, extractionConfig)
val cardPrints = cardPrintExtractionService.extractMultiple(
element,
providerConfig,
providerConfig.getTargets().getCardPrintConfiguration().get()
)
return regionalSetList.map {
RegionalSet.fromMap(it, cardPrints)
}
}
}

View File

@@ -1,91 +1,56 @@
package com.rak.service package com.rak.service
import com.rak.config.RegionalSetDefinition import com.rak.model.card.Card
import com.rak.config.SourcesConfiguration import com.rak.model.set.CardSet
import com.rak.config.Step import com.rak.model.set.RegionalSet
import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
@ApplicationScoped @ApplicationScoped
class ScrapeService( class ScrapeService(
private val sourceService: SourceService private val sourceService: SourceService,
private val extractionService: ExtractionService,
private val setExtractionService: SetExtractionService,
private val regionalSetExtractionService: RegionalSetExtractionService
) { ) {
companion object {
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
}
private fun extractTextFromRootBySteps( fun scrapeSet(
root: Element,
steps: Set<Step>
): String? {
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until steps.size) {
val currentStep = steps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == steps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.value())
}
else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
}
}
return result
}
fun extractSet(
provider: String, provider: String,
setName: String, setName: String,
): List<Map<String, String>> { ): CardSet {
val source = val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(setName) val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
val regionalSetSelector = source.getItems().regionalSet().get()
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value()) // return extractionService.extractSet(setName, document, provider)
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
return regionalSetRoot.map {
val setId: String? = extractTextFromRootBySteps(
it,
regionalSetSelector.idSelector().steps()
)
val setLanguage: String? = extractTextFromRootBySteps(
it,
regionalSetSelector.languageSelector().steps()
)
val setKey: String? = extractTextFromRootBySteps(
it,
regionalSetSelector.regionKeySelector().steps()
)
mapOf(
Pair("id", setId ?: "N/A"),
Pair("language", setLanguage ?: "N/A"),
Pair("key", setKey ?: "N/A"),
)
}
} }
fun scrapeRegionalSet(
provider: String,
setName: String,
): RegionalSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
fun extractCard( val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
}
fun scrapeCard(
provider: String, provider: String,
cardName: String, cardName: String,
): Map<String, String> { ): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(cardName) val path: String = normalizePath(cardName)
return mapOf() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractCard(document, provider)
} }
private fun normalizePath(path: String): String = path private fun normalizePath(path: String): String = path

View File

@@ -0,0 +1,42 @@
package com.rak.service
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.SetScrapeTargetConfig
import com.rak.model.exception.NotImplementedException
import com.rak.model.set.CardSet
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Element
@ApplicationScoped
class SetExtractionService(
private val regionalSetExtractionService: RegionalSetExtractionService
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf(
Pair("prefix", this.getIdConfig()),
Pair("regionCode", this.getRegionKeyConfig()),
Pair("region", this.getLanguageConfig()),
)
}
override fun extract(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig
): CardSet {
return CardSet(
"test",
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet()
)
}
override fun extractMultiple(
element: Element,
providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig
): Collection<CardSet> {
throw NotImplementedException("Not implemented")
}
}

View File

@@ -1,15 +1,86 @@
package com.rak.service package com.rak.service
import com.rak.config.SourceConfig import com.rak.config.model.CardScrapeTargetConfig
import com.rak.config.SourcesConfiguration import com.rak.config.model.SetScrapeTargetConfig
import com.rak.config.model.ProviderConfig
import com.rak.config.model.SourcesConfig
import com.rak.model.exception.InvalidConfigurationException
import io.quarkus.runtime.Startup
import jakarta.annotation.PostConstruct
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
@Startup
@ApplicationScoped @ApplicationScoped
class SourceService ( class SourceService(
val sourcesConfiguration: SourcesConfiguration val sourcesConfiguration: SourcesConfig
) { ) {
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet() @PostConstruct
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id } fun init() {
sourcesConfiguration.getSources().forEach { validateSource(it) }
}
private fun validateSource(providerConfig: ProviderConfig) {
val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig()
val optionalCardConfig = providerConfig.getTargets().getCardConfig()
if (optionalRegionalSetConfig.isPresent) {
validateSetExtractConfig(optionalRegionalSetConfig.get())
}
if (optionalCardConfig.isPresent) {
validateCardExtractConfig(optionalCardConfig.get())
}
}
private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) {
val selectors = listOf(
setExtractConfig.getLanguageConfig(),
setExtractConfig.getIdConfig(),
setExtractConfig.getRegionKeyConfig()
)
// If global root is present, dedicated roots may not exist
if (setExtractConfig.getRootConfig().isPresent) {
if (selectors.any { it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots cannot be set when a global extraction root is configured"
)
}
} else {
if (selectors.any { !it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots have to be set when a global extraction root is not configured"
)
}
}
}
private fun validateCardExtractConfig(cardScrapeTargetConfig: CardScrapeTargetConfig) {
val selectors = listOf(
cardScrapeTargetConfig.getEnglishNameConfig(),
cardScrapeTargetConfig.getDescriptionConfig(),
cardScrapeTargetConfig.getCardTypeConfig(),
cardScrapeTargetConfig.getAttackConfig(),
cardScrapeTargetConfig.getDefenseConfig(),
)
if (cardScrapeTargetConfig.getRootConfig().isPresent) {
if (selectors.any { it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots cannot be set when a global extraction root is configured"
)
}
} else {
if (selectors.any { !it.getRootConfig().isPresent }) {
throw InvalidConfigurationException(
"Dedicated extraction roots have to be set when a global extraction root is not configured"
)
}
}
}
fun getSources(): Set<ProviderConfig> = sourcesConfiguration.getSources().toSet()
fun getSourceById(id: String): ProviderConfig? = getSources().firstOrNull { it.getId() == id }
} }

View File

@@ -0,0 +1,19 @@
package com.rak.util
import org.jsoup.nodes.Element
class CssUtil private constructor() {
companion object {
fun getNextElement(element: Element, path: String): Element? {
return element.select(path).firstOrNull()
}
fun extractResult(root: Element, path: String): String? {
return root
.select(path)
.firstOrNull()?.text()
}
}
}

View File

@@ -26,8 +26,8 @@ class XPathUtil private constructor() {
.firstOrNull()?.text() .firstOrNull()?.text()
} }
fun getNextElement(root: Element, path: String): Element? { fun getNextElement(element: Element, path: String): Element? {
return root.selectXpath(path).firstOrNull() return element.selectXpath(path).firstOrNull()
} }
fun extractResult(root: Element, path: String): String? { fun extractResult(root: Element, path: String): String? {

View File

@@ -1 +1,2 @@
com.rak.config.converter.TypeSelectorConverter com.rak.config.converter.TypeSelectorConverter
com.rak.config.converter.DiscriminatorDirectionConverter

View File

@@ -9,8 +9,11 @@ scraper:
domain: "yugioh-card.com" domain: "yugioh-card.com"
url-patterns: url-patterns:
- "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$" - "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
selectors: targets:
card: card:
root:
type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
name: name:
steps: steps:
- type: "css" - type: "css"
@@ -27,8 +30,8 @@ scraper:
domain: "yugioh.fandom.com" domain: "yugioh.fandom.com"
url-patterns: url-patterns:
- "^https://yugioh\\.fandom\\.com/wiki/.*$" - "^https://yugioh\\.fandom\\.com/wiki/.*$"
selectors: targets:
regional-set: set:
root: root:
type: css type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
@@ -36,6 +39,11 @@ scraper:
steps: steps:
- type: xpath - type: xpath
value: "//li/text()" value: "//li/text()"
transform:
- name: "replace"
parameters:
- " ("
- ""
language: language:
steps: steps:
- type: xpath - type: xpath
@@ -45,4 +53,69 @@ scraper:
region-key: region-key:
steps: steps:
- type: xpath - type: xpath
value: "//li/abbr/text()" value: "//li/abbr/text()"
card-print:
multi: true
root:
type: css
value: ".tabber.wds-tabber > div"
discriminator:
direction: asc
root:
type: css
value: ".wds-tabs__tab"
steps:
- type: xpath
value: "//li/div/a/text()"
id:
steps:
- type: xpath
value: ".//table/tbody/tr[2]/td[1]/a/text()"
name:
steps:
- type: xpath
value: ".//table/tbody/tr[2]/td[1]/a/text()"
regional-name:
steps:
- type: xpath
value: ".//table/tbody/tr[2]/td[2]/a/text()"
rarity:
steps:
- type: xpath
value: ".//table/tbody/tr[2]/td[3]/a/text()"
card:
name:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "./tbody/tr[3]/th/text()"
description:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
type:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
attack:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"
defense:
root:
type: css
value: ".cardTable"
steps:
- type: "xpath"
value: "b:contains(Card descriptions)"