Refactor extraction logic
Add required models
This commit is contained in:
@@ -1,8 +0,0 @@
|
|||||||
package com.rak.config
|
|
||||||
|
|
||||||
import io.smallrye.config.WithName
|
|
||||||
|
|
||||||
interface AbstractModelDefinition {
|
|
||||||
@WithName("root")
|
|
||||||
fun rootSelector(): Step
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
package com.rak.config
|
|
||||||
|
|
||||||
import io.smallrye.config.WithName
|
|
||||||
|
|
||||||
interface CardDefinition {
|
|
||||||
@WithName("name")
|
|
||||||
fun nameSelector(): SelectorDefinition
|
|
||||||
@WithName("attack")
|
|
||||||
fun attackSelector(): SelectorDefinition
|
|
||||||
@WithName("effect")
|
|
||||||
fun effectSelector(): SelectorDefinition
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package com.rak.config
|
|
||||||
|
|
||||||
import java.util.*
|
|
||||||
|
|
||||||
interface Items {
|
|
||||||
fun card(): Optional<CardDefinition>
|
|
||||||
fun regionalSet(): Optional<RegionalSetDefinition>
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
package com.rak.config
|
|
||||||
|
|
||||||
import io.smallrye.config.WithName
|
|
||||||
|
|
||||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
|
||||||
@WithName("id")
|
|
||||||
fun idSelector(): SelectorDefinition
|
|
||||||
@WithName("language")
|
|
||||||
fun languageSelector(): SelectorDefinition
|
|
||||||
@WithName("region-key")
|
|
||||||
fun regionKeySelector(): SelectorDefinition
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package com.rak.config
|
|
||||||
|
|
||||||
import java.util.*
|
|
||||||
|
|
||||||
interface SelectorDefinition {
|
|
||||||
fun steps(): Set<Step>
|
|
||||||
fun transform(): Optional<List<TransformationStep>>
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
package com.rak.config.converter
|
package com.rak.config.converter
|
||||||
|
|
||||||
import com.rak.model.scrape.selector.Selector
|
import com.rak.model.Selector
|
||||||
import org.eclipse.microprofile.config.spi.Converter
|
import org.eclipse.microprofile.config.spi.Converter
|
||||||
|
|
||||||
class TypeSelectorConverter : Converter<Selector> {
|
class TypeSelectorConverter : Converter<Selector> {
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
import java.util.Optional
|
||||||
|
|
||||||
|
interface AbstractScrapeTargetConfig {
|
||||||
|
@WithName("root")
|
||||||
|
fun getRootConfig(): Optional<ExtractConfig>
|
||||||
|
@WithName("multi")
|
||||||
|
fun isMulti(): Optional<Boolean>
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface CardScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
|
@WithName("name")
|
||||||
|
fun getEnglishName(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("description")
|
||||||
|
fun getDescription(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("type")
|
||||||
|
fun getCardType(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("attack")
|
||||||
|
fun getAttack(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("defense")
|
||||||
|
fun getDefense(): ScrapeTargetFieldConfig
|
||||||
|
}
|
||||||
@@ -1,13 +1,14 @@
|
|||||||
package com.rak.config
|
package com.rak.config.model
|
||||||
|
|
||||||
import com.rak.config.converter.TypeSelectorConverter
|
import com.rak.config.converter.TypeSelectorConverter
|
||||||
import com.rak.model.scrape.selector.Selector
|
import com.rak.model.Selector
|
||||||
import io.smallrye.config.WithConverter
|
import io.smallrye.config.WithConverter
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
interface Step {
|
interface ExtractConfig {
|
||||||
@WithConverter(TypeSelectorConverter::class)
|
@WithConverter(TypeSelectorConverter::class)
|
||||||
@WithName("type")
|
@WithName("type")
|
||||||
fun selectorType(): Selector // e.g. css or xpath
|
fun selectorType(): Selector
|
||||||
fun value(): String
|
@WithName("value")
|
||||||
|
fun getQueryString(): String
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
|
@WithName("id")
|
||||||
|
fun idSelector(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("language")
|
||||||
|
fun languageSelector(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("region-key")
|
||||||
|
fun regionKeySelector(): ScrapeTargetFieldConfig
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
import java.util.*
|
||||||
|
|
||||||
|
interface ScrapeTargetFieldConfig {
|
||||||
|
@WithName("root")
|
||||||
|
fun getRootConfig(): Optional<ExtractConfig>
|
||||||
|
@WithName("steps")
|
||||||
|
fun getSteps(): List<ExtractConfig>
|
||||||
|
fun transform(): Optional<List<TransformationStepConfig>>
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.rak.config
|
package com.rak.config.model
|
||||||
|
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
import java.util.*
|
import java.util.*
|
||||||
@@ -13,7 +13,7 @@ interface SourceConfig {
|
|||||||
fun getDomain(): String
|
fun getDomain(): String
|
||||||
@WithName("url-patterns")
|
@WithName("url-patterns")
|
||||||
fun getUrlPatterns(): Optional<MutableSet<String>>
|
fun getUrlPatterns(): Optional<MutableSet<String>>
|
||||||
@WithName("selectors")
|
@WithName("targets")
|
||||||
fun getItems(): Items
|
fun getTargets(): TargetsConfig
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
package com.rak.config
|
package com.rak.config.model
|
||||||
|
|
||||||
import io.smallrye.config.ConfigMapping
|
import io.smallrye.config.ConfigMapping
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
@ConfigMapping(prefix = "scraper")
|
@ConfigMapping(prefix = "scraper")
|
||||||
interface SourcesConfiguration {
|
interface SourcesConfig {
|
||||||
|
|
||||||
@WithName("sources")
|
@WithName("sources")
|
||||||
fun getSources(): MutableList<SourceConfig>
|
fun getSources(): MutableList<SourceConfig>
|
||||||
8
src/main/kotlin/com/rak/config/model/TargetsConfig.kt
Normal file
8
src/main/kotlin/com/rak/config/model/TargetsConfig.kt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import java.util.*
|
||||||
|
|
||||||
|
interface TargetsConfig {
|
||||||
|
fun card(): Optional<CardScrapeTargetConfig>
|
||||||
|
fun regionalSet(): Optional<RegionalSetScrapeTargetConfig>
|
||||||
|
}
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
package com.rak.config
|
package com.rak.config.model
|
||||||
|
|
||||||
import com.rak.config.converter.EmptyStringConverter
|
import com.rak.config.converter.EmptyStringConverter
|
||||||
import io.smallrye.config.WithConverter
|
import io.smallrye.config.WithConverter
|
||||||
|
|
||||||
interface TransformationStep {
|
interface TransformationStepConfig {
|
||||||
fun name(): String
|
fun name(): String
|
||||||
@WithConverter(EmptyStringConverter::class)
|
@WithConverter(EmptyStringConverter::class)
|
||||||
fun parameters(): MutableList<String>
|
fun parameters(): MutableList<String>
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package com.rak.controller
|
package com.rak.controller
|
||||||
|
|
||||||
import com.rak.model.RegionalSet
|
import com.rak.model.card.Card
|
||||||
|
import com.rak.model.set.CardSet
|
||||||
import com.rak.service.ScrapeService
|
import com.rak.service.ScrapeService
|
||||||
import jakarta.ws.rs.Consumes
|
import jakarta.ws.rs.Consumes
|
||||||
import jakarta.ws.rs.GET
|
import jakarta.ws.rs.GET
|
||||||
@@ -27,7 +28,7 @@ class ScrapeController(
|
|||||||
provider: String,
|
provider: String,
|
||||||
@RestQuery
|
@RestQuery
|
||||||
setName: String
|
setName: String
|
||||||
): List<RegionalSet> {
|
): CardSet {
|
||||||
return scrapeService.scrapeSet(
|
return scrapeService.scrapeSet(
|
||||||
provider,
|
provider,
|
||||||
setName
|
setName
|
||||||
@@ -43,7 +44,7 @@ class ScrapeController(
|
|||||||
provider: String,
|
provider: String,
|
||||||
@RestQuery
|
@RestQuery
|
||||||
cardName: String
|
cardName: String
|
||||||
): Map<String, String> {
|
): Card? {
|
||||||
return scrapeService.scrapeCard(
|
return scrapeService.scrapeCard(
|
||||||
provider,
|
provider,
|
||||||
cardName
|
cardName
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
package com.rak.model
|
|
||||||
|
|
||||||
data class RegionalSet(
|
|
||||||
val id: String,
|
|
||||||
val language: String,
|
|
||||||
val key: String
|
|
||||||
)
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.rak.model.scrape.selector
|
package com.rak.model
|
||||||
|
|
||||||
enum class Selector {
|
enum class Selector {
|
||||||
CSS,
|
CSS,
|
||||||
11
src/main/kotlin/com/rak/model/card/Attribute.kt
Normal file
11
src/main/kotlin/com/rak/model/card/Attribute.kt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
enum class Attribute {
|
||||||
|
WIND,
|
||||||
|
WATER,
|
||||||
|
FIRE,
|
||||||
|
EARTH,
|
||||||
|
LIGHT,
|
||||||
|
DARK,
|
||||||
|
DIVINE;
|
||||||
|
}
|
||||||
8
src/main/kotlin/com/rak/model/card/Card.kt
Normal file
8
src/main/kotlin/com/rak/model/card/Card.kt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
abstract class Card {
|
||||||
|
abstract val id: Int
|
||||||
|
abstract val cardType: CardType
|
||||||
|
abstract val description: String
|
||||||
|
abstract val name: String
|
||||||
|
}
|
||||||
8
src/main/kotlin/com/rak/model/card/CardType.kt
Normal file
8
src/main/kotlin/com/rak/model/card/CardType.kt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
enum class CardType {
|
||||||
|
MONSTER,
|
||||||
|
SPELL,
|
||||||
|
TRAP,
|
||||||
|
UNKNOWN
|
||||||
|
}
|
||||||
3
src/main/kotlin/com/rak/model/card/ICardType.kt
Normal file
3
src/main/kotlin/com/rak/model/card/ICardType.kt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
interface ICardType
|
||||||
12
src/main/kotlin/com/rak/model/card/LinkArrow.kt
Normal file
12
src/main/kotlin/com/rak/model/card/LinkArrow.kt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
enum class LinkArrow {
|
||||||
|
TOP_LEFT,
|
||||||
|
TOP,
|
||||||
|
TOP_RIGHT,
|
||||||
|
LEFT,
|
||||||
|
RIGHT,
|
||||||
|
BOTTOM_LEFT,
|
||||||
|
BOTTOM,
|
||||||
|
BOTTOM_RIGHT;
|
||||||
|
}
|
||||||
20
src/main/kotlin/com/rak/model/card/MonsterCard.kt
Normal file
20
src/main/kotlin/com/rak/model/card/MonsterCard.kt
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
data class MonsterCard(
|
||||||
|
override val id: Int,
|
||||||
|
override val cardType: CardType,
|
||||||
|
override val description: String,
|
||||||
|
override val name: String,
|
||||||
|
val monsterEffect: String? = null,
|
||||||
|
val attack: Int? = null,
|
||||||
|
val defense: Int? = null,
|
||||||
|
val level: Int? = null,
|
||||||
|
val isPendulum: Boolean = false,
|
||||||
|
val pendulumScale: Int? = null,
|
||||||
|
val pendulumEffect: String? = null,
|
||||||
|
val linkValue: Int? = null,
|
||||||
|
val subType: MonsterCardType,
|
||||||
|
val monsterType: MonsterType,
|
||||||
|
val attribute: Attribute,
|
||||||
|
val linkArrows: Set<LinkArrow>
|
||||||
|
) : Card()
|
||||||
11
src/main/kotlin/com/rak/model/card/MonsterCardType.kt
Normal file
11
src/main/kotlin/com/rak/model/card/MonsterCardType.kt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
enum class MonsterCardType : ICardType {
|
||||||
|
NORMAL,
|
||||||
|
EFFECT,
|
||||||
|
RITUAL,
|
||||||
|
FUSION,
|
||||||
|
SYNCHRO,
|
||||||
|
XYZ,
|
||||||
|
LINK
|
||||||
|
}
|
||||||
32
src/main/kotlin/com/rak/model/card/MonsterType.kt
Normal file
32
src/main/kotlin/com/rak/model/card/MonsterType.kt
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
// TODO string value for proper names
|
||||||
|
// TODO consider adding unknown type
|
||||||
|
enum class MonsterType {
|
||||||
|
AQUA,
|
||||||
|
BEAST,
|
||||||
|
BEAST_WARRIOR,
|
||||||
|
CREATOR_GOD,
|
||||||
|
CYBERSE,
|
||||||
|
DINOSAUR,
|
||||||
|
DIVINE_BEAST,
|
||||||
|
DRAGON,
|
||||||
|
FAIRY,
|
||||||
|
FIEND,
|
||||||
|
FISH,
|
||||||
|
INSECT,
|
||||||
|
ILLUSION,
|
||||||
|
MACHINE,
|
||||||
|
PLANT,
|
||||||
|
PSYCHIC,
|
||||||
|
PYRO,
|
||||||
|
REPTILE,
|
||||||
|
ROCK,
|
||||||
|
SEA_SERPENT,
|
||||||
|
SPELLCASTER,
|
||||||
|
THUNDER,
|
||||||
|
WARRIOR,
|
||||||
|
WINGED_BEAST,
|
||||||
|
WYRM,
|
||||||
|
ZOMBIE
|
||||||
|
}
|
||||||
9
src/main/kotlin/com/rak/model/card/SpellCard.kt
Normal file
9
src/main/kotlin/com/rak/model/card/SpellCard.kt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
data class SpellCard(
|
||||||
|
override val id: Int,
|
||||||
|
override val cardType: CardType,
|
||||||
|
override val description: String,
|
||||||
|
override val name: String,
|
||||||
|
val subType: SpellCardType
|
||||||
|
) : Card()
|
||||||
11
src/main/kotlin/com/rak/model/card/SpellCardType.kt
Normal file
11
src/main/kotlin/com/rak/model/card/SpellCardType.kt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
// TODO fix underscore for all types with string value
|
||||||
|
enum class SpellCardType {
|
||||||
|
NORMAL,
|
||||||
|
CONTINUOUS,
|
||||||
|
EQUIP,
|
||||||
|
QUICK_PLAY,
|
||||||
|
FIELD,
|
||||||
|
RITUAL
|
||||||
|
}
|
||||||
9
src/main/kotlin/com/rak/model/card/TrapCard.kt
Normal file
9
src/main/kotlin/com/rak/model/card/TrapCard.kt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
data class TrapCard(
|
||||||
|
override val id: Int,
|
||||||
|
override val cardType: CardType,
|
||||||
|
override val description: String,
|
||||||
|
override val name: String,
|
||||||
|
val subType: TrapCardType
|
||||||
|
) : Card()
|
||||||
7
src/main/kotlin/com/rak/model/card/TrapCardType.kt
Normal file
7
src/main/kotlin/com/rak/model/card/TrapCardType.kt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
enum class TrapCardType {
|
||||||
|
NORMAL,
|
||||||
|
CONTINUOUS,
|
||||||
|
COUNTER
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package com.rak.model.exception
|
||||||
|
|
||||||
|
class ElementNotFoundException(message: String) : RuntimeException(message)
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package com.rak.model.exception
|
||||||
|
|
||||||
|
class InvalidConfigurationException(message: String) : RuntimeException(message)
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
package com.rak.model.exception
|
||||||
|
|
||||||
|
import java.lang.RuntimeException
|
||||||
|
|
||||||
|
class UnsupportedQueryForProviderException(
|
||||||
|
message: String,
|
||||||
|
) : RuntimeException(message)
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
package com.rak.model.scrape
|
|
||||||
|
|
||||||
abstract class AbstractScraper{
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
package com.rak.model.scrape
|
|
||||||
|
|
||||||
class JsoupScraper : AbstractScraper() {
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
package com.rak.model.scrape
|
|
||||||
|
|
||||||
data class ScrapeJob(
|
|
||||||
val url: String,
|
|
||||||
)
|
|
||||||
8
src/main/kotlin/com/rak/model/set/CardSet.kt
Normal file
8
src/main/kotlin/com/rak/model/set/CardSet.kt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package com.rak.model.set
|
||||||
|
|
||||||
|
import kotlin.collections.Set
|
||||||
|
|
||||||
|
data class CardSet(
|
||||||
|
val name: String,
|
||||||
|
val regionalSets: Set<RegionalSet>
|
||||||
|
)
|
||||||
33
src/main/kotlin/com/rak/model/set/RegionalSet.kt
Normal file
33
src/main/kotlin/com/rak/model/set/RegionalSet.kt
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
package com.rak.model.set
|
||||||
|
|
||||||
|
data class RegionalSet(
|
||||||
|
val prefix: String,
|
||||||
|
val region: String,
|
||||||
|
val regionCode: String
|
||||||
|
) {
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
|
||||||
|
fun flattenFromMemberLists(
|
||||||
|
idList: List<String>,
|
||||||
|
languageList: List<String>,
|
||||||
|
regionKeyAliasList: List<String>,
|
||||||
|
): MutableSet<RegionalSet> {
|
||||||
|
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
|
||||||
|
throw IllegalArgumentException("Lists have to be the same size")
|
||||||
|
}
|
||||||
|
|
||||||
|
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
|
||||||
|
for (index in 0..idList.size - 1) {
|
||||||
|
regionalSetList.add(RegionalSet(
|
||||||
|
prefix = idList[index],
|
||||||
|
region = languageList[index],
|
||||||
|
regionCode = regionKeyAliasList[index]
|
||||||
|
))
|
||||||
|
}
|
||||||
|
return regionalSetList
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
package com.rak.model.transform
|
package com.rak.model.transform
|
||||||
|
|
||||||
import com.rak.config.TransformationStep
|
import com.rak.config.model.TransformationStepConfig
|
||||||
import java.util.concurrent.ConcurrentHashMap
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
|
|
||||||
class TransformationRegistry {
|
class TransformationRegistry {
|
||||||
@@ -34,7 +34,7 @@ class TransformationRegistry {
|
|||||||
parameterizedTransformation.put(name, transformation)
|
parameterizedTransformation.put(name, transformation)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getTransformation(transformationStep: TransformationStep): AbstractTransformation {
|
fun getTransformation(transformationStep: TransformationStepConfig): AbstractTransformation {
|
||||||
val name = transformationStep.name()
|
val name = transformationStep.name()
|
||||||
val parameters = transformationStep.parameters()
|
val parameters = transformationStep.parameters()
|
||||||
return when {
|
return when {
|
||||||
@@ -56,7 +56,7 @@ class TransformationRegistry {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun applyTransformations(input: String, steps: List<TransformationStep>): String {
|
fun applyTransformations(input: String, steps: List<TransformationStepConfig>): String {
|
||||||
return steps.fold(input) { current, step ->
|
return steps.fold(input) { current, step ->
|
||||||
val actualStep = getTransformation(step)
|
val actualStep = getTransformation(step)
|
||||||
when (actualStep) {
|
when (actualStep) {
|
||||||
|
|||||||
@@ -1,44 +1,82 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.config.Step
|
import com.rak.config.model.ExtractConfig
|
||||||
import com.rak.model.RegionalSet
|
import com.rak.model.Selector
|
||||||
import com.rak.model.transform.TransformationRegistry
|
import com.rak.model.card.Card
|
||||||
|
import com.rak.model.exception.ElementNotFoundException
|
||||||
|
import com.rak.model.set.CardSet
|
||||||
|
import com.rak.model.set.RegionalSet
|
||||||
import com.rak.util.XPathUtil
|
import com.rak.util.XPathUtil
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.nodes.Document
|
import org.jsoup.nodes.Document
|
||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
|
import org.jsoup.select.Elements
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ExtractionService(
|
class ExtractionService(
|
||||||
private val sourceService: SourceService,
|
private val sourceService: SourceService,
|
||||||
) {
|
) {
|
||||||
|
|
||||||
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
|
fun extractSet(setName: String, root: Element, provider: String): CardSet {
|
||||||
|
return CardSet(
|
||||||
|
name = setName,
|
||||||
|
regionalSets = extractRegionalSets(root, provider)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
fun extractSet(document: Document, provider: String): List<RegionalSet> {
|
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
val regionalSetSelector = source.getItems().regionalSet().get()
|
val regionalSetSelector = source.getTargets().regionalSet().get()
|
||||||
|
|
||||||
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value())
|
if (regionalSetSelector.getRootConfig().isPresent) {
|
||||||
|
val setId: String = extractTextFromElementBySteps(
|
||||||
return regionalSetRoot.map {
|
root,
|
||||||
var setId: String = extractTextFromRootBySteps(
|
regionalSetSelector.idSelector().getSteps()
|
||||||
it,
|
|
||||||
regionalSetSelector.idSelector().steps()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||||
|
val setLanguage: String = extractTextFromElementBySteps(
|
||||||
setId = transformationRegistry.applyTransformations(setId, regionalSetSelector.idSelector().transform().get())
|
root,
|
||||||
|
regionalSetSelector.languageSelector().getSteps()
|
||||||
val setLanguage: String = extractTextFromRootBySteps(
|
|
||||||
it,
|
|
||||||
regionalSetSelector.languageSelector().steps()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||||
val setKey: String = extractTextFromRootBySteps(
|
val setKey: String = extractTextFromElementBySteps(
|
||||||
it,
|
root,
|
||||||
regionalSetSelector.regionKeySelector().steps()
|
regionalSetSelector.regionKeySelector().getSteps()
|
||||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||||
|
|
||||||
RegionalSet(
|
return RegionalSet(
|
||||||
|
setId,
|
||||||
|
setLanguage,
|
||||||
|
setKey
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
val setIdConfiguration = regionalSetSelector.idSelector()
|
||||||
|
if (!setIdConfiguration.getRootConfig().isPresent) {
|
||||||
|
throw RuntimeException("as[po") // TODO fix me
|
||||||
|
}
|
||||||
|
val rootConfiguration = setIdConfiguration.getRootConfig().get()
|
||||||
|
|
||||||
|
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||||
|
val setId: String = extractTextFromElementBySteps(
|
||||||
|
setIdRoot,
|
||||||
|
setIdConfiguration.getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||||
|
|
||||||
|
|
||||||
|
val setLanguageConfiguration = regionalSetSelector.idSelector()
|
||||||
|
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||||
|
val setLanguage: String = extractTextFromElementBySteps(
|
||||||
|
setLanguageRoot,
|
||||||
|
setLanguageConfiguration.getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||||
|
|
||||||
|
|
||||||
|
val setKeyConfiguration = regionalSetSelector.idSelector()
|
||||||
|
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||||
|
val setKey: String = extractTextFromElementBySteps(
|
||||||
|
setKeyRoot,
|
||||||
|
setKeyConfiguration.getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||||
|
|
||||||
|
return RegionalSet(
|
||||||
setId,
|
setId,
|
||||||
setLanguage,
|
setLanguage,
|
||||||
setKey
|
setKey
|
||||||
@@ -46,10 +84,127 @@ class ExtractionService(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun extractTextFromRootBySteps(
|
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
|
||||||
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
val regionalSetSelector = source.getTargets().regionalSet().get()
|
||||||
|
|
||||||
|
if (regionalSetSelector.getRootConfig().isPresent) {
|
||||||
|
val rootConfiguration = regionalSetSelector.getRootConfig().get()
|
||||||
|
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
|
||||||
|
root,
|
||||||
|
rootConfiguration
|
||||||
|
)
|
||||||
|
|
||||||
|
return regionalSetRoots.map {
|
||||||
|
extractRegionalSet(
|
||||||
|
it,
|
||||||
|
provider
|
||||||
|
)
|
||||||
|
}.toSet()
|
||||||
|
} else {
|
||||||
|
val setIdConfiguration = regionalSetSelector.idSelector()
|
||||||
|
|
||||||
|
try {
|
||||||
|
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
|
||||||
|
val setIds = setIdRoot.map {
|
||||||
|
extractTextFromElementBySteps(
|
||||||
|
it,
|
||||||
|
setIdConfiguration.getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||||
|
}
|
||||||
|
|
||||||
|
val languageConfiguration = regionalSetSelector.languageSelector()
|
||||||
|
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
|
||||||
|
val languages = languageRoot.map {
|
||||||
|
extractTextFromElementBySteps(
|
||||||
|
it,
|
||||||
|
languageConfiguration.getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||||
|
}
|
||||||
|
|
||||||
|
val setKeyConfiguration = regionalSetSelector.regionKeySelector()
|
||||||
|
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
|
||||||
|
val setKeys = setKeyRoot.map {
|
||||||
|
extractTextFromElementBySteps(
|
||||||
|
it,
|
||||||
|
setKeyConfiguration.getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||||
|
}
|
||||||
|
|
||||||
|
return RegionalSet.flattenFromMemberLists(
|
||||||
|
setIds,
|
||||||
|
languages,
|
||||||
|
setKeys
|
||||||
|
)
|
||||||
|
} catch (ex: NoSuchElementException) {
|
||||||
|
throw RuntimeException("sdfgs") // TODO handle me
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun extractCard(root: Document, provider: String): Card? {
|
||||||
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
val cardSelector = source.getTargets().card().get()
|
||||||
|
|
||||||
|
val rootConfigurationOptional = cardSelector.getRootConfig()
|
||||||
|
|
||||||
|
if (rootConfigurationOptional.isPresent) {
|
||||||
|
val rootConfiguration = rootConfigurationOptional.get()
|
||||||
|
val rootElement: Element = getElementFromDocumentByExtractConfig(
|
||||||
|
root,
|
||||||
|
rootConfiguration
|
||||||
|
) ?: throw ElementNotFoundException("TODO make this better")
|
||||||
|
|
||||||
|
val englishCardName: String = extractTextFromElementBySteps(
|
||||||
|
rootElement,
|
||||||
|
cardSelector.getEnglishName().getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||||
|
|
||||||
|
val cardType: String = extractTextFromElementBySteps(
|
||||||
|
rootElement,
|
||||||
|
cardSelector.getEnglishName().getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||||
|
|
||||||
|
val description: String = extractTextFromElementBySteps(
|
||||||
|
rootElement,
|
||||||
|
cardSelector.getEnglishName().getSteps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||||
|
|
||||||
|
return null
|
||||||
|
} else {
|
||||||
|
return null
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getElementsFromDocumentByExtractConfig(
|
||||||
|
document: Element,
|
||||||
|
step: ExtractConfig
|
||||||
|
): Elements {
|
||||||
|
return if (step.selectorType() == Selector.CSS) {
|
||||||
|
document.select(step.getQueryString())
|
||||||
|
} else {
|
||||||
|
document.selectXpath(step.getQueryString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getElementFromDocumentByExtractConfig(
|
||||||
|
document: Element,
|
||||||
|
step: ExtractConfig
|
||||||
|
): Element? {
|
||||||
|
return if (step.selectorType() == Selector.CSS) {
|
||||||
|
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
||||||
|
} else {
|
||||||
|
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractTextFromElementBySteps(
|
||||||
root: Element,
|
root: Element,
|
||||||
steps: Set<Step>
|
steps: List<ExtractConfig>
|
||||||
): String? {
|
): String? {
|
||||||
|
val stepsInCorrectOrder = steps.reversed()
|
||||||
var currentElement: Element? = root.clone()
|
var currentElement: Element? = root.clone()
|
||||||
var result: String? = null
|
var result: String? = null
|
||||||
|
|
||||||
@@ -60,10 +215,10 @@ class ExtractionService(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (index == steps.size - 1) {
|
if (index == steps.size - 1) {
|
||||||
result = XPathUtil.extractResult(currentElement, currentStep.value())
|
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
|
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.model.RegionalSet
|
import com.rak.model.card.Card
|
||||||
|
import com.rak.model.set.CardSet
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
import org.jsoup.nodes.Document
|
import org.jsoup.nodes.Document
|
||||||
@@ -15,21 +16,25 @@ class ScrapeService(
|
|||||||
fun scrapeSet(
|
fun scrapeSet(
|
||||||
provider: String,
|
provider: String,
|
||||||
setName: String,
|
setName: String,
|
||||||
): List<RegionalSet> {
|
): CardSet {
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
|
||||||
val path: String = normalizePath(setName)
|
val path: String = normalizePath(setName)
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
return extractionService.extractSet(document, provider)
|
return extractionService.extractSet(setName, document, provider)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun scrapeCard(
|
fun scrapeCard(
|
||||||
provider: String,
|
provider: String,
|
||||||
cardName: String,
|
cardName: String,
|
||||||
): Map<String, String> {
|
): Card? {
|
||||||
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
|
||||||
val path: String = normalizePath(cardName)
|
val path: String = normalizePath(cardName)
|
||||||
return mapOf()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
|
return extractionService.extractCard(document, provider)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun normalizePath(path: String): String = path
|
private fun normalizePath(path: String): String = path
|
||||||
|
|||||||
@@ -1,14 +1,86 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.config.SourceConfig
|
import com.rak.config.model.CardScrapeTargetConfig
|
||||||
import com.rak.config.SourcesConfiguration
|
import com.rak.config.model.RegionalSetScrapeTargetConfig
|
||||||
|
import com.rak.config.model.SourceConfig
|
||||||
|
import com.rak.config.model.SourcesConfig
|
||||||
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
|
import io.quarkus.logging.Log
|
||||||
|
import io.quarkus.runtime.Startup
|
||||||
|
import jakarta.annotation.PostConstruct
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
|
||||||
|
@Startup
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class SourceService (
|
class SourceService(
|
||||||
val sourcesConfiguration: SourcesConfiguration
|
val sourcesConfiguration: SourcesConfig
|
||||||
) {
|
) {
|
||||||
|
|
||||||
|
@PostConstruct
|
||||||
|
fun init() {
|
||||||
|
sourcesConfiguration.getSources().forEach { validateSource(it) }
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun validateSource(sourceConfig: SourceConfig) {
|
||||||
|
val optionalRegionalSetConfig = sourceConfig.getTargets().regionalSet()
|
||||||
|
val optionalCardConfig = sourceConfig.getTargets().card()
|
||||||
|
|
||||||
|
if (optionalRegionalSetConfig.isPresent) {
|
||||||
|
validateSetExtractConfig(optionalRegionalSetConfig.get())
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optionalCardConfig.isPresent) {
|
||||||
|
validateCardExtractConfig(optionalCardConfig.get())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
|
||||||
|
val selectors = listOf(
|
||||||
|
setExtractConfig.languageSelector(),
|
||||||
|
setExtractConfig.idSelector(),
|
||||||
|
setExtractConfig.regionKeySelector()
|
||||||
|
)
|
||||||
|
|
||||||
|
// If global root is present, dedicated roots may not exist
|
||||||
|
if (setExtractConfig.getRootConfig().isPresent) {
|
||||||
|
if (selectors.any { it.getRootConfig().isPresent }) {
|
||||||
|
throw InvalidConfigurationException(
|
||||||
|
"Dedicated extraction roots cannot be set when a global extraction root is configured"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (selectors.any { !it.getRootConfig().isPresent }) {
|
||||||
|
throw InvalidConfigurationException(
|
||||||
|
"Dedicated extraction roots have to be set when a global extraction root is not configured"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun validateCardExtractConfig(cardScrapeTargetConfig: CardScrapeTargetConfig) {
|
||||||
|
val selectors = listOf(
|
||||||
|
cardScrapeTargetConfig.getEnglishName(),
|
||||||
|
cardScrapeTargetConfig.getDescription(),
|
||||||
|
cardScrapeTargetConfig.getCardType(),
|
||||||
|
cardScrapeTargetConfig.getAttack(),
|
||||||
|
cardScrapeTargetConfig.getDefense(),
|
||||||
|
)
|
||||||
|
|
||||||
|
if (cardScrapeTargetConfig.getRootConfig().isPresent) {
|
||||||
|
if (selectors.any { it.getRootConfig().isPresent }) {
|
||||||
|
throw InvalidConfigurationException(
|
||||||
|
"Dedicated extraction roots cannot be set when a global extraction root is configured"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (selectors.any { !it.getRootConfig().isPresent }) {
|
||||||
|
throw InvalidConfigurationException(
|
||||||
|
"Dedicated extraction roots have to be set when a global extraction root is not configured"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
|
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
|
||||||
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
|
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
package com.rak.service
|
|
||||||
|
|
||||||
import com.rak.model.transform.TransformationRegistry
|
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
|
||||||
|
|
||||||
@ApplicationScoped
|
|
||||||
class TransformService(
|
|
||||||
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
|
|
||||||
) {
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -9,8 +9,11 @@ scraper:
|
|||||||
domain: "yugioh-card.com"
|
domain: "yugioh-card.com"
|
||||||
url-patterns:
|
url-patterns:
|
||||||
- "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
|
- "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
|
||||||
selectors:
|
targets:
|
||||||
card:
|
card:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||||
name:
|
name:
|
||||||
steps:
|
steps:
|
||||||
- type: "css"
|
- type: "css"
|
||||||
@@ -27,8 +30,9 @@ scraper:
|
|||||||
domain: "yugioh.fandom.com"
|
domain: "yugioh.fandom.com"
|
||||||
url-patterns:
|
url-patterns:
|
||||||
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
|
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
|
||||||
selectors:
|
targets:
|
||||||
regional-set:
|
regional-set:
|
||||||
|
multi: true
|
||||||
root:
|
root:
|
||||||
type: css
|
type: css
|
||||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||||
@@ -51,3 +55,39 @@ scraper:
|
|||||||
steps:
|
steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//li/abbr/text()"
|
value: "//li/abbr/text()"
|
||||||
|
card:
|
||||||
|
name:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".cardTable"
|
||||||
|
steps:
|
||||||
|
- type: "xpath"
|
||||||
|
value: "./tbody/tr[3]/th/text()"
|
||||||
|
description:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".cardTable"
|
||||||
|
steps:
|
||||||
|
- type: "xpath"
|
||||||
|
value: "b:contains(Card descriptions)"
|
||||||
|
type:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".cardTable"
|
||||||
|
steps:
|
||||||
|
- type: "xpath"
|
||||||
|
value: "b:contains(Card descriptions)"
|
||||||
|
attack:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".cardTable"
|
||||||
|
steps:
|
||||||
|
- type: "xpath"
|
||||||
|
value: "b:contains(Card descriptions)"
|
||||||
|
defense:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".cardTable"
|
||||||
|
steps:
|
||||||
|
- type: "xpath"
|
||||||
|
value: "b:contains(Card descriptions)"
|
||||||
Reference in New Issue
Block a user