Compare commits
8 Commits
9db3753105
...
39c0ebfc7c
| Author | SHA1 | Date | |
|---|---|---|---|
| 39c0ebfc7c | |||
| e0330e7baa | |||
| 3808fe153e | |||
| 0196308c10 | |||
| 72af626e54 | |||
| ce64f90a66 | |||
| 284723c978 | |||
| 8cc9a64111 |
@@ -30,6 +30,11 @@ scraper:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "//li/text()"
|
||||
transform:
|
||||
- name: "replace"
|
||||
parameters:
|
||||
- " ("
|
||||
- ""
|
||||
language:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
package com.rak.model.transform;
|
||||
|
||||
import jakarta.enterprise.context.ApplicationScoped;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ApplicationScoped
|
||||
public class TestRegistry {
|
||||
private final Map<String, Transformation> registry = new ConcurrentHashMap<>();
|
||||
|
||||
public TestRegistry() {
|
||||
// Register built-in transformations
|
||||
register("trim", string -> string.trim());
|
||||
register("upper", String::toUpperCase);
|
||||
register("parseInt", s -> Integer.parseInt((String) s));
|
||||
register("parseFloat", s -> Float.parseFloat((String) s));
|
||||
register("parseDate", s -> LocalDate.parse((String) s));
|
||||
register("extract", this::extract);
|
||||
}
|
||||
|
||||
public void register(String name, Transformation transformation) {
|
||||
registry.put(name, transformation);
|
||||
}
|
||||
|
||||
public Transformation get(String name) {
|
||||
return registry.get(name);
|
||||
}
|
||||
|
||||
private Object extract(Object input, String pattern) {
|
||||
Pattern regex = Pattern.compile(pattern);
|
||||
Matcher matcher = regex.matcher((String) input);
|
||||
return matcher.find() ? matcher.group(1) : input;
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
package com.rak.config
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface CardDefinition {
|
||||
@WithName("name")
|
||||
fun nameSelector(): SelectorDefinition
|
||||
@WithName("attack")
|
||||
fun attackSelector(): SelectorDefinition
|
||||
@WithName("effect")
|
||||
fun effectSelector(): SelectorDefinition
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
package com.rak.config
|
||||
|
||||
import java.util.*
|
||||
|
||||
interface Items {
|
||||
fun card(): Optional<CardDefinition>
|
||||
fun regionalSet(): Optional<RegionalSetDefinition>
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
package com.rak.config
|
||||
|
||||
import com.rak.config.converter.AbstractModelDefinition
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
||||
@WithName("id")
|
||||
fun idSelector(): SelectorDefinition
|
||||
@WithName("language")
|
||||
fun languageSelector(): SelectorDefinition
|
||||
@WithName("region-key")
|
||||
fun regionKeySelector(): SelectorDefinition
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
package com.rak.config
|
||||
|
||||
interface SelectorDefinition {
|
||||
fun steps(): Set<Step>
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
package com.rak.config.converter
|
||||
|
||||
import com.rak.config.Step
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface AbstractModelDefinition {
|
||||
@WithName("root")
|
||||
fun rootSelector(): Step
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.rak.config.converter
|
||||
|
||||
import com.rak.model.DiscriminatorDirection
|
||||
import org.eclipse.microprofile.config.spi.Converter
|
||||
|
||||
class DiscriminatorDirectionConverter : Converter<DiscriminatorDirection> {
|
||||
override fun convert(value: String): DiscriminatorDirection? {
|
||||
return DiscriminatorDirection.from(value)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.rak.config.converter
|
||||
|
||||
import jakarta.annotation.Priority
|
||||
import org.eclipse.microprofile.config.spi.Converter
|
||||
|
||||
@Priority(1)
|
||||
class EmptyStringConverter : Converter<String> {
|
||||
override fun convert(value: String): String {
|
||||
return value
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
package com.rak.config.converter
|
||||
|
||||
import com.rak.model.scrape.selector.Selector
|
||||
import com.rak.model.Selector
|
||||
import org.eclipse.microprofile.config.spi.Converter
|
||||
|
||||
class TypeSelectorConverter : Converter<Selector> {
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithDefault
|
||||
import io.smallrye.config.WithName
|
||||
import java.util.*
|
||||
|
||||
interface AbstractScrapeTargetConfig {
|
||||
@WithName("root")
|
||||
fun getRootConfig(): Optional<ExtractConfig>
|
||||
@WithName("multi")
|
||||
@WithDefault("false")
|
||||
fun isMulti(): Boolean
|
||||
@WithName("discriminator")
|
||||
fun getDiscriminator(): Optional<DiscriminatorConfig>
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
package com.rak.config.model
|
||||
|
||||
interface AbstractScrapeTargetFieldConfig
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface CardPrintScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||
@WithName("id")
|
||||
fun getIdConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("name")
|
||||
fun getNameConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("regional-name")
|
||||
fun getRegionNameConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("rarity")
|
||||
fun getRarityConfig(): ScrapeTargetFieldConfig
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface CardScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||
@WithName("name")
|
||||
fun getEnglishNameConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("description")
|
||||
fun getDescriptionConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("type")
|
||||
fun getCardTypeConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("attack")
|
||||
fun getAttackConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("defense")
|
||||
fun getDefenseConfig(): ScrapeTargetFieldConfig
|
||||
}
|
||||
12
src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt
Normal file
12
src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt
Normal file
@@ -0,0 +1,12 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import com.rak.config.converter.DiscriminatorDirectionConverter
|
||||
import com.rak.model.DiscriminatorDirection
|
||||
import io.smallrye.config.WithConverter
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface DiscriminatorConfig : ScrapeTargetFieldConfig {
|
||||
@WithName("direction")
|
||||
@WithConverter(DiscriminatorDirectionConverter::class)
|
||||
fun getDiscriminatorDirection(): DiscriminatorDirection
|
||||
}
|
||||
@@ -1,13 +1,14 @@
|
||||
package com.rak.config
|
||||
package com.rak.config.model
|
||||
|
||||
import com.rak.config.converter.TypeSelectorConverter
|
||||
import com.rak.model.scrape.selector.Selector
|
||||
import com.rak.model.Selector
|
||||
import io.smallrye.config.WithConverter
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface Step {
|
||||
interface ExtractConfig {
|
||||
@WithConverter(TypeSelectorConverter::class)
|
||||
@WithName("type")
|
||||
fun selectorType(): Selector // e.g. css or xpath
|
||||
fun value(): String
|
||||
fun selectorType(): Selector
|
||||
@WithName("value")
|
||||
fun getQueryString(): String
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
package com.rak.config
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
import java.util.*
|
||||
|
||||
interface SourceConfig {
|
||||
interface ProviderConfig {
|
||||
|
||||
@WithName("id")
|
||||
fun getId(): String
|
||||
@@ -13,7 +13,7 @@ interface SourceConfig {
|
||||
fun getDomain(): String
|
||||
@WithName("url-patterns")
|
||||
fun getUrlPatterns(): Optional<MutableSet<String>>
|
||||
@WithName("selectors")
|
||||
fun getItems(): Items
|
||||
@WithName("targets")
|
||||
fun getTargets(): TargetsConfig
|
||||
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
import java.util.*
|
||||
|
||||
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
|
||||
@WithName("root")
|
||||
fun getRootConfig(): Optional<ExtractConfig>
|
||||
@WithName("steps")
|
||||
fun getExtractionSteps(): List<ExtractConfig>
|
||||
@WithName("transform")
|
||||
fun getOptionalTransformationSteps(): Optional<List<TransformationStepConfig>>
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||
@WithName("id")
|
||||
fun getIdConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("language")
|
||||
fun getLanguageConfig(): ScrapeTargetFieldConfig
|
||||
@WithName("region-key")
|
||||
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
package com.rak.config
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.ConfigMapping
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
@ConfigMapping(prefix = "scraper")
|
||||
interface SourcesConfiguration {
|
||||
interface SourcesConfig {
|
||||
|
||||
@WithName("sources")
|
||||
fun getSources(): MutableList<SourceConfig>
|
||||
fun getSources(): MutableList<ProviderConfig>
|
||||
|
||||
}
|
||||
13
src/main/kotlin/com/rak/config/model/TargetsConfig.kt
Normal file
13
src/main/kotlin/com/rak/config/model/TargetsConfig.kt
Normal file
@@ -0,0 +1,13 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
import java.util.*
|
||||
|
||||
interface TargetsConfig {
|
||||
@WithName("card")
|
||||
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
||||
@WithName("set")
|
||||
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
||||
@WithName("card-print")
|
||||
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.rak.config.model
|
||||
|
||||
import com.rak.config.converter.EmptyStringConverter
|
||||
import io.smallrye.config.WithConverter
|
||||
|
||||
interface TransformationStepConfig {
|
||||
fun name(): String
|
||||
@WithConverter(EmptyStringConverter::class)
|
||||
fun parameters(): MutableList<String>
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
package com.rak.controller
|
||||
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.model.card.Card
|
||||
import com.rak.model.set.CardSet
|
||||
import com.rak.model.set.RegionalSet
|
||||
import com.rak.service.ScrapeService
|
||||
import com.rak.service.SourceService
|
||||
import jakarta.ws.rs.Consumes
|
||||
import jakarta.ws.rs.GET
|
||||
import jakarta.ws.rs.Path
|
||||
@@ -13,15 +14,11 @@ import org.jboss.resteasy.reactive.RestQuery
|
||||
|
||||
|
||||
@Path("/api")
|
||||
class ExampleResource(
|
||||
private val sourcesConfiguration: SourcesConfiguration,
|
||||
class ScrapeController(
|
||||
private val scrapeService: ScrapeService,
|
||||
private val sourceService: SourceService
|
||||
) {
|
||||
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
||||
}
|
||||
|
||||
|
||||
@GET
|
||||
@Path("/{provider}/set")
|
||||
@@ -32,8 +29,24 @@ class ExampleResource(
|
||||
provider: String,
|
||||
@RestQuery
|
||||
setName: String
|
||||
): List<Map<String, String>> {
|
||||
return scrapeService.extractSet(
|
||||
): CardSet {
|
||||
return scrapeService.scrapeSet(
|
||||
provider,
|
||||
setName
|
||||
)
|
||||
}
|
||||
|
||||
@GET
|
||||
@Path("/{provider}/regionalSet")
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
@Consumes(MediaType.APPLICATION_JSON)
|
||||
fun scrapeRegionalSet(
|
||||
@RestPath
|
||||
provider: String,
|
||||
@RestQuery
|
||||
setName: String
|
||||
): RegionalSet {
|
||||
return scrapeService.scrapeRegionalSet(
|
||||
provider,
|
||||
setName
|
||||
)
|
||||
@@ -48,8 +61,8 @@ class ExampleResource(
|
||||
provider: String,
|
||||
@RestQuery
|
||||
cardName: String
|
||||
): Map<String, String> {
|
||||
return scrapeService.extractCard(
|
||||
): Card? {
|
||||
return scrapeService.scrapeCard(
|
||||
provider,
|
||||
cardName
|
||||
)
|
||||
17
src/main/kotlin/com/rak/model/DiscriminatorDirection.kt
Normal file
17
src/main/kotlin/com/rak/model/DiscriminatorDirection.kt
Normal file
@@ -0,0 +1,17 @@
|
||||
package com.rak.model
|
||||
|
||||
enum class DiscriminatorDirection(val value: String) {
|
||||
ASC("asc"),
|
||||
DESC("desc");
|
||||
|
||||
companion object {
|
||||
fun from(value: String): DiscriminatorDirection? {
|
||||
for (discriminatorDirection in DiscriminatorDirection.entries) {
|
||||
if (discriminatorDirection.value == value) {
|
||||
return discriminatorDirection
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.rak.model.scrape.selector
|
||||
package com.rak.model
|
||||
|
||||
enum class Selector {
|
||||
CSS,
|
||||
11
src/main/kotlin/com/rak/model/card/Attribute.kt
Normal file
11
src/main/kotlin/com/rak/model/card/Attribute.kt
Normal file
@@ -0,0 +1,11 @@
|
||||
package com.rak.model.card
|
||||
|
||||
enum class Attribute {
|
||||
WIND,
|
||||
WATER,
|
||||
FIRE,
|
||||
EARTH,
|
||||
LIGHT,
|
||||
DARK,
|
||||
DIVINE;
|
||||
}
|
||||
8
src/main/kotlin/com/rak/model/card/Card.kt
Normal file
8
src/main/kotlin/com/rak/model/card/Card.kt
Normal file
@@ -0,0 +1,8 @@
|
||||
package com.rak.model.card
|
||||
|
||||
abstract class Card {
|
||||
abstract val id: Int
|
||||
abstract val cardType: CardType
|
||||
abstract val description: String
|
||||
abstract val name: String
|
||||
}
|
||||
23
src/main/kotlin/com/rak/model/card/CardPrint.kt
Normal file
23
src/main/kotlin/com/rak/model/card/CardPrint.kt
Normal file
@@ -0,0 +1,23 @@
|
||||
package com.rak.model.card
|
||||
|
||||
import com.rak.model.set.RegionalSet
|
||||
|
||||
data class CardPrint(
|
||||
val id: String,
|
||||
val name: String,
|
||||
val regionalName: String? = null,
|
||||
val rarity: String
|
||||
) {
|
||||
|
||||
companion object {
|
||||
fun fromMap(map: Map<String, String>): CardPrint {
|
||||
return CardPrint(
|
||||
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
||||
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
||||
map["regionalName"],
|
||||
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
8
src/main/kotlin/com/rak/model/card/CardType.kt
Normal file
8
src/main/kotlin/com/rak/model/card/CardType.kt
Normal file
@@ -0,0 +1,8 @@
|
||||
package com.rak.model.card
|
||||
|
||||
enum class CardType {
|
||||
MONSTER,
|
||||
SPELL,
|
||||
TRAP,
|
||||
UNKNOWN
|
||||
}
|
||||
3
src/main/kotlin/com/rak/model/card/ICardType.kt
Normal file
3
src/main/kotlin/com/rak/model/card/ICardType.kt
Normal file
@@ -0,0 +1,3 @@
|
||||
package com.rak.model.card
|
||||
|
||||
interface ICardType
|
||||
12
src/main/kotlin/com/rak/model/card/LinkArrow.kt
Normal file
12
src/main/kotlin/com/rak/model/card/LinkArrow.kt
Normal file
@@ -0,0 +1,12 @@
|
||||
package com.rak.model.card
|
||||
|
||||
enum class LinkArrow {
|
||||
TOP_LEFT,
|
||||
TOP,
|
||||
TOP_RIGHT,
|
||||
LEFT,
|
||||
RIGHT,
|
||||
BOTTOM_LEFT,
|
||||
BOTTOM,
|
||||
BOTTOM_RIGHT;
|
||||
}
|
||||
20
src/main/kotlin/com/rak/model/card/MonsterCard.kt
Normal file
20
src/main/kotlin/com/rak/model/card/MonsterCard.kt
Normal file
@@ -0,0 +1,20 @@
|
||||
package com.rak.model.card
|
||||
|
||||
data class MonsterCard(
|
||||
override val id: Int,
|
||||
override val cardType: CardType,
|
||||
override val description: String,
|
||||
override val name: String,
|
||||
val monsterEffect: String? = null,
|
||||
val attack: Int? = null,
|
||||
val defense: Int? = null,
|
||||
val level: Int? = null,
|
||||
val isPendulum: Boolean = false,
|
||||
val pendulumScale: Int? = null,
|
||||
val pendulumEffect: String? = null,
|
||||
val linkValue: Int? = null,
|
||||
val subType: MonsterCardType,
|
||||
val monsterType: MonsterType,
|
||||
val attribute: Attribute,
|
||||
val linkArrows: Set<LinkArrow>
|
||||
) : Card()
|
||||
11
src/main/kotlin/com/rak/model/card/MonsterCardType.kt
Normal file
11
src/main/kotlin/com/rak/model/card/MonsterCardType.kt
Normal file
@@ -0,0 +1,11 @@
|
||||
package com.rak.model.card
|
||||
|
||||
enum class MonsterCardType : ICardType {
|
||||
NORMAL,
|
||||
EFFECT,
|
||||
RITUAL,
|
||||
FUSION,
|
||||
SYNCHRO,
|
||||
XYZ,
|
||||
LINK
|
||||
}
|
||||
32
src/main/kotlin/com/rak/model/card/MonsterType.kt
Normal file
32
src/main/kotlin/com/rak/model/card/MonsterType.kt
Normal file
@@ -0,0 +1,32 @@
|
||||
package com.rak.model.card
|
||||
|
||||
// TODO string value for proper names
|
||||
// TODO consider adding unknown type
|
||||
enum class MonsterType {
|
||||
AQUA,
|
||||
BEAST,
|
||||
BEAST_WARRIOR,
|
||||
CREATOR_GOD,
|
||||
CYBERSE,
|
||||
DINOSAUR,
|
||||
DIVINE_BEAST,
|
||||
DRAGON,
|
||||
FAIRY,
|
||||
FIEND,
|
||||
FISH,
|
||||
INSECT,
|
||||
ILLUSION,
|
||||
MACHINE,
|
||||
PLANT,
|
||||
PSYCHIC,
|
||||
PYRO,
|
||||
REPTILE,
|
||||
ROCK,
|
||||
SEA_SERPENT,
|
||||
SPELLCASTER,
|
||||
THUNDER,
|
||||
WARRIOR,
|
||||
WINGED_BEAST,
|
||||
WYRM,
|
||||
ZOMBIE
|
||||
}
|
||||
9
src/main/kotlin/com/rak/model/card/SpellCard.kt
Normal file
9
src/main/kotlin/com/rak/model/card/SpellCard.kt
Normal file
@@ -0,0 +1,9 @@
|
||||
package com.rak.model.card
|
||||
|
||||
data class SpellCard(
|
||||
override val id: Int,
|
||||
override val cardType: CardType,
|
||||
override val description: String,
|
||||
override val name: String,
|
||||
val subType: SpellCardType
|
||||
) : Card()
|
||||
11
src/main/kotlin/com/rak/model/card/SpellCardType.kt
Normal file
11
src/main/kotlin/com/rak/model/card/SpellCardType.kt
Normal file
@@ -0,0 +1,11 @@
|
||||
package com.rak.model.card
|
||||
|
||||
// TODO fix underscore for all types with string value
|
||||
enum class SpellCardType {
|
||||
NORMAL,
|
||||
CONTINUOUS,
|
||||
EQUIP,
|
||||
QUICK_PLAY,
|
||||
FIELD,
|
||||
RITUAL
|
||||
}
|
||||
9
src/main/kotlin/com/rak/model/card/TrapCard.kt
Normal file
9
src/main/kotlin/com/rak/model/card/TrapCard.kt
Normal file
@@ -0,0 +1,9 @@
|
||||
package com.rak.model.card
|
||||
|
||||
data class TrapCard(
|
||||
override val id: Int,
|
||||
override val cardType: CardType,
|
||||
override val description: String,
|
||||
override val name: String,
|
||||
val subType: TrapCardType
|
||||
) : Card()
|
||||
7
src/main/kotlin/com/rak/model/card/TrapCardType.kt
Normal file
7
src/main/kotlin/com/rak/model/card/TrapCardType.kt
Normal file
@@ -0,0 +1,7 @@
|
||||
package com.rak.model.card
|
||||
|
||||
enum class TrapCardType {
|
||||
NORMAL,
|
||||
CONTINUOUS,
|
||||
COUNTER
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
package com.rak.model.exception
|
||||
|
||||
class ElementNotFoundException(message: String) : RuntimeException(message)
|
||||
@@ -0,0 +1,3 @@
|
||||
package com.rak.model.exception
|
||||
|
||||
class InvalidConfigurationException(message: String) : RuntimeException(message)
|
||||
@@ -0,0 +1,3 @@
|
||||
package com.rak.model.exception
|
||||
|
||||
class NotImplementedException(message: String) : RuntimeException(message)
|
||||
@@ -0,0 +1,7 @@
|
||||
package com.rak.model.exception
|
||||
|
||||
import java.lang.RuntimeException
|
||||
|
||||
class UnsupportedQueryForProviderException(
|
||||
message: String,
|
||||
) : RuntimeException(message)
|
||||
@@ -1,4 +0,0 @@
|
||||
package com.rak.model.scrape
|
||||
|
||||
abstract class AbstractScraper{
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
package com.rak.model.scrape
|
||||
|
||||
class JsoupScraper : AbstractScraper() {
|
||||
|
||||
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
package com.rak.model.scrape
|
||||
|
||||
data class ScrapeJob(
|
||||
val url: String,
|
||||
)
|
||||
12
src/main/kotlin/com/rak/model/set/CardSet.kt
Normal file
12
src/main/kotlin/com/rak/model/set/CardSet.kt
Normal file
@@ -0,0 +1,12 @@
|
||||
package com.rak.model.set
|
||||
|
||||
import kotlin.collections.Set
|
||||
|
||||
data class CardSet(
|
||||
val name: String,
|
||||
val regionalSets: Set<RegionalSet>
|
||||
) {
|
||||
companion object {
|
||||
|
||||
}
|
||||
}
|
||||
49
src/main/kotlin/com/rak/model/set/RegionalSet.kt
Normal file
49
src/main/kotlin/com/rak/model/set/RegionalSet.kt
Normal file
@@ -0,0 +1,49 @@
|
||||
package com.rak.model.set
|
||||
|
||||
import com.rak.model.card.CardPrint
|
||||
|
||||
data class RegionalSet(
|
||||
val prefix: String,
|
||||
val region: String,
|
||||
val regionCode: String,
|
||||
val cardPrints: Collection<CardPrint>,
|
||||
val numberOfCards: Int
|
||||
) {
|
||||
|
||||
companion object {
|
||||
|
||||
fun fromMap(map: Map<String, String>, cardPrints: Collection<CardPrint>): RegionalSet {
|
||||
return RegionalSet(
|
||||
map["prefix"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
||||
map["region"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
||||
map["regionCode"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
||||
cardPrints,
|
||||
cardPrints.size
|
||||
)
|
||||
}
|
||||
|
||||
fun flattenFromMemberLists(
|
||||
idList: List<String>,
|
||||
languageList: List<String>,
|
||||
regionKeyAliasList: List<String>,
|
||||
): MutableSet<RegionalSet> {
|
||||
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
|
||||
throw IllegalArgumentException("Lists have to be the same size")
|
||||
}
|
||||
|
||||
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
|
||||
for (index in 0..idList.size - 1) {
|
||||
regionalSetList.add(RegionalSet(
|
||||
prefix = idList[index],
|
||||
region = languageList[index],
|
||||
regionCode = regionKeyAliasList[index],
|
||||
listOf(),
|
||||
numberOfCards = -1
|
||||
))
|
||||
}
|
||||
return regionalSetList
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -2,5 +2,5 @@ package com.rak.model.transform
|
||||
|
||||
@FunctionalInterface
|
||||
fun interface ParameterizedTransformation : AbstractTransformation {
|
||||
fun apply(input: Any, vararg parameters: Any): Any?
|
||||
fun apply(input: String, parameters: List<String>): String
|
||||
}
|
||||
@@ -2,5 +2,5 @@ package com.rak.model.transform
|
||||
|
||||
@FunctionalInterface
|
||||
fun interface Transformation : AbstractTransformation {
|
||||
fun apply(input: Any): Any?
|
||||
fun apply(input: String): String
|
||||
}
|
||||
@@ -1,27 +1,76 @@
|
||||
package com.rak.model.transform
|
||||
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import com.rak.config.model.TransformationStepConfig
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
|
||||
@ApplicationScoped
|
||||
class TransformationRegistry {
|
||||
|
||||
private val transformations = hashMapOf<String, (input: Any) -> Any>()
|
||||
private val transformations: ConcurrentHashMap<String, Transformation> = ConcurrentHashMap()
|
||||
private val parameterizedTransformation: ConcurrentHashMap<String, ParameterizedTransformation> =
|
||||
ConcurrentHashMap()
|
||||
|
||||
init {
|
||||
register<String>("trim") {
|
||||
(it as String).trim()
|
||||
register("trim") { it.trim() }
|
||||
register("replace") { input, parameters ->
|
||||
require(parameters.size == 2) {
|
||||
"'replace' requires exactly 2 parameters"
|
||||
}
|
||||
input.replace(parameters[0], parameters[1])
|
||||
}
|
||||
register("regexReplace") { input, params ->
|
||||
require(params.size == 2) {
|
||||
"'regexReplace' requires exactly 2 parameters"
|
||||
}
|
||||
input.replace(params[0].toRegex(), params[1])
|
||||
}
|
||||
register<String>("replace", { s: Any ->
|
||||
Integer.
|
||||
})
|
||||
}
|
||||
|
||||
// fun <T : Any> register(name: String, transformation: (input: Any) -> T) {
|
||||
// transformations[name] = transformation
|
||||
// }
|
||||
|
||||
fun <T : Any> register(name: String, transformation: Transformation) {
|
||||
fun register(name: String, transformation: Transformation) {
|
||||
transformations.put(name, transformation)
|
||||
}
|
||||
|
||||
fun register(name: String, transformation: ParameterizedTransformation) {
|
||||
parameterizedTransformation.put(name, transformation)
|
||||
}
|
||||
|
||||
fun getTransformation(transformationStep: TransformationStepConfig): AbstractTransformation {
|
||||
val name = transformationStep.name()
|
||||
val parameters = transformationStep.parameters()
|
||||
return when {
|
||||
transformations.containsKey(name) -> {
|
||||
if (parameters.isNotEmpty()) {
|
||||
throw IllegalArgumentException("'$name' doesn't accept parameters")
|
||||
} else {
|
||||
transformations[name]!!
|
||||
}
|
||||
}
|
||||
parameterizedTransformation.containsKey(name) -> {
|
||||
if (parameters.isEmpty()) {
|
||||
throw IllegalArgumentException("'$name' requires parameters")
|
||||
} else {
|
||||
parameterizedTransformation[name]!!
|
||||
}
|
||||
}
|
||||
else -> throw IllegalArgumentException("Unknown transformation: '$name'")
|
||||
}
|
||||
}
|
||||
|
||||
fun applyTransformations(input: String, steps: List<TransformationStepConfig>): String {
|
||||
return steps.fold(input) { current, step ->
|
||||
val actualStep = getTransformation(step)
|
||||
when (actualStep) {
|
||||
is Transformation ->
|
||||
transformations[step.name()]?.apply(current)
|
||||
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
|
||||
|
||||
is ParameterizedTransformation ->
|
||||
parameterizedTransformation[step.name()]?.apply(current, step.parameters())
|
||||
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
|
||||
|
||||
else -> throw IllegalStateException("Invalid transformation type")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
279
src/main/kotlin/com/rak/service/AbstractExtractionService.kt
Normal file
279
src/main/kotlin/com/rak/service/AbstractExtractionService.kt
Normal file
@@ -0,0 +1,279 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.model.AbstractScrapeTargetConfig
|
||||
import com.rak.config.model.ExtractConfig
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.model.DiscriminatorDirection
|
||||
import com.rak.model.Selector
|
||||
import com.rak.model.exception.ElementNotFoundException
|
||||
import com.rak.model.exception.InvalidConfigurationException
|
||||
import com.rak.model.transform.TransformationRegistry
|
||||
import com.rak.util.CssUtil
|
||||
import com.rak.util.XPathUtil
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.select.Elements
|
||||
import java.util.Optional
|
||||
import kotlin.jvm.optionals.getOrElse
|
||||
|
||||
// find root element from global or node config
|
||||
// get field target configs as list
|
||||
// extract item from root element via field config
|
||||
|
||||
abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
||||
|
||||
private val transformationRegistry = TransformationRegistry()
|
||||
|
||||
abstract fun T.getItems(): Map<String, ScrapeTargetFieldConfig>
|
||||
abstract fun extract(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: T
|
||||
): E
|
||||
|
||||
abstract fun extractMultiple(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: T
|
||||
): Collection<E>
|
||||
|
||||
fun getRootElement(
|
||||
element: Element,
|
||||
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||
): Element {
|
||||
return getRootElements(
|
||||
element,
|
||||
globalRootExtractConfig,
|
||||
nodeRootExtractConfig
|
||||
).firstOrNull() ?: throw ElementNotFoundException("")
|
||||
}
|
||||
|
||||
fun getRootElements(
|
||||
element: Element,
|
||||
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||
): Elements {
|
||||
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.getOrElse {
|
||||
nodeRootExtractConfig.orElseThrow {
|
||||
InvalidConfigurationException("")
|
||||
}
|
||||
}
|
||||
|
||||
return getElementsFromElementByExtractConfig(
|
||||
element,
|
||||
rootExtractConfig
|
||||
)
|
||||
}
|
||||
|
||||
protected fun getElementFromDocumentByExtractConfig(
|
||||
element: Element,
|
||||
step: ExtractConfig,
|
||||
): Element? {
|
||||
return getElementsFromElementByExtractConfig(element, step).firstOrNull()
|
||||
}
|
||||
|
||||
protected fun getElementsFromElementByExtractConfig(
|
||||
element: Element,
|
||||
step: ExtractConfig,
|
||||
): Elements {
|
||||
return if (step.selectorType() == Selector.CSS) {
|
||||
element.select(step.getQueryString())
|
||||
} else {
|
||||
element.selectXpath(step.getQueryString())
|
||||
}
|
||||
}
|
||||
|
||||
protected fun extractAsMap(
|
||||
document: Element,
|
||||
extractionConfig: T
|
||||
): Map<String, String> {
|
||||
val result = mutableMapOf<String, String>()
|
||||
|
||||
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
|
||||
val rootElement = getRootElement(
|
||||
document,
|
||||
extractionConfig.getRootConfig(),
|
||||
fieldConfig.getRootConfig()
|
||||
)
|
||||
|
||||
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
fieldConfig
|
||||
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||
|
||||
result.put(identifier, extractedText)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
fun extractAsListOfMaps(
|
||||
element: Element,
|
||||
extractionConfig: T
|
||||
): List<Map<String, String>> {
|
||||
val resultList = mutableListOf<MutableMap<String, String>>()
|
||||
|
||||
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
|
||||
val rootElements = getRootElements(
|
||||
element,
|
||||
extractionConfig.getRootConfig(),
|
||||
fieldConfig.getRootConfig()
|
||||
)
|
||||
|
||||
for(index in 0..rootElements.size - 1) {
|
||||
val rootElement = rootElements[index]
|
||||
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
fieldConfig
|
||||
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||
|
||||
val mapToModify: MutableMap<String, String> = try {
|
||||
resultList[index]
|
||||
} catch (_: IndexOutOfBoundsException) {
|
||||
val newMap = mutableMapOf<String, String>()
|
||||
resultList.add(newMap)
|
||||
newMap
|
||||
}
|
||||
|
||||
mapToModify.put(identifier, extractedText)
|
||||
}
|
||||
}
|
||||
|
||||
return resultList
|
||||
}
|
||||
|
||||
fun extractAsListOfMaps(
|
||||
elements: Elements,
|
||||
extractionConfig: T
|
||||
): List<Map<String, String>> {
|
||||
val resultList = mutableListOf<MutableMap<String, String>>()
|
||||
|
||||
|
||||
// refactor this
|
||||
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
|
||||
for(index in 0..elements.size - 1) {
|
||||
val rootElement = elements[index]
|
||||
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
fieldConfig
|
||||
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||
|
||||
val mapToModify: MutableMap<String, String> = try {
|
||||
resultList[index]
|
||||
} catch (_: IndexOutOfBoundsException) {
|
||||
val newMap = mutableMapOf<String, String>()
|
||||
resultList.add(newMap)
|
||||
newMap
|
||||
}
|
||||
|
||||
mapToModify.put(identifier, extractedText)
|
||||
}
|
||||
}
|
||||
|
||||
return resultList
|
||||
}
|
||||
|
||||
fun extractWithDiscriminator(
|
||||
element: Element,
|
||||
extractionConfig: T
|
||||
): List<List<Map<String, String>>>{
|
||||
val rootElement = getRootElement(
|
||||
element,
|
||||
extractionConfig.getRootConfig(),
|
||||
Optional.empty<ExtractConfig>()
|
||||
)
|
||||
|
||||
var rootElements = getRootElements(
|
||||
element,
|
||||
extractionConfig.getRootConfig(),
|
||||
Optional.empty<ExtractConfig>()
|
||||
)
|
||||
|
||||
val discriminatedElements = getElementsFromElementByExtractConfig(
|
||||
rootElement,
|
||||
extractionConfig.getDiscriminator().get().getRootConfig().get(),
|
||||
)
|
||||
|
||||
val discriminations = mutableListOf<String>()
|
||||
val result = mutableListOf<List<Map<String, String>>>()
|
||||
|
||||
for (element in discriminatedElements) {
|
||||
val discriminatorValue: String = extractTextFromElementByTargetFieldConfig(
|
||||
element,
|
||||
extractionConfig.getDiscriminator().get()
|
||||
) ?: throw ElementNotFoundException("")
|
||||
|
||||
discriminations.add(discriminatorValue)
|
||||
}
|
||||
|
||||
val definitiveElements = if (discriminations.size < rootElements.size) {
|
||||
if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) {
|
||||
rootElements = Elements(rootElements.reversed())
|
||||
}
|
||||
|
||||
while (discriminations.size < rootElements.size) {
|
||||
rootElements.removeFirst()
|
||||
}
|
||||
|
||||
if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) {
|
||||
rootElements = Elements(rootElements.reversed())
|
||||
}
|
||||
|
||||
rootElements
|
||||
} else {
|
||||
rootElements
|
||||
}
|
||||
|
||||
result.add(extractAsListOfMaps(
|
||||
definitiveElements,
|
||||
extractionConfig
|
||||
))
|
||||
|
||||
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
private fun extractTextFromElementByTargetFieldConfig(
|
||||
root: Element,
|
||||
extractionConfig: ScrapeTargetFieldConfig
|
||||
): String? {
|
||||
val extractionSteps = extractionConfig.getExtractionSteps()
|
||||
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
|
||||
|
||||
var currentElement: Element? = root.clone()
|
||||
var result: String? = null
|
||||
|
||||
for (index in 0 until extractionSteps.size) {
|
||||
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
|
||||
if (currentElement == null) {
|
||||
throw IllegalStateException()
|
||||
}
|
||||
|
||||
if (index == extractionSteps.size - 1) {
|
||||
result = when (currentStep.selectorType()) {
|
||||
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||
}
|
||||
}
|
||||
else {
|
||||
currentElement = when (currentStep.selectorType()) {
|
||||
Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||
Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (result == null) {
|
||||
throw ElementNotFoundException("Result could not be extracted")
|
||||
}
|
||||
|
||||
if (transformationSteps.isPresent) {
|
||||
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.config.model.SetScrapeTargetConfig
|
||||
import com.rak.model.card.CardPrint
|
||||
import com.rak.model.exception.NotImplementedException
|
||||
import com.rak.model.set.CardSet
|
||||
import com.rak.model.set.RegionalSet
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
|
||||
@ApplicationScoped
|
||||
class CardPrintExtractionService : AbstractExtractionService<CardPrint, CardPrintScrapeTargetConfig>() {
|
||||
|
||||
override fun CardPrintScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||
return mapOf(
|
||||
Pair("id", this.getIdConfig()),
|
||||
Pair("name", this.getNameConfig()),
|
||||
Pair("regionalName", this.getRegionNameConfig()),
|
||||
Pair("rarity", this.getRarityConfig()),
|
||||
)
|
||||
}
|
||||
|
||||
override fun extract(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: CardPrintScrapeTargetConfig
|
||||
): CardPrint {
|
||||
throw NotImplementedException("Not implemented")
|
||||
}
|
||||
|
||||
override fun extractMultiple(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: CardPrintScrapeTargetConfig
|
||||
): Collection<CardPrint> {
|
||||
val objectAsListOfMaps = extractWithDiscriminator(element, extractionConfig)
|
||||
|
||||
return objectAsListOfMaps.map {
|
||||
CardPrint.fromMap(it[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
273
src/main/kotlin/com/rak/service/ExtractionService.kt
Normal file
273
src/main/kotlin/com/rak/service/ExtractionService.kt
Normal file
@@ -0,0 +1,273 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||
import com.rak.config.model.ExtractConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.model.Selector
|
||||
import com.rak.model.card.Card
|
||||
import com.rak.model.card.CardPrint
|
||||
import com.rak.model.exception.ElementNotFoundException
|
||||
import com.rak.model.exception.InvalidConfigurationException
|
||||
import com.rak.model.set.CardSet
|
||||
import com.rak.model.set.RegionalSet
|
||||
import com.rak.model.transform.TransformationRegistry
|
||||
import com.rak.util.XPathUtil
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.select.Elements
|
||||
import java.util.Optional
|
||||
|
||||
@ApplicationScoped
|
||||
class ExtractionService(
|
||||
private val sourceService: SourceService,
|
||||
) {
|
||||
|
||||
private val transformationRegistry = TransformationRegistry()
|
||||
|
||||
fun extractSet(setName: String, root: Element, provider: String): CardSet {
|
||||
return CardSet(
|
||||
name = setName,
|
||||
regionalSets = extractRegionalSets(root, provider)
|
||||
)
|
||||
}
|
||||
|
||||
fun getRootElement(
|
||||
document: Document,
|
||||
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||
): Element {
|
||||
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
|
||||
nodeRootExtractConfig.orElseThrow {
|
||||
InvalidConfigurationException("")
|
||||
})
|
||||
|
||||
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
|
||||
}
|
||||
|
||||
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
|
||||
val cardName = extractTextFromElementByTargetFieldConfig(
|
||||
getRootElement(
|
||||
document,
|
||||
cardPrintConfig.getRootConfig(),
|
||||
cardPrintConfig.getNameConfig().getRootConfig()
|
||||
),
|
||||
cardPrintConfig.getNameConfig()
|
||||
)
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
||||
|
||||
if (setExtractionConfig.getRootConfig().isPresent) {
|
||||
val setId: String = extractTextFromElementByTargetFieldConfig(
|
||||
root,
|
||||
setExtractionConfig.getIdConfig(),
|
||||
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
|
||||
root,
|
||||
setExtractionConfig.getLanguageConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||
val setKey: String = extractTextFromElementByTargetFieldConfig(
|
||||
root,
|
||||
setExtractionConfig.getRegionKeyConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||
|
||||
return RegionalSet(
|
||||
setId,
|
||||
setLanguage,
|
||||
setKey,
|
||||
listOf(),
|
||||
-1
|
||||
)
|
||||
} else {
|
||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
||||
val rootConfiguration = setIdConfiguration.getRootConfig().get()
|
||||
|
||||
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||
val setId: String = extractTextFromElementByTargetFieldConfig(
|
||||
setIdRoot,
|
||||
setIdConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
|
||||
|
||||
val setLanguageConfiguration = setExtractionConfig.getIdConfig()
|
||||
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
|
||||
setLanguageRoot,
|
||||
setLanguageConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||
|
||||
|
||||
val setKeyConfiguration = setExtractionConfig.getIdConfig()
|
||||
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||
val setKey: String = extractTextFromElementByTargetFieldConfig(
|
||||
setKeyRoot,
|
||||
setKeyConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||
|
||||
return RegionalSet(
|
||||
setId,
|
||||
setLanguage,
|
||||
setKey,
|
||||
listOf(),
|
||||
-1
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
||||
|
||||
if (setExtractionConfig.getRootConfig().isPresent) {
|
||||
val rootConfiguration = setExtractionConfig.getRootConfig().get()
|
||||
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
|
||||
root,
|
||||
rootConfiguration
|
||||
)
|
||||
|
||||
return regionalSetRoots.map {
|
||||
extractRegionalSet(
|
||||
it,
|
||||
provider
|
||||
)
|
||||
}.toSet()
|
||||
} else {
|
||||
try {
|
||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
||||
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
|
||||
val setIds = setIdRoot.map {
|
||||
extractTextFromElementByTargetFieldConfig(
|
||||
it,
|
||||
setIdConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
}
|
||||
|
||||
val languageConfiguration = setExtractionConfig.getLanguageConfig()
|
||||
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
|
||||
val languages = languageRoot.map {
|
||||
extractTextFromElementByTargetFieldConfig(
|
||||
it,
|
||||
languageConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
}
|
||||
|
||||
val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig()
|
||||
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
|
||||
val setKeys = setKeyRoot.map {
|
||||
extractTextFromElementByTargetFieldConfig(
|
||||
it,
|
||||
setKeyConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
}
|
||||
|
||||
return RegionalSet.flattenFromMemberLists(
|
||||
setIds,
|
||||
languages,
|
||||
setKeys
|
||||
)
|
||||
} catch (ex: NoSuchElementException) {
|
||||
throw RuntimeException("sdfgs") // TODO handle me
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun extractCard(root: Document, provider: String): Card? {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
val cardSelector = source.getTargets().getCardConfig().get()
|
||||
|
||||
val rootConfigurationOptional = cardSelector.getRootConfig()
|
||||
|
||||
if (rootConfigurationOptional.isPresent) {
|
||||
val rootConfiguration = rootConfigurationOptional.get()
|
||||
val rootElement: Element = getElementFromDocumentByExtractConfig(
|
||||
root,
|
||||
rootConfiguration
|
||||
) ?: throw ElementNotFoundException("TODO make this better")
|
||||
|
||||
val englishCardName: String = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
cardSelector.getEnglishNameConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||
|
||||
val cardType: String = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
cardSelector.getEnglishNameConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||
|
||||
val description: String = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
cardSelector.getEnglishNameConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||
|
||||
return null
|
||||
} else {
|
||||
return null
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private fun getElementsFromDocumentByExtractConfig(
|
||||
document: Element,
|
||||
step: ExtractConfig
|
||||
): Elements {
|
||||
return if (step.selectorType() == Selector.CSS) {
|
||||
document.select(step.getQueryString())
|
||||
} else {
|
||||
document.selectXpath(step.getQueryString())
|
||||
}
|
||||
}
|
||||
|
||||
private fun getElementFromDocumentByExtractConfig(
|
||||
document: Element,
|
||||
step: ExtractConfig,
|
||||
): Element? {
|
||||
return if (step.selectorType() == Selector.CSS) {
|
||||
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
||||
} else {
|
||||
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractTextFromElementByTargetFieldConfig(
|
||||
root: Element,
|
||||
extractionConfig: ScrapeTargetFieldConfig
|
||||
): String? {
|
||||
val extractionSteps = extractionConfig.getExtractionSteps()
|
||||
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
|
||||
|
||||
var currentElement: Element? = root.clone()
|
||||
var result: String? = null
|
||||
|
||||
for (index in 0 until extractionSteps.size) {
|
||||
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
|
||||
if (currentElement == null) {
|
||||
throw IllegalStateException()
|
||||
}
|
||||
|
||||
if (index == extractionSteps.size - 1) {
|
||||
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||
}
|
||||
else {
|
||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||
}
|
||||
}
|
||||
|
||||
if (result == null) {
|
||||
throw ElementNotFoundException("Result could not be extracted")
|
||||
}
|
||||
|
||||
if (transformationSteps.isPresent) {
|
||||
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
package com.rak.service
|
||||
|
||||
import jakarta.ws.rs.GET
|
||||
import jakarta.ws.rs.Path
|
||||
import jakarta.ws.rs.QueryParam
|
||||
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
||||
|
||||
/**
|
||||
* To use it via injection.
|
||||
*
|
||||
* ```kotlin
|
||||
* @Inject
|
||||
* @RestClient
|
||||
* lateinit var myRemoteService: MyRemoteService
|
||||
*
|
||||
* fun doSomething() {
|
||||
* val restClientExtensions = myRemoteService.getExtensionsById("io.quarkus:quarkus-rest-client")
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
@RegisterRestClient(baseUri = "https://stage.code.quarkus.io/api")
|
||||
interface MyRemoteService {
|
||||
|
||||
@GET
|
||||
@Path("/extensions")
|
||||
fun getExtensionsById(@QueryParam("id") id: String): Set<Extension>
|
||||
|
||||
data class Extension(val id: String, val name: String, val shortName: String, val keywords: List<String>)
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.config.model.SetScrapeTargetConfig
|
||||
import com.rak.config.model.SourcesConfig
|
||||
import com.rak.model.exception.NotImplementedException
|
||||
import com.rak.model.set.RegionalSet
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Element
|
||||
|
||||
@ApplicationScoped
|
||||
class RegionalSetExtractionService(
|
||||
private val cardPrintExtractionService: CardPrintExtractionService,
|
||||
private val sourcesConfig: SourcesConfig
|
||||
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() {
|
||||
|
||||
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||
return mapOf(
|
||||
Pair("prefix", this.getIdConfig()),
|
||||
Pair("regionCode", this.getRegionKeyConfig()),
|
||||
Pair("region", this.getLanguageConfig()),
|
||||
)
|
||||
}
|
||||
|
||||
override fun extract(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: SetScrapeTargetConfig
|
||||
): RegionalSet {
|
||||
throw NotImplementedException("Not implemented")
|
||||
}
|
||||
|
||||
override fun extractMultiple(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: SetScrapeTargetConfig
|
||||
): Collection<RegionalSet> {
|
||||
val regionalSetList = extractAsListOfMaps(element, extractionConfig)
|
||||
val cardPrintsInRegionalSet = extractAsListOfMaps(element, extractionConfig)
|
||||
|
||||
val cardPrints = cardPrintExtractionService.extractMultiple(
|
||||
element,
|
||||
providerConfig,
|
||||
providerConfig.getTargets().getCardPrintConfiguration().get()
|
||||
)
|
||||
|
||||
return regionalSetList.map {
|
||||
RegionalSet.fromMap(it, cardPrints)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,91 +1,56 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.RegionalSetDefinition
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.config.Step
|
||||
import com.rak.util.XPathUtil
|
||||
import com.rak.model.card.Card
|
||||
import com.rak.model.set.CardSet
|
||||
import com.rak.model.set.RegionalSet
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
|
||||
@ApplicationScoped
|
||||
class ScrapeService(
|
||||
private val sourceService: SourceService
|
||||
private val sourceService: SourceService,
|
||||
private val extractionService: ExtractionService,
|
||||
private val setExtractionService: SetExtractionService,
|
||||
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||
) {
|
||||
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
||||
}
|
||||
|
||||
private fun extractTextFromRootBySteps(
|
||||
root: Element,
|
||||
steps: Set<Step>
|
||||
): String? {
|
||||
var currentElement: Element? = root.clone()
|
||||
var result: String? = null
|
||||
|
||||
for (index in 0 until steps.size) {
|
||||
val currentStep = steps.elementAtOrNull(index) ?: return null
|
||||
if (currentElement == null) {
|
||||
throw IllegalStateException()
|
||||
}
|
||||
|
||||
if (index == steps.size - 1) {
|
||||
result = XPathUtil.extractResult(currentElement, currentStep.value())
|
||||
}
|
||||
else {
|
||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
fun extractSet(
|
||||
fun scrapeSet(
|
||||
provider: String,
|
||||
setName: String,
|
||||
): List<Map<String, String>> {
|
||||
val source =
|
||||
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
): CardSet {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
val path: String = normalizePath(setName)
|
||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||
val regionalSetSelector = source.getItems().regionalSet().get()
|
||||
|
||||
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value())
|
||||
|
||||
return regionalSetRoot.map {
|
||||
val setId: String? = extractTextFromRootBySteps(
|
||||
it,
|
||||
regionalSetSelector.idSelector().steps()
|
||||
)
|
||||
val setLanguage: String? = extractTextFromRootBySteps(
|
||||
it,
|
||||
regionalSetSelector.languageSelector().steps()
|
||||
)
|
||||
val setKey: String? = extractTextFromRootBySteps(
|
||||
it,
|
||||
regionalSetSelector.regionKeySelector().steps()
|
||||
)
|
||||
|
||||
mapOf(
|
||||
Pair("id", setId ?: "N/A"),
|
||||
Pair("language", setLanguage ?: "N/A"),
|
||||
Pair("key", setKey ?: "N/A"),
|
||||
)
|
||||
}
|
||||
// return extractionService.extractSet(setName, document, provider)
|
||||
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
||||
}
|
||||
|
||||
fun scrapeRegionalSet(
|
||||
provider: String,
|
||||
setName: String,
|
||||
): RegionalSet {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
fun extractCard(
|
||||
val path: String = normalizePath(setName)
|
||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||
|
||||
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
||||
}
|
||||
|
||||
fun scrapeCard(
|
||||
provider: String,
|
||||
cardName: String,
|
||||
): Map<String, String> {
|
||||
): Card? {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
val path: String = normalizePath(cardName)
|
||||
return mapOf()
|
||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||
|
||||
return extractionService.extractCard(document, provider)
|
||||
}
|
||||
|
||||
private fun normalizePath(path: String): String = path
|
||||
|
||||
42
src/main/kotlin/com/rak/service/SetExtractionService.kt
Normal file
42
src/main/kotlin/com/rak/service/SetExtractionService.kt
Normal file
@@ -0,0 +1,42 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.config.model.SetScrapeTargetConfig
|
||||
import com.rak.model.exception.NotImplementedException
|
||||
import com.rak.model.set.CardSet
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Element
|
||||
|
||||
@ApplicationScoped
|
||||
class SetExtractionService(
|
||||
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
|
||||
|
||||
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||
return mapOf(
|
||||
Pair("prefix", this.getIdConfig()),
|
||||
Pair("regionCode", this.getRegionKeyConfig()),
|
||||
Pair("region", this.getLanguageConfig()),
|
||||
)
|
||||
}
|
||||
|
||||
override fun extract(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: SetScrapeTargetConfig
|
||||
): CardSet {
|
||||
return CardSet(
|
||||
"test",
|
||||
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet()
|
||||
)
|
||||
}
|
||||
|
||||
override fun extractMultiple(
|
||||
element: Element,
|
||||
providerConfig: ProviderConfig,
|
||||
extractionConfig: SetScrapeTargetConfig
|
||||
): Collection<CardSet> {
|
||||
throw NotImplementedException("Not implemented")
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,86 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.SourceConfig
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.config.model.CardScrapeTargetConfig
|
||||
import com.rak.config.model.SetScrapeTargetConfig
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.SourcesConfig
|
||||
import com.rak.model.exception.InvalidConfigurationException
|
||||
import io.quarkus.runtime.Startup
|
||||
import jakarta.annotation.PostConstruct
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
|
||||
@Startup
|
||||
@ApplicationScoped
|
||||
class SourceService(
|
||||
val sourcesConfiguration: SourcesConfiguration
|
||||
val sourcesConfiguration: SourcesConfig
|
||||
) {
|
||||
|
||||
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
|
||||
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
|
||||
@PostConstruct
|
||||
fun init() {
|
||||
sourcesConfiguration.getSources().forEach { validateSource(it) }
|
||||
}
|
||||
|
||||
private fun validateSource(providerConfig: ProviderConfig) {
|
||||
val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig()
|
||||
val optionalCardConfig = providerConfig.getTargets().getCardConfig()
|
||||
|
||||
if (optionalRegionalSetConfig.isPresent) {
|
||||
validateSetExtractConfig(optionalRegionalSetConfig.get())
|
||||
}
|
||||
|
||||
if (optionalCardConfig.isPresent) {
|
||||
validateCardExtractConfig(optionalCardConfig.get())
|
||||
}
|
||||
}
|
||||
|
||||
private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) {
|
||||
val selectors = listOf(
|
||||
setExtractConfig.getLanguageConfig(),
|
||||
setExtractConfig.getIdConfig(),
|
||||
setExtractConfig.getRegionKeyConfig()
|
||||
)
|
||||
|
||||
// If global root is present, dedicated roots may not exist
|
||||
if (setExtractConfig.getRootConfig().isPresent) {
|
||||
if (selectors.any { it.getRootConfig().isPresent }) {
|
||||
throw InvalidConfigurationException(
|
||||
"Dedicated extraction roots cannot be set when a global extraction root is configured"
|
||||
)
|
||||
}
|
||||
} else {
|
||||
if (selectors.any { !it.getRootConfig().isPresent }) {
|
||||
throw InvalidConfigurationException(
|
||||
"Dedicated extraction roots have to be set when a global extraction root is not configured"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun validateCardExtractConfig(cardScrapeTargetConfig: CardScrapeTargetConfig) {
|
||||
val selectors = listOf(
|
||||
cardScrapeTargetConfig.getEnglishNameConfig(),
|
||||
cardScrapeTargetConfig.getDescriptionConfig(),
|
||||
cardScrapeTargetConfig.getCardTypeConfig(),
|
||||
cardScrapeTargetConfig.getAttackConfig(),
|
||||
cardScrapeTargetConfig.getDefenseConfig(),
|
||||
)
|
||||
|
||||
if (cardScrapeTargetConfig.getRootConfig().isPresent) {
|
||||
if (selectors.any { it.getRootConfig().isPresent }) {
|
||||
throw InvalidConfigurationException(
|
||||
"Dedicated extraction roots cannot be set when a global extraction root is configured"
|
||||
)
|
||||
}
|
||||
} else {
|
||||
if (selectors.any { !it.getRootConfig().isPresent }) {
|
||||
throw InvalidConfigurationException(
|
||||
"Dedicated extraction roots have to be set when a global extraction root is not configured"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun getSources(): Set<ProviderConfig> = sourcesConfiguration.getSources().toSet()
|
||||
fun getSourceById(id: String): ProviderConfig? = getSources().firstOrNull { it.getId() == id }
|
||||
|
||||
}
|
||||
19
src/main/kotlin/com/rak/util/CssUtil.kt
Normal file
19
src/main/kotlin/com/rak/util/CssUtil.kt
Normal file
@@ -0,0 +1,19 @@
|
||||
package com.rak.util
|
||||
|
||||
import org.jsoup.nodes.Element
|
||||
|
||||
class CssUtil private constructor() {
|
||||
|
||||
companion object {
|
||||
fun getNextElement(element: Element, path: String): Element? {
|
||||
return element.select(path).firstOrNull()
|
||||
}
|
||||
|
||||
fun extractResult(root: Element, path: String): String? {
|
||||
return root
|
||||
.select(path)
|
||||
.firstOrNull()?.text()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -26,8 +26,8 @@ class XPathUtil private constructor() {
|
||||
.firstOrNull()?.text()
|
||||
}
|
||||
|
||||
fun getNextElement(root: Element, path: String): Element? {
|
||||
return root.selectXpath(path).firstOrNull()
|
||||
fun getNextElement(element: Element, path: String): Element? {
|
||||
return element.selectXpath(path).firstOrNull()
|
||||
}
|
||||
|
||||
fun extractResult(root: Element, path: String): String? {
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
com.rak.config.converter.TypeSelectorConverter
|
||||
com.rak.config.converter.DiscriminatorDirectionConverter
|
||||
|
||||
@@ -9,8 +9,11 @@ scraper:
|
||||
domain: "yugioh-card.com"
|
||||
url-patterns:
|
||||
- "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
|
||||
selectors:
|
||||
targets:
|
||||
card:
|
||||
root:
|
||||
type: css
|
||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||
name:
|
||||
steps:
|
||||
- type: "css"
|
||||
@@ -27,8 +30,8 @@ scraper:
|
||||
domain: "yugioh.fandom.com"
|
||||
url-patterns:
|
||||
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
|
||||
selectors:
|
||||
regional-set:
|
||||
targets:
|
||||
set:
|
||||
root:
|
||||
type: css
|
||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||
@@ -36,6 +39,11 @@ scraper:
|
||||
steps:
|
||||
- type: xpath
|
||||
value: "//li/text()"
|
||||
transform:
|
||||
- name: "replace"
|
||||
parameters:
|
||||
- " ("
|
||||
- ""
|
||||
language:
|
||||
steps:
|
||||
- type: xpath
|
||||
@@ -46,3 +54,68 @@ scraper:
|
||||
steps:
|
||||
- type: xpath
|
||||
value: "//li/abbr/text()"
|
||||
card-print:
|
||||
multi: true
|
||||
root:
|
||||
type: css
|
||||
value: ".tabber.wds-tabber > div"
|
||||
discriminator:
|
||||
direction: asc
|
||||
root:
|
||||
type: css
|
||||
value: ".wds-tabs__tab"
|
||||
steps:
|
||||
- type: xpath
|
||||
value: "//li/div/a/text()"
|
||||
id:
|
||||
steps:
|
||||
- type: xpath
|
||||
value: ".//table/tbody/tr[2]/td[1]/a/text()"
|
||||
name:
|
||||
steps:
|
||||
- type: xpath
|
||||
value: ".//table/tbody/tr[2]/td[1]/a/text()"
|
||||
regional-name:
|
||||
steps:
|
||||
- type: xpath
|
||||
value: ".//table/tbody/tr[2]/td[2]/a/text()"
|
||||
rarity:
|
||||
steps:
|
||||
- type: xpath
|
||||
value: ".//table/tbody/tr[2]/td[3]/a/text()"
|
||||
card:
|
||||
name:
|
||||
root:
|
||||
type: css
|
||||
value: ".cardTable"
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "./tbody/tr[3]/th/text()"
|
||||
description:
|
||||
root:
|
||||
type: css
|
||||
value: ".cardTable"
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "b:contains(Card descriptions)"
|
||||
type:
|
||||
root:
|
||||
type: css
|
||||
value: ".cardTable"
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "b:contains(Card descriptions)"
|
||||
attack:
|
||||
root:
|
||||
type: css
|
||||
value: ".cardTable"
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "b:contains(Card descriptions)"
|
||||
defense:
|
||||
root:
|
||||
type: css
|
||||
value: ".cardTable"
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "b:contains(Card descriptions)"
|
||||
Reference in New Issue
Block a user