Compare commits

..

4 Commits

Author SHA1 Message Date
5930da7a4c Split Set/RegionalSet properly 2025-06-29 16:49:30 +02:00
8a0777e557 Minor config amend
Regards Set ID
2025-06-29 14:56:00 +02:00
2a79218a54 Add RegEx validation
Amend RegExReplace transformer
Amend transformations
2025-06-29 14:52:09 +02:00
ee4ce4fd65 Basic multi-method extraction 2025-06-29 13:21:18 +02:00
18 changed files with 342 additions and 181 deletions

View File

@@ -0,0 +1,19 @@
package com.rak.config.converter
import org.eclipse.microprofile.config.spi.Converter
import java.util.regex.Pattern
import java.util.regex.PatternSyntaxException
class PatternConverter : Converter<Pattern> {
override fun convert(value: String): Pattern {
if (value.isBlank()) {
throw IllegalArgumentException("Pattern may not be empty")
}
try {
return Pattern.compile(value)
} catch (_: PatternSyntaxException) {
throw IllegalStateException("'$value' is not a valid RegEx pattern")
}
}
}

View File

@@ -0,0 +1,11 @@
package com.rak.config.model
import io.smallrye.config.WithName
import java.util.Optional
interface ExtractorConfig {
@WithName("steps")
fun getExtractionSteps(): List<ExtractConfig>
@WithName("transform")
fun getOptionalTransformationSteps(): Optional<List<TransformationStepConfig>>
}

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import io.smallrye.config.WithName
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id")
fun getIdConfig(): ScrapeTargetFieldConfig
@WithName("language")
fun getLanguageConfig(): ScrapeTargetFieldConfig
@WithName("region-key")
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
}

View File

@@ -4,12 +4,14 @@ import io.smallrye.config.WithName
import java.util.* import java.util.*
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig { interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
@WithName("type")
fun getType(): String
@WithName("root") @WithName("root")
fun getRootConfig(): Optional<ExtractConfig> fun getRootConfig(): Optional<ExtractConfig>
@WithName("steps") @WithName("extractors")
fun getExtractionSteps(): List<ExtractConfig> fun getExtractionMethods(): List<ExtractorConfig>
@WithName("transform")
fun getOptionalTransformationSteps(): Optional<List<TransformationStepConfig>>
@WithName("fallback") @WithName("fallback")
fun getFallbackConfiguration(): Optional<FieldConfigFallback> fun getFallbackConfiguration(): Optional<FieldConfigFallback>
@WithName("validation")
fun getOptionalValidation(): Optional<ValidationConfig>
} }

View File

@@ -3,10 +3,6 @@ package com.rak.config.model
import io.smallrye.config.WithName import io.smallrye.config.WithName
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig { interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
@WithName("id") @WithName("name")
fun getIdConfig(): ScrapeTargetFieldConfig fun getNameConfig(): ScrapeTargetFieldConfig
@WithName("language")
fun getLanguageConfig(): ScrapeTargetFieldConfig
@WithName("region-key")
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
} }

View File

@@ -8,6 +8,8 @@ interface TargetsConfig {
fun getCardConfig(): Optional<CardScrapeTargetConfig> fun getCardConfig(): Optional<CardScrapeTargetConfig>
@WithName("set") @WithName("set")
fun getSetConfig(): Optional<SetScrapeTargetConfig> fun getSetConfig(): Optional<SetScrapeTargetConfig>
@WithName("regional-set")
fun getRegionalSetConfig(): Optional<RegionalSetScrapeTargetConfig>
@WithName("card-print") @WithName("card-print")
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig> fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
} }

View File

@@ -0,0 +1,12 @@
package com.rak.config.model
import com.rak.config.converter.PatternConverter
import io.smallrye.config.WithConverter
import io.smallrye.config.WithName
import java.util.regex.Pattern
interface ValidationConfig {
@WithName("pattern")
@WithConverter(PatternConverter::class)
fun getRegexPatterns(): MutableList<Pattern>
}

View File

@@ -0,0 +1,3 @@
package com.rak.model.exception
class ValueValidationException(message: String) : RuntimeException(message)

View File

@@ -1,12 +1,15 @@
package com.rak.model.set package com.rak.model.set
import kotlin.collections.Set
data class CardSet( data class CardSet(
var name: String, var name: String,
val regionalSets: Set<RegionalSet> val regionalSets: Set<RegionalSet>
) { ) {
companion object { companion object {
fun fromMap(map: Map<String, String>, regionalSet: Set<RegionalSet>): CardSet {
return CardSet(
map["name"] ?: throw IllegalStateException("Parameter 'name' not found"),
regionalSet
)
}
} }
} }

View File

@@ -22,8 +22,11 @@ class TransformationRegistry {
input.replace(parameters[0], parameters[1]) input.replace(parameters[0], parameters[1])
} }
register("regexReplace") { input, params -> register("regexReplace") { input, params ->
require(params.size == 2) { require(params.size == 1 || params.size == 2) {
"'regexReplace' requires exactly 2 parameters" "'regexReplace' requires either 1 or 2 parameters"
}
if (params.size == 1) {
params.add("")
} }
input.replace(params[0].toRegex(), params[1]) input.replace(params[0].toRegex(), params[1])
} }

View File

@@ -7,7 +7,7 @@ import com.rak.model.exception.TargetNotFoundException
import com.rak.model.set.CardSet import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet import com.rak.model.set.RegionalSet
import com.rak.service.extract.RegionalSetExtractionService import com.rak.service.extract.RegionalSetExtractionService
import com.rak.service.extract.SetExtractionService import com.rak.service.extract.CardSetExtractionService
import io.quarkus.logging.Log import io.quarkus.logging.Log
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup import org.jsoup.Jsoup
@@ -17,7 +17,7 @@ import java.lang.Exception
@ApplicationScoped @ApplicationScoped
class ScrapeService( class ScrapeService(
private val sourceService: SourceService, private val sourceService: SourceService,
private val setExtractionService: SetExtractionService, private val cardSetExtractionService: CardSetExtractionService,
private val regionalSetExtractionService: RegionalSetExtractionService, private val regionalSetExtractionService: RegionalSetExtractionService,
private val commonCrawlService: CommonCrawlService private val commonCrawlService: CommonCrawlService
) { ) {
@@ -59,13 +59,11 @@ class ScrapeService(
} }
} }
return setExtractionService.extract( return cardSetExtractionService.extract(
document, document,
source, source,
source.getTargets().getSetConfig().get() source.getTargets().getSetConfig().get()
).apply { )
name = setName
}
} }
fun scrapeRegionalSet( fun scrapeRegionalSet(
@@ -77,7 +75,7 @@ class ScrapeService(
val path: String = normalizePath(setName) val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) return regionalSetExtractionService.extract(document, source, source.getTargets().getRegionalSetConfig().get())
} }
fun scrapeCard( fun scrapeCard(

View File

@@ -1,7 +1,7 @@
package com.rak.service package com.rak.service
import com.rak.config.model.CardScrapeTargetConfig import com.rak.config.model.CardScrapeTargetConfig
import com.rak.config.model.SetScrapeTargetConfig import com.rak.config.model.RegionalSetScrapeTargetConfig
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.SourcesConfig import com.rak.config.model.SourcesConfig
import com.rak.model.exception.InvalidConfigurationException import com.rak.model.exception.InvalidConfigurationException
@@ -21,7 +21,7 @@ class SourceService(
} }
private fun validateSource(providerConfig: ProviderConfig) { private fun validateSource(providerConfig: ProviderConfig) {
val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig() val optionalRegionalSetConfig = providerConfig.getTargets().getRegionalSetConfig()
val optionalCardConfig = providerConfig.getTargets().getCardConfig() val optionalCardConfig = providerConfig.getTargets().getCardConfig()
if (optionalRegionalSetConfig.isPresent) { if (optionalRegionalSetConfig.isPresent) {
@@ -33,7 +33,7 @@ class SourceService(
} }
} }
private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) { private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
val selectors = listOf( val selectors = listOf(
setExtractConfig.getLanguageConfig(), setExtractConfig.getLanguageConfig(),
setExtractConfig.getIdConfig(), setExtractConfig.getIdConfig(),

View File

@@ -1,18 +1,17 @@
package com.rak.service.extract package com.rak.service.extract
import com.rak.config.model.AbstractScrapeTargetConfig import com.rak.config.model.*
import com.rak.config.model.ExtractConfig
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.model.Selector import com.rak.model.Selector
import com.rak.model.exception.ElementNotFoundException import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException import com.rak.model.exception.InvalidConfigurationException
import com.rak.model.exception.ValueValidationException
import com.rak.model.transform.TransformationRegistry import com.rak.model.transform.TransformationRegistry
import com.rak.util.CssUtil import com.rak.util.CssUtil
import com.rak.util.XPathUtil import com.rak.util.XPathUtil
import io.quarkus.logging.Log
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.select.Elements import org.jsoup.select.Elements
import java.util.Optional import java.util.*
import kotlin.jvm.optionals.getOrElse import kotlin.jvm.optionals.getOrElse
// find root element from global or node config // find root element from global or node config
@@ -174,56 +173,93 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
root: Element, root: Element,
extractionConfig: ScrapeTargetFieldConfig extractionConfig: ScrapeTargetFieldConfig
): String? { ): String? {
val extractionSteps = extractionConfig.getExtractionSteps() val extractionMethods = extractionConfig.getExtractionMethods()
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
var currentElement: Element? = root.clone()
var result: String? = null var result: String? = null
try {
for (index in 0 until extractionSteps.size) {
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == extractionSteps.size - 1) { for(extractionMethod in extractionMethods) {
result = when (currentStep.selectorType()) { val extractionSteps = extractionMethod.getExtractionSteps()
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString()) val transformationSteps = extractionMethod.getOptionalTransformationSteps()
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
var currentElement: Element? = root.clone()
var intermediateResult: String? = null
try {
for (index in 0 until extractionSteps.size) {
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == extractionSteps.size - 1) {
intermediateResult = when (currentStep.selectorType()) {
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString())
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
}
}
else {
currentElement = when (currentStep.selectorType()) {
Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString())
Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
}
} }
} }
else {
currentElement = when (currentStep.selectorType()) {
Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString())
Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
}
}
}
if (result == null) { if (intermediateResult == null) {
throw ElementNotFoundException("Result could not be extracted") throw ElementNotFoundException("Result could not be extracted")
} } else {
try {
if (transformationSteps.isPresent) { validateValue(intermediateResult, extractionConfig.getOptionalValidation())
result = transformationRegistry.applyTransformations(result, transformationSteps.get()) } catch (ex: ValueValidationException) {
}
} catch (ex: RuntimeException) {
when (ex) {
is ElementNotFoundException,
is IllegalStateException -> {
if (extractionConfig.getFallbackConfiguration().isPresent) {
result = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
} else {
throw ex throw ex
} }
if (transformationSteps.isPresent) {
intermediateResult = transformationRegistry.applyTransformations(intermediateResult, transformationSteps.get())
}
result = intermediateResult
break
}
} catch (ex: RuntimeException) {
when (ex) {
is ElementNotFoundException,
is IllegalStateException -> {
// if (extractionConfig.getFallbackConfiguration().isPresent) {
// intermediateResult = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
// } else {
// throw ex
// }
}
is ValueValidationException -> Log.warn(ex.message)
else -> throw ex
} }
else -> throw ex
} }
} }
if (result == null && extractionConfig.getFallbackConfiguration().isPresent) {
result = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
}
return result return result
} }
private fun validateValue(value: String, validationConfig: Optional<ValidationConfig>) {
if (!validationConfig.isPresent) {
return
}
var validated = true
for(regex in validationConfig.get().getRegexPatterns()) {
if (!value.matches(regex.toRegex())) {
validated = false
}
}
if (!validated) {
throw ValueValidationException("'$value' does not validate against RegEx(s)")
}
}
} }

View File

@@ -9,15 +9,13 @@ import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
@ApplicationScoped @ApplicationScoped
class SetExtractionService( class CardSetExtractionService(
private val regionalSetExtractionService: RegionalSetExtractionService private val regionalSetExtractionService: RegionalSetExtractionService
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() { ) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> { override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf( return mapOf(
Pair("prefix", this.getIdConfig()), Pair("name", this.getNameConfig()),
Pair("regionCode", this.getRegionKeyConfig()),
Pair("region", this.getLanguageConfig()),
) )
} }
@@ -26,9 +24,15 @@ class SetExtractionService(
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: SetScrapeTargetConfig
): CardSet { ): CardSet {
return CardSet( val set = extractSingle(element, extractionConfig)
"test",
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet() return CardSet.fromMap(
set,
regionalSetExtractionService.extractMultiple(
element,
providerConfig,
providerConfig.getTargets().getRegionalSetConfig().get()
).toSet()
) )
} }

View File

@@ -2,7 +2,7 @@ package com.rak.service.extract
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.SetScrapeTargetConfig import com.rak.config.model.RegionalSetScrapeTargetConfig
import com.rak.config.model.SourcesConfig import com.rak.config.model.SourcesConfig
import com.rak.model.card.CardPrint import com.rak.model.card.CardPrint
import com.rak.model.exception.NotImplementedException import com.rak.model.exception.NotImplementedException
@@ -14,9 +14,9 @@ import org.jsoup.nodes.Element
class RegionalSetExtractionService( class RegionalSetExtractionService(
private val cardPrintExtractionService: CardPrintExtractionService, private val cardPrintExtractionService: CardPrintExtractionService,
private val sourcesConfig: SourcesConfig private val sourcesConfig: SourcesConfig
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() { ) : AbstractExtractionService<RegionalSet, RegionalSetScrapeTargetConfig>() {
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> { override fun RegionalSetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
return mapOf( return mapOf(
Pair("prefix", this.getIdConfig()), Pair("prefix", this.getIdConfig()),
Pair("regionCode", this.getRegionKeyConfig()), Pair("regionCode", this.getRegionKeyConfig()),
@@ -27,7 +27,7 @@ class RegionalSetExtractionService(
override fun extract( override fun extract(
element: Element, element: Element,
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: RegionalSetScrapeTargetConfig
): RegionalSet { ): RegionalSet {
throw NotImplementedException("Not implemented") throw NotImplementedException("Not implemented")
} }
@@ -35,7 +35,7 @@ class RegionalSetExtractionService(
override fun extractMultiple( override fun extractMultiple(
element: Element, element: Element,
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: RegionalSetScrapeTargetConfig
): List<RegionalSet> { ): List<RegionalSet> {
val regionalSetList = extractMulti(element, extractionConfig) val regionalSetList = extractMulti(element, extractionConfig)
@@ -55,7 +55,7 @@ class RegionalSetExtractionService(
override fun extractNestedMultiples( override fun extractNestedMultiples(
element: Element, element: Element,
providerConfig: ProviderConfig, providerConfig: ProviderConfig,
extractionConfig: SetScrapeTargetConfig extractionConfig: RegionalSetScrapeTargetConfig
): List<List<RegionalSet>> { ): List<List<RegionalSet>> {
throw NotImplementedException("Not implemented") throw NotImplementedException("Not implemented")
} }

View File

@@ -1,6 +1,5 @@
package com.rak.util package com.rak.util
import com.fasterxml.jackson.datatype.jsr310.JSR310Module
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.rak.model.cc.CCIndexSuccessResponse import com.rak.model.cc.CCIndexSuccessResponse

View File

@@ -4,6 +4,7 @@ import com.rak.model.XPathTarget
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode import org.jsoup.nodes.TextNode
import org.jsoup.select.Elements import org.jsoup.select.Elements
import java.util.regex.Pattern
import kotlin.coroutines.CoroutineContext import kotlin.coroutines.CoroutineContext
class XPathUtil private constructor() { class XPathUtil private constructor() {
@@ -40,8 +41,8 @@ class XPathUtil private constructor() {
private fun extractTextFromNode(root: Element, xpath: String): String? { private fun extractTextFromNode(root: Element, xpath: String): String? {
return root return root
.selectXpath(xpath, TextNode::class.java) .selectXpath(xpath.replace("/text()", ""))
.firstOrNull()?.text() .text()
} }
fun getNextElement(element: Element, path: String): Element? { fun getNextElement(element: Element, path: String): Element? {

View File

@@ -4,25 +4,25 @@ quarkus:
scraper: scraper:
sources: sources:
- id: konami-official # - id: konami-official
name: "Konami Official Database" # name: "Konami Official Database"
domain: "yugioh-card.com" # domain: "yugioh-card.com"
url-pattern: "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$" # url-pattern: "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
targets: # targets:
card: # card:
root: # root:
type: css # type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" # value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
name: # name:
steps: # steps:
- type: "css" # - type: "css"
value: "h1.product-title" # value: "h1.product-title"
- type: "xpath" # - type: "xpath"
value: "//h1[@itemprop='name']" # value: "//h1[@itemprop='name']"
attack: # attack:
steps: # steps:
- type: "css" # - type: "css"
value: ".atk-value" # value: ".atk-value"
- id: ygo-fandom - id: ygo-fandom
name: "Yu-Gi-Oh Fandom Wiki" name: "Yu-Gi-Oh Fandom Wiki"
@@ -30,102 +30,162 @@ scraper:
url-pattern: "https://yugioh.fandom.com/wiki/%s" url-pattern: "https://yugioh.fandom.com/wiki/%s"
targets: targets:
set: set:
root:
type: css
value: "aside > .pi-title"
name:
type: string
extractors:
- steps:
- type: xpath
value: "//h2/text()"
regional-set:
root: root:
type: css type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
id: id:
steps: type: int
- type: xpath extractors:
value: "//li/text()" - steps:
transform: - type: xpath
- name: "replace" value: "//li/text()"
parameters: [ transform:
" (", - name: "regexReplace"
"" parameters: [
] " *\\(.+\\)",
""
]
language: language:
steps: type: int
- type: xpath extractors:
value: "//li/abbr" - steps:
- type: xpath - type: xpath
value: "//abbr/@title" value: "//li/abbr"
- type: xpath
value: "//abbr/@title"
region-key: region-key:
steps: type: int
- type: xpath extractors:
value: "//li/abbr/text()" - steps:
- type: xpath
value: "//li/abbr/text()"
card-print: card-print:
multi: true multi: true
discriminator:
root:
type: css
value: ".wds-tab__content"
root: root:
type: css type: css
value: "table > tbody > tr:has(> td)" value: "table > tbody > tr:has(> td)"
discriminator:
type: string
root:
type: css
value: ".wds-tab__content"
id: id:
steps: type: int
- type: xpath extractors:
value: "./td/a[0]" - steps:
- type: xpath - type: xpath
value: "./text()" value: "./td/a[0]"
- type: xpath
value: "./text()"
- steps:
- type: xpath
value: "./td/span/text()"
transform:
- name: "regexReplace"
parameters: [
" .+",
""
]
validation:
pattern: "^.+-.+\\\\d.+$"
name: name:
steps: type: int
- type: xpath extractors:
value: "./td/a[1]" - steps:
- type: xpath - type: xpath
value: "./text()" value: "./td[1]"
- type: xpath
value: "./text()"
transform:
- name: "regexReplace"
parameters: [
"\\(.+\\)",
""
]
- name: "removeInnerQuotes"
parameters: []
validation:
pattern: "^\".+\".*"
regional-name: regional-name:
fallback: fallback:
default: "N/A" default: "N/A"
steps: type: int
- type: xpath extractors:
value: "./td[2]" - steps:
- type: xpath - type: xpath
value: "./text()" value: "./td[2]"
transform: - type: xpath
- name: "removeInnerQuotes" value: "./text()"
parameters: [] transform:
- name: "removeInnerQuotes"
parameters: []
validation:
pattern: "^\".+\"$"
rarity: rarity:
fallback: fallback:
default: "N/A" default: "N/A"
steps: type: int
- type: xpath extractors:
value: "./td/a[3]" - steps:
- type: xpath - type: xpath
value: "./text()" value: "./td/a[3]"
card: - type: xpath
name: value: "./text()"
root: - steps:
type: css - type: xpath
value: ".cardTable" value: "./td/a[2]"
steps: - type: xpath
- type: "xpath" value: "./text()"
value: "./tbody/tr[3]/th/text()" - steps:
description: - type: xpath
root: value: "./td/a[1]"
type: css - type: xpath
value: ".cardTable" value: "./text()"
steps: validation:
- type: "xpath" pattern: "^.*(Common|Rare|Print).*$"
value: "b:contains(Card descriptions)" # card:
type: # name:
root: # root:
type: css # type: css
value: ".cardTable" # value: ".cardTable"
steps: # steps:
- type: "xpath" # - type: "xpath"
value: "b:contains(Card descriptions)" # value: "./tbody/tr[3]/th/text()"
attack: # description:
root: # root:
type: css # type: css
value: ".cardTable" # value: ".cardTable"
steps: # steps:
- type: "xpath" # - type: "xpath"
value: "b:contains(Card descriptions)" # value: "b:contains(Card descriptions)"
defense: # type:
root: # root:
type: css # type: css
value: ".cardTable" # value: ".cardTable"
steps: # steps:
- type: "xpath" # - type: "xpath"
value: "b:contains(Card descriptions)" # value: "b:contains(Card descriptions)"
# attack:
# root:
# type: css
# value: ".cardTable"
# steps:
# - type: "xpath"
# value: "b:contains(Card descriptions)"
# defense:
# root:
# type: css
# value: ".cardTable"
# steps:
# - type: "xpath"
# value: "b:contains(Card descriptions)"