Compare commits
9 Commits
8f934bc2b9
...
configurat
| Author | SHA1 | Date | |
|---|---|---|---|
| 7860819029 | |||
| 304490b52e | |||
| ce5b87c34e | |||
| a9f6efc818 | |||
| 5930da7a4c | |||
| 8a0777e557 | |||
| 2a79218a54 | |||
| ee4ce4fd65 | |||
| 108b4c4c19 |
32
.gitea/workflows/release.yml
Normal file
32
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
name: Create and Push Release
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
AUTHENTIK_URL: https://auth.smoothbrain.win
|
||||||
|
REGISTRY_URL: gitea.smoothbrain.win
|
||||||
|
IMAGE_OWNER: rak
|
||||||
|
IMAGE_NAME: dex-scraper-java
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
release:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup JDK
|
||||||
|
uses: https://gitea.smoothbrain.win/rak/setup-java@main
|
||||||
|
with:
|
||||||
|
distribution: 'corretto'
|
||||||
|
java-version: '21.0.6'
|
||||||
|
cache: 'gradle'
|
||||||
|
|
||||||
|
- name: Build & Push Image
|
||||||
|
env:
|
||||||
|
QUARKUS_CONTAINER_IMAGE_USERNAME: ${{ secrets.CI_SERVICE_ACCOUNT }}
|
||||||
|
QUARKUS_CONTAINER_IMAGE_PASSWORD: ${{ secrets.CI_SERVICE_ACCOUNT_PASSWORD }}
|
||||||
|
run: |
|
||||||
|
./gradlew clean build \
|
||||||
|
-Dquarkus.container-image.push=true
|
||||||
@@ -22,6 +22,7 @@ dependencies {
|
|||||||
implementation("io.quarkus:quarkus-rest-client-kotlin-serialization")
|
implementation("io.quarkus:quarkus-rest-client-kotlin-serialization")
|
||||||
implementation("io.quarkus:quarkus-rest-jackson")
|
implementation("io.quarkus:quarkus-rest-jackson")
|
||||||
implementation("io.quarkus:quarkus-kotlin")
|
implementation("io.quarkus:quarkus-kotlin")
|
||||||
|
implementation("io.quarkus:quarkus-smallrye-fault-tolerance")
|
||||||
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
|
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
|
||||||
implementation("io.quarkus:quarkus-arc")
|
implementation("io.quarkus:quarkus-arc")
|
||||||
implementation("org.jsoup:jsoup:1.20.1")
|
implementation("org.jsoup:jsoup:1.20.1")
|
||||||
|
|||||||
19
src/main/kotlin/com/rak/config/converter/PatternConverter.kt
Normal file
19
src/main/kotlin/com/rak/config/converter/PatternConverter.kt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package com.rak.config.converter
|
||||||
|
|
||||||
|
import org.eclipse.microprofile.config.spi.Converter
|
||||||
|
import java.util.regex.Pattern
|
||||||
|
import java.util.regex.PatternSyntaxException
|
||||||
|
|
||||||
|
class PatternConverter : Converter<Pattern> {
|
||||||
|
override fun convert(value: String): Pattern {
|
||||||
|
if (value.isBlank()) {
|
||||||
|
throw IllegalArgumentException("Pattern may not be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return Pattern.compile(value)
|
||||||
|
} catch (_: PatternSyntaxException) {
|
||||||
|
throw IllegalStateException("'$value' is not a valid RegEx pattern")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
11
src/main/kotlin/com/rak/config/model/ExtractorConfig.kt
Normal file
11
src/main/kotlin/com/rak/config/model/ExtractorConfig.kt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
import java.util.Optional
|
||||||
|
|
||||||
|
interface ExtractorConfig {
|
||||||
|
@WithName("steps")
|
||||||
|
fun getExtractionSteps(): List<ExtractConfig>
|
||||||
|
@WithName("transform")
|
||||||
|
fun getOptionalTransformationSteps(): Optional<List<TransformationStepConfig>>
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
|
@WithName("id")
|
||||||
|
fun getIdConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("language")
|
||||||
|
fun getLanguageConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("region-key")
|
||||||
|
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
|
||||||
|
}
|
||||||
@@ -1,15 +1,21 @@
|
|||||||
package com.rak.config.model
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithDefault
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
import java.util.*
|
import java.util.*
|
||||||
|
|
||||||
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
|
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
|
||||||
|
@WithName("type")
|
||||||
|
fun getType(): String
|
||||||
|
@WithName("nullable")
|
||||||
|
@WithDefault("false")
|
||||||
|
fun isNullable(): Boolean
|
||||||
@WithName("root")
|
@WithName("root")
|
||||||
fun getRootConfig(): Optional<ExtractConfig>
|
fun getRootConfig(): Optional<ExtractConfig>
|
||||||
@WithName("steps")
|
@WithName("extractors")
|
||||||
fun getExtractionSteps(): List<ExtractConfig>
|
fun getExtractionMethods(): List<ExtractorConfig>
|
||||||
@WithName("transform")
|
|
||||||
fun getOptionalTransformationSteps(): Optional<List<TransformationStepConfig>>
|
|
||||||
@WithName("fallback")
|
@WithName("fallback")
|
||||||
fun getFallbackConfiguration(): Optional<FieldConfigFallback>
|
fun getFallbackConfiguration(): Optional<FieldConfigFallback>
|
||||||
|
@WithName("validation")
|
||||||
|
fun getOptionalValidation(): Optional<ValidationConfig>
|
||||||
}
|
}
|
||||||
@@ -3,10 +3,6 @@ package com.rak.config.model
|
|||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
@WithName("id")
|
@WithName("name")
|
||||||
fun getIdConfig(): ScrapeTargetFieldConfig
|
fun getNameConfig(): ScrapeTargetFieldConfig
|
||||||
@WithName("language")
|
|
||||||
fun getLanguageConfig(): ScrapeTargetFieldConfig
|
|
||||||
@WithName("region-key")
|
|
||||||
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
|
|
||||||
}
|
}
|
||||||
@@ -8,6 +8,8 @@ interface TargetsConfig {
|
|||||||
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
||||||
@WithName("set")
|
@WithName("set")
|
||||||
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
||||||
|
@WithName("regional-set")
|
||||||
|
fun getRegionalSetConfig(): Optional<RegionalSetScrapeTargetConfig>
|
||||||
@WithName("card-print")
|
@WithName("card-print")
|
||||||
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
|
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
|
||||||
}
|
}
|
||||||
12
src/main/kotlin/com/rak/config/model/ValidationConfig.kt
Normal file
12
src/main/kotlin/com/rak/config/model/ValidationConfig.kt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import com.rak.config.converter.PatternConverter
|
||||||
|
import io.smallrye.config.WithConverter
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
import java.util.regex.Pattern
|
||||||
|
|
||||||
|
interface ValidationConfig {
|
||||||
|
@WithName("pattern")
|
||||||
|
@WithConverter(PatternConverter::class)
|
||||||
|
fun getRegexPatterns(): MutableList<Pattern>
|
||||||
|
}
|
||||||
5
src/main/kotlin/com/rak/model/ErrorResponse.kt
Normal file
5
src/main/kotlin/com/rak/model/ErrorResponse.kt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
package com.rak.model
|
||||||
|
|
||||||
|
data class ErrorResponse(
|
||||||
|
val message: String
|
||||||
|
)
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
package com.rak.model.card
|
package com.rak.model.card
|
||||||
|
|
||||||
import com.rak.model.set.RegionalSet
|
|
||||||
|
|
||||||
data class CardPrint(
|
data class CardPrint(
|
||||||
val id: String,
|
var id: Int,
|
||||||
val name: String,
|
val name: String,
|
||||||
val regionalName: String? = null,
|
val regionalName: String? = null,
|
||||||
val rarity: String
|
val rarity: String
|
||||||
@@ -11,10 +9,17 @@ data class CardPrint(
|
|||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
fun fromMap(map: Map<String, String>): CardPrint {
|
fun fromMap(map: Map<String, String>): CardPrint {
|
||||||
|
val regionalNameValue = map["regionalName"]
|
||||||
|
val regionalName = if (regionalNameValue == "") {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
regionalNameValue
|
||||||
|
}
|
||||||
|
|
||||||
return CardPrint(
|
return CardPrint(
|
||||||
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
map["id"]?.toInt() ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
||||||
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
||||||
map["regionalName"],
|
regionalName,
|
||||||
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package com.rak.model.exception
|
||||||
|
|
||||||
|
class ValueValidationException(message: String) : RuntimeException(message)
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
package com.rak.model.exception.mapper
|
||||||
|
|
||||||
|
import com.rak.model.ErrorResponse
|
||||||
|
import com.rak.model.exception.NotImplementedException
|
||||||
|
import jakarta.ws.rs.core.Response
|
||||||
|
import jakarta.ws.rs.ext.ExceptionMapper
|
||||||
|
import jakarta.ws.rs.ext.Provider
|
||||||
|
|
||||||
|
@Provider
|
||||||
|
class NotImplementedExceptionMapper : ExceptionMapper<NotImplementedException> {
|
||||||
|
override fun toResponse(exception: NotImplementedException): Response {
|
||||||
|
return Response.status(405).entity(
|
||||||
|
ErrorResponse(
|
||||||
|
exception.message ?: "Provider does not implement this method"
|
||||||
|
)
|
||||||
|
).build()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
package com.rak.model.exception.mapper
|
||||||
|
|
||||||
|
import com.rak.model.ErrorResponse
|
||||||
|
import com.rak.model.exception.NotImplementedException
|
||||||
|
import com.rak.model.exception.TargetNotFoundException
|
||||||
|
import jakarta.ws.rs.core.Response
|
||||||
|
import jakarta.ws.rs.ext.ExceptionMapper
|
||||||
|
import jakarta.ws.rs.ext.Provider
|
||||||
|
|
||||||
|
@Provider
|
||||||
|
class TargetNotFoundExceptionMapper : ExceptionMapper<TargetNotFoundException> {
|
||||||
|
override fun toResponse(exception: TargetNotFoundException): Response {
|
||||||
|
return Response.status(404).entity(
|
||||||
|
ErrorResponse(
|
||||||
|
exception.message ?: "Scrape target could not be found"
|
||||||
|
)
|
||||||
|
).build()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,12 +1,15 @@
|
|||||||
package com.rak.model.set
|
package com.rak.model.set
|
||||||
|
|
||||||
import kotlin.collections.Set
|
|
||||||
|
|
||||||
data class CardSet(
|
data class CardSet(
|
||||||
var name: String,
|
var name: String,
|
||||||
val regionalSets: Set<RegionalSet>
|
val regionalSets: Set<RegionalSet>
|
||||||
) {
|
) {
|
||||||
companion object {
|
companion object {
|
||||||
|
fun fromMap(map: Map<String, String>, regionalSet: Set<RegionalSet>): CardSet {
|
||||||
|
return CardSet(
|
||||||
|
map["name"] ?: throw IllegalStateException("Parameter 'name' not found"),
|
||||||
|
regionalSet
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -22,28 +22,6 @@ data class RegionalSet(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun flattenFromMemberLists(
|
|
||||||
idList: List<String>,
|
|
||||||
languageList: List<String>,
|
|
||||||
regionKeyAliasList: List<String>,
|
|
||||||
): MutableSet<RegionalSet> {
|
|
||||||
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
|
|
||||||
throw IllegalArgumentException("Lists have to be the same size")
|
|
||||||
}
|
|
||||||
|
|
||||||
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
|
|
||||||
for (index in 0..idList.size - 1) {
|
|
||||||
regionalSetList.add(RegionalSet(
|
|
||||||
prefix = idList[index],
|
|
||||||
region = languageList[index],
|
|
||||||
regionCode = regionKeyAliasList[index],
|
|
||||||
listOf(),
|
|
||||||
numberOfCards = -1
|
|
||||||
))
|
|
||||||
}
|
|
||||||
return regionalSetList
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -11,7 +11,7 @@ class TransformationRegistry {
|
|||||||
|
|
||||||
init {
|
init {
|
||||||
register("trim") { it.trim() }
|
register("trim") { it.trim() }
|
||||||
register("removeInnerQuotes") { it.replace("\"", "") }
|
register("removeInnerQuotes") { it.replace(Regex("^\""), "").replace(Regex("\"$"), "") }
|
||||||
register("replace") { input, parameters ->
|
register("replace") { input, parameters ->
|
||||||
require(parameters.size == 1 || parameters.size == 2) {
|
require(parameters.size == 1 || parameters.size == 2) {
|
||||||
"'replace' requires either 1 or 2 parameters"
|
"'replace' requires either 1 or 2 parameters"
|
||||||
@@ -22,8 +22,11 @@ class TransformationRegistry {
|
|||||||
input.replace(parameters[0], parameters[1])
|
input.replace(parameters[0], parameters[1])
|
||||||
}
|
}
|
||||||
register("regexReplace") { input, params ->
|
register("regexReplace") { input, params ->
|
||||||
require(params.size == 2) {
|
require(params.size == 1 || params.size == 2) {
|
||||||
"'regexReplace' requires exactly 2 parameters"
|
"'regexReplace' requires either 1 or 2 parameters"
|
||||||
|
}
|
||||||
|
if (params.size == 1) {
|
||||||
|
params.add("")
|
||||||
}
|
}
|
||||||
input.replace(params[0].toRegex(), params[1])
|
input.replace(params[0].toRegex(), params[1])
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ class CommonCrawlService(
|
|||||||
crawlName.indexName
|
crawlName.indexName
|
||||||
))
|
))
|
||||||
} catch (ex: RuntimeException) {
|
} catch (ex: RuntimeException) {
|
||||||
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url")
|
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url", ex)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import com.rak.model.exception.TargetNotFoundException
|
|||||||
import com.rak.model.set.CardSet
|
import com.rak.model.set.CardSet
|
||||||
import com.rak.model.set.RegionalSet
|
import com.rak.model.set.RegionalSet
|
||||||
import com.rak.service.extract.RegionalSetExtractionService
|
import com.rak.service.extract.RegionalSetExtractionService
|
||||||
import com.rak.service.extract.SetExtractionService
|
import com.rak.service.extract.CardSetExtractionService
|
||||||
import io.quarkus.logging.Log
|
import io.quarkus.logging.Log
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
@@ -17,7 +17,7 @@ import java.lang.Exception
|
|||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ScrapeService(
|
class ScrapeService(
|
||||||
private val sourceService: SourceService,
|
private val sourceService: SourceService,
|
||||||
private val setExtractionService: SetExtractionService,
|
private val cardSetExtractionService: CardSetExtractionService,
|
||||||
private val regionalSetExtractionService: RegionalSetExtractionService,
|
private val regionalSetExtractionService: RegionalSetExtractionService,
|
||||||
private val commonCrawlService: CommonCrawlService
|
private val commonCrawlService: CommonCrawlService
|
||||||
) {
|
) {
|
||||||
@@ -54,18 +54,16 @@ class ScrapeService(
|
|||||||
try {
|
try {
|
||||||
document = Jsoup.connect(url).get()
|
document = Jsoup.connect(url).get()
|
||||||
} catch(ex: Exception) {
|
} catch(ex: Exception) {
|
||||||
Log.warn("Error occurred during Jsoup query")
|
Log.warn("Error occurred during Jsoup query", ex)
|
||||||
throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'")
|
throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return setExtractionService.extract(
|
return cardSetExtractionService.extract(
|
||||||
document,
|
document,
|
||||||
source,
|
source,
|
||||||
source.getTargets().getSetConfig().get()
|
source.getTargets().getSetConfig().get()
|
||||||
).apply {
|
)
|
||||||
name = setName
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun scrapeRegionalSet(
|
fun scrapeRegionalSet(
|
||||||
@@ -77,7 +75,7 @@ class ScrapeService(
|
|||||||
val path: String = normalizePath(setName)
|
val path: String = normalizePath(setName)
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
return regionalSetExtractionService.extract(document, source, source.getTargets().getRegionalSetConfig().get())
|
||||||
}
|
}
|
||||||
|
|
||||||
fun scrapeCard(
|
fun scrapeCard(
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.config.model.CardScrapeTargetConfig
|
import com.rak.config.model.CardScrapeTargetConfig
|
||||||
import com.rak.config.model.SetScrapeTargetConfig
|
import com.rak.config.model.RegionalSetScrapeTargetConfig
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.SourcesConfig
|
import com.rak.config.model.SourcesConfig
|
||||||
import com.rak.model.exception.InvalidConfigurationException
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
@@ -21,7 +21,7 @@ class SourceService(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun validateSource(providerConfig: ProviderConfig) {
|
private fun validateSource(providerConfig: ProviderConfig) {
|
||||||
val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig()
|
val optionalRegionalSetConfig = providerConfig.getTargets().getRegionalSetConfig()
|
||||||
val optionalCardConfig = providerConfig.getTargets().getCardConfig()
|
val optionalCardConfig = providerConfig.getTargets().getCardConfig()
|
||||||
|
|
||||||
if (optionalRegionalSetConfig.isPresent) {
|
if (optionalRegionalSetConfig.isPresent) {
|
||||||
@@ -33,7 +33,7 @@ class SourceService(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) {
|
private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
|
||||||
val selectors = listOf(
|
val selectors = listOf(
|
||||||
setExtractConfig.getLanguageConfig(),
|
setExtractConfig.getLanguageConfig(),
|
||||||
setExtractConfig.getIdConfig(),
|
setExtractConfig.getIdConfig(),
|
||||||
|
|||||||
@@ -6,14 +6,17 @@ import io.netty.buffer.ByteBufInputStream
|
|||||||
import io.quarkus.rest.client.reactive.ClientQueryParam
|
import io.quarkus.rest.client.reactive.ClientQueryParam
|
||||||
import io.quarkus.rest.client.reactive.NotBody
|
import io.quarkus.rest.client.reactive.NotBody
|
||||||
import io.quarkus.rest.client.reactive.Url
|
import io.quarkus.rest.client.reactive.Url
|
||||||
|
import io.smallrye.faulttolerance.api.RateLimit
|
||||||
import jakarta.ws.rs.Consumes
|
import jakarta.ws.rs.Consumes
|
||||||
import jakarta.ws.rs.GET
|
import jakarta.ws.rs.GET
|
||||||
import jakarta.ws.rs.Path
|
import jakarta.ws.rs.Path
|
||||||
import jakarta.ws.rs.PathParam
|
import jakarta.ws.rs.PathParam
|
||||||
import jakarta.ws.rs.QueryParam
|
import jakarta.ws.rs.QueryParam
|
||||||
|
import org.eclipse.microprofile.faulttolerance.Bulkhead
|
||||||
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam
|
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam
|
||||||
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider
|
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider
|
||||||
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
||||||
|
import java.time.temporal.ChronoUnit
|
||||||
|
|
||||||
@RegisterRestClient(baseUri = "whatever")
|
@RegisterRestClient(baseUri = "whatever")
|
||||||
@RegisterProvider(NDJsonReader::class)
|
@RegisterProvider(NDJsonReader::class)
|
||||||
@@ -23,6 +26,11 @@ interface CommonCrawlRestClient {
|
|||||||
@ClientQueryParam(name = "output", value = ["json"])
|
@ClientQueryParam(name = "output", value = ["json"])
|
||||||
@Path("/{index}-index")
|
@Path("/{index}-index")
|
||||||
@Consumes("text/x-ndjson")
|
@Consumes("text/x-ndjson")
|
||||||
|
@RateLimit(
|
||||||
|
value = 1,
|
||||||
|
minSpacing = 5
|
||||||
|
)
|
||||||
|
@Bulkhead
|
||||||
fun queryIndex(
|
fun queryIndex(
|
||||||
@Url
|
@Url
|
||||||
baseUrl: String,
|
baseUrl: String,
|
||||||
|
|||||||
@@ -1,18 +1,17 @@
|
|||||||
package com.rak.service.extract
|
package com.rak.service.extract
|
||||||
|
|
||||||
import com.rak.config.model.AbstractScrapeTargetConfig
|
import com.rak.config.model.*
|
||||||
import com.rak.config.model.ExtractConfig
|
|
||||||
import com.rak.config.model.ProviderConfig
|
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
|
||||||
import com.rak.model.Selector
|
import com.rak.model.Selector
|
||||||
import com.rak.model.exception.ElementNotFoundException
|
import com.rak.model.exception.ElementNotFoundException
|
||||||
import com.rak.model.exception.InvalidConfigurationException
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
|
import com.rak.model.exception.ValueValidationException
|
||||||
import com.rak.model.transform.TransformationRegistry
|
import com.rak.model.transform.TransformationRegistry
|
||||||
import com.rak.util.CssUtil
|
import com.rak.util.CssUtil
|
||||||
import com.rak.util.XPathUtil
|
import com.rak.util.XPathUtil
|
||||||
|
import io.quarkus.logging.Log
|
||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
import org.jsoup.select.Elements
|
import org.jsoup.select.Elements
|
||||||
import java.util.Optional
|
import java.util.*
|
||||||
import kotlin.jvm.optionals.getOrElse
|
import kotlin.jvm.optionals.getOrElse
|
||||||
|
|
||||||
// find root element from global or node config
|
// find root element from global or node config
|
||||||
@@ -131,7 +130,11 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
|||||||
val extractedText = extractTextFromElementByTargetFieldConfig(
|
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||||
rootElement,
|
rootElement,
|
||||||
fieldConfig
|
fieldConfig
|
||||||
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
) ?: if (fieldConfig.isNullable()) {
|
||||||
|
""
|
||||||
|
} else {
|
||||||
|
throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||||
|
}
|
||||||
|
|
||||||
val mapToModify: MutableMap<String, String> = try {
|
val mapToModify: MutableMap<String, String> = try {
|
||||||
resultList[index]
|
resultList[index]
|
||||||
@@ -174,11 +177,16 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
|||||||
root: Element,
|
root: Element,
|
||||||
extractionConfig: ScrapeTargetFieldConfig
|
extractionConfig: ScrapeTargetFieldConfig
|
||||||
): String? {
|
): String? {
|
||||||
val extractionSteps = extractionConfig.getExtractionSteps()
|
val extractionMethods = extractionConfig.getExtractionMethods()
|
||||||
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
|
var result: String? = null
|
||||||
|
|
||||||
|
|
||||||
|
for(extractionMethod in extractionMethods) {
|
||||||
|
val extractionSteps = extractionMethod.getExtractionSteps()
|
||||||
|
val transformationSteps = extractionMethod.getOptionalTransformationSteps()
|
||||||
|
|
||||||
var currentElement: Element? = root.clone()
|
var currentElement: Element? = root.clone()
|
||||||
var result: String? = null
|
var intermediateResult: String? = null
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (index in 0 until extractionSteps.size) {
|
for (index in 0 until extractionSteps.size) {
|
||||||
@@ -188,7 +196,7 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (index == extractionSteps.size - 1) {
|
if (index == extractionSteps.size - 1) {
|
||||||
result = when (currentStep.selectorType()) {
|
intermediateResult = when (currentStep.selectorType()) {
|
||||||
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString())
|
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||||
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||||
}
|
}
|
||||||
@@ -201,29 +209,55 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result == null) {
|
if (intermediateResult == null) {
|
||||||
throw ElementNotFoundException("Result could not be extracted")
|
throw ElementNotFoundException("Result could not be extracted")
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
validateValue(intermediateResult, extractionConfig.getOptionalValidation())
|
||||||
|
} catch (ex: ValueValidationException) {
|
||||||
|
throw ex
|
||||||
}
|
}
|
||||||
|
|
||||||
if (transformationSteps.isPresent) {
|
if (transformationSteps.isPresent) {
|
||||||
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
|
intermediateResult = transformationRegistry.applyTransformations(intermediateResult, transformationSteps.get())
|
||||||
|
}
|
||||||
|
|
||||||
|
result = intermediateResult
|
||||||
|
break
|
||||||
}
|
}
|
||||||
} catch (ex: RuntimeException) {
|
} catch (ex: RuntimeException) {
|
||||||
when (ex) {
|
when (ex) {
|
||||||
is ElementNotFoundException,
|
is ElementNotFoundException,
|
||||||
is IllegalStateException -> {
|
is IllegalStateException,
|
||||||
if (extractionConfig.getFallbackConfiguration().isPresent) {
|
is ValueValidationException -> Log.debug(ex.message)
|
||||||
result = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
|
|
||||||
} else {
|
|
||||||
throw ex
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else -> throw ex
|
else -> throw ex
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == null && extractionConfig.getFallbackConfiguration().isPresent) {
|
||||||
|
result = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
|
||||||
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun validateValue(value: String, validationConfig: Optional<ValidationConfig>) {
|
||||||
|
if (!validationConfig.isPresent) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var validated = true
|
||||||
|
|
||||||
|
for(regex in validationConfig.get().getRegexPatterns()) {
|
||||||
|
if (!value.matches(regex.toRegex())) {
|
||||||
|
validated = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!validated) {
|
||||||
|
throw ValueValidationException("'$value' does not validate against RegEx(s)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -9,15 +9,13 @@ import jakarta.enterprise.context.ApplicationScoped
|
|||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class SetExtractionService(
|
class CardSetExtractionService(
|
||||||
private val regionalSetExtractionService: RegionalSetExtractionService
|
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||||
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
|
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
|
||||||
|
|
||||||
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
return mapOf(
|
return mapOf(
|
||||||
Pair("prefix", this.getIdConfig()),
|
Pair("name", this.getNameConfig()),
|
||||||
Pair("regionCode", this.getRegionKeyConfig()),
|
|
||||||
Pair("region", this.getLanguageConfig()),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,9 +24,15 @@ class SetExtractionService(
|
|||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: SetScrapeTargetConfig
|
||||||
): CardSet {
|
): CardSet {
|
||||||
return CardSet(
|
val set = extractSingle(element, extractionConfig)
|
||||||
"test",
|
|
||||||
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet()
|
return CardSet.fromMap(
|
||||||
|
set,
|
||||||
|
regionalSetExtractionService.extractMultiple(
|
||||||
|
element,
|
||||||
|
providerConfig,
|
||||||
|
providerConfig.getTargets().getRegionalSetConfig().get()
|
||||||
|
).toSet()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2,7 +2,7 @@ package com.rak.service.extract
|
|||||||
|
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
import com.rak.config.model.SetScrapeTargetConfig
|
import com.rak.config.model.RegionalSetScrapeTargetConfig
|
||||||
import com.rak.config.model.SourcesConfig
|
import com.rak.config.model.SourcesConfig
|
||||||
import com.rak.model.card.CardPrint
|
import com.rak.model.card.CardPrint
|
||||||
import com.rak.model.exception.NotImplementedException
|
import com.rak.model.exception.NotImplementedException
|
||||||
@@ -14,9 +14,9 @@ import org.jsoup.nodes.Element
|
|||||||
class RegionalSetExtractionService(
|
class RegionalSetExtractionService(
|
||||||
private val cardPrintExtractionService: CardPrintExtractionService,
|
private val cardPrintExtractionService: CardPrintExtractionService,
|
||||||
private val sourcesConfig: SourcesConfig
|
private val sourcesConfig: SourcesConfig
|
||||||
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() {
|
) : AbstractExtractionService<RegionalSet, RegionalSetScrapeTargetConfig>() {
|
||||||
|
|
||||||
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
override fun RegionalSetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
return mapOf(
|
return mapOf(
|
||||||
Pair("prefix", this.getIdConfig()),
|
Pair("prefix", this.getIdConfig()),
|
||||||
Pair("regionCode", this.getRegionKeyConfig()),
|
Pair("regionCode", this.getRegionKeyConfig()),
|
||||||
@@ -27,7 +27,7 @@ class RegionalSetExtractionService(
|
|||||||
override fun extract(
|
override fun extract(
|
||||||
element: Element,
|
element: Element,
|
||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: RegionalSetScrapeTargetConfig
|
||||||
): RegionalSet {
|
): RegionalSet {
|
||||||
throw NotImplementedException("Not implemented")
|
throw NotImplementedException("Not implemented")
|
||||||
}
|
}
|
||||||
@@ -35,7 +35,7 @@ class RegionalSetExtractionService(
|
|||||||
override fun extractMultiple(
|
override fun extractMultiple(
|
||||||
element: Element,
|
element: Element,
|
||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: RegionalSetScrapeTargetConfig
|
||||||
): List<RegionalSet> {
|
): List<RegionalSet> {
|
||||||
val regionalSetList = extractMulti(element, extractionConfig)
|
val regionalSetList = extractMulti(element, extractionConfig)
|
||||||
|
|
||||||
@@ -55,7 +55,7 @@ class RegionalSetExtractionService(
|
|||||||
override fun extractNestedMultiples(
|
override fun extractNestedMultiples(
|
||||||
element: Element,
|
element: Element,
|
||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: RegionalSetScrapeTargetConfig
|
||||||
): List<List<RegionalSet>> {
|
): List<List<RegionalSet>> {
|
||||||
throw NotImplementedException("Not implemented")
|
throw NotImplementedException("Not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
package com.rak.util
|
package com.rak.util
|
||||||
|
|
||||||
import com.fasterxml.jackson.datatype.jsr310.JSR310Module
|
|
||||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
|
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
|
||||||
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
|
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
|
||||||
import com.rak.model.cc.CCIndexSuccessResponse
|
import com.rak.model.cc.CCIndexSuccessResponse
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import com.rak.model.XPathTarget
|
|||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
import org.jsoup.nodes.TextNode
|
import org.jsoup.nodes.TextNode
|
||||||
import org.jsoup.select.Elements
|
import org.jsoup.select.Elements
|
||||||
|
import java.util.regex.Pattern
|
||||||
import kotlin.coroutines.CoroutineContext
|
import kotlin.coroutines.CoroutineContext
|
||||||
|
|
||||||
class XPathUtil private constructor() {
|
class XPathUtil private constructor() {
|
||||||
@@ -40,8 +41,8 @@ class XPathUtil private constructor() {
|
|||||||
|
|
||||||
private fun extractTextFromNode(root: Element, xpath: String): String? {
|
private fun extractTextFromNode(root: Element, xpath: String): String? {
|
||||||
return root
|
return root
|
||||||
.selectXpath(xpath, TextNode::class.java)
|
.selectXpath(xpath.replace("/text()", ""))
|
||||||
.firstOrNull()?.text()
|
.text()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getNextElement(element: Element, path: String): Element? {
|
fun getNextElement(element: Element, path: String): Element? {
|
||||||
|
|||||||
@@ -1,28 +1,37 @@
|
|||||||
quarkus:
|
quarkus:
|
||||||
|
|
||||||
|
container-image:
|
||||||
|
registry: gitea.smoothbrain.win
|
||||||
|
group: rak
|
||||||
|
build: true
|
||||||
|
additional-tags: latest
|
||||||
|
|
||||||
http:
|
http:
|
||||||
port: 8081
|
port: 8081
|
||||||
|
live-reload:
|
||||||
|
instrumentation: true
|
||||||
|
|
||||||
scraper:
|
scraper:
|
||||||
sources:
|
sources:
|
||||||
- id: konami-official
|
# - id: konami-official
|
||||||
name: "Konami Official Database"
|
# name: "Konami Official Database"
|
||||||
domain: "yugioh-card.com"
|
# domain: "yugioh-card.com"
|
||||||
url-pattern: "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
|
# url-pattern: "^https://www\\.yugioh-card\\.com/[a-z]{2}/products/.*$"
|
||||||
targets:
|
# targets:
|
||||||
card:
|
# card:
|
||||||
root:
|
# root:
|
||||||
type: css
|
# type: css
|
||||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
# value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||||
name:
|
# name:
|
||||||
steps:
|
# steps:
|
||||||
- type: "css"
|
# - type: "css"
|
||||||
value: "h1.product-title"
|
# value: "h1.product-title"
|
||||||
- type: "xpath"
|
# - type: "xpath"
|
||||||
value: "//h1[@itemprop='name']"
|
# value: "//h1[@itemprop='name']"
|
||||||
attack:
|
# attack:
|
||||||
steps:
|
# steps:
|
||||||
- type: "css"
|
# - type: "css"
|
||||||
value: ".atk-value"
|
# value: ".atk-value"
|
||||||
|
|
||||||
- id: ygo-fandom
|
- id: ygo-fandom
|
||||||
name: "Yu-Gi-Oh Fandom Wiki"
|
name: "Yu-Gi-Oh Fandom Wiki"
|
||||||
@@ -30,54 +39,102 @@ scraper:
|
|||||||
url-pattern: "https://yugioh.fandom.com/wiki/%s"
|
url-pattern: "https://yugioh.fandom.com/wiki/%s"
|
||||||
targets:
|
targets:
|
||||||
set:
|
set:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: "aside > .pi-title"
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
|
- type: xpath
|
||||||
|
value: "//h2/text()"
|
||||||
|
regional-set:
|
||||||
root:
|
root:
|
||||||
type: css
|
type: css
|
||||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||||
id:
|
id:
|
||||||
steps:
|
type: int
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//li/text()"
|
value: "//li/text()"
|
||||||
transform:
|
transform:
|
||||||
- name: "replace"
|
- name: "regexReplace"
|
||||||
parameters: [
|
parameters: [
|
||||||
" (",
|
" *\\(.+\\)",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
language:
|
language:
|
||||||
steps:
|
type: int
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//li/abbr"
|
value: "//li/abbr"
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//abbr/@title"
|
value: "//abbr/@title"
|
||||||
region-key:
|
region-key:
|
||||||
steps:
|
type: int
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//li/abbr/text()"
|
value: "//li/abbr/text()"
|
||||||
card-print:
|
card-print:
|
||||||
multi: true
|
multi: true
|
||||||
discriminator:
|
|
||||||
root:
|
|
||||||
type: css
|
|
||||||
value: ".wds-tab__content"
|
|
||||||
root:
|
root:
|
||||||
type: css
|
type: css
|
||||||
value: "table > tbody > tr:has(> td)"
|
value: "table > tbody > tr:has(> td)"
|
||||||
|
discriminator:
|
||||||
|
type: string
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".wds-tab__content"
|
||||||
id:
|
id:
|
||||||
steps:
|
type: int
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./td/a[0]"
|
value: "./td/a[0]"
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./text()"
|
value: "./text()"
|
||||||
name:
|
- steps:
|
||||||
steps:
|
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./td/a[1]"
|
value: "./td/span/text()"
|
||||||
|
transform:
|
||||||
|
- name: "regexReplace"
|
||||||
|
parameters: [
|
||||||
|
" .+",
|
||||||
|
""
|
||||||
|
]
|
||||||
|
- name: "regexReplace"
|
||||||
|
parameters: [
|
||||||
|
".+-[A-Za-z]*0?",
|
||||||
|
""
|
||||||
|
]
|
||||||
|
validation:
|
||||||
|
pattern: "^.+-.+\\\\d.+$"
|
||||||
|
name:
|
||||||
|
type: int
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
|
- type: xpath
|
||||||
|
value: "./td[1]"
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./text()"
|
value: "./text()"
|
||||||
|
transform:
|
||||||
|
- name: "regexReplace"
|
||||||
|
parameters: [
|
||||||
|
" ?\\(.+\\)",
|
||||||
|
""
|
||||||
|
]
|
||||||
|
- name: "removeInnerQuotes"
|
||||||
|
parameters: []
|
||||||
|
validation:
|
||||||
|
pattern: "^\".+\".*"
|
||||||
regional-name:
|
regional-name:
|
||||||
fallback:
|
type: int
|
||||||
default: "N/A"
|
nullable: true
|
||||||
steps:
|
extractors:
|
||||||
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./td[2]"
|
value: "./td[2]"
|
||||||
- type: xpath
|
- type: xpath
|
||||||
@@ -85,47 +142,63 @@ scraper:
|
|||||||
transform:
|
transform:
|
||||||
- name: "removeInnerQuotes"
|
- name: "removeInnerQuotes"
|
||||||
parameters: []
|
parameters: []
|
||||||
|
validation:
|
||||||
|
pattern: "^\".+\"$"
|
||||||
rarity:
|
rarity:
|
||||||
fallback:
|
fallback:
|
||||||
default: "N/A"
|
default: "N/A"
|
||||||
steps:
|
type: int
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./td/a[3]"
|
value: "./td/a[3]"
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./text()"
|
value: "./text()"
|
||||||
card:
|
- steps:
|
||||||
name:
|
- type: xpath
|
||||||
root:
|
value: "./td/a[2]"
|
||||||
type: css
|
- type: xpath
|
||||||
value: ".cardTable"
|
value: "./text()"
|
||||||
steps:
|
- steps:
|
||||||
- type: "xpath"
|
- type: xpath
|
||||||
value: "./tbody/tr[3]/th/text()"
|
value: "./td/a[1]"
|
||||||
description:
|
- type: xpath
|
||||||
root:
|
value: "./text()"
|
||||||
type: css
|
validation:
|
||||||
value: ".cardTable"
|
pattern: "^.*(Common|Rare|Print).*$"
|
||||||
steps:
|
# card:
|
||||||
- type: "xpath"
|
# name:
|
||||||
value: "b:contains(Card descriptions)"
|
# root:
|
||||||
type:
|
# type: css
|
||||||
root:
|
# value: ".cardTable"
|
||||||
type: css
|
# steps:
|
||||||
value: ".cardTable"
|
# - type: "xpath"
|
||||||
steps:
|
# value: "./tbody/tr[3]/th/text()"
|
||||||
- type: "xpath"
|
# description:
|
||||||
value: "b:contains(Card descriptions)"
|
# root:
|
||||||
attack:
|
# type: css
|
||||||
root:
|
# value: ".cardTable"
|
||||||
type: css
|
# steps:
|
||||||
value: ".cardTable"
|
# - type: "xpath"
|
||||||
steps:
|
# value: "b:contains(Card descriptions)"
|
||||||
- type: "xpath"
|
# type:
|
||||||
value: "b:contains(Card descriptions)"
|
# root:
|
||||||
defense:
|
# type: css
|
||||||
root:
|
# value: ".cardTable"
|
||||||
type: css
|
# steps:
|
||||||
value: ".cardTable"
|
# - type: "xpath"
|
||||||
steps:
|
# value: "b:contains(Card descriptions)"
|
||||||
- type: "xpath"
|
# attack:
|
||||||
value: "b:contains(Card descriptions)"
|
# root:
|
||||||
|
# type: css
|
||||||
|
# value: ".cardTable"
|
||||||
|
# steps:
|
||||||
|
# - type: "xpath"
|
||||||
|
# value: "b:contains(Card descriptions)"
|
||||||
|
# defense:
|
||||||
|
# root:
|
||||||
|
# type: css
|
||||||
|
# value: ".cardTable"
|
||||||
|
# steps:
|
||||||
|
# - type: "xpath"
|
||||||
|
# value: "b:contains(Card descriptions)"
|
||||||
Reference in New Issue
Block a user