Trying out model
This commit is contained in:
@@ -1,14 +0,0 @@
|
||||
package com.rak.model.config;
|
||||
|
||||
import org.eclipse.microprofile.config.spi.Converter;
|
||||
|
||||
public class TestConverter {
|
||||
|
||||
public static class ActualConverter implements Converter<Attribute> {
|
||||
@Override
|
||||
public Attribute convert(final String value) throws IllegalArgumentException, NullPointerException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
12
src/main/kotlin/com/rak/config/CardDefinition.kt
Normal file
12
src/main/kotlin/com/rak/config/CardDefinition.kt
Normal file
@@ -0,0 +1,12 @@
|
||||
package com.rak.config
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface CardDefinition {
|
||||
@WithName("name")
|
||||
fun nameSelector(): SelectorDefinition
|
||||
@WithName("attack")
|
||||
fun attackSelector(): SelectorDefinition
|
||||
@WithName("effect")
|
||||
fun effectSelector(): SelectorDefinition
|
||||
}
|
||||
8
src/main/kotlin/com/rak/config/Items.kt
Normal file
8
src/main/kotlin/com/rak/config/Items.kt
Normal file
@@ -0,0 +1,8 @@
|
||||
package com.rak.config
|
||||
|
||||
import java.util.*
|
||||
|
||||
interface Items {
|
||||
fun card(): Optional<CardDefinition>
|
||||
fun regionalSet(): Optional<RegionalSetDefinition>
|
||||
}
|
||||
13
src/main/kotlin/com/rak/config/RegionalSetDefinition.kt
Normal file
13
src/main/kotlin/com/rak/config/RegionalSetDefinition.kt
Normal file
@@ -0,0 +1,13 @@
|
||||
package com.rak.config
|
||||
|
||||
import com.rak.config.converter.AbstractModelDefinition
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
||||
@WithName("id")
|
||||
fun idSelector(): SelectorDefinition
|
||||
@WithName("language")
|
||||
fun languageSelector(): SelectorDefinition
|
||||
@WithName("region-key")
|
||||
fun regionKeySelector(): SelectorDefinition
|
||||
}
|
||||
5
src/main/kotlin/com/rak/config/SelectorDefinition.kt
Normal file
5
src/main/kotlin/com/rak/config/SelectorDefinition.kt
Normal file
@@ -0,0 +1,5 @@
|
||||
package com.rak.config
|
||||
|
||||
interface SelectorDefinition {
|
||||
fun steps(): Set<Step>
|
||||
}
|
||||
19
src/main/kotlin/com/rak/config/SourceConfig.kt
Normal file
19
src/main/kotlin/com/rak/config/SourceConfig.kt
Normal file
@@ -0,0 +1,19 @@
|
||||
package com.rak.config
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
import java.util.*
|
||||
|
||||
interface SourceConfig {
|
||||
|
||||
@WithName("id")
|
||||
fun getId(): String
|
||||
@WithName("name")
|
||||
fun getName(): String
|
||||
@WithName("domain")
|
||||
fun getDomain(): String
|
||||
@WithName("url-patterns")
|
||||
fun getUrlPatterns(): Optional<MutableSet<String>>
|
||||
@WithName("selectors")
|
||||
fun getItems(): Items
|
||||
|
||||
}
|
||||
@@ -1,50 +1,12 @@
|
||||
package com.rak.config
|
||||
|
||||
import io.smallrye.config.ConfigMapping
|
||||
import java.util.*
|
||||
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
@ConfigMapping(prefix = "scraper")
|
||||
interface SourcesConfiguration {
|
||||
|
||||
fun sources(): MutableList<SourceConfig>
|
||||
|
||||
interface SourceConfig {
|
||||
fun id(): String
|
||||
fun name(): String
|
||||
fun domain(): String
|
||||
fun urlPatterns(): Optional<MutableSet<String>>
|
||||
fun selectors(): Selectors
|
||||
|
||||
interface Selectors {
|
||||
fun card(): Optional<CardDefinition>
|
||||
fun regionalSet(): Optional<RegionalSetDefinition>
|
||||
|
||||
interface AbstractModelDefinition {
|
||||
fun root(): Optional<String>
|
||||
}
|
||||
|
||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
||||
fun id(): SelectorDefinition
|
||||
fun language(): SelectorDefinition
|
||||
fun regionKey(): SelectorDefinition
|
||||
}
|
||||
|
||||
interface CardDefinition {
|
||||
fun name(): SelectorDefinition
|
||||
fun attack(): SelectorDefinition
|
||||
fun effect(): SelectorDefinition
|
||||
}
|
||||
|
||||
interface SelectorDefinition {
|
||||
fun steps(): Set<StepDefinition>
|
||||
}
|
||||
|
||||
interface StepDefinition {
|
||||
fun type(): String // e.g. css or xpath
|
||||
fun value(): String
|
||||
}
|
||||
}
|
||||
}
|
||||
@WithName("sources")
|
||||
fun getSources(): MutableList<SourceConfig>
|
||||
|
||||
}
|
||||
13
src/main/kotlin/com/rak/config/Step.kt
Normal file
13
src/main/kotlin/com/rak/config/Step.kt
Normal file
@@ -0,0 +1,13 @@
|
||||
package com.rak.config
|
||||
|
||||
import com.rak.config.converter.TypeSelectorConverter
|
||||
import com.rak.model.scrape.selector.Selector
|
||||
import io.smallrye.config.WithConverter
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface Step {
|
||||
@WithConverter(TypeSelectorConverter::class)
|
||||
@WithName("type")
|
||||
fun selectorType(): Selector // e.g. css or xpath
|
||||
fun value(): String
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.rak.config.converter
|
||||
|
||||
import com.rak.config.Step
|
||||
import io.smallrye.config.WithName
|
||||
|
||||
interface AbstractModelDefinition {
|
||||
@WithName("root")
|
||||
fun rootSelector(): Step
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.rak.config.converter
|
||||
|
||||
import com.rak.model.scrape.selector.Selector
|
||||
import org.eclipse.microprofile.config.spi.Converter
|
||||
|
||||
class TypeSelectorConverter : Converter<Selector> {
|
||||
override fun convert(value: String): Selector {
|
||||
return Selector.valueOf(value.uppercase())
|
||||
}
|
||||
}
|
||||
@@ -2,22 +2,21 @@ package com.rak.controller
|
||||
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.service.ScrapeService
|
||||
import io.quarkus.logging.Log
|
||||
import com.rak.service.SourceService
|
||||
import jakarta.ws.rs.Consumes
|
||||
import jakarta.ws.rs.GET
|
||||
import jakarta.ws.rs.Path
|
||||
import jakarta.ws.rs.Produces
|
||||
import jakarta.ws.rs.core.MediaType
|
||||
import org.jboss.resteasy.reactive.RestPath
|
||||
import org.jboss.resteasy.reactive.RestQuery
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
|
||||
|
||||
@Path("/hello")
|
||||
@Path("/api")
|
||||
class ExampleResource(
|
||||
private val sourcesConfiguration: SourcesConfiguration,
|
||||
private val scrapeService: ScrapeService
|
||||
private val scrapeService: ScrapeService,
|
||||
private val sourceService: SourceService
|
||||
) {
|
||||
|
||||
companion object {
|
||||
@@ -25,38 +24,34 @@ class ExampleResource(
|
||||
}
|
||||
|
||||
@GET
|
||||
@Produces(MediaType.TEXT_PLAIN)
|
||||
fun hello(
|
||||
@RestQuery
|
||||
@Path("/{provider}/set")
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
@Consumes(MediaType.APPLICATION_JSON)
|
||||
fun scrapeSet(
|
||||
@RestPath
|
||||
provider: String,
|
||||
@RestQuery
|
||||
path: String
|
||||
setName: String
|
||||
): Map<String, String> {
|
||||
val sources = sourcesConfiguration
|
||||
.sources()
|
||||
.filter {
|
||||
it.id().equals(provider, ignoreCase = true)
|
||||
}
|
||||
return scrapeService.extractSet(
|
||||
provider,
|
||||
setName
|
||||
)
|
||||
}
|
||||
|
||||
val source = sources.firstOrNull() ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
val newPath: String = path
|
||||
.trim()
|
||||
.replace(" ", "_")
|
||||
|
||||
val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get()
|
||||
|
||||
val regionalSetSelector = source.selectors().regionalSet().get()
|
||||
val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!!
|
||||
|
||||
val setId: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps())
|
||||
val setLanguage: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().language().steps())
|
||||
val setKey: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().regionKey().steps())
|
||||
|
||||
return mapOf(
|
||||
Pair("id", setId ?: "N/A"),
|
||||
Pair("language", setLanguage ?: "N/A"),
|
||||
Pair("key", setKey ?: "N/A"),
|
||||
@GET
|
||||
@Path("/{provider}/card")
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
@Consumes(MediaType.APPLICATION_JSON)
|
||||
fun scrapeCard(
|
||||
@RestPath
|
||||
provider: String,
|
||||
@RestQuery
|
||||
cardName: String
|
||||
): Map<String, String> {
|
||||
return scrapeService.extractCard(
|
||||
provider,
|
||||
cardName
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class Attribute(
|
||||
val steps: Set<Step>
|
||||
)
|
||||
@@ -1,8 +0,0 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class CardConfigModel(
|
||||
override val root: String,
|
||||
val name: Attribute,
|
||||
val attack: Attribute,
|
||||
val effect: Attribute,
|
||||
) : ConfigModel
|
||||
@@ -1,5 +0,0 @@
|
||||
package com.rak.model.config
|
||||
|
||||
interface ConfigModel {
|
||||
val root: String
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class RegionalSetConfigModel(
|
||||
override val root: String,
|
||||
val id: Attribute,
|
||||
val language: Attribute,
|
||||
val regionKey: Attribute
|
||||
) : ConfigModel
|
||||
@@ -1,8 +0,0 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class SourceConfiguration(
|
||||
val id: String,
|
||||
val name: String,
|
||||
val urlPatterns: Set<String>,
|
||||
val selectors: Set<ConfigModel>
|
||||
)
|
||||
@@ -1,11 +0,0 @@
|
||||
package com.rak.model.config
|
||||
|
||||
data class Step(
|
||||
val type: Type,
|
||||
val value: String
|
||||
) {
|
||||
enum class Type {
|
||||
XPATH,
|
||||
CSS
|
||||
}
|
||||
}
|
||||
@@ -1,20 +1,26 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.RegionalSetDefinition
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.model.XPathTarget
|
||||
import com.rak.config.Step
|
||||
import com.rak.util.XPathUtil
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
import org.jsoup.select.Evaluator
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
|
||||
@ApplicationScoped
|
||||
class ScrapeService {
|
||||
class ScrapeService(
|
||||
private val sourceService: SourceService
|
||||
) {
|
||||
|
||||
fun extractTextFromRootBySteps(
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
||||
}
|
||||
|
||||
private fun extractTextFromRootBySteps(
|
||||
root: Element,
|
||||
steps: Set<SourcesConfiguration.SourceConfig.Selectors.StepDefinition>
|
||||
steps: Set<Step>
|
||||
): String? {
|
||||
var currentElement: Element? = root.clone()
|
||||
var result: String? = null
|
||||
@@ -37,4 +43,51 @@ class ScrapeService {
|
||||
return result
|
||||
}
|
||||
|
||||
fun extractSet(
|
||||
provider: String,
|
||||
setName: String,
|
||||
): Map<String, String> {
|
||||
val source =
|
||||
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
|
||||
val path: String = normalizePath(setName)
|
||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||
val regionalSetSelector = source.getItems().regionalSet().get()
|
||||
|
||||
val regionalSetRoot = document.selectFirst(regionalSetSelector.rootSelector().value())!!
|
||||
|
||||
val setId: String? = extractTextFromRootBySteps(
|
||||
regionalSetRoot,
|
||||
regionalSetSelector.idSelector().steps()
|
||||
)
|
||||
val setLanguage: String? = extractTextFromRootBySteps(
|
||||
regionalSetRoot,
|
||||
regionalSetSelector.languageSelector().steps()
|
||||
)
|
||||
val setKey: String? = extractTextFromRootBySteps(
|
||||
regionalSetRoot,
|
||||
regionalSetSelector.regionKeySelector().steps()
|
||||
)
|
||||
|
||||
return mapOf(
|
||||
Pair("id", setId ?: "N/A"),
|
||||
Pair("language", setLanguage ?: "N/A"),
|
||||
Pair("key", setKey ?: "N/A"),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
fun extractCard(
|
||||
provider: String,
|
||||
cardName: String,
|
||||
): Map<String, String> {
|
||||
val path: String = normalizePath(cardName)
|
||||
return mapOf()
|
||||
}
|
||||
|
||||
private fun normalizePath(path: String): String = path
|
||||
.trim()
|
||||
.replace(" ", "_")
|
||||
|
||||
}
|
||||
@@ -1,31 +1,15 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.SourceConfig
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.model.config.SourceConfiguration
|
||||
import jakarta.annotation.PostConstruct
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
|
||||
@ApplicationScoped
|
||||
class SourceService (
|
||||
private val sourcesConfiguration: SourcesConfiguration
|
||||
val sourcesConfiguration: SourcesConfiguration
|
||||
) {
|
||||
|
||||
private val sources: MutableSet<SourceConfiguration> = mutableSetOf()
|
||||
|
||||
@PostConstruct
|
||||
fun init() {
|
||||
sourcesConfiguration
|
||||
.sources()
|
||||
.forEach { source ->
|
||||
val config = SourceConfiguration(
|
||||
source.id(),
|
||||
source.name(),
|
||||
source.urlPatterns().orElse(mutableSetOf()),
|
||||
setOf()
|
||||
)
|
||||
|
||||
sources.add(config)
|
||||
}
|
||||
}
|
||||
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
|
||||
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
|
||||
|
||||
}
|
||||
@@ -1 +1 @@
|
||||
com.rak.model.config.TestConverter$ActualConverter
|
||||
com.rak.config.converter.TypeSelectorConverter
|
||||
@@ -1,3 +1,7 @@
|
||||
quarkus:
|
||||
http:
|
||||
port: 8081
|
||||
|
||||
scraper:
|
||||
sources:
|
||||
- id: konami-official
|
||||
@@ -25,20 +29,20 @@ scraper:
|
||||
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
|
||||
selectors:
|
||||
regional-set:
|
||||
root: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||
root:
|
||||
type: css
|
||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||
id:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
- type: xpath
|
||||
value: "//li/text()"
|
||||
language:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
- type: xpath
|
||||
value: "//li/abbr"
|
||||
- type: "xpath"
|
||||
- type: xpath
|
||||
value: "//abbr/@title"
|
||||
region-key:
|
||||
steps:
|
||||
- type: "xpath"
|
||||
value: "//li/abbr/text()"
|
||||
testing:
|
||||
waaa: test
|
||||
- type: xpath
|
||||
value: "//li/abbr/text()"
|
||||
Reference in New Issue
Block a user