Trying out model
This commit is contained in:
@@ -1,14 +0,0 @@
|
|||||||
package com.rak.model.config;
|
|
||||||
|
|
||||||
import org.eclipse.microprofile.config.spi.Converter;
|
|
||||||
|
|
||||||
public class TestConverter {
|
|
||||||
|
|
||||||
public static class ActualConverter implements Converter<Attribute> {
|
|
||||||
@Override
|
|
||||||
public Attribute convert(final String value) throws IllegalArgumentException, NullPointerException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
12
src/main/kotlin/com/rak/config/CardDefinition.kt
Normal file
12
src/main/kotlin/com/rak/config/CardDefinition.kt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface CardDefinition {
|
||||||
|
@WithName("name")
|
||||||
|
fun nameSelector(): SelectorDefinition
|
||||||
|
@WithName("attack")
|
||||||
|
fun attackSelector(): SelectorDefinition
|
||||||
|
@WithName("effect")
|
||||||
|
fun effectSelector(): SelectorDefinition
|
||||||
|
}
|
||||||
8
src/main/kotlin/com/rak/config/Items.kt
Normal file
8
src/main/kotlin/com/rak/config/Items.kt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
import java.util.*
|
||||||
|
|
||||||
|
interface Items {
|
||||||
|
fun card(): Optional<CardDefinition>
|
||||||
|
fun regionalSet(): Optional<RegionalSetDefinition>
|
||||||
|
}
|
||||||
13
src/main/kotlin/com/rak/config/RegionalSetDefinition.kt
Normal file
13
src/main/kotlin/com/rak/config/RegionalSetDefinition.kt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
import com.rak.config.converter.AbstractModelDefinition
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface RegionalSetDefinition : AbstractModelDefinition {
|
||||||
|
@WithName("id")
|
||||||
|
fun idSelector(): SelectorDefinition
|
||||||
|
@WithName("language")
|
||||||
|
fun languageSelector(): SelectorDefinition
|
||||||
|
@WithName("region-key")
|
||||||
|
fun regionKeySelector(): SelectorDefinition
|
||||||
|
}
|
||||||
5
src/main/kotlin/com/rak/config/SelectorDefinition.kt
Normal file
5
src/main/kotlin/com/rak/config/SelectorDefinition.kt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
interface SelectorDefinition {
|
||||||
|
fun steps(): Set<Step>
|
||||||
|
}
|
||||||
19
src/main/kotlin/com/rak/config/SourceConfig.kt
Normal file
19
src/main/kotlin/com/rak/config/SourceConfig.kt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
import java.util.*
|
||||||
|
|
||||||
|
interface SourceConfig {
|
||||||
|
|
||||||
|
@WithName("id")
|
||||||
|
fun getId(): String
|
||||||
|
@WithName("name")
|
||||||
|
fun getName(): String
|
||||||
|
@WithName("domain")
|
||||||
|
fun getDomain(): String
|
||||||
|
@WithName("url-patterns")
|
||||||
|
fun getUrlPatterns(): Optional<MutableSet<String>>
|
||||||
|
@WithName("selectors")
|
||||||
|
fun getItems(): Items
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,50 +1,12 @@
|
|||||||
package com.rak.config
|
package com.rak.config
|
||||||
|
|
||||||
import io.smallrye.config.ConfigMapping
|
import io.smallrye.config.ConfigMapping
|
||||||
import java.util.*
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
|
||||||
@ConfigMapping(prefix = "scraper")
|
@ConfigMapping(prefix = "scraper")
|
||||||
interface SourcesConfiguration {
|
interface SourcesConfiguration {
|
||||||
|
|
||||||
fun sources(): MutableList<SourceConfig>
|
@WithName("sources")
|
||||||
|
fun getSources(): MutableList<SourceConfig>
|
||||||
interface SourceConfig {
|
|
||||||
fun id(): String
|
|
||||||
fun name(): String
|
|
||||||
fun domain(): String
|
|
||||||
fun urlPatterns(): Optional<MutableSet<String>>
|
|
||||||
fun selectors(): Selectors
|
|
||||||
|
|
||||||
interface Selectors {
|
|
||||||
fun card(): Optional<CardDefinition>
|
|
||||||
fun regionalSet(): Optional<RegionalSetDefinition>
|
|
||||||
|
|
||||||
interface AbstractModelDefinition {
|
|
||||||
fun root(): Optional<String>
|
|
||||||
}
|
|
||||||
|
|
||||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
|
||||||
fun id(): SelectorDefinition
|
|
||||||
fun language(): SelectorDefinition
|
|
||||||
fun regionKey(): SelectorDefinition
|
|
||||||
}
|
|
||||||
|
|
||||||
interface CardDefinition {
|
|
||||||
fun name(): SelectorDefinition
|
|
||||||
fun attack(): SelectorDefinition
|
|
||||||
fun effect(): SelectorDefinition
|
|
||||||
}
|
|
||||||
|
|
||||||
interface SelectorDefinition {
|
|
||||||
fun steps(): Set<StepDefinition>
|
|
||||||
}
|
|
||||||
|
|
||||||
interface StepDefinition {
|
|
||||||
fun type(): String // e.g. css or xpath
|
|
||||||
fun value(): String
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
13
src/main/kotlin/com/rak/config/Step.kt
Normal file
13
src/main/kotlin/com/rak/config/Step.kt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
import com.rak.config.converter.TypeSelectorConverter
|
||||||
|
import com.rak.model.scrape.selector.Selector
|
||||||
|
import io.smallrye.config.WithConverter
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface Step {
|
||||||
|
@WithConverter(TypeSelectorConverter::class)
|
||||||
|
@WithName("type")
|
||||||
|
fun selectorType(): Selector // e.g. css or xpath
|
||||||
|
fun value(): String
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
package com.rak.config.converter
|
||||||
|
|
||||||
|
import com.rak.config.Step
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface AbstractModelDefinition {
|
||||||
|
@WithName("root")
|
||||||
|
fun rootSelector(): Step
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
package com.rak.config.converter
|
||||||
|
|
||||||
|
import com.rak.model.scrape.selector.Selector
|
||||||
|
import org.eclipse.microprofile.config.spi.Converter
|
||||||
|
|
||||||
|
class TypeSelectorConverter : Converter<Selector> {
|
||||||
|
override fun convert(value: String): Selector {
|
||||||
|
return Selector.valueOf(value.uppercase())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,22 +2,21 @@ package com.rak.controller
|
|||||||
|
|
||||||
import com.rak.config.SourcesConfiguration
|
import com.rak.config.SourcesConfiguration
|
||||||
import com.rak.service.ScrapeService
|
import com.rak.service.ScrapeService
|
||||||
import io.quarkus.logging.Log
|
import com.rak.service.SourceService
|
||||||
|
import jakarta.ws.rs.Consumes
|
||||||
import jakarta.ws.rs.GET
|
import jakarta.ws.rs.GET
|
||||||
import jakarta.ws.rs.Path
|
import jakarta.ws.rs.Path
|
||||||
import jakarta.ws.rs.Produces
|
import jakarta.ws.rs.Produces
|
||||||
import jakarta.ws.rs.core.MediaType
|
import jakarta.ws.rs.core.MediaType
|
||||||
|
import org.jboss.resteasy.reactive.RestPath
|
||||||
import org.jboss.resteasy.reactive.RestQuery
|
import org.jboss.resteasy.reactive.RestQuery
|
||||||
import org.jsoup.Jsoup
|
|
||||||
import org.jsoup.nodes.Document
|
|
||||||
import org.jsoup.nodes.Element
|
|
||||||
import org.jsoup.nodes.TextNode
|
|
||||||
|
|
||||||
|
|
||||||
@Path("/hello")
|
@Path("/api")
|
||||||
class ExampleResource(
|
class ExampleResource(
|
||||||
private val sourcesConfiguration: SourcesConfiguration,
|
private val sourcesConfiguration: SourcesConfiguration,
|
||||||
private val scrapeService: ScrapeService
|
private val scrapeService: ScrapeService,
|
||||||
|
private val sourceService: SourceService
|
||||||
) {
|
) {
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
@@ -25,38 +24,34 @@ class ExampleResource(
|
|||||||
}
|
}
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Produces(MediaType.TEXT_PLAIN)
|
@Path("/{provider}/set")
|
||||||
fun hello(
|
@Produces(MediaType.APPLICATION_JSON)
|
||||||
@RestQuery
|
@Consumes(MediaType.APPLICATION_JSON)
|
||||||
|
fun scrapeSet(
|
||||||
|
@RestPath
|
||||||
provider: String,
|
provider: String,
|
||||||
@RestQuery
|
@RestQuery
|
||||||
path: String
|
setName: String
|
||||||
): Map<String, String> {
|
): Map<String, String> {
|
||||||
val sources = sourcesConfiguration
|
return scrapeService.extractSet(
|
||||||
.sources()
|
provider,
|
||||||
.filter {
|
setName
|
||||||
it.id().equals(provider, ignoreCase = true)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val source = sources.firstOrNull() ?: throw IllegalArgumentException("Provider $provider not found")
|
@GET
|
||||||
|
@Path("/{provider}/card")
|
||||||
val newPath: String = path
|
@Produces(MediaType.APPLICATION_JSON)
|
||||||
.trim()
|
@Consumes(MediaType.APPLICATION_JSON)
|
||||||
.replace(" ", "_")
|
fun scrapeCard(
|
||||||
|
@RestPath
|
||||||
val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get()
|
provider: String,
|
||||||
|
@RestQuery
|
||||||
val regionalSetSelector = source.selectors().regionalSet().get()
|
cardName: String
|
||||||
val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!!
|
): Map<String, String> {
|
||||||
|
return scrapeService.extractCard(
|
||||||
val setId: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps())
|
provider,
|
||||||
val setLanguage: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().language().steps())
|
cardName
|
||||||
val setKey: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().regionKey().steps())
|
|
||||||
|
|
||||||
return mapOf(
|
|
||||||
Pair("id", setId ?: "N/A"),
|
|
||||||
Pair("language", setLanguage ?: "N/A"),
|
|
||||||
Pair("key", setKey ?: "N/A"),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
package com.rak.model.config
|
|
||||||
|
|
||||||
data class Attribute(
|
|
||||||
val steps: Set<Step>
|
|
||||||
)
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package com.rak.model.config
|
|
||||||
|
|
||||||
data class CardConfigModel(
|
|
||||||
override val root: String,
|
|
||||||
val name: Attribute,
|
|
||||||
val attack: Attribute,
|
|
||||||
val effect: Attribute,
|
|
||||||
) : ConfigModel
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
package com.rak.model.config
|
|
||||||
|
|
||||||
interface ConfigModel {
|
|
||||||
val root: String
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package com.rak.model.config
|
|
||||||
|
|
||||||
data class RegionalSetConfigModel(
|
|
||||||
override val root: String,
|
|
||||||
val id: Attribute,
|
|
||||||
val language: Attribute,
|
|
||||||
val regionKey: Attribute
|
|
||||||
) : ConfigModel
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package com.rak.model.config
|
|
||||||
|
|
||||||
data class SourceConfiguration(
|
|
||||||
val id: String,
|
|
||||||
val name: String,
|
|
||||||
val urlPatterns: Set<String>,
|
|
||||||
val selectors: Set<ConfigModel>
|
|
||||||
)
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
package com.rak.model.config
|
|
||||||
|
|
||||||
data class Step(
|
|
||||||
val type: Type,
|
|
||||||
val value: String
|
|
||||||
) {
|
|
||||||
enum class Type {
|
|
||||||
XPATH,
|
|
||||||
CSS
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,20 +1,26 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.RegionalSetDefinition
|
||||||
import com.rak.config.SourcesConfiguration
|
import com.rak.config.SourcesConfiguration
|
||||||
import com.rak.model.XPathTarget
|
import com.rak.config.Step
|
||||||
import com.rak.util.XPathUtil
|
import com.rak.util.XPathUtil
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
import org.jsoup.Jsoup
|
||||||
|
import org.jsoup.nodes.Document
|
||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
import org.jsoup.nodes.TextNode
|
|
||||||
import org.jsoup.select.Evaluator
|
|
||||||
import java.util.concurrent.LinkedBlockingQueue
|
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ScrapeService {
|
class ScrapeService(
|
||||||
|
private val sourceService: SourceService
|
||||||
|
) {
|
||||||
|
|
||||||
fun extractTextFromRootBySteps(
|
companion object {
|
||||||
|
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractTextFromRootBySteps(
|
||||||
root: Element,
|
root: Element,
|
||||||
steps: Set<SourcesConfiguration.SourceConfig.Selectors.StepDefinition>
|
steps: Set<Step>
|
||||||
): String? {
|
): String? {
|
||||||
var currentElement: Element? = root.clone()
|
var currentElement: Element? = root.clone()
|
||||||
var result: String? = null
|
var result: String? = null
|
||||||
@@ -37,4 +43,51 @@ class ScrapeService {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun extractSet(
|
||||||
|
provider: String,
|
||||||
|
setName: String,
|
||||||
|
): Map<String, String> {
|
||||||
|
val source =
|
||||||
|
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
|
||||||
|
|
||||||
|
val path: String = normalizePath(setName)
|
||||||
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
val regionalSetSelector = source.getItems().regionalSet().get()
|
||||||
|
|
||||||
|
val regionalSetRoot = document.selectFirst(regionalSetSelector.rootSelector().value())!!
|
||||||
|
|
||||||
|
val setId: String? = extractTextFromRootBySteps(
|
||||||
|
regionalSetRoot,
|
||||||
|
regionalSetSelector.idSelector().steps()
|
||||||
|
)
|
||||||
|
val setLanguage: String? = extractTextFromRootBySteps(
|
||||||
|
regionalSetRoot,
|
||||||
|
regionalSetSelector.languageSelector().steps()
|
||||||
|
)
|
||||||
|
val setKey: String? = extractTextFromRootBySteps(
|
||||||
|
regionalSetRoot,
|
||||||
|
regionalSetSelector.regionKeySelector().steps()
|
||||||
|
)
|
||||||
|
|
||||||
|
return mapOf(
|
||||||
|
Pair("id", setId ?: "N/A"),
|
||||||
|
Pair("language", setLanguage ?: "N/A"),
|
||||||
|
Pair("key", setKey ?: "N/A"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fun extractCard(
|
||||||
|
provider: String,
|
||||||
|
cardName: String,
|
||||||
|
): Map<String, String> {
|
||||||
|
val path: String = normalizePath(cardName)
|
||||||
|
return mapOf()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun normalizePath(path: String): String = path
|
||||||
|
.trim()
|
||||||
|
.replace(" ", "_")
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1,31 +1,15 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.SourceConfig
|
||||||
import com.rak.config.SourcesConfiguration
|
import com.rak.config.SourcesConfiguration
|
||||||
import com.rak.model.config.SourceConfiguration
|
|
||||||
import jakarta.annotation.PostConstruct
|
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class SourceService (
|
class SourceService (
|
||||||
private val sourcesConfiguration: SourcesConfiguration
|
val sourcesConfiguration: SourcesConfiguration
|
||||||
) {
|
) {
|
||||||
|
|
||||||
private val sources: MutableSet<SourceConfiguration> = mutableSetOf()
|
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
|
||||||
|
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
|
||||||
@PostConstruct
|
|
||||||
fun init() {
|
|
||||||
sourcesConfiguration
|
|
||||||
.sources()
|
|
||||||
.forEach { source ->
|
|
||||||
val config = SourceConfiguration(
|
|
||||||
source.id(),
|
|
||||||
source.name(),
|
|
||||||
source.urlPatterns().orElse(mutableSetOf()),
|
|
||||||
setOf()
|
|
||||||
)
|
|
||||||
|
|
||||||
sources.add(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1 +1 @@
|
|||||||
com.rak.model.config.TestConverter$ActualConverter
|
com.rak.config.converter.TypeSelectorConverter
|
||||||
@@ -1,3 +1,7 @@
|
|||||||
|
quarkus:
|
||||||
|
http:
|
||||||
|
port: 8081
|
||||||
|
|
||||||
scraper:
|
scraper:
|
||||||
sources:
|
sources:
|
||||||
- id: konami-official
|
- id: konami-official
|
||||||
@@ -25,20 +29,20 @@ scraper:
|
|||||||
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
|
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
|
||||||
selectors:
|
selectors:
|
||||||
regional-set:
|
regional-set:
|
||||||
root: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
root:
|
||||||
|
type: css
|
||||||
|
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||||
id:
|
id:
|
||||||
steps:
|
steps:
|
||||||
- type: "xpath"
|
- type: xpath
|
||||||
value: "//li/text()"
|
value: "//li/text()"
|
||||||
language:
|
language:
|
||||||
steps:
|
steps:
|
||||||
- type: "xpath"
|
- type: xpath
|
||||||
value: "//li/abbr"
|
value: "//li/abbr"
|
||||||
- type: "xpath"
|
- type: xpath
|
||||||
value: "//abbr/@title"
|
value: "//abbr/@title"
|
||||||
region-key:
|
region-key:
|
||||||
steps:
|
steps:
|
||||||
- type: "xpath"
|
- type: xpath
|
||||||
value: "//li/abbr/text()"
|
value: "//li/abbr/text()"
|
||||||
testing:
|
|
||||||
waaa: test
|
|
||||||
Reference in New Issue
Block a user