Init
This commit is contained in:
50
src/main/kotlin/com/rak/config/SourcesConfiguration.kt
Normal file
50
src/main/kotlin/com/rak/config/SourcesConfiguration.kt
Normal file
@@ -0,0 +1,50 @@
|
||||
package com.rak.config
|
||||
|
||||
import io.smallrye.config.ConfigMapping
|
||||
import java.util.*
|
||||
|
||||
|
||||
@ConfigMapping(prefix = "scraper")
|
||||
interface SourcesConfiguration {
|
||||
|
||||
fun sources(): MutableList<SourceConfig>
|
||||
|
||||
interface SourceConfig {
|
||||
fun id(): String
|
||||
fun name(): String
|
||||
fun domain(): String
|
||||
fun urlPatterns(): Optional<MutableList<String>>
|
||||
fun selectors(): Selectors
|
||||
|
||||
interface Selectors {
|
||||
fun card(): Optional<CardDefinition>
|
||||
fun regionalSet(): Optional<RegionalSetDefinition>
|
||||
|
||||
interface AbstractModelDefinition {
|
||||
fun root(): Optional<String>
|
||||
}
|
||||
|
||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
||||
fun id(): SelectorDefinition
|
||||
fun language(): SelectorDefinition
|
||||
fun regionKey(): SelectorDefinition
|
||||
}
|
||||
|
||||
interface CardDefinition {
|
||||
fun name(): SelectorDefinition
|
||||
fun attack(): SelectorDefinition
|
||||
fun effect(): SelectorDefinition
|
||||
}
|
||||
|
||||
interface SelectorDefinition {
|
||||
fun steps(): Set<StepDefinition>
|
||||
}
|
||||
|
||||
interface StepDefinition {
|
||||
fun type(): String // e.g. css or xpath
|
||||
fun value(): String
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
56
src/main/kotlin/com/rak/controller/ExampleResource.kt
Normal file
56
src/main/kotlin/com/rak/controller/ExampleResource.kt
Normal file
@@ -0,0 +1,56 @@
|
||||
package com.rak.controller
|
||||
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import com.rak.service.ScrapeService
|
||||
import io.quarkus.logging.Log
|
||||
import jakarta.ws.rs.GET
|
||||
import jakarta.ws.rs.Path
|
||||
import jakarta.ws.rs.Produces
|
||||
import jakarta.ws.rs.core.MediaType
|
||||
import org.jboss.resteasy.reactive.RestQuery
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
|
||||
|
||||
@Path("/hello")
|
||||
class ExampleResource(
|
||||
private val sourcesConfiguration: SourcesConfiguration,
|
||||
private val scrapeService: ScrapeService
|
||||
) {
|
||||
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
||||
}
|
||||
|
||||
@GET
|
||||
@Produces(MediaType.TEXT_PLAIN)
|
||||
fun hello(
|
||||
@RestQuery
|
||||
provider: String,
|
||||
@RestQuery
|
||||
path: String
|
||||
): String {
|
||||
val sources = sourcesConfiguration
|
||||
.sources()
|
||||
.filter {
|
||||
it.id().equals(provider, ignoreCase = true)
|
||||
}
|
||||
|
||||
val source = sources.firstOrNull() ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
|
||||
val newPath: String = path
|
||||
.trim()
|
||||
.replace(" ", "_")
|
||||
|
||||
Log.info(newPath)
|
||||
|
||||
val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get()
|
||||
|
||||
val regionalSetSelector = source.selectors().regionalSet().get()
|
||||
val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!!
|
||||
|
||||
return scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps()) ?: "whoomp whoomp"
|
||||
}
|
||||
}
|
||||
4
src/main/kotlin/com/rak/model/scrape/AbstractScraper.kt
Normal file
4
src/main/kotlin/com/rak/model/scrape/AbstractScraper.kt
Normal file
@@ -0,0 +1,4 @@
|
||||
package com.rak.model.scrape
|
||||
|
||||
abstract class AbstractScraper{
|
||||
}
|
||||
6
src/main/kotlin/com/rak/model/scrape/JsoupScraper.kt
Normal file
6
src/main/kotlin/com/rak/model/scrape/JsoupScraper.kt
Normal file
@@ -0,0 +1,6 @@
|
||||
package com.rak.model.scrape
|
||||
|
||||
class JsoupScraper : AbstractScraper() {
|
||||
|
||||
|
||||
}
|
||||
5
src/main/kotlin/com/rak/model/scrape/ScrapeJob.kt
Normal file
5
src/main/kotlin/com/rak/model/scrape/ScrapeJob.kt
Normal file
@@ -0,0 +1,5 @@
|
||||
package com.rak.model.scrape
|
||||
|
||||
data class ScrapeJob(
|
||||
val url: String,
|
||||
)
|
||||
@@ -0,0 +1,6 @@
|
||||
package com.rak.model.scrape.selector
|
||||
|
||||
enum class Selector {
|
||||
CSS,
|
||||
XPATH
|
||||
}
|
||||
29
src/main/kotlin/com/rak/service/MyRemoteService.kt
Normal file
29
src/main/kotlin/com/rak/service/MyRemoteService.kt
Normal file
@@ -0,0 +1,29 @@
|
||||
package com.rak.service
|
||||
|
||||
import jakarta.ws.rs.GET
|
||||
import jakarta.ws.rs.Path
|
||||
import jakarta.ws.rs.QueryParam
|
||||
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
||||
|
||||
/**
|
||||
* To use it via injection.
|
||||
*
|
||||
* ```kotlin
|
||||
* @Inject
|
||||
* @RestClient
|
||||
* lateinit var myRemoteService: MyRemoteService
|
||||
*
|
||||
* fun doSomething() {
|
||||
* val restClientExtensions = myRemoteService.getExtensionsById("io.quarkus:quarkus-rest-client")
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
@RegisterRestClient(baseUri = "https://stage.code.quarkus.io/api")
|
||||
interface MyRemoteService {
|
||||
|
||||
@GET
|
||||
@Path("/extensions")
|
||||
fun getExtensionsById(@QueryParam("id") id: String): Set<Extension>
|
||||
|
||||
data class Extension(val id: String, val name: String, val shortName: String, val keywords: List<String>)
|
||||
}
|
||||
68
src/main/kotlin/com/rak/service/ScrapeService.kt
Normal file
68
src/main/kotlin/com/rak/service/ScrapeService.kt
Normal file
@@ -0,0 +1,68 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.SourcesConfiguration
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
|
||||
@ApplicationScoped
|
||||
class ScrapeService {
|
||||
|
||||
companion object {
|
||||
private val TEXT_NODE_MATCHER: Regex = Regex("^.*text\\(\\)$")
|
||||
|
||||
private fun evaluateXpath(element: Element, xpath: String): Element? {
|
||||
return element.selectXpath(xpath).first()
|
||||
}
|
||||
|
||||
private fun evaluateCssSelector(element: Element, cssSelector: String): Element? {
|
||||
return null
|
||||
}
|
||||
// XPath
|
||||
// - text()
|
||||
// - last step (default to text())
|
||||
// CSS
|
||||
// - last step???
|
||||
private fun untilText(): String? {
|
||||
|
||||
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fun extractTextFromRootBySteps(
|
||||
root: Element,
|
||||
steps: Set<SourcesConfiguration.SourceConfig.Selectors.StepDefinition>
|
||||
): String? {
|
||||
var currentElement: Element? = root.clone()
|
||||
val stepsAsQueue = LinkedBlockingQueue(
|
||||
steps
|
||||
)
|
||||
|
||||
while (stepsAsQueue.isNotEmpty()) {
|
||||
val step = stepsAsQueue.take()
|
||||
val stepTargetsTextNode: Boolean = TEXT_NODE_MATCHER.matches(step.value())
|
||||
|
||||
if (currentElement == null) {
|
||||
return null
|
||||
}
|
||||
|
||||
currentElement = if (step.type() == "xpath") {
|
||||
if (stepTargetsTextNode) {
|
||||
return currentElement.selectXpath(step.value(), TextNode::class.java).first().text()
|
||||
}
|
||||
else {
|
||||
currentElement.selectXpath(step.value()).first()
|
||||
}
|
||||
}
|
||||
else {
|
||||
currentElement.selectFirst(step.value())
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user