Merge pull request #171 from Taumer/ru_parsers

Implement parsers for Readmanga, Mintmanga and Mangachan
This commit is contained in:
inorichi 2016-03-11 13:57:11 +01:00
commit ed636d5e2f
4 changed files with 700 additions and 1 deletions

View file

@ -6,6 +6,9 @@ import eu.kanade.tachiyomi.data.source.online.english.Batoto
import eu.kanade.tachiyomi.data.source.online.english.Kissmanga
import eu.kanade.tachiyomi.data.source.online.english.Mangafox
import eu.kanade.tachiyomi.data.source.online.english.Mangahere
import eu.kanade.tachiyomi.data.source.online.russian.Mangachan;
import eu.kanade.tachiyomi.data.source.online.russian.Mintmanga;
import eu.kanade.tachiyomi.data.source.online.russian.Readmanga;
import java.util.*
open class SourceManager(private val context: Context) {
@ -16,8 +19,11 @@ open class SourceManager(private val context: Context) {
val MANGAHERE = 2
val MANGAFOX = 3
val KISSMANGA = 4
val READMANGA = 5
val MINTMANGA = 6
val MANGACHAN = 7
val LAST_SOURCE = 4
val LAST_SOURCE = 7
init {
sourcesMap = createSourcesMap()
@ -32,6 +38,9 @@ open class SourceManager(private val context: Context) {
MANGAHERE -> Mangahere(context)
MANGAFOX -> Mangafox(context)
KISSMANGA -> Kissmanga(context)
READMANGA -> Readmanga(context)
MINTMANGA -> Mintmanga(context)
MANGACHAN -> Mangachan(context)
else -> null
}

View file

@ -0,0 +1,240 @@
package eu.kanade.tachiyomi.data.source.online.russian;
import android.content.Context;
import android.net.Uri;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import eu.kanade.tachiyomi.data.database.models.Chapter;
import eu.kanade.tachiyomi.data.database.models.Manga;
import eu.kanade.tachiyomi.data.source.Language;
import eu.kanade.tachiyomi.data.source.LanguageKt;
import eu.kanade.tachiyomi.data.source.base.Source;
import eu.kanade.tachiyomi.data.source.model.MangasPage;
import eu.kanade.tachiyomi.data.source.model.Page;
import eu.kanade.tachiyomi.util.Parser;
public class Mangachan extends Source {
public static final String NAME = "Mangachan";
public static final String BASE_URL = "http://mangachan.ru";
public static final String POPULAR_MANGAS_URL = BASE_URL + "/mostfavorites";
public static final String SEARCH_URL = BASE_URL + "/?do=search&subaction=search&story=%s";
public Mangachan(Context context) {
super(context);
}
@Override
public Language getLang() {
return LanguageKt.getRU();
}
@Override
public String getName() {
return NAME;
}
@Override
public String getBaseUrl() {
return BASE_URL;
}
@Override
protected String getInitialPopularMangasUrl() {
return POPULAR_MANGAS_URL;
}
@Override
protected String getInitialSearchUrl(String query) {
return String.format(SEARCH_URL, Uri.encode(query));
}
@Override
protected List<Manga> parsePopularMangasFromHtml(Document parsedHtml) {
List<Manga> mangaList = new ArrayList<>();
for (Element currentHtmlBlock : parsedHtml.select("div.content_row")) {
Manga manga = constructPopularMangaFromHtml(currentHtmlBlock);
mangaList.add(manga);
}
return mangaList;
}
private Manga constructPopularMangaFromHtml(Element currentHtmlBlock) {
Manga manga = new Manga();
manga.source = getId();
Element urlElement = currentHtmlBlock.getElementsByTag("h2").select("a").first();
Element imgElement = currentHtmlBlock.getElementsByClass("manga_images").select("img").first();
if (urlElement != null) {
manga.setUrl(urlElement.attr("href"));
manga.title = urlElement.text();
}
if (imgElement != null) {
manga.thumbnail_url = BASE_URL + imgElement.attr("src");
}
return manga;
}
@Override
protected String parseNextPopularMangasUrl(Document parsedHtml, MangasPage page) {
String path = Parser.href(parsedHtml, "a:contains(Вперед)");
return path != null ? POPULAR_MANGAS_URL + path : null;
}
@Override
protected List<Manga> parseSearchFromHtml(Document parsedHtml) {
return parsePopularMangasFromHtml(parsedHtml);
}
@Override
protected String parseNextSearchUrl(Document parsedHtml, MangasPage page, String query) {
return null;
}
@Override
protected Manga parseHtmlToManga(String mangaUrl, String unparsedHtml) {
Document parsedDocument = Jsoup.parse(unparsedHtml);
Element infoElement = parsedDocument.getElementsByClass("mangatitle").first();
String description = parsedDocument.getElementById("description").text();
Manga manga = Manga.create(mangaUrl);
manga.author = infoElement.select("tr:eq(2) td:eq(1)").text();
manga.genre = infoElement.select("tr:eq(5) td:eq(1)").text();
manga.status = parseStatus(infoElement.select("tr:eq(4) td:eq(1)").text());
manga.description = description.replaceAll("Прислать описание", "");
manga.initialized = true;
return manga;
}
private int parseStatus(String status) {
if (status.contains("перевод продолжается")) {
return Manga.ONGOING;
} else if (status.contains("перевод завершен")) {
return Manga.COMPLETED;
} else return Manga.UNKNOWN;
}
@Override
protected List<Chapter> parseHtmlToChapters(String unparsedHtml) {
Document parsedDocument = Jsoup.parse(unparsedHtml);
List<Chapter> chapterList = new ArrayList<>();
for (Element chapterElement : parsedDocument.select("table.table_cha tr:gt(1)")) {
Chapter chapter = constructChapterFromHtmlBlock(chapterElement);
chapterList.add(chapter);
}
return chapterList;
}
private Chapter constructChapterFromHtmlBlock(Element chapterElement) {
Chapter chapter = Chapter.create();
Element urlElement = chapterElement.select("a").first();
String date = Parser.text(chapterElement, "div.date");
if (urlElement != null) {
chapter.name = urlElement.text();
chapter.url = urlElement.attr("href");
}
if (date != null) {
try {
chapter.date_upload = new SimpleDateFormat("yyyy-MM-dd", Locale.ENGLISH).parse(date).getTime();
} catch (ParseException e) { /* Ignore */ }
}
return chapter;
}
// Without this extra chapters are in the wrong place in the list
@Override
public void parseChapterNumber(Chapter chapter) {
// For chapters with url like /online/254903-fairy-tail_v56_ch474.html
String url = chapter.url.replace(".html", "");
Pattern pattern = Pattern.compile("\\d+_ch[\\d.]+");
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
String[] parts = matcher.group().split("_ch");
chapter.chapter_number = Float.parseFloat(parts[0] + "." + AddZero(parts[1]));
} else { // For chapters with url like /online/61216-3298.html
String name = chapter.name;
name = name.replaceAll("[\\s\\d\\w\\W]+v", "");
String volume = name.substring(0, name.indexOf(" - "));
String[] parts = name.replaceFirst("\\d+ - ", "").split(" ");
chapter.chapter_number = Float.parseFloat(volume + "." + AddZero(parts[0]));
}
}
private String AddZero(String num) {
if (Float.parseFloat(num) < 1000f) {
num = "0" + num.replace(".", "");
}
if (Float.parseFloat(num) < 100f) {
num = "0" + num.replace(".", "");
}
if (Float.parseFloat(num) < 10f) {
num = "0" + num.replace(".", "");
}
return num;
}
@Override
protected List<String> parseHtmlToPageUrls(String unparsedHtml) {
ArrayList<String> pages = new ArrayList<>();
int beginIndex = unparsedHtml.indexOf("fullimg\":[");
int endIndex = unparsedHtml.indexOf("]", beginIndex);
String trimmedHtml = unparsedHtml.substring(beginIndex + 10, endIndex);
trimmedHtml = trimmedHtml.replaceAll("\"", "");
String[] pageUrls = trimmedHtml.split(",");
for (int i = 0; i < pageUrls.length; i++) {
pages.add("");
}
return pages;
}
@Override
protected List<Page> parseFirstPage(List<Page> pages, String unparsedHtml) {
int beginIndex = unparsedHtml.indexOf("fullimg\":[");
int endIndex = unparsedHtml.indexOf("]", beginIndex);
String trimmedHtml = unparsedHtml.substring(beginIndex + 10, endIndex);
trimmedHtml = trimmedHtml.replaceAll("\"", "");
String[] pageUrls = trimmedHtml.split(",");
for (int i = 0; i < pageUrls.length; i++) {
pages.get(i).setImageUrl(pageUrls[i].replaceAll("im.?\\.", ""));
}
return pages;
}
@Override
protected String parseHtmlToImageUrl(String unparsedHtml) {
return null;
}
}

View file

@ -0,0 +1,225 @@
package eu.kanade.tachiyomi.data.source.online.russian;
import android.content.Context;
import android.net.Uri;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import eu.kanade.tachiyomi.data.database.models.Chapter;
import eu.kanade.tachiyomi.data.database.models.Manga;
import eu.kanade.tachiyomi.data.source.Language;
import eu.kanade.tachiyomi.data.source.LanguageKt;
import eu.kanade.tachiyomi.data.source.base.Source;
import eu.kanade.tachiyomi.data.source.model.MangasPage;
import eu.kanade.tachiyomi.data.source.model.Page;
import eu.kanade.tachiyomi.util.Parser;
public class Mintmanga extends Source {
public static final String NAME = "Mintmanga";
public static final String BASE_URL = "http://mintmanga.com";
public static final String POPULAR_MANGAS_URL = BASE_URL + "/list?sortType=rate";
public static final String SEARCH_URL = BASE_URL + "/search?q=%s";
public Mintmanga(Context context) {
super(context);
}
@Override
public Language getLang() {
return LanguageKt.getRU();
}
@Override
public String getName() {
return NAME;
}
@Override
public String getBaseUrl() {
return BASE_URL;
}
@Override
protected String getInitialPopularMangasUrl() {
return POPULAR_MANGAS_URL;
}
@Override
protected String getInitialSearchUrl(String query) {
return String.format(SEARCH_URL, Uri.encode(query));
}
@Override
protected List<Manga> parsePopularMangasFromHtml(Document parsedHtml) {
List<Manga> mangaList = new ArrayList<>();
for (Element currentHtmlBlock : parsedHtml.select("div.desc")) {
Manga manga = constructPopularMangaFromHtml(currentHtmlBlock);
mangaList.add(manga);
}
return mangaList;
}
private Manga constructPopularMangaFromHtml(Element currentHtmlBlock) {
Manga manga = new Manga();
manga.source = getId();
Element urlElement = currentHtmlBlock.getElementsByTag("h3").select("a").first();
if (urlElement != null) {
manga.setUrl(urlElement.attr("href"));
manga.title = urlElement.text();
}
return manga;
}
@Override
protected String parseNextPopularMangasUrl(Document parsedHtml, MangasPage page) {
String path = Parser.href(parsedHtml, "a:contains(→)");
return path != null ? BASE_URL + path : null;
}
@Override
protected List<Manga> parseSearchFromHtml(Document parsedHtml) {
return parsePopularMangasFromHtml(parsedHtml);
}
@Override
protected String parseNextSearchUrl(Document parsedHtml, MangasPage page, String query) {
return null;
}
@Override
protected Manga parseHtmlToManga(String mangaUrl, String unparsedHtml) {
Document parsedDocument = Jsoup.parse(unparsedHtml);
Element infoElement = parsedDocument.select("div.leftContent").first();
Manga manga = Manga.create(mangaUrl);
manga.title = Parser.text(infoElement, "span.eng-name");
manga.author = Parser.text(infoElement, "span.elem_author ");
manga.genre = Parser.allText(infoElement, "span.elem_genre ").replaceAll(" ,", ",");
manga.description = Parser.allText(infoElement, "div.manga-description");
if (Parser.text(infoElement, "h1.names:contains(Сингл)") != null) {
manga.status = Manga.COMPLETED;
} else {
manga.status = parseStatus(Parser.text(infoElement, "p:has(b:contains(Перевод:))"));
}
String thumbnail = Parser.element(infoElement, "img").attr("data-full");
if (thumbnail != null) {
manga.thumbnail_url = thumbnail;
}
manga.initialized = true;
return manga;
}
private int parseStatus(String status) {
if (status.contains("продолжается")) {
return Manga.ONGOING;
}
if (status.contains("завершен")) {
return Manga.COMPLETED;
}
return Manga.UNKNOWN;
}
@Override
protected List<Chapter> parseHtmlToChapters(String unparsedHtml) {
Document parsedDocument = Jsoup.parse(unparsedHtml);
List<Chapter> chapterList = new ArrayList<>();
for (Element chapterElement : parsedDocument.select("div.chapters-link tbody tr")) {
Chapter chapter = constructChapterFromHtmlBlock(chapterElement);
chapterList.add(chapter);
}
return chapterList;
}
private Chapter constructChapterFromHtmlBlock(Element chapterElement) {
Chapter chapter = Chapter.create();
Element urlElement = Parser.element(chapterElement, "a");
String date = Parser.text(chapterElement, "td:eq(1)");
if (urlElement != null) {
chapter.setUrl(urlElement.attr("href") + "?mature=1");
chapter.name = urlElement.text().replaceAll(" новое", "");
}
if (date != null) {
try {
chapter.date_upload = new SimpleDateFormat("dd/MM/yy", Locale.ENGLISH).parse(date).getTime();
} catch (ParseException e) { /* Ignore */ }
}
return chapter;
}
// Without this extra chapters are in the wrong place in the list
@Override
public void parseChapterNumber(Chapter chapter) {
String url = chapter.url.replace("?mature=1", "");
String[] urlParts = url.replaceAll("/[\\w\\d]+/vol", "").split("/");
if (Float.parseFloat(urlParts[1]) < 1000f) {
urlParts[1] = "0" + urlParts[1];
}
if (Float.parseFloat(urlParts[1]) < 100f) {
urlParts[1] = "0" + urlParts[1];
}
if (Float.parseFloat(urlParts[1]) < 10f) {
urlParts[1] = "0" + urlParts[1];
}
chapter.chapter_number = Float.parseFloat(urlParts[0] + "." + urlParts[1]);
}
@Override
protected List<String> parseHtmlToPageUrls(String unparsedHtml) {
ArrayList<String> pages = new ArrayList<>();
int beginIndex = unparsedHtml.indexOf("rm_h.init( [");
int endIndex = unparsedHtml.indexOf("], 0, false);", beginIndex);
String trimmedHtml = unparsedHtml.substring(beginIndex + 13, endIndex);
trimmedHtml = trimmedHtml.replaceAll("[\"']", "");
String[] pageUrls = trimmedHtml.split("],\\[");
for (int i = 0; i < pageUrls.length; i++) {
pages.add("");
}
return pages;
}
@Override
protected List<Page> parseFirstPage(List<Page> pages, String unparsedHtml) {
int beginIndex = unparsedHtml.indexOf("rm_h.init( [");
int endIndex = unparsedHtml.indexOf("], 0, false);", beginIndex);
String trimmedHtml = unparsedHtml.substring(beginIndex + 13, endIndex);
trimmedHtml = trimmedHtml.replaceAll("[\"']", "");
String[] pageUrls = trimmedHtml.split("],\\[");
for (int i = 0; i < pageUrls.length; i++) {
String[] urlParts = pageUrls[i].split(","); // auto/06/35,http://e4.adultmanga.me/,/55/01.png
String page = urlParts[1] + urlParts[0] + urlParts[2];
pages.get(i).setImageUrl(page);
}
return pages;
}
@Override
protected String parseHtmlToImageUrl(String unparsedHtml) {
return null;
}
}

View file

@ -0,0 +1,225 @@
package eu.kanade.tachiyomi.data.source.online.russian;
import android.content.Context;
import android.net.Uri;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import eu.kanade.tachiyomi.data.database.models.Chapter;
import eu.kanade.tachiyomi.data.database.models.Manga;
import eu.kanade.tachiyomi.data.source.Language;
import eu.kanade.tachiyomi.data.source.LanguageKt;
import eu.kanade.tachiyomi.data.source.base.Source;
import eu.kanade.tachiyomi.data.source.model.MangasPage;
import eu.kanade.tachiyomi.data.source.model.Page;
import eu.kanade.tachiyomi.util.Parser;
public class Readmanga extends Source {
public static final String NAME = "Readmanga";
public static final String BASE_URL = "http://readmanga.me";
public static final String POPULAR_MANGAS_URL = BASE_URL + "/list?sortType=rate";
public static final String SEARCH_URL = BASE_URL + "/search?q=%s";
public Readmanga(Context context) {
super(context);
}
@Override
public Language getLang() {
return LanguageKt.getRU();
}
@Override
public String getName() {
return NAME;
}
@Override
public String getBaseUrl() {
return BASE_URL;
}
@Override
protected String getInitialPopularMangasUrl() {
return POPULAR_MANGAS_URL;
}
@Override
protected String getInitialSearchUrl(String query) {
return String.format(SEARCH_URL, Uri.encode(query));
}
@Override
protected List<Manga> parsePopularMangasFromHtml(Document parsedHtml) {
List<Manga> mangaList = new ArrayList<>();
for (Element currentHtmlBlock : parsedHtml.select("div.desc")) {
Manga manga = constructPopularMangaFromHtml(currentHtmlBlock);
mangaList.add(manga);
}
return mangaList;
}
private Manga constructPopularMangaFromHtml(Element currentHtmlBlock) {
Manga manga = new Manga();
manga.source = getId();
Element urlElement = currentHtmlBlock.getElementsByTag("h3").select("a").first();
if (urlElement != null) {
manga.setUrl(urlElement.attr("href"));
manga.title = urlElement.text();
}
return manga;
}
@Override
protected String parseNextPopularMangasUrl(Document parsedHtml, MangasPage page) {
String path = Parser.href(parsedHtml, "a:contains(→)");
return path != null ? BASE_URL + path : null;
}
@Override
protected List<Manga> parseSearchFromHtml(Document parsedHtml) {
return parsePopularMangasFromHtml(parsedHtml);
}
@Override
protected String parseNextSearchUrl(Document parsedHtml, MangasPage page, String query) {
return null;
}
@Override
protected Manga parseHtmlToManga(String mangaUrl, String unparsedHtml) {
Document parsedDocument = Jsoup.parse(unparsedHtml);
Element infoElement = parsedDocument.select("div.leftContent").first();
Manga manga = Manga.create(mangaUrl);
manga.title = Parser.text(infoElement, "span.eng-name");
manga.author = Parser.text(infoElement, "span.elem_author ");
manga.genre = Parser.allText(infoElement, "span.elem_genre ").replaceAll(" ,", ",");
manga.description = Parser.allText(infoElement, "div.manga-description");
if (Parser.text(infoElement, "h1.names:contains(Сингл)") != null) {
manga.status = Manga.COMPLETED;
} else {
manga.status = parseStatus(Parser.text(infoElement, "p:has(b:contains(Перевод:))"));
}
String thumbnail = Parser.element(infoElement, "img").attr("data-full");
if (thumbnail != null) {
manga.thumbnail_url = thumbnail;
}
manga.initialized = true;
return manga;
}
private int parseStatus(String status) {
if (status.contains("продолжается")) {
return Manga.ONGOING;
}
if (status.contains("завершен")) {
return Manga.COMPLETED;
}
return Manga.UNKNOWN;
}
@Override
protected List<Chapter> parseHtmlToChapters(String unparsedHtml) {
Document parsedDocument = Jsoup.parse(unparsedHtml);
List<Chapter> chapterList = new ArrayList<>();
for (Element chapterElement : parsedDocument.select("div.chapters-link tbody tr")) {
Chapter chapter = constructChapterFromHtmlBlock(chapterElement);
chapterList.add(chapter);
}
return chapterList;
}
private Chapter constructChapterFromHtmlBlock(Element chapterElement) {
Chapter chapter = Chapter.create();
Element urlElement = Parser.element(chapterElement, "a");
String date = Parser.text(chapterElement, "td:eq(1)");
if (urlElement != null) {
chapter.setUrl(urlElement.attr("href") + "?mature=1");
chapter.name = urlElement.text().replaceAll(" новое", "");
}
if (date != null) {
try {
chapter.date_upload = new SimpleDateFormat("dd/MM/yy", Locale.ENGLISH).parse(date).getTime();
} catch (ParseException e) { /* Ignore */ }
}
return chapter;
}
// Without this extra chapters are in the wrong place in the list
@Override
public void parseChapterNumber(Chapter chapter) {
String url = chapter.url.replace("?mature=1", "");
String[] urlParts = url.replaceAll("/[\\w\\d]+/vol", "").split("/");
if (Float.parseFloat(urlParts[1]) < 1000f) {
urlParts[1] = "0" + urlParts[1];
}
if (Float.parseFloat(urlParts[1]) < 100f) {
urlParts[1] = "0" + urlParts[1];
}
if (Float.parseFloat(urlParts[1]) < 10f) {
urlParts[1] = "0" + urlParts[1];
}
chapter.chapter_number = Float.parseFloat(urlParts[0] + "." + urlParts[1]);
}
@Override
protected List<String> parseHtmlToPageUrls(String unparsedHtml) {
ArrayList<String> pages = new ArrayList<>();
int beginIndex = unparsedHtml.indexOf("rm_h.init( [");
int endIndex = unparsedHtml.indexOf("], 0, false);", beginIndex);
String trimmedHtml = unparsedHtml.substring(beginIndex + 13, endIndex);
trimmedHtml = trimmedHtml.replaceAll("[\"']", "");
String[] pageUrls = trimmedHtml.split("],\\[");
for (int i = 0; i < pageUrls.length; i++) {
pages.add("");
}
return pages;
}
@Override
protected List<Page> parseFirstPage(List<Page> pages, String unparsedHtml) {
int beginIndex = unparsedHtml.indexOf("rm_h.init( [");
int endIndex = unparsedHtml.indexOf("], 0, false);", beginIndex);
String trimmedHtml = unparsedHtml.substring(beginIndex + 13, endIndex);
trimmedHtml = trimmedHtml.replaceAll("[\"']", "");
String[] pageUrls = trimmedHtml.split("],\\[");
for (int i = 0; i < pageUrls.length; i++) {
String[] urlParts = pageUrls[i].split(","); // auto/12/56,http://e7.postfact.ru/,/51/01.jpg_res.jpg
String page = urlParts[1] + urlParts[0] + urlParts[2];
pages.get(i).setImageUrl(page);
}
return pages;
}
@Override
protected String parseHtmlToImageUrl(String unparsedHtml) {
return null;
}
}